Commit 0424e052 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'ffmpeg-mt/master'

    * ffmpeg-mt/master:
      Update todo.
      h264: add an assert that copied pictures are valid picture pointers
      valgrind-check: run with 1 and 3 threads
      h264: When decoding a packet with multiple PPS/SPS, don't start the next thread until all of them have been read
      Allow some pictures to be released earlier after 51ead6d2c40c5defdd211f435aec49b19f5f6a18
      h264: fix slice threading MC reading uninitialized frame edges.

    Please see ffmpeg-mt for a list of authors of these changes.

    Conflicts:
        libavcodec/h264.c
        mt-work/valgrind-check.sh
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 5c08c7b2
...@@ -312,7 +312,6 @@ static void chroma_dc_dct_c(DCTELEM *block){ ...@@ -312,7 +312,6 @@ static void chroma_dc_dct_c(DCTELEM *block){
} }
#endif #endif
static void free_tables(H264Context *h, int free_rbsp){ static void free_tables(H264Context *h, int free_rbsp){
int i; int i;
H264Context *hx; H264Context *hx;
...@@ -612,11 +611,15 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ...@@ -612,11 +611,15 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
return 0; return 0;
} }
#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
{ {
int i; int i;
for (i=0; i<count; i++){ for (i=0; i<count; i++){
assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
!from[i]));
to[i] = REBASE_PICTURE(from[i], new_base, old_base); to[i] = REBASE_PICTURE(from[i], new_base, old_base);
} }
} }
...@@ -796,8 +799,10 @@ int ff_h264_frame_start(H264Context *h){ ...@@ -796,8 +799,10 @@ int ff_h264_frame_start(H264Context *h){
* This includes finding the next displayed frame. * This includes finding the next displayed frame.
* *
* @param h h264 master context * @param h h264 master context
* @param setup_finished enough NALs have been read that we can call
* ff_thread_finish_setup()
*/ */
static void decode_postinit(H264Context *h){ static void decode_postinit(H264Context *h, int setup_finished){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
Picture *out = s->current_picture_ptr; Picture *out = s->current_picture_ptr;
Picture *cur = s->current_picture_ptr; Picture *cur = s->current_picture_ptr;
...@@ -809,10 +814,11 @@ static void decode_postinit(H264Context *h){ ...@@ -809,10 +814,11 @@ static void decode_postinit(H264Context *h){
if (h->next_output_pic) return; if (h->next_output_pic) return;
if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
//FIXME this allows the next thread to start once we encounter the first field of a PAFF packet //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
//This works if the next packet contains the second field. It does not work if both fields are //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
//in the same packet. //to find this yet, so we assume the worst for now.
//ff_thread_finish_setup(s->avctx); //if (setup_finished)
// ff_thread_finish_setup(s->avctx);
return; return;
} }
...@@ -943,7 +949,8 @@ static void decode_postinit(H264Context *h){ ...@@ -943,7 +949,8 @@ static void decode_postinit(H264Context *h){
av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
} }
ff_thread_finish_setup(s->avctx); if (setup_finished)
ff_thread_finish_setup(s->avctx);
} }
static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
...@@ -2310,7 +2317,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ ...@@ -2310,7 +2317,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
} }
//FIXME: fix draw_edges+PAFF+frame threads //FIXME: fix draw_edges+PAFF+frame threads
h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type&FF_THREAD_FRAME)) ? 0 : 16; h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){ if(s->avctx->debug&FF_DEBUG_PICT_INFO){
...@@ -2892,10 +2899,13 @@ static void execute_decode_slices(H264Context *h, int context_count){ ...@@ -2892,10 +2899,13 @@ static void execute_decode_slices(H264Context *h, int context_count){
static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
AVCodecContext * const avctx= s->avctx; AVCodecContext * const avctx= s->avctx;
int buf_index=0;
H264Context *hx; ///< thread context H264Context *hx; ///< thread context
int context_count = 0; int buf_index;
int next_avc= h->is_avc ? 0 : buf_size; int context_count;
int next_avc;
int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts
int nal_index;
h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1; h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
#if 0 #if 0
...@@ -2911,6 +2921,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ...@@ -2911,6 +2921,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
ff_h264_reset_sei(h); ff_h264_reset_sei(h);
} }
for(;pass <= 1;pass++){
buf_index = 0;
context_count = 0;
next_avc = h->is_avc ? 0 : buf_size;
nal_index = 0;
for(;;){ for(;;){
int consumed; int consumed;
int dst_length; int dst_length;
...@@ -2969,6 +2984,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ...@@ -2969,6 +2984,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
} }
buf_index += consumed; buf_index += consumed;
nal_index++;
if(pass == 0) {
// packets can sometimes contain multiple PPS/SPS
// e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
// if so, when frame threading we can't start the next thread until we've read all of them
switch (hx->nal_unit_type) {
case NAL_SPS:
case NAL_PPS:
nals_needed = nal_index;
}
continue;
}
//FIXME do not discard SEI id //FIXME do not discard SEI id
if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0) if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
...@@ -2998,7 +3026,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ...@@ -2998,7 +3026,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if (h->current_slice == 1) { if (h->current_slice == 1) {
if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) { if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
decode_postinit(h); decode_postinit(h, nal_index >= nals_needed);
} }
if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
...@@ -3115,6 +3143,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ...@@ -3115,6 +3143,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
goto again; goto again;
} }
} }
}
if(context_count) if(context_count)
execute_decode_slices(h, context_count); execute_decode_slices(h, context_count);
return buf_index; return buf_index;
...@@ -3190,7 +3219,7 @@ static int decode_frame(AVCodecContext *avctx, ...@@ -3190,7 +3219,7 @@ static int decode_frame(AVCodecContext *avctx,
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h); if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
field_end(h, 0); field_end(h, 0);
......
...@@ -316,7 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){ ...@@ -316,7 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type; s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type;
if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B) if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B)
pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway. pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
pic->owner2 = s; pic->owner2 = NULL;
return 0; return 0;
fail: //for the FF_ALLOCZ_OR_GOTO macro fail: //for the FF_ALLOCZ_OR_GOTO macro
...@@ -955,7 +955,7 @@ void ff_release_unused_pictures(MpegEncContext *s, int remove_current) ...@@ -955,7 +955,7 @@ void ff_release_unused_pictures(MpegEncContext *s, int remove_current)
/* release non reference frames */ /* release non reference frames */
for(i=0; i<s->picture_count; i++){ for(i=0; i<s->picture_count; i++){
if(s->picture[i].data[0] && !s->picture[i].reference if(s->picture[i].data[0] && !s->picture[i].reference
&& s->picture[i].owner2 == s && (!s->picture[i].owner2 || s->picture[i].owner2 == s)
&& (remove_current || &s->picture[i] != s->current_picture_ptr) && (remove_current || &s->picture[i] != s->current_picture_ptr)
/*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){ /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
free_frame_buffer(s, &s->picture[i]); free_frame_buffer(s, &s->picture[i]);
......
Todo Todo
-- For other people -- For other people
- Multithread vp8 or vc1. - Multithread vc1.
- Multithread an intra codec like mjpeg (trivial). - Multithread an intra codec like mjpeg (trivial).
- Fix mpeg1 (see below). - Fix mpeg1 (see below).
- Try the first three items under Optimization. - Try the first three items under Optimization.
...@@ -18,11 +18,13 @@ work.) In general testing error paths should be done more. ...@@ -18,11 +18,13 @@ work.) In general testing error paths should be done more.
bugs in vsync in ffmpeg.c, which are currently obscuring real failures. bugs in vsync in ffmpeg.c, which are currently obscuring real failures.
h264: h264:
- Files split at the wrong NAL unit don't (and can't) - Files that aren't parsed (e.g. mp4) and contain PAFF with two
be decoded with threads (e.g. TS split so PPS is after field pictures in the same packet are not optimal. Modify the
the frame, PAFF with two fields in a packet). Scan the nals_needed check so that the second field's first slice is
packet at the start of decode and don't finish setup considered as needed, then uncomment the FIXME code in decode_postinit.
until all PPS/SPS have been encountered. Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4
- The conformance sample MR3_TANDBERG_B.264 has problems (allocated picture overflow).
- One 10-bit sample has problems.
mpeg4: mpeg4:
- Packed B-frames need to be explicitly split up - Packed B-frames need to be explicitly split up
......
#!/bin/bash #!/bin/bash
valgrind --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f framecrc /dev/null valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null
\ No newline at end of file
valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment