h264_parser.c 19.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * H.26L/H.264/AVC/JVT/14496-10/... parser
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27
 * H.264 / AVC / MPEG4 part10 parser.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

28 29
#define UNCHECKED_BITSTREAM_READER 1

30
#include "libavutil/attributes.h"
31
#include "parser.h"
32 33
#include "h264data.h"
#include "golomb.h"
34
#include "internal.h"
35
#include "mpegutils.h"
36 37


38 39
static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
                               int buf_size)
40
{
41
    int i, j;
42
    uint32_t state;
43
    ParseContext *pc = &h->parse_context;
44 45
    int next_avc= h->is_avc ? 0 : buf_size;

46
//    mb_addr= pc->mb_addr - 1;
47 48 49
    state = pc->state;
    if (state > 13)
        state = 7;
50

51
    if (h->is_avc && !h->nal_length_size)
52
        av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal length size invalid\n");
53

54
    for (i = 0; i < buf_size; i++) {
55
        if (i >= next_avc) {
56 57
            int nalsize = 0;
            i = next_avc;
58
            for (j = 0; j < h->nal_length_size; j++)
59
                nalsize = (nalsize << 8) | buf[i++];
60
            if (nalsize <= 0 || nalsize > buf_size - i) {
61
                av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal size %d remaining %d\n", nalsize, buf_size - i);
62 63
                return buf_size;
            }
64 65
            next_avc = i + nalsize;
            state    = 5;
66 67
        }

68
        if (state == 7) {
69 70
            i += h->h264dsp.h264_find_start_code_candidate(buf + i, next_avc - i);
            if (i < next_avc)
71
                state = 2;
72 73 74 75 76 77 78 79
        } else if (state <= 2) {
            if (buf[i] == 1)
                state ^= 5;            // 2->7, 1->4, 0->5
            else if (buf[i])
                state = 7;
            else
                state >>= 1;           // 2->1, 1->0, 0->0
        } else if (state <= 5) {
80 81 82
            int nalu_type = buf[i] & 0x1F;
            if (nalu_type == NAL_SEI || nalu_type == NAL_SPS ||
                nalu_type == NAL_PPS || nalu_type == NAL_AUD) {
83
                if (pc->frame_start_found) {
84
                    i++;
85
                    goto found;
86
                }
87 88
            } else if (nalu_type == NAL_SLICE || nalu_type == NAL_DPA ||
                       nalu_type == NAL_IDR_SLICE) {
89
                state += 8;
90 91
                continue;
            }
92 93
            state = 7;
        } else {
94
            h->parse_history[h->parse_history_count++]= buf[i];
95
            if (h->parse_history_count>3) {
96 97 98 99 100 101 102 103
                unsigned int mb, last_mb= h->parse_last_mb;
                GetBitContext gb;

                init_get_bits(&gb, h->parse_history, 8*h->parse_history_count);
                h->parse_history_count=0;
                mb= get_ue_golomb_long(&gb);
                last_mb= h->parse_last_mb;
                h->parse_last_mb= mb;
104
                if (pc->frame_start_found) {
105
                    if (mb <= last_mb)
106
                        goto found;
107
                } else
108
                    pc->frame_start_found = 1;
109
                state = 7;
110 111 112
            }
        }
    }
113
    pc->state = state;
114
    if (h->is_avc)
115
        return next_avc;
116
    return END_NOT_FOUND;
117 118

found:
119 120
    pc->state             = 7;
    pc->frame_start_found = 0;
121
    if (h->is_avc)
122
        return next_avc;
123
    return i - (state & 5) - 3 * (state > 7);
124 125
}

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static int scan_mmco_reset(AVCodecParserContext *s)
{
    H264Context *h = s->priv_data;

    h->slice_type_nos = s->pict_type & 3;

    if (h->pps.redundant_pic_cnt_present)
        get_ue_golomb(&h->gb); // redundant_pic_count

    if (ff_set_ref_count(h) < 0)
        return AVERROR_INVALIDDATA;

    if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
        int list;
        for (list = 0; list < h->list_count; list++) {
            if (get_bits1(&h->gb)) {
                int index;
                for (index = 0; ; index++) {
                    unsigned int reordering_of_pic_nums_idc = get_ue_golomb_31(&h->gb);

                    if (reordering_of_pic_nums_idc < 3)
                        get_ue_golomb(&h->gb);
                    else if (reordering_of_pic_nums_idc > 3) {
                        av_log(h->avctx, AV_LOG_ERROR,
                               "illegal reordering_of_pic_nums_idc %d\n",
                               reordering_of_pic_nums_idc);
                        return AVERROR_INVALIDDATA;
                    } else
                        break;

                    if (index >= h->ref_count[list]) {
157 158
                        av_log(h->avctx, AV_LOG_ERROR,
                               "reference count %d overflow\n", index);
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
                        return AVERROR_INVALIDDATA;
                    }
                }
            }
        }
    }

    if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
        (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B))
        ff_pred_weight_table(h);

    if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag
        int i;
        for (i = 0; i < MAX_MMCO_COUNT; i++) {
            MMCOOpcode opcode = get_ue_golomb_31(&h->gb);
            if (opcode > (unsigned) MMCO_LONG) {
                av_log(h->avctx, AV_LOG_ERROR,
                       "illegal memory management control operation %d\n",
                       opcode);
                return AVERROR_INVALIDDATA;
            }
            if (opcode == MMCO_END)
               return 0;
            else if (opcode == MMCO_RESET)
                return 1;

            if (opcode == MMCO_SHORT2UNUSED || opcode == MMCO_SHORT2LONG)
                get_ue_golomb(&h->gb);
            if (opcode == MMCO_SHORT2LONG || opcode == MMCO_LONG2UNUSED ||
                opcode == MMCO_LONG || opcode == MMCO_SET_MAX_LONG)
                get_ue_golomb_31(&h->gb);
        }
    }

    return 0;
}

196
/**
197 198 199 200 201 202 203 204 205 206 207
 * Parse NAL units of found picture and decode some basic information.
 *
 * @param s parser context.
 * @param avctx codec context.
 * @param buf buffer with field/frame data.
 * @param buf_size size of the buffer.
 */
static inline int parse_nal_units(AVCodecParserContext *s,
                                  AVCodecContext *avctx,
                                  const uint8_t *buf, int buf_size)
{
208
    H264Context *h         = s->priv_data;
209
    const uint8_t *buf_end = buf + buf_size;
210
    unsigned int pps_id;
211
    unsigned int slice_type;
212
    int state = -1, got_reset = 0;
213
    const uint8_t *ptr;
214
    int q264 = buf_size >=4 && !memcmp("Q264", buf, 4);
215
    int field_poc[2];
216 217

    /* set some sane default values */
218 219
    s->pict_type         = AV_PICTURE_TYPE_I;
    s->key_frame         = 0;
220
    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
221

222 223
    h->avctx = avctx;
    ff_h264_reset_sei(h);
224
    h->sei_fpa.frame_packing_arrangement_cancel_flag = -1;
225

226 227 228
    if (!buf_size)
        return 0;

229
    for (;;) {
230 231 232 233 234 235 236 237
        int src_length, dst_length, consumed, nalsize = 0;
        if (h->is_avc) {
            int i;
            if (h->nal_length_size >= buf_end - buf) break;
            nalsize = 0;
            for (i = 0; i < h->nal_length_size; i++)
                nalsize = (nalsize << 8) | *buf++;
            if (nalsize <= 0 || nalsize > buf_end - buf) {
238
                av_log(h->avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
239 240 241 242
                break;
            }
            src_length = nalsize;
        } else {
243
        buf = avpriv_find_start_code(buf, buf_end, &state);
244
        if (buf >= buf_end)
245 246 247
            break;
        --buf;
        src_length = buf_end - buf;
248
        }
249 250 251 252
        switch (state & 0x1f) {
        case NAL_SLICE:
        case NAL_IDR_SLICE:
            // Do not walk the whole buffer just to decode slice header
253
            if ((state & 0x1f) == NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
254 255 256 257 258 259 260 261 262
                /* IDR or disposable slice
                 * No need to decode many bytes because MMCOs shall not be present. */
                if (src_length > 60)
                    src_length = 60;
            } else {
                /* To decode up to MMCOs */
                if (src_length > 1000)
                    src_length = 1000;
            }
263 264
            break;
        }
265 266
        ptr = ff_h264_decode_nal(h, buf, &dst_length, &consumed, src_length);
        if (ptr == NULL || dst_length < 0)
267 268
            break;

269 270
        init_get_bits(&h->gb, ptr, 8 * dst_length);
        switch (h->nal_unit_type) {
271 272 273 274
        case NAL_SPS:
            ff_h264_decode_seq_parameter_set(h);
            break;
        case NAL_PPS:
275
            ff_h264_decode_picture_parameter_set(h, h->gb.size_in_bits);
276 277 278 279 280
            break;
        case NAL_SEI:
            ff_h264_decode_sei(h);
            break;
        case NAL_IDR_SLICE:
281
            s->key_frame = 1;
282 283 284 285 286

            h->prev_frame_num        = 0;
            h->prev_frame_num_offset = 0;
            h->prev_poc_msb          =
            h->prev_poc_lsb          = 0;
287
        /* fall through */
288
        case NAL_SLICE:
289
            get_ue_golomb_long(&h->gb);  // skip first_mb_in_slice
290
            slice_type   = get_ue_golomb_31(&h->gb);
291
            s->pict_type = golomb_to_pict_type[slice_type % 5];
292 293 294 295
            if (h->sei_recovery_frame_cnt >= 0) {
                /* key frame, since recovery_frame_cnt is set */
                s->key_frame = 1;
            }
296 297 298
            pps_id = get_ue_golomb(&h->gb);
            if (pps_id >= MAX_PPS_COUNT) {
                av_log(h->avctx, AV_LOG_ERROR,
299
                       "pps_id %u out of range\n", pps_id);
300 301
                return -1;
            }
302 303
            if (!h->pps_buffers[pps_id]) {
                av_log(h->avctx, AV_LOG_ERROR,
304
                       "non-existing PPS %u referenced\n", pps_id);
305 306
                return -1;
            }
307 308 309
            h->pps = *h->pps_buffers[pps_id];
            if (!h->sps_buffers[h->pps.sps_id]) {
                av_log(h->avctx, AV_LOG_ERROR,
310
                       "non-existing SPS %u referenced\n", h->pps.sps_id);
311 312
                return -1;
            }
313
            h->sps       = *h->sps_buffers[h->pps.sps_id];
314
            h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
315

316 317 318
            if(h->sps.ref_frame_count <= 1 && h->pps.ref_count[0] <= 1 && s->pict_type == AV_PICTURE_TYPE_I)
                s->key_frame = 1;

319
            avctx->profile = ff_h264_get_profile(&h->sps);
320 321
            avctx->level   = h->sps.level_idc;

322 323 324 325 326
            if (h->sps.frame_mbs_only_flag) {
                h->picture_structure = PICT_FRAME;
            } else {
                if (get_bits1(&h->gb)) { // field_pic_flag
                    h->picture_structure = PICT_TOP_FIELD + get_bits1(&h->gb); // bottom_field_flag
327
                } else {
328
                    h->picture_structure = PICT_FRAME;
329 330 331
                }
            }

332 333 334 335 336
            if (h->nal_unit_type == NAL_IDR_SLICE)
                get_ue_golomb(&h->gb); /* idr_pic_id */
            if (h->sps.poc_type == 0) {
                h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb);

337 338
                if (h->pps.pic_order_present == 1 &&
                    h->picture_structure == PICT_FRAME)
339 340 341
                    h->delta_poc_bottom = get_se_golomb(&h->gb);
            }

342 343
            if (h->sps.poc_type == 1 &&
                !h->sps.delta_pic_order_always_zero_flag) {
344 345
                h->delta_poc[0] = get_se_golomb(&h->gb);

346 347
                if (h->pps.pic_order_present == 1 &&
                    h->picture_structure == PICT_FRAME)
348 349 350
                    h->delta_poc[1] = get_se_golomb(&h->gb);
            }

351 352
            /* Decode POC of this picture.
             * The prev_ values needed for decoding POC of the next picture are not set here. */
353
            field_poc[0] = field_poc[1] = INT_MAX;
354
            ff_init_poc(h, field_poc, &s->output_picture_number);
355

356 357 358 359 360 361 362 363 364 365
            /* Continue parsing to check if MMCO_RESET is present.
             * FIXME: MMCO_RESET could appear in non-first slice.
             *        Maybe, we should parse all undisposable non-IDR slice of this
             *        picture until encountering MMCO_RESET in a slice of it. */
            if (h->nal_ref_idc && h->nal_unit_type != NAL_IDR_SLICE) {
                got_reset = scan_mmco_reset(s);
                if (got_reset < 0)
                    return got_reset;
            }

366
            /* Set up the prev_ values for decoding POC of the next picture. */
367 368
            h->prev_frame_num        = got_reset ? 0 : h->frame_num;
            h->prev_frame_num_offset = got_reset ? 0 : h->frame_num_offset;
369
            if (h->nal_ref_idc != 0) {
370 371 372 373 374 375 376 377
                if (!got_reset) {
                    h->prev_poc_msb = h->poc_msb;
                    h->prev_poc_lsb = h->poc_lsb;
                } else {
                    h->prev_poc_msb = 0;
                    h->prev_poc_lsb =
                        h->picture_structure == PICT_BOTTOM_FIELD ? 0 : field_poc[0];
                }
378 379
            }

380
            if (h->sps.pic_struct_present_flag) {
381
                switch (h->sei_pic_struct) {
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
                case SEI_PIC_STRUCT_TOP_FIELD:
                case SEI_PIC_STRUCT_BOTTOM_FIELD:
                    s->repeat_pict = 0;
                    break;
                case SEI_PIC_STRUCT_FRAME:
                case SEI_PIC_STRUCT_TOP_BOTTOM:
                case SEI_PIC_STRUCT_BOTTOM_TOP:
                    s->repeat_pict = 1;
                    break;
                case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
                case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
                    s->repeat_pict = 2;
                    break;
                case SEI_PIC_STRUCT_FRAME_DOUBLING:
                    s->repeat_pict = 3;
                    break;
                case SEI_PIC_STRUCT_FRAME_TRIPLING:
                    s->repeat_pict = 5;
                    break;
                default:
                    s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
                    break;
404 405
                }
            } else {
406
                s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
407 408
            }

409 410 411 412
            if (h->picture_structure == PICT_FRAME) {
                s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
                if (h->sps.pic_struct_present_flag) {
                    switch (h->sei_pic_struct) {
413 414 415 416 417 418 419 420 421 422 423
                    case SEI_PIC_STRUCT_TOP_BOTTOM:
                    case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
                        s->field_order = AV_FIELD_TT;
                        break;
                    case SEI_PIC_STRUCT_BOTTOM_TOP:
                    case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
                        s->field_order = AV_FIELD_BB;
                        break;
                    default:
                        s->field_order = AV_FIELD_PROGRESSIVE;
                        break;
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
                    }
                } else {
                    if (field_poc[0] < field_poc[1])
                        s->field_order = AV_FIELD_TT;
                    else if (field_poc[0] > field_poc[1])
                        s->field_order = AV_FIELD_BB;
                    else
                        s->field_order = AV_FIELD_PROGRESSIVE;
                }
            } else {
                if (h->picture_structure == PICT_TOP_FIELD)
                    s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
                else
                    s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
                s->field_order = AV_FIELD_UNKNOWN;
            }

441 442
            return 0; /* no need to evaluate the rest */
        }
443
        buf += h->is_avc ? nalsize : consumed;
444
    }
445 446
    if (q264)
        return 0;
447
    /* didn't find a picture! */
448
    av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
449 450 451
    return -1;
}

452 453 454 455 456
static int h264_parse(AVCodecParserContext *s,
                      AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
{
457
    H264Context *h   = s->priv_data;
458
    ParseContext *pc = &h->parse_context;
459 460
    int next;

461 462
    if (!h->got_first) {
        h->got_first = 1;
463
        if (avctx->extradata_size) {
464
            h->avctx = avctx;
465 466 467 468 469
            // must be done like in decoder, otherwise opening the parser,
            // letting it create extradata and then closing and opening again
            // will cause has_b_frames to be always set.
            // Note that estimate_timings_from_pts does exactly this.
            if (!avctx->has_b_frames)
470
                h->low_delay = 1;
471
            ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
472 473 474
        }
    }

475 476 477
    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
        next = buf_size;
    } else {
478
        next = h264_find_frame_end(h, buf, buf_size);
479 480

        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
481
            *poutbuf      = NULL;
482 483 484 485
            *poutbuf_size = 0;
            return buf_size;
        }

486
        if (next < 0 && next != END_NOT_FOUND) {
487
            av_assert1(pc->last_index + next >= 0);
488
            h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); // update state
489
        }
490
    }
491

492
    parse_nal_units(s, avctx, buf, buf_size);
493

494 495 496 497 498 499 500 501 502
    if (h->sei_cpb_removal_delay >= 0) {
        s->dts_sync_point    = h->sei_buffering_period_present;
        s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
        s->pts_dts_delta     = h->sei_dpb_output_delay;
    } else {
        s->dts_sync_point    = INT_MIN;
        s->dts_ref_dts_delta = INT_MIN;
        s->pts_dts_delta     = INT_MIN;
    }
503

504 505 506
    if (s->flags & PARSER_FLAG_ONCE) {
        s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
    }
507

508
    *poutbuf      = buf;
509 510 511 512 513 514 515 516 517
    *poutbuf_size = buf_size;
    return next;
}

static int h264_split(AVCodecContext *avctx,
                      const uint8_t *buf, int buf_size)
{
    int i;
    uint32_t state = -1;
518 519 520 521 522
    int has_sps    = 0;

    for (i = 0; i <= buf_size; i++) {
        if ((state & 0xFFFFFF1F) == 0x107)
            has_sps = 1;
523
        /*  if ((state&0xFFFFFF1F) == 0x101 ||
524 525 526 527 528 529 530 531 532 533
         *     (state&0xFFFFFF1F) == 0x102 ||
         *     (state&0xFFFFFF1F) == 0x105) {
         *  }
         */
        if ((state & 0xFFFFFF00) == 0x100 && (state & 0xFFFFFF1F) != 0x107 &&
            (state & 0xFFFFFF1F) != 0x108 && (state & 0xFFFFFF1F) != 0x109) {
            if (has_sps) {
                while (i > 4 && buf[i - 5] == 0)
                    i--;
                return i - 4;
534 535
            }
        }
536 537
        if (i < buf_size)
            state = (state << 8) | buf[i];
538 539 540 541
    }
    return 0;
}

542
static void close(AVCodecParserContext *s)
543
{
544
    H264Context *h   = s->priv_data;
545
    ParseContext *pc = &h->parse_context;
546 547

    av_free(pc->buffer);
548
    ff_h264_free_context(h);
549 550
}

551
static av_cold int init(AVCodecParserContext *s)
552 553
{
    H264Context *h = s->priv_data;
554
    h->thread_context[0]   = h;
555
    h->slice_context_count = 1;
556
    ff_h264dsp_init(&h->h264dsp, 8, 1);
557 558
    return 0;
}
559

560
AVCodecParser ff_h264_parser = {
561
    .codec_ids      = { AV_CODEC_ID_H264 },
562 563 564 565 566
    .priv_data_size = sizeof(H264Context),
    .parser_init    = init,
    .parser_parse   = h264_parse,
    .parser_close   = close,
    .split          = h264_split,
567
};