h264.c 165 KB
Newer Older
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 4
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

22
/**
23
 * @file
24 25 26 27
 * H.264 / AVC / MPEG4 part10 codec.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

28 29
#define UNCHECKED_BITSTREAM_READER 1

30
#include "libavutil/imgutils.h"
31
#include "libavutil/opt.h"
32
#include "internal.h"
33 34
#include "cabac.h"
#include "cabac_functions.h"
35 36 37
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
38
#include "h264.h"
39
#include "h264data.h"
40
#include "h264_mvpred.h"
41
#include "golomb.h"
42
#include "mathops.h"
43
#include "rectangle.h"
44
#include "thread.h"
45
#include "vdpau_internal.h"
46
#include "libavutil/avassert.h"
47

48
// #undef NDEBUG
49 50
#include <assert.h>

51 52
const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };

53 54 55
static const uint8_t rem6[QP_MAX_NUM + 1] = {
    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
56 57 58
    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
    0, 1, 2, 3,
59 60
};

61 62 63
static const uint8_t div6[QP_MAX_NUM + 1] = {
    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
    3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
64 65 66
    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10,
   10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13,
   14,14,14,14,
67 68
};

69 70 71
static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
    PIX_FMT_DXVA2_VLD,
    PIX_FMT_VAAPI_VLD,
72
    PIX_FMT_VDA_VLD,
73 74 75 76
    PIX_FMT_YUVJ420P,
    PIX_FMT_NONE
};

77 78 79 80 81 82
int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx)
{
    H264Context *h = avctx->priv_data;
    return h ? h->sps.num_reorder_frames : 0;
}

83
/**
84 85
 * Check if the top & left blocks are available if needed and
 * change the dc mode so it only uses the available blocks.
86
 */
87 88 89 90 91 92 93 94 95
int ff_h264_check_intra4x4_pred_mode(H264Context *h)
{
    MpegEncContext *const s     = &h->s;
    static const int8_t top[12] = {
        -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
    };
    static const int8_t left[12] = {
        0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
    };
96 97
    int i;

98 99 100 101 102 103 104
    if (!(h->top_samples_available & 0x8000)) {
        for (i = 0; i < 4; i++) {
            int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
            if (status < 0) {
                av_log(h->s.avctx, AV_LOG_ERROR,
                       "top block unavailable for requested intra4x4 mode %d at %d %d\n",
                       status, s->mb_x, s->mb_y);
105
                return -1;
106 107
            } else if (status) {
                h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
108 109 110 111
            }
        }
    }

112 113 114 115 116 117 118 119 120
    if ((h->left_samples_available & 0x8888) != 0x8888) {
        static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
        for (i = 0; i < 4; i++)
            if (!(h->left_samples_available & mask[i])) {
                int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
                if (status < 0) {
                    av_log(h->s.avctx, AV_LOG_ERROR,
                           "left block unavailable for requested intra4x4 mode %d at %d %d\n",
                           status, s->mb_x, s->mb_y);
121
                    return -1;
122 123
                } else if (status) {
                    h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
124 125 126 127 128
                }
            }
    }

    return 0;
129
} // FIXME cleanup like ff_h264_check_intra_pred_mode
130

131
/**
132 133
 * Check if the top & left blocks are available if needed and
 * change the dc mode so it only uses the available blocks.
134
 */
135 136 137 138 139 140 141 142 143 144
int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
{
    MpegEncContext *const s     = &h->s;
    static const int8_t top[7]  = { LEFT_DC_PRED8x8, 1, -1, -1 };
    static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };

    if (mode > 6U) {
        av_log(h->s.avctx, AV_LOG_ERROR,
               "out of range intra chroma pred mode at %d %d\n",
               s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
145
        return -1;
Loic Le Loarer's avatar
Loic Le Loarer committed
146
    }
147

148 149 150 151 152 153
    if (!(h->top_samples_available & 0x8000)) {
        mode = top[mode];
        if (mode < 0) {
            av_log(h->s.avctx, AV_LOG_ERROR,
                   "top block unavailable for requested intra mode at %d %d\n",
                   s->mb_x, s->mb_y);
154 155 156
            return -1;
        }
    }
157

158 159 160 161 162 163 164
    if ((h->left_samples_available & 0x8080) != 0x8080) {
        mode = left[mode];
        if (is_chroma && (h->left_samples_available & 0x8080)) {
            // mad cow disease mode, aka MBAFF + constrained_intra_pred
            mode = ALZHEIMER_DC_L0T_PRED8x8 +
                   (!(h->left_samples_available & 0x8000)) +
                   2 * (mode == DC_128_PRED8x8);
165
        }
166 167 168 169
        if (mode < 0) {
            av_log(h->s.avctx, AV_LOG_ERROR,
                   "left block unavailable for requested intra mode at %d %d\n",
                   s->mb_x, s->mb_y);
170
            return -1;
171
        }
172 173 174 175 176
    }

    return mode;
}

177 178 179
const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
                                  int *dst_length, int *consumed, int length)
{
180 181
    int i, si, di;
    uint8_t *dst;
182
    int bufidx;
183

184 185 186
    // src[0]&0x80; // forbidden bit
    h->nal_ref_idc   = src[0] >> 5;
    h->nal_unit_type = src[0] & 0x1F;
187

188 189
    src++;
    length--;
190

191 192 193 194 195 196 197 198
#define STARTCODE_TEST                                                  \
        if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
            if (src[i + 2] != 3) {                                      \
                /* startcode, so we must be past the end */             \
                length = i;                                             \
            }                                                           \
            break;                                                      \
        }
199
#if HAVE_FAST_UNALIGNED
200 201 202 203 204
#define FIND_FIRST_ZERO                                                 \
        if (i > 0 && !src[i])                                           \
            i--;                                                        \
        while (src[i])                                                  \
            i++
205 206 207 208 209
#if HAVE_FAST_64BIT
    for (i = 0; i + 1 < length; i += 9) {
        if (!((~AV_RN64A(src + i) &
               (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
              0x8000800080008080ULL))
210 211 212 213 214
            continue;
        FIND_FIRST_ZERO;
        STARTCODE_TEST;
        i -= 7;
    }
215 216 217 218 219
#else
    for (i = 0; i + 1 < length; i += 5) {
        if (!((~AV_RN32A(src + i) &
               (AV_RN32A(src + i) - 0x01000101U)) &
              0x80008080U))
220
            continue;
221 222 223 224 225
        FIND_FIRST_ZERO;
        STARTCODE_TEST;
        i -= 3;
    }
#endif
226
#else
227 228 229 230 231
    for (i = 0; i + 1 < length; i += 2) {
        if (src[i])
            continue;
        if (i > 0 && src[i - 1] == 0)
            i--;
232
        STARTCODE_TEST;
233
    }
234
#endif
235

236 237
    // use second escape buffer for inter data
    bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
238 239

    si = h->rbsp_buffer_size[bufidx];
240
    av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE);
241
    dst = h->rbsp_buffer[bufidx];
242

243
    if (dst == NULL)
244 245
        return NULL;

246 247 248
    if(i>=length-1){ //no escaped 0
        *dst_length= length;
        *consumed= length+1; //+1 for the header
249 250 251 252 253 254
        if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
            return src;
        }else{
            memcpy(dst, src, length);
            return dst;
        }
255 256
    }

257
    // printf("decoding esc\n");
258
    memcpy(dst, src, i);
259 260 261 262 263 264 265 266 267 268 269
    si = di = i;
    while (si + 2 < length) {
        // remove escapes (very rare 1:2^22)
        if (src[si + 2] > 3) {
            dst[di++] = src[si++];
            dst[di++] = src[si++];
        } else if (src[si] == 0 && src[si + 1] == 0) {
            if (src[si + 2] == 3) { // escape
                dst[di++]  = 0;
                dst[di++]  = 0;
                si        += 3;
270
                continue;
271
            } else // next start code
272
                goto nsc;
273 274
        }

275
        dst[di++] = src[si++];
276
    }
277 278
    while (si < length)
        dst[di++] = src[si++];
279
nsc:
280

281
    memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
282

283 284 285 286
    *dst_length = di;
    *consumed   = si + 1; // +1 for the header
    /* FIXME store exact number of bits in the getbitcontext
     * (it is needed for decoding) */
287 288 289
    return dst;
}

290 291 292 293
/**
 * Identify the exact end of the bitstream
 * @return the length of the trailing, or 0 if damaged
 */
294
static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
295 296
{
    int v = *src;
297 298
    int r;

299
    tprintf(h->s.avctx, "rbsp trailing %X\n", v);
300

301 302 303 304
    for (r = 1; r < 9; r++) {
        if (v & 1)
            return r;
        v >>= 1;
305 306 307 308
    }
    return 0;
}

309 310 311 312 313 314 315 316
static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
                                         int height, int y_offset, int list)
{
    int raw_my        = h->mv_cache[list][scan8[n]][1];
    int filter_height = (raw_my & 3) ? 2 : 0;
    int full_my       = (raw_my >> 2) + y_offset;
    int top           = full_my - filter_height;
    int bottom        = full_my + filter_height + height;
317 318 319 320

    return FFMAX(abs(top), bottom);
}

321 322 323 324 325
static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
                                     int height, int y_offset, int list0,
                                     int list1, int *nrefs)
{
    MpegEncContext *const s = &h->s;
326 327
    int my;

328
    y_offset += 16 * (s->mb_y >> MB_FIELD);
329

330 331 332
    if (list0) {
        int ref_n    = h->ref_cache[0][scan8[n]];
        Picture *ref = &h->ref_list[0][ref_n];
333 334 335 336

        // Error resilience puts the current picture in the ref list.
        // Don't try to wait on these as it will cause a deadlock.
        // Fields can wait on each other, though.
337 338
        if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
            (ref->f.reference & 3) != s->picture_structure) {
339
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
340 341
            if (refs[0][ref_n] < 0)
                nrefs[0] += 1;
342 343 344 345
            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
        }
    }

346 347 348
    if (list1) {
        int ref_n    = h->ref_cache[1][scan8[n]];
        Picture *ref = &h->ref_list[1][ref_n];
349

350 351
        if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
            (ref->f.reference & 3) != s->picture_structure) {
352
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
353 354
            if (refs[1][ref_n] < 0)
                nrefs[1] += 1;
355 356 357 358 359 360 361 362 363 364
            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
        }
    }
}

/**
 * Wait until all reference frames are available for MC operations.
 *
 * @param h the H264 context
 */
365 366 367 368
static void await_references(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int mb_xy   = h->mb_xy;
369
    const int mb_type = s->current_picture.f.mb_type[mb_xy];
370
    int refs[2][48];
371
    int nrefs[2] = { 0 };
372 373 374 375
    int ref, list;

    memset(refs, -1, sizeof(refs));

376
    if (IS_16X16(mb_type)) {
377
        get_lowest_part_y(h, refs, 0, 16, 0,
378 379
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
    } else if (IS_16X8(mb_type)) {
380
        get_lowest_part_y(h, refs, 0, 8, 0,
381
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
382
        get_lowest_part_y(h, refs, 8, 8, 8,
383 384
                          IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
    } else if (IS_8X16(mb_type)) {
385
        get_lowest_part_y(h, refs, 0, 16, 0,
386
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
387
        get_lowest_part_y(h, refs, 4, 16, 0,
388 389
                          IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
    } else {
390 391 392 393
        int i;

        assert(IS_8X8(mb_type));

394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
        for (i = 0; i < 4; i++) {
            const int sub_mb_type = h->sub_mb_type[i];
            const int n           = 4 * i;
            int y_offset          = (i & 2) << 2;

            if (IS_SUB_8X8(sub_mb_type)) {
                get_lowest_part_y(h, refs, n, 8, y_offset,
                                  IS_DIR(sub_mb_type, 0, 0),
                                  IS_DIR(sub_mb_type, 0, 1),
                                  nrefs);
            } else if (IS_SUB_8X4(sub_mb_type)) {
                get_lowest_part_y(h, refs, n, 4, y_offset,
                                  IS_DIR(sub_mb_type, 0, 0),
                                  IS_DIR(sub_mb_type, 0, 1),
                                  nrefs);
                get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
                                  IS_DIR(sub_mb_type, 0, 0),
                                  IS_DIR(sub_mb_type, 0, 1),
                                  nrefs);
            } else if (IS_SUB_4X8(sub_mb_type)) {
                get_lowest_part_y(h, refs, n, 8, y_offset,
                                  IS_DIR(sub_mb_type, 0, 0),
                                  IS_DIR(sub_mb_type, 0, 1),
                                  nrefs);
                get_lowest_part_y(h, refs, n + 1, 8, y_offset,
                                  IS_DIR(sub_mb_type, 0, 0),
                                  IS_DIR(sub_mb_type, 0, 1),
                                  nrefs);
            } else {
423 424
                int j;
                assert(IS_SUB_4X4(sub_mb_type));
425 426 427 428 429 430
                for (j = 0; j < 4; j++) {
                    int sub_y_offset = y_offset + 2 * (j & 2);
                    get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
                                      IS_DIR(sub_mb_type, 0, 0),
                                      IS_DIR(sub_mb_type, 0, 1),
                                      nrefs);
431 432 433 434 435
                }
            }
        }
    }

436 437
    for (list = h->list_count - 1; list >= 0; list--)
        for (ref = 0; ref < 48 && nrefs[list]; ref++) {
438
            int row = refs[list][ref];
439 440 441
            if (row >= 0) {
                Picture *ref_pic      = &h->ref_list[list][ref];
                int ref_field         = ref_pic->f.reference - 1;
442
                int ref_field_picture = ref_pic->field_picture;
443
                int pic_height        = 16 * s->mb_height >> ref_field_picture;
444 445 446 447

                row <<= MB_MBAFF;
                nrefs[list]--;

448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
                if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields
                    ff_thread_await_progress(&ref_pic->f,
                                             FFMIN((row >> 1) - !(row & 1),
                                                   pic_height - 1),
                                             1);
                    ff_thread_await_progress(&ref_pic->f,
                                             FFMIN((row >> 1), pic_height - 1),
                                             0);
                } else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame
                    ff_thread_await_progress(&ref_pic->f,
                                             FFMIN(row * 2 + ref_field,
                                                   pic_height - 1),
                                             0);
                } else if (FIELD_PICTURE) {
                    ff_thread_await_progress(&ref_pic->f,
                                             FFMIN(row, pic_height - 1),
                                             ref_field);
                } else {
                    ff_thread_await_progress(&ref_pic->f,
                                             FFMIN(row, pic_height - 1),
                                             0);
469 470 471 472 473
                }
            }
        }
}

474 475 476 477 478 479 480 481 482
static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
                                         int n, int square, int height,
                                         int delta, int list,
                                         uint8_t *dest_y, uint8_t *dest_cb,
                                         uint8_t *dest_cr,
                                         int src_x_offset, int src_y_offset,
                                         qpel_mc_func *qpix_op,
                                         h264_chroma_mc_func chroma_op,
                                         int pixel_shift, int chroma_idc)
483
{
484 485 486 487 488 489 490 491 492 493 494 495 496 497
    MpegEncContext *const s = &h->s;
    const int mx      = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
    int my            = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
    const int luma_xy = (mx & 3) + ((my & 3) << 2);
    int offset        = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
    uint8_t *src_y    = pic->f.data[0] + offset;
    uint8_t *src_cb, *src_cr;
    int extra_width  = h->emu_edge_width;
    int extra_height = h->emu_edge_height;
    int emu = 0;
    const int full_mx    = mx >> 2;
    const int full_my    = my >> 2;
    const int pic_width  = 16 * s->mb_width;
    const int pic_height = 16 * s->mb_height >> MB_FIELD;
498
    int ysh;
499

500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
    if (mx & 7)
        extra_width -= 3;
    if (my & 7)
        extra_height -= 3;

    if (full_mx                <          0 - extra_width  ||
        full_my                <          0 - extra_height ||
        full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
        full_my + 16 /*FIXME*/ > pic_height + extra_height) {
        s->dsp.emulated_edge_mc(s->edge_emu_buffer,
                                src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
                                h->mb_linesize,
                                16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
                                full_my - 2, pic_width, pic_height);
        src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
        emu   = 1;
    }

    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
    if (!square)
520 521
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);

522 523
    if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
        return;
524

525
    if (chroma_idc == 3 /* yuv444 */) {
526
        src_cb = pic->f.data[1] + offset;
527 528 529 530 531 532 533 534
        if (emu) {
            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
                                    src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
                                    h->mb_linesize,
                                    16 + 5, 16 + 5 /*FIXME*/,
                                    full_mx - 2, full_my - 2,
                                    pic_width, pic_height);
            src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
535
        }
536 537
        qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
        if (!square)
538 539
            qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);

540
        src_cr = pic->f.data[2] + offset;
541 542 543 544 545 546 547 548
        if (emu) {
            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
                                    src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
                                    h->mb_linesize,
                                    16 + 5, 16 + 5 /*FIXME*/,
                                    full_mx - 2, full_my - 2,
                                    pic_width, pic_height);
            src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
549
        }
550 551
        qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
        if (!square)
552 553 554 555
            qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
        return;
    }

556
    ysh = 3 - (chroma_idc == 2 /* yuv422 */);
557
    if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) {
558
        // chroma offset when predicting from a field of opposite parity
559 560
        my  += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
        emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
561
    }
562

563 564 565 566
    src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
             (my >> ysh) * h->mb_uvlinesize;
    src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
             (my >> ysh) * h->mb_uvlinesize;
567

568
    if (emu) {
569 570 571
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
572
        src_cb = s->edge_emu_buffer;
573
    }
574 575 576
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
              height >> (chroma_idc == 1 /* yuv420 */),
              mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
577

578
    if (emu) {
579 580 581
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
582
        src_cr = s->edge_emu_buffer;
583
    }
584
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
585
              mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
586 587
}

588 589 590 591 592 593 594 595 596 597 598
static av_always_inline void mc_part_std(H264Context *h, int n, int square,
                                         int height, int delta,
                                         uint8_t *dest_y, uint8_t *dest_cb,
                                         uint8_t *dest_cr,
                                         int x_offset, int y_offset,
                                         qpel_mc_func *qpix_put,
                                         h264_chroma_mc_func chroma_put,
                                         qpel_mc_func *qpix_avg,
                                         h264_chroma_mc_func chroma_avg,
                                         int list0, int list1,
                                         int pixel_shift, int chroma_idc)
599
{
600 601 602
    MpegEncContext *const s       = &h->s;
    qpel_mc_func *qpix_op         = qpix_put;
    h264_chroma_mc_func chroma_op = chroma_put;
603

604
    dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
605
    if (chroma_idc == 3 /* yuv444 */) {
606 607
        dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
        dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
608
    } else if (chroma_idc == 2 /* yuv422 */) {
609 610 611 612 613
        dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
        dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
    } else { /* yuv420 */
        dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
        dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
614
    }
615 616
    x_offset += 8 * s->mb_x;
    y_offset += 8 * (s->mb_y >> MB_FIELD);
617

618 619
    if (list0) {
        Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
620
        mc_dir_part(h, ref, n, square, height, delta, 0,
621 622
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
                    qpix_op, chroma_op, pixel_shift, chroma_idc);
623

624 625
        qpix_op   = qpix_avg;
        chroma_op = chroma_avg;
626 627
    }

628 629
    if (list1) {
        Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
630
        mc_dir_part(h, ref, n, square, height, delta, 1,
631 632
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
                    qpix_op, chroma_op, pixel_shift, chroma_idc);
633 634 635
    }
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
                                              int height, int delta,
                                              uint8_t *dest_y, uint8_t *dest_cb,
                                              uint8_t *dest_cr,
                                              int x_offset, int y_offset,
                                              qpel_mc_func *qpix_put,
                                              h264_chroma_mc_func chroma_put,
                                              h264_weight_func luma_weight_op,
                                              h264_weight_func chroma_weight_op,
                                              h264_biweight_func luma_weight_avg,
                                              h264_biweight_func chroma_weight_avg,
                                              int list0, int list1,
                                              int pixel_shift, int chroma_idc)
{
    MpegEncContext *const s = &h->s;
651
    int chroma_height;
652

653
    dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
654
    if (chroma_idc == 3 /* yuv444 */) {
655
        chroma_height     = height;
656
        chroma_weight_avg = luma_weight_avg;
657 658 659
        chroma_weight_op  = luma_weight_op;
        dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
        dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
660
    } else if (chroma_idc == 2 /* yuv422 */) {
661
        chroma_height = height;
662 663 664
        dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
        dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
    } else { /* yuv420 */
665
        chroma_height = height >> 1;
666 667
        dest_cb      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
        dest_cr      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
668
    }
669 670
    x_offset += 8 * s->mb_x;
    y_offset += 8 * (s->mb_y >> MB_FIELD);
671

672
    if (list0 && list1) {
673 674 675
        /* don't optimize for luma-only case, since B-frames usually
         * use implicit weights => chroma too. */
        uint8_t *tmp_cb = s->obmc_scratchpad;
676
        uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
677 678 679
        uint8_t *tmp_y  = s->obmc_scratchpad + 16 * h->mb_uvlinesize;
        int refn0       = h->ref_cache[0][scan8[n]];
        int refn1       = h->ref_cache[1][scan8[n]];
680

681
        mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
682
                    dest_y, dest_cb, dest_cr,
683 684
                    x_offset, y_offset, qpix_put, chroma_put,
                    pixel_shift, chroma_idc);
685
        mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
686
                    tmp_y, tmp_cb, tmp_cr,
687 688
                    x_offset, y_offset, qpix_put, chroma_put,
                    pixel_shift, chroma_idc);
689

690 691
        if (h->use_weight == 2) {
            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y & 1];
692
            int weight1 = 64 - weight0;
693 694
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
                            height, 5, weight0, weight1, 0);
695 696 697 698
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
                              chroma_height, 5, weight0, weight1, 0);
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
                              chroma_height, 5, weight0, weight1, 0);
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
        } else {
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
                            h->luma_log2_weight_denom,
                            h->luma_weight[refn0][0][0],
                            h->luma_weight[refn1][1][0],
                            h->luma_weight[refn0][0][1] +
                            h->luma_weight[refn1][1][1]);
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
                              h->chroma_log2_weight_denom,
                              h->chroma_weight[refn0][0][0][0],
                              h->chroma_weight[refn1][1][0][0],
                              h->chroma_weight[refn0][0][0][1] +
                              h->chroma_weight[refn1][1][0][1]);
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
                              h->chroma_log2_weight_denom,
                              h->chroma_weight[refn0][0][1][0],
                              h->chroma_weight[refn1][1][1][0],
                              h->chroma_weight[refn0][0][1][1] +
                              h->chroma_weight[refn1][1][1][1]);
718
        }
719 720 721 722
    } else {
        int list     = list1 ? 1 : 0;
        int refn     = h->ref_cache[list][scan8[n]];
        Picture *ref = &h->ref_list[list][refn];
723
        mc_dir_part(h, ref, n, square, height, delta, list,
724
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
725
                    qpix_put, chroma_put, pixel_shift, chroma_idc);
726

727 728 729 730 731 732 733 734 735 736 737 738 739
        luma_weight_op(dest_y, h->mb_linesize, height,
                       h->luma_log2_weight_denom,
                       h->luma_weight[refn][list][0],
                       h->luma_weight[refn][list][1]);
        if (h->use_weight_chroma) {
            chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
                             h->chroma_log2_weight_denom,
                             h->chroma_weight[refn][list][0][0],
                             h->chroma_weight[refn][list][0][1]);
            chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
                             h->chroma_log2_weight_denom,
                             h->chroma_weight[refn][list][1][0],
                             h->chroma_weight[refn][list][1][1]);
740 741 742 743
        }
    }
}

744 745
static av_always_inline void prefetch_motion(H264Context *h, int list,
                                             int pixel_shift, int chroma_idc)
746
{
747 748
    /* fetch pixels for estimated mv 4 macroblocks ahead
     * optimized for 64byte cache lines */
749
    MpegEncContext *const s = &h->s;
750
    const int refn = h->ref_cache[list][scan8[0]];
751 752 753
    if (refn >= 0) {
        const int mx  = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * s->mb_x + 8;
        const int my  = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * s->mb_y;
754
        uint8_t **src = h->ref_list[list][refn].f.data;
755 756 757 758
        int off       = (mx << pixel_shift) +
                        (my + (s->mb_x & 3) * 4) * h->mb_linesize +
                        (64 << pixel_shift);
        s->dsp.prefetch(src[0] + off, s->linesize, 4);
759
        if (chroma_idc == 3 /* yuv444 */) {
760 761 762
            s->dsp.prefetch(src[1] + off, s->linesize, 4);
            s->dsp.prefetch(src[2] + off, s->linesize, 4);
        } else {
763
            off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
764
            s->dsp.prefetch(src[1] + off, src[2] - src[1], 2);
765
        }
766 767 768
    }
}

769 770
static void free_tables(H264Context *h, int free_rbsp)
{
771
    int i;
772
    H264Context *hx;
773

774
    av_freep(&h->intra4x4_pred_mode);
775 776
    av_freep(&h->chroma_pred_mode_table);
    av_freep(&h->cbp_table);
777 778
    av_freep(&h->mvd_table[0]);
    av_freep(&h->mvd_table[1]);
779
    av_freep(&h->direct_table);
780 781
    av_freep(&h->non_zero_count);
    av_freep(&h->slice_table_base);
782
    h->slice_table = NULL;
783
    av_freep(&h->list_counts);
784

785
    av_freep(&h->mb2b_xy);
786
    av_freep(&h->mb2br_xy);
787

788
    for (i = 0; i < MAX_THREADS; i++) {
789
        hx = h->thread_context[i];
790 791
        if (!hx)
            continue;
792 793 794
        av_freep(&hx->top_borders[1]);
        av_freep(&hx->top_borders[0]);
        av_freep(&hx->s.obmc_scratchpad);
795
        if (free_rbsp) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
796 797 798 799
            av_freep(&hx->rbsp_buffer[1]);
            av_freep(&hx->rbsp_buffer[0]);
            hx->rbsp_buffer_size[0] = 0;
            hx->rbsp_buffer_size[1] = 0;
800
        }
801 802
        if (i)
            av_freep(&h->thread_context[i]);
803
    }
804 805
}

806 807 808 809
static void init_dequant8_coeff_table(H264Context *h)
{
    int i, j, q, x;
    const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
810

811
    for (i = 0; i < 6; i++) {
812
        h->dequant8_coeff[i] = h->dequant8_buffer[i];
813 814 815
        for (j = 0; j < i; j++)
            if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
                        64 * sizeof(uint8_t))) {
816 817 818
                h->dequant8_coeff[i] = h->dequant8_buffer[j];
                break;
            }
819
        if (j < i)
820
            continue;
821

822
        for (q = 0; q < max_qp + 1; q++) {
823
            int shift = div6[q];
824 825 826 827 828
            int idx   = rem6[q];
            for (x = 0; x < 64; x++)
                h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
                    ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
                     h->pps.scaling_matrix8[i][x]) << shift;
829 830 831 832
        }
    }
}

833 834 835 836 837
static void init_dequant4_coeff_table(H264Context *h)
{
    int i, j, q, x;
    const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
    for (i = 0; i < 6; i++) {
838
        h->dequant4_coeff[i] = h->dequant4_buffer[i];
839 840 841
        for (j = 0; j < i; j++)
            if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
                        16 * sizeof(uint8_t))) {
842 843 844
                h->dequant4_coeff[i] = h->dequant4_buffer[j];
                break;
            }
845
        if (j < i)
846 847
            continue;

848
        for (q = 0; q < max_qp + 1; q++) {
849
            int shift = div6[q] + 2;
850 851 852 853 854
            int idx   = rem6[q];
            for (x = 0; x < 16; x++)
                h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
                    ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
                     h->pps.scaling_matrix4[i][x]) << shift;
855 856 857 858
        }
    }
}

859 860 861
static void init_dequant_tables(H264Context *h)
{
    int i, x;
862
    init_dequant4_coeff_table(h);
863
    if (h->pps.transform_8x8_mode)
864
        init_dequant8_coeff_table(h);
865 866 867 868 869 870 871 872
    if (h->sps.transform_bypass) {
        for (i = 0; i < 6; i++)
            for (x = 0; x < 16; x++)
                h->dequant4_coeff[i][0][x] = 1 << 6;
        if (h->pps.transform_8x8_mode)
            for (i = 0; i < 6; i++)
                for (x = 0; x < 64; x++)
                    h->dequant8_coeff[i][0][x] = 1 << 6;
873 874 875
    }
}

876 877 878 879
int ff_h264_alloc_tables(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int big_mb_num    = s->mb_stride * (s->mb_height + 1);
880
    const int row_mb_num    = 2*s->mb_stride*FFMAX(s->avctx->thread_count, 1);
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
    int x, y;

    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode,
                      row_mb_num * 8 * sizeof(uint8_t), fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count,
                      big_mb_num * 48 * sizeof(uint8_t), fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base,
                      (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base), fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table,
                      big_mb_num * sizeof(uint16_t), fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table,
                      big_mb_num * sizeof(uint8_t), fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0],
                      16 * row_mb_num * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1],
                      16 * row_mb_num * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table,
                      4 * big_mb_num * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts,
                      big_mb_num * sizeof(uint8_t), fail)

    memset(h->slice_table_base, -1,
           (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base));
    h->slice_table = h->slice_table_base + s->mb_stride * 2 + 1;

    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy,
                      big_mb_num * sizeof(uint32_t), fail);
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy,
                      big_mb_num * sizeof(uint32_t), fail);
    for (y = 0; y < s->mb_height; y++)
        for (x = 0; x < s->mb_width; x++) {
            const int mb_xy = x + y * s->mb_stride;
            const int b_xy  = 4 * x + 4 * y * h->b_stride;

            h->mb2b_xy[mb_xy]  = b_xy;
            h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * s->mb_stride)));
917
        }
918

919 920
    s->obmc_scratchpad = NULL;

921
    if (!h->dequant4_coeff[0])
922 923
        init_dequant_tables(h);

924
    return 0;
925

926
fail:
927
    free_tables(h, 1);
928 929 930
    return -1;
}

931 932 933
/**
 * Mimic alloc_tables(), but for every context thread.
 */
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
static void clone_tables(H264Context *dst, H264Context *src, int i)
{
    MpegEncContext *const s     = &src->s;
    dst->intra4x4_pred_mode     = src->intra4x4_pred_mode + i * 8 * 2 * s->mb_stride;
    dst->non_zero_count         = src->non_zero_count;
    dst->slice_table            = src->slice_table;
    dst->cbp_table              = src->cbp_table;
    dst->mb2b_xy                = src->mb2b_xy;
    dst->mb2br_xy               = src->mb2br_xy;
    dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
    dst->mvd_table[0]           = src->mvd_table[0] + i * 8 * 2 * s->mb_stride;
    dst->mvd_table[1]           = src->mvd_table[1] + i * 8 * 2 * s->mb_stride;
    dst->direct_table           = src->direct_table;
    dst->list_counts            = src->list_counts;
    dst->s.obmc_scratchpad      = NULL;
    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma,
                      src->sps.chroma_format_idc);
951 952 953 954 955 956
}

/**
 * Init context
 * Allocate buffers which are not shared amongst multiple threads.
 */
957 958 959 960 961 962 963 964 965 966 967 968 969
static int context_init(H264Context *h)
{
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0],
                      h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1],
                      h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)

    h->ref_cache[0][scan8[5]  + 1] =
    h->ref_cache[0][scan8[7]  + 1] =
    h->ref_cache[0][scan8[13] + 1] =
    h->ref_cache[1][scan8[5]  + 1] =
    h->ref_cache[1][scan8[7]  + 1] =
    h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
970

971
    return 0;
972

973 974 975 976
fail:
    return -1; // free_tables will clean up for us
}

977 978
static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);

979 980 981
static av_cold void common_init(H264Context *h)
{
    MpegEncContext *const s = &h->s;
982

983 984 985
    s->width    = s->avctx->width;
    s->height   = s->avctx->height;
    s->codec_id = s->avctx->codec->id;
986

987 988 989 990 991 992 993
    s->avctx->bits_per_raw_sample = 8;
    h->cur_chroma_format_idc = 1;

    ff_h264dsp_init(&h->h264dsp,
                    s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
    ff_h264_pred_init(&h->hpc, s->codec_id,
                      s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
994

995 996
    h->dequant_coeff_pps = -1;
    s->unrestricted_mv   = 1;
997

998
    s->dsp.dct_bits = 16;
999 1000
    /* needed so that IDCT permutation is known early */
    ff_dsputil_init(&s->dsp, s->avctx);
1001

1002 1003
    memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
    memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1004 1005
}

1006
int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
1007 1008
{
    AVCodecContext *avctx = h->s.avctx;
1009

1010
    if (!buf || size <= 0)
1011 1012
        return -1;

1013
    if (buf[0] == 1) {
1014
        int i, cnt, nalsize;
1015
        const unsigned char *p = buf;
1016 1017 1018

        h->is_avc = 1;

1019
        if (size < 7) {
1020 1021 1022 1023
            av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
            return -1;
        }
        /* sps and pps in the avcC always have length coded with 2 bytes,
1024
         * so put a fake nal_length_size = 2 while parsing them */
1025 1026
        h->nal_length_size = 2;
        // Decode sps from avcC
1027 1028
        cnt = *(p + 5) & 0x1f; // Number of sps
        p  += 6;
1029 1030
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
1031 1032
            if(nalsize > size - (p-buf))
                return -1;
1033 1034 1035
            if (decode_nal_units(h, p, nalsize) < 0) {
                av_log(avctx, AV_LOG_ERROR,
                       "Decoding sps %d from avcC failed\n", i);
1036 1037 1038 1039 1040 1041 1042 1043
                return -1;
            }
            p += nalsize;
        }
        // Decode pps from avcC
        cnt = *(p++); // Number of pps
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
1044 1045
            if(nalsize > size - (p-buf))
                return -1;
1046
            if (decode_nal_units(h, p, nalsize) < 0) {
1047 1048
                av_log(avctx, AV_LOG_ERROR,
                       "Decoding pps %d from avcC failed\n", i);
1049 1050 1051 1052
                return -1;
            }
            p += nalsize;
        }
1053
        // Now store right nal length size, that will be used to parse all other nals
1054
        h->nal_length_size = (buf[4] & 0x03) + 1;
1055 1056
    } else {
        h->is_avc = 0;
1057
        if (decode_nal_units(h, buf, size) < 0)
1058 1059
            return -1;
    }
1060
    return size;
1061 1062
}

1063 1064 1065 1066
av_cold int ff_h264_decode_init(AVCodecContext *avctx)
{
    H264Context *h = avctx->priv_data;
    MpegEncContext *const s = &h->s;
1067
    int i;
1068

1069
    ff_MPV_decode_defaults(s);
1070 1071 1072 1073

    s->avctx = avctx;
    common_init(h);

1074 1075
    s->out_format      = FMT_H264;
    s->workaround_bugs = avctx->workaround_bugs;
1076

1077 1078
    /* set defaults */
    // s->decode_mb = ff_h263_decode_mb;
1079
    s->quarter_sample = 1;
1080 1081
    if (!avctx->has_b_frames)
        s->low_delay = 1;
1082 1083 1084 1085 1086

    avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;

    ff_h264_decode_init_vlc();

1087
    h->pixel_shift = 0;
1088
    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1089

1090
    h->thread_context[0] = h;
1091
    h->outputed_poc      = h->next_outputed_poc = INT_MIN;
1092 1093
    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
        h->last_pocs[i] = INT_MIN;
1094
    h->prev_poc_msb = 1 << 16;
1095
    h->prev_frame_num = -1;
1096
    h->x264_build   = -1;
1097
    ff_h264_reset_sei(h);
1098
    if (avctx->codec_id == AV_CODEC_ID_H264) {
1099 1100
        if (avctx->ticks_per_frame == 1)
            s->avctx->time_base.den *= 2;
1101 1102 1103
        avctx->ticks_per_frame = 2;
    }

1104
    if (avctx->extradata_size > 0 && avctx->extradata &&
1105 1106
        ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size) < 0) {
        ff_h264_free_context(h);
1107
        return -1;
1108
    }
1109

1110 1111
    if (h->sps.bitstream_restriction_flag &&
        s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1112
        s->avctx->has_b_frames = h->sps.num_reorder_frames;
1113
        s->low_delay           = 0;
1114
    }
1115

1116 1117 1118
    return 0;
}

1119 1120 1121 1122 1123
#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))

static void copy_picture_range(Picture **to, Picture **from, int count,
                               MpegEncContext *new_base,
                               MpegEncContext *old_base)
1124 1125 1126
{
    int i;

1127
    for (i = 0; i < count; i++) {
1128
        assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
1129 1130
                IN_RANGE(from[i], old_base->picture,
                         sizeof(Picture) * old_base->picture_count) ||
1131
                !from[i]));
1132 1133 1134 1135 1136 1137 1138 1139
        to[i] = REBASE_PICTURE(from[i], new_base, old_base);
    }
}

static void copy_parameter_set(void **to, void **from, int count, int size)
{
    int i;

1140 1141 1142 1143 1144
    for (i = 0; i < count; i++) {
        if (to[i] && !from[i])
            av_freep(&to[i]);
        else if (from[i] && !to[i])
            to[i] = av_malloc(size);
1145

1146 1147
        if (from[i])
            memcpy(to[i], from[i], size);
1148 1149 1150
    }
}

1151 1152 1153
static int decode_init_thread_copy(AVCodecContext *avctx)
{
    H264Context *h = avctx->priv_data;
1154

1155 1156
    if (!avctx->internal->is_copy)
        return 0;
1157 1158 1159 1160 1161 1162
    memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
    memset(h->pps_buffers, 0, sizeof(h->pps_buffers));

    return 0;
}

1163 1164 1165 1166 1167 1168 1169 1170 1171
#define copy_fields(to, from, start_field, end_field)                   \
    memcpy(&to->start_field, &from->start_field,                        \
           (char *)&to->end_field - (char *)&to->start_field)

static int decode_update_thread_context(AVCodecContext *dst,
                                        const AVCodecContext *src)
{
    H264Context *h = dst->priv_data, *h1 = src->priv_data;
    MpegEncContext *const s = &h->s, *const s1 = &h1->s;
1172 1173 1174
    int inited = s->context_initialized, err;
    int i;

1175
    if (dst == src)
1176
        return 0;
1177 1178

    err = ff_mpeg_update_thread_context(dst, src);
1179 1180
    if (err)
        return err;
1181

1182 1183 1184
    // FIXME handle width/height changing
    if (!inited) {
        for (i = 0; i < MAX_SPS_COUNT; i++)
1185 1186
            av_freep(h->sps_buffers + i);

1187
        for (i = 0; i < MAX_PPS_COUNT; i++)
1188 1189
            av_freep(h->pps_buffers + i);

1190 1191 1192
        // copy all fields after MpegEnc
        memcpy(&h->s + 1, &h1->s + 1,
               sizeof(H264Context) - sizeof(MpegEncContext));
1193 1194
        memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
        memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1195 1196

        if (s1->context_initialized) {
1197 1198 1199 1200
        if (ff_h264_alloc_tables(h) < 0) {
            av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
            return AVERROR(ENOMEM);
        }
1201 1202
        context_init(h);

1203 1204 1205
        /* frame_start may not be called for the next thread (if it's decoding
         * a bottom field) so this has to be allocated here */
        h->s.obmc_scratchpad = av_malloc(16 * 6 * s->linesize);
1206 1207
        }

1208 1209
        for (i = 0; i < 2; i++) {
            h->rbsp_buffer[i]      = NULL;
1210 1211 1212 1213 1214 1215
            h->rbsp_buffer_size[i] = 0;
        }

        h->thread_context[0] = h;

        s->dsp.clear_blocks(h->mb);
1216
        s->dsp.clear_blocks(h->mb + (24 * 16 << h->pixel_shift));
1217 1218
    }

1219 1220
    // extradata/NAL handling
    h->is_avc = h1->is_avc;
1221

1222 1223 1224 1225 1226 1227 1228
    // SPS/PPS
    copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers,
                       MAX_SPS_COUNT, sizeof(SPS));
    h->sps = h1->sps;
    copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers,
                       MAX_PPS_COUNT, sizeof(PPS));
    h->pps = h1->pps;
1229

1230 1231
    // Dequantization matrices
    // FIXME these are big - can they be only copied when PPS changes?
1232 1233
    copy_fields(h, h1, dequant4_buffer, dequant4_coeff);

1234 1235 1236
    for (i = 0; i < 6; i++)
        h->dequant4_coeff[i] = h->dequant4_buffer[0] +
                               (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
1237

1238 1239 1240
    for (i = 0; i < 6; i++)
        h->dequant8_coeff[i] = h->dequant8_buffer[0] +
                               (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
1241 1242 1243

    h->dequant_coeff_pps = h1->dequant_coeff_pps;

1244
    // POC timing
1245 1246
    copy_fields(h, h1, poc_lsb, redundant_pic_count);

1247
    // reference lists
1248
    copy_fields(h, h1, ref_count, list_count);
1249
    copy_fields(h, h1, ref_list, intra_gb);
1250 1251
    copy_fields(h, h1, short_ref, cabac_init_idc);

1252 1253 1254 1255
    copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
    copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1);
    copy_picture_range(h->delayed_pic, h1->delayed_pic,
                       MAX_DELAYED_PIC_COUNT + 2, s, s1);
1256 1257

    h->last_slice_type = h1->last_slice_type;
1258
    h->sync            = h1->sync;
1259

1260 1261
    if (!s->current_picture_ptr)
        return 0;
1262

1263
    if (!s->dropable) {
1264
        err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1265 1266
        h->prev_poc_msb = h->poc_msb;
        h->prev_poc_lsb = h->poc_lsb;
1267
    }
1268 1269 1270
    h->prev_frame_num_offset = h->frame_num_offset;
    h->prev_frame_num        = h->frame_num;
    h->outputed_poc          = h->next_outputed_poc;
1271

1272
    return err;
1273 1274
}

1275 1276 1277
int ff_h264_frame_start(H264Context *h)
{
    MpegEncContext *const s = &h->s;
1278
    int i;
1279
    const int pixel_shift = h->pixel_shift;
1280

1281
    if (ff_MPV_frame_start(s, s->avctx) < 0)
1282
        return -1;
1283
    ff_er_frame_start(s);
1284
    /*
1285
     * ff_MPV_frame_start uses pict_type to derive key_frame.
1286
     * This is incorrect for H.264; IDR markings must be used.
1287
     * Zero here; IDR markings per slice in frame or fields are ORed in later.
1288 1289
     * See decode_nal_units().
     */
1290
    s->current_picture_ptr->f.key_frame = 0;
1291
    s->current_picture_ptr->sync        = 0;
1292
    s->current_picture_ptr->mmco_reset  = 0;
1293 1294 1295

    assert(s->linesize && s->uvlinesize);

1296 1297 1298
    for (i = 0; i < 16; i++) {
        h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
        h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
1299
    }
1300 1301 1302 1303 1304
    for (i = 0; i < 16; i++) {
        h->block_offset[16 + i]      =
        h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
        h->block_offset[48 + 16 + i] =
        h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1305 1306
    }

1307 1308
    /* can't be in alloc_tables because linesize isn't known there.
     * FIXME: redo bipred weight to not require extra buffer? */
1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    for (i = 0; i < s->slice_context_count; i++)
        if (h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16 * 6 * s->linesize);

    /* Some macroblocks can be accessed before they're available in case
     * of lost slices, MBAFF or threading. */
    memset(h->slice_table, -1,
           (s->mb_height * s->mb_stride - 1) * sizeof(*h->slice_table));

    // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
    //             s->current_picture.f.reference /* || h->contains_intra */ || 1;

    /* We mark the current picture as non-reference after allocating it, so
     * that if we break out due to an error it can be released automatically
     * in the next ff_MPV_frame_start().
     * SVQ3 as well as most other codecs have only last/next/current and thus
     * get released even with set reference, besides SVQ3 and others do not
     * mark frames as reference later "naturally". */
1327
    if (s->codec_id != AV_CODEC_ID_SVQ3)
1328
        s->current_picture_ptr->f.reference = 0;
1329

1330 1331
    s->current_picture_ptr->field_poc[0]     =
        s->current_picture_ptr->field_poc[1] = INT_MAX;
1332 1333 1334

    h->next_output_pic = NULL;

1335
    assert(s->current_picture_ptr->long_ref == 0);
1336

1337
    return 0;
1338 1339
}

1340
/**
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350
 * Run setup operations that must be run after slice header decoding.
 * This includes finding the next displayed frame.
 *
 * @param h h264 master context
 * @param setup_finished enough NALs have been read that we can call
 * ff_thread_finish_setup()
 */
static void decode_postinit(H264Context *h, int setup_finished)
{
    MpegEncContext *const s = &h->s;
1351 1352 1353 1354
    Picture *out = s->current_picture_ptr;
    Picture *cur = s->current_picture_ptr;
    int i, pics, out_of_order, out_idx;

1355 1356
    s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
    s->current_picture_ptr->f.pict_type   = s->pict_type;
1357

1358 1359
    if (h->next_output_pic)
        return;
1360

1361 1362 1363 1364 1365 1366
    if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
        /* FIXME: if we have two PAFF fields in one packet, we can't start
         * the next thread here. If we have one field per packet, we can.
         * The check in decode_nal_units() is not good enough to find this
         * yet, so we assume the worst for now. */
        // if (setup_finished)
1367
        //    ff_thread_finish_setup(s->avctx);
1368 1369 1370
        return;
    }

1371 1372
    cur->f.interlaced_frame = 0;
    cur->f.repeat_pict      = 0;
1373 1374

    /* Signal interlacing information externally. */
1375 1376
    /* Prioritize picture timing SEI information over used
     * decoding process if it exists. */
1377

1378 1379
    if (h->sps.pic_struct_present_flag) {
        switch (h->sei_pic_struct) {
1380 1381 1382 1383
        case SEI_PIC_STRUCT_FRAME:
            break;
        case SEI_PIC_STRUCT_TOP_FIELD:
        case SEI_PIC_STRUCT_BOTTOM_FIELD:
1384
            cur->f.interlaced_frame = 1;
1385 1386 1387 1388
            break;
        case SEI_PIC_STRUCT_TOP_BOTTOM:
        case SEI_PIC_STRUCT_BOTTOM_TOP:
            if (FIELD_OR_MBAFF_PICTURE)
1389
                cur->f.interlaced_frame = 1;
1390 1391
            else
                // try to flag soft telecine progressive
1392
                cur->f.interlaced_frame = h->prev_interlaced_frame;
1393 1394 1395
            break;
        case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
        case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
1396 1397 1398
            /* Signal the possibility of telecined film externally
             * (pic_struct 5,6). From these hints, let the applications
             * decide if they apply deinterlacing. */
1399
            cur->f.repeat_pict = 1;
1400 1401
            break;
        case SEI_PIC_STRUCT_FRAME_DOUBLING:
1402
            // Force progressive here, doubling interlaced frame is a bad idea.
1403
            cur->f.repeat_pict = 2;
1404 1405
            break;
        case SEI_PIC_STRUCT_FRAME_TRIPLING:
1406
            cur->f.repeat_pict = 4;
1407 1408 1409
            break;
        }

1410 1411
        if ((h->sei_ct_type & 3) &&
            h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
1412
            cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
1413
    } else {
1414
        /* Derive interlacing flag from used decoding process. */
1415
        cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
1416
    }
1417
    h->prev_interlaced_frame = cur->f.interlaced_frame;
1418

1419
    if (cur->field_poc[0] != cur->field_poc[1]) {
1420
        /* Derive top_field_first from field pocs. */
1421
        cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
1422
    } else {
1423
        if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
1424 1425 1426 1427
            /* Use picture timing SEI information. Even if it is a
             * information of a past frame, better than nothing. */
            if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
                h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
1428
                cur->f.top_field_first = 1;
1429
            else
1430
                cur->f.top_field_first = 0;
1431
        } else {
1432
            /* Most likely progressive */
1433
            cur->f.top_field_first = 0;
1434 1435 1436
        }
    }

1437 1438
    cur->mmco_reset = h->mmco_reset;
    h->mmco_reset = 0;
1439
    // FIXME do something with unavailable reference frames
1440 1441 1442

    /* Sort B-frames into display order */

1443 1444
    if (h->sps.bitstream_restriction_flag &&
        s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1445
        s->avctx->has_b_frames = h->sps.num_reorder_frames;
1446
        s->low_delay           = 0;
1447 1448
    }

1449 1450
    if (s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
        !h->sps.bitstream_restriction_flag) {
1451
        s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
1452
        s->low_delay           = 0;
1453 1454
    }

1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468
    for (i = 0; 1; i++) {
        if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){
            if(i)
                h->last_pocs[i-1] = cur->poc;
            break;
        } else if(i) {
            h->last_pocs[i-1]= h->last_pocs[i];
        }
    }
    out_of_order = MAX_DELAYED_PIC_COUNT - i;
    if(   cur->f.pict_type == AV_PICTURE_TYPE_B
       || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
        out_of_order = FFMAX(out_of_order, 1);
    if(s->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){
1469
        av_log(s->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order);
1470 1471 1472 1473
        s->avctx->has_b_frames = out_of_order;
        s->low_delay = 0;
    }

1474
    pics = 0;
1475 1476
    while (h->delayed_pic[pics])
        pics++;
1477

1478
    av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
1479 1480

    h->delayed_pic[pics++] = cur;
1481 1482
    if (cur->f.reference == 0)
        cur->f.reference = DELAYED_PIC_REF;
1483 1484 1485

    out = h->delayed_pic[0];
    out_idx = 0;
1486 1487 1488
    for (i = 1; h->delayed_pic[i] &&
                !h->delayed_pic[i]->f.key_frame &&
                !h->delayed_pic[i]->mmco_reset;
1489 1490 1491
         i++)
        if (h->delayed_pic[i]->poc < out->poc) {
            out     = h->delayed_pic[i];
1492 1493
            out_idx = i;
        }
1494
    if (s->avctx->has_b_frames == 0 &&
1495
        (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset))
1496
        h->next_outputed_poc = INT_MIN;
1497
    out_of_order = out->poc < h->next_outputed_poc;
1498

1499
    if (out_of_order || pics > s->avctx->has_b_frames) {
1500
        out->f.reference &= ~DELAYED_PIC_REF;
1501 1502 1503 1504 1505
        // for frame threading, the owner must be the second field's thread or
        // else the first thread can release the picture and reuse it unsafely
        out->owner2       = s;
        for (i = out_idx; h->delayed_pic[i]; i++)
            h->delayed_pic[i] = h->delayed_pic[i + 1];
1506
    }
1507
    if (!out_of_order && pics > s->avctx->has_b_frames) {
1508
        h->next_output_pic = out;
1509 1510 1511
        if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) {
            h->next_outputed_poc = INT_MIN;
        } else
1512
            h->next_outputed_poc = out->poc;
1513
    } else {
1514
        av_log(s->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : "");
1515 1516
    }

1517
    if (h->next_output_pic && h->next_output_pic->sync) {
1518
        h->sync |= 2;
1519 1520
    }

1521 1522
    if (setup_finished)
        ff_thread_finish_setup(s->avctx);
1523 1524
}

1525 1526
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
                                              uint8_t *src_cb, uint8_t *src_cr,
1527 1528
                                              int linesize, int uvlinesize,
                                              int simple)
1529
{
1530
    MpegEncContext *const s = &h->s;
1531
    uint8_t *top_border;
1532
    int top_idx = 1;
1533
    const int pixel_shift = h->pixel_shift;
1534 1535
    int chroma444 = CHROMA444;
    int chroma422 = CHROMA422;
1536

1537
    src_y  -= linesize;
1538 1539 1540
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

1541 1542 1543
    if (!simple && FRAME_MBAFF) {
        if (s->mb_y & 1) {
            if (!MB_MBAFF) {
1544
                top_border = h->top_borders[0][s->mb_x];
1545
                AV_COPY128(top_border, src_y + 15 * linesize);
1546
                if (pixel_shift)
1547 1548 1549 1550 1551 1552 1553 1554
                    AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
                if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
                    if (chroma444) {
                        if (pixel_shift) {
                            AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
                            AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
                            AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
                            AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
1555
                        } else {
1556 1557
                            AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
                            AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
1558
                        }
1559
                    } else if (chroma422) {
1560
                        if (pixel_shift) {
1561 1562
                            AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
                            AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
1563
                        } else {
1564 1565
                            AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
                            AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
1566
                        }
1567
                    } else {
1568
                        if (pixel_shift) {
1569 1570
                            AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
                            AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
1571
                        } else {
1572 1573
                            AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
                            AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1574
                        }
1575
                    }
1576 1577
                }
            }
1578
        } else if (MB_MBAFF) {
1579
            top_idx = 0;
1580
        } else
1581
            return;
1582 1583
    }

1584
    top_border = h->top_borders[top_idx][s->mb_x];
1585
    /* There are two lines saved, the line above the top macroblock
1586 1587
     * of a pair, and the line above the bottom macroblock. */
    AV_COPY128(top_border, src_y + 16 * linesize);
1588
    if (pixel_shift)
1589 1590 1591 1592 1593 1594 1595 1596 1597
        AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);

    if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
        if (chroma444) {
            if (pixel_shift) {
                AV_COPY128(top_border + 32, src_cb + 16 * linesize);
                AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
                AV_COPY128(top_border + 64, src_cr + 16 * linesize);
                AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
1598
            } else {
1599 1600
                AV_COPY128(top_border + 16, src_cb + 16 * linesize);
                AV_COPY128(top_border + 32, src_cr + 16 * linesize);
1601
            }
1602
        } else if (chroma422) {
1603
            if (pixel_shift) {
1604 1605
                AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
                AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
1606
            } else {
1607 1608
                AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
                AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
1609
            }
1610
        } else {
1611
            if (pixel_shift) {
1612 1613
                AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
                AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
1614
            } else {
1615 1616
                AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
                AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
1617
            }
1618
        }
1619 1620 1621
    }
}

1622
static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
1623 1624 1625 1626 1627 1628
                                            uint8_t *src_cb, uint8_t *src_cr,
                                            int linesize, int uvlinesize,
                                            int xchg, int chroma444,
                                            int simple, int pixel_shift)
{
    MpegEncContext *const s = &h->s;
1629
    int deblock_topleft;
1630
    int deblock_top;
1631
    int top_idx = 1;
1632 1633
    uint8_t *top_border_m1;
    uint8_t *top_border;
1634

1635 1636 1637
    if (!simple && FRAME_MBAFF) {
        if (s->mb_y & 1) {
            if (!MB_MBAFF)
1638
                return;
1639
        } else {
1640 1641 1642
            top_idx = MB_MBAFF ? 0 : 1;
        }
    }
1643

1644
    if (h->deblocking_filter == 2) {
1645 1646
        deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
        deblock_top     = h->top_type;
1647
    } else {
1648 1649
        deblock_topleft = (s->mb_x > 0);
        deblock_top     = (s->mb_y > !!MB_FIELD);
1650
    }
1651

1652
    src_y  -= linesize   + 1 + pixel_shift;
1653 1654
    src_cb -= uvlinesize + 1 + pixel_shift;
    src_cr -= uvlinesize + 1 + pixel_shift;
1655

1656
    top_border_m1 = h->top_borders[top_idx][s->mb_x - 1];
1657 1658
    top_border    = h->top_borders[top_idx][s->mb_x];

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
#define XCHG(a, b, xchg)                        \
    if (pixel_shift) {                          \
        if (xchg) {                             \
            AV_SWAP64(b + 0, a + 0);            \
            AV_SWAP64(b + 8, a + 8);            \
        } else {                                \
            AV_COPY128(b, a);                   \
        }                                       \
    } else if (xchg)                            \
        AV_SWAP64(b, a);                        \
    else                                        \
        AV_COPY64(b, a);

    if (deblock_top) {
        if (deblock_topleft) {
            XCHG(top_border_m1 + (8 << pixel_shift),
                 src_y - (7 << pixel_shift), 1);
1676
        }
1677 1678
        XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
        XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
1679 1680 1681
        if (s->mb_x + 1 < s->mb_width) {
            XCHG(h->top_borders[top_idx][s->mb_x + 1],
                 src_y + (17 << pixel_shift), 1);
1682
        }
1683
    }
1684 1685 1686
    if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
        if (chroma444) {
            if (deblock_topleft) {
1687 1688 1689 1690 1691 1692 1693
                XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
            }
            XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
            XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
            XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
            XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
1694 1695 1696
            if (s->mb_x + 1 < s->mb_width) {
                XCHG(h->top_borders[top_idx][s->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
                XCHG(h->top_borders[top_idx][s->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
1697 1698
            }
        } else {
1699 1700
            if (deblock_top) {
                if (deblock_topleft) {
1701 1702 1703
                    XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                    XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
                }
1704 1705
                XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
                XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
1706
            }
1707 1708 1709 1710
        }
    }
}

1711 1712 1713
static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
                                        int index)
{
1714
    if (high_bit_depth) {
1715
        return AV_RN32A(((int32_t *)mb) + index);
1716 1717
    } else
        return AV_RN16A(mb + index);
1718 1719
}

1720 1721 1722
static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth,
                                         int index, int value)
{
1723
    if (high_bit_depth) {
1724
        AV_WN32A(((int32_t *)mb) + index, value);
1725 1726
    } else
        AV_WN16A(mb + index, value);
1727 1728
}

1729 1730 1731 1732 1733 1734 1735 1736
static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
                                                       int mb_type, int is_h264,
                                                       int simple,
                                                       int transform_bypass,
                                                       int pixel_shift,
                                                       int *block_offset,
                                                       int linesize,
                                                       uint8_t *dest_y, int p)
1737
{
1738
    MpegEncContext *const s = &h->s;
1739 1740 1741
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
    int i;
1742 1743 1744 1745 1746 1747 1748 1749 1750
    int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
    block_offset += 16 * p;
    if (IS_INTRA4x4(mb_type)) {
        if (simple || !s->encoding) {
            if (IS_8x8DCT(mb_type)) {
                if (transform_bypass) {
                    idct_dc_add  =
                    idct_add     = s->dsp.add_pixels8;
                } else {
1751 1752 1753
                    idct_dc_add = h->h264dsp.h264_idct8_dc_add;
                    idct_add    = h->h264dsp.h264_idct8_add;
                }
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765
                for (i = 0; i < 16; i += 4) {
                    uint8_t *const ptr = dest_y + block_offset[i];
                    const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
                    if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
                        h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
                    } else {
                        const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
                        h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
                                             (h->topright_samples_available << i) & 0x4000, linesize);
                        if (nnz) {
                            if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
                                idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1766
                            else
1767
                                idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1768 1769 1770
                        }
                    }
                }
1771 1772 1773 1774 1775
            } else {
                if (transform_bypass) {
                    idct_dc_add  =
                        idct_add = s->dsp.add_pixels4;
                } else {
1776 1777 1778
                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
                    idct_add    = h->h264dsp.h264_idct_add;
                }
1779 1780 1781
                for (i = 0; i < 16; i++) {
                    uint8_t *const ptr = dest_y + block_offset[i];
                    const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
1782

1783 1784 1785
                    if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
                        h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
                    } else {
1786 1787 1788
                        uint8_t *topright;
                        int nnz, tr;
                        uint64_t tr_high;
1789 1790
                        if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
                            const int topright_avail = (h->topright_samples_available << i) & 0x8000;
1791
                            assert(s->mb_y || linesize <= block_offset[i]);
1792
                            if (!topright_avail) {
1793
                                if (pixel_shift) {
1794 1795
                                    tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
                                    topright = (uint8_t *)&tr_high;
1796
                                } else {
1797 1798
                                    tr       = ptr[3 - linesize] * 0x01010101u;
                                    topright = (uint8_t *)&tr;
1799
                                }
1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810
                            } else
                                topright = ptr + (4 << pixel_shift) - linesize;
                        } else
                            topright = NULL;

                        h->hpc.pred4x4[dir](ptr, topright, linesize);
                        nnz = h->non_zero_count_cache[scan8[i + p * 16]];
                        if (nnz) {
                            if (is_h264) {
                                if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
                                    idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1811
                                else
1812
                                    idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1813
                            } else if (CONFIG_SVQ3_DECODER)
1814
                                ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
1815 1816 1817 1818 1819
                        }
                    }
                }
            }
        }
1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838
    } else {
        h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
        if (is_h264) {
            if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
                if (!transform_bypass)
                    h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
                                                         h->mb_luma_dc[p],
                                                         h->dequant4_coeff[p][qscale][0]);
                else {
                    static const uint8_t dc_mapping[16] = {
                         0 * 16,  1 * 16,  4 * 16,  5 * 16,
                         2 * 16,  3 * 16,  6 * 16,  7 * 16,
                         8 * 16,  9 * 16, 12 * 16, 13 * 16,
                        10 * 16, 11 * 16, 14 * 16, 15 * 16 };
                    for (i = 0; i < 16; i++)
                        dctcoef_set(h->mb + (p * 256 << pixel_shift),
                                    pixel_shift, dc_mapping[i],
                                    dctcoef_get(h->mb_luma_dc[p],
                                                pixel_shift, i));
1839 1840
                }
            }
1841
        } else if (CONFIG_SVQ3_DECODER)
1842 1843
            ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
                                           h->mb_luma_dc[p], qscale);
1844 1845 1846
    }
}

1847 1848 1849 1850 1851 1852 1853
static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
                                                    int is_h264, int simple,
                                                    int transform_bypass,
                                                    int pixel_shift,
                                                    int *block_offset,
                                                    int linesize,
                                                    uint8_t *dest_y, int p)
1854
{
1855
    MpegEncContext *const s = &h->s;
1856 1857
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
    int i;
1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875
    block_offset += 16 * p;
    if (!IS_INTRA4x4(mb_type)) {
        if (is_h264) {
            if (IS_INTRA16x16(mb_type)) {
                if (transform_bypass) {
                    if (h->sps.profile_idc == 244 &&
                        (h->intra16x16_pred_mode == VERT_PRED8x8 ||
                         h->intra16x16_pred_mode == HOR_PRED8x8)) {
                        h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
                                                                      h->mb + (p * 256 << pixel_shift),
                                                                      linesize);
                    } else {
                        for (i = 0; i < 16; i++)
                            if (h->non_zero_count_cache[scan8[i + p * 16]] ||
                                dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
                                s->dsp.add_pixels4(dest_y + block_offset[i],
                                                   h->mb + (i * 16 + p * 256 << pixel_shift),
                                                   linesize);
1876
                    }
1877 1878 1879 1880 1881
                } else {
                    h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
                                                    h->mb + (p * 256 << pixel_shift),
                                                    linesize,
                                                    h->non_zero_count_cache + p * 5 * 8);
1882
                }
1883 1884
            } else if (h->cbp & 15) {
                if (transform_bypass) {
1885
                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
                    idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8
                                                  : s->dsp.add_pixels4;
                    for (i = 0; i < 16; i += di)
                        if (h->non_zero_count_cache[scan8[i + p * 16]])
                            idct_add(dest_y + block_offset[i],
                                     h->mb + (i * 16 + p * 256 << pixel_shift),
                                     linesize);
                } else {
                    if (IS_8x8DCT(mb_type))
                        h->h264dsp.h264_idct8_add4(dest_y, block_offset,
                                                   h->mb + (p * 256 << pixel_shift),
                                                   linesize,
                                                   h->non_zero_count_cache + p * 5 * 8);
                    else
                        h->h264dsp.h264_idct_add16(dest_y, block_offset,
                                                   h->mb + (p * 256 << pixel_shift),
                                                   linesize,
                                                   h->non_zero_count_cache + p * 5 * 8);
1904 1905
                }
            }
1906
        } else if (CONFIG_SVQ3_DECODER) {
1907 1908 1909 1910 1911 1912
            for (i = 0; i < 16; i++)
                if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
                    // FIXME benchmark weird rule, & below
                    uint8_t *const ptr = dest_y + block_offset[i];
                    ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
                                       s->qscale, IS_INTRA(mb_type) ? 1 : 0);
1913 1914 1915 1916 1917
                }
        }
    }
}

1918 1919 1920
#define BITS   8
#define SIMPLE 1
#include "h264_mb_template.c"
1921

1922 1923 1924
#undef  BITS
#define BITS   16
#include "h264_mb_template.c"
1925

1926 1927 1928
#undef  SIMPLE
#define SIMPLE 0
#include "h264_mb_template.c"
1929

1930 1931 1932 1933
void ff_h264_hl_decode_mb(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int mb_xy   = h->mb_xy;
1934
    const int mb_type = s->current_picture.f.mb_type[mb_xy];
1935
    int is_complex    = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
1936

1937
    if (CHROMA444) {
1938
        if (is_complex || h->pixel_shift)
1939 1940
            hl_decode_mb_444_complex(h);
        else
1941
            hl_decode_mb_444_simple_8(h);
1942
    } else if (is_complex) {
1943
        hl_decode_mb_complex(h);
1944 1945 1946 1947
    } else if (h->pixel_shift) {
        hl_decode_mb_simple_16(h);
    } else
        hl_decode_mb_simple_8(h);
1948 1949
}

1950 1951 1952
static int pred_weight_table(H264Context *h)
{
    MpegEncContext *const s = &h->s;
1953
    int list, i;
1954
    int luma_def, chroma_def;
1955

1956 1957 1958 1959 1960 1961 1962
    h->use_weight             = 0;
    h->use_weight_chroma      = 0;
    h->luma_log2_weight_denom = get_ue_golomb(&s->gb);
    if (h->sps.chroma_format_idc)
        h->chroma_log2_weight_denom = get_ue_golomb(&s->gb);
    luma_def   = 1 << h->luma_log2_weight_denom;
    chroma_def = 1 << h->chroma_log2_weight_denom;
1963

1964
    for (list = 0; list < 2; list++) {
1965 1966
        h->luma_weight_flag[list]   = 0;
        h->chroma_weight_flag[list] = 0;
1967
        for (i = 0; i < h->ref_count[list]; i++) {
1968
            int luma_weight_flag, chroma_weight_flag;
1969

1970 1971 1972 1973 1974 1975 1976 1977
            luma_weight_flag = get_bits1(&s->gb);
            if (luma_weight_flag) {
                h->luma_weight[i][list][0] = get_se_golomb(&s->gb);
                h->luma_weight[i][list][1] = get_se_golomb(&s->gb);
                if (h->luma_weight[i][list][0] != luma_def ||
                    h->luma_weight[i][list][1] != 0) {
                    h->use_weight             = 1;
                    h->luma_weight_flag[list] = 1;
1978
                }
1979 1980 1981
            } else {
                h->luma_weight[i][list][0] = luma_def;
                h->luma_weight[i][list][1] = 0;
1982 1983
            }

1984 1985 1986
            if (h->sps.chroma_format_idc) {
                chroma_weight_flag = get_bits1(&s->gb);
                if (chroma_weight_flag) {
Michael Niedermayer's avatar
Michael Niedermayer committed
1987
                    int j;
1988 1989 1990 1991 1992 1993 1994
                    for (j = 0; j < 2; j++) {
                        h->chroma_weight[i][list][j][0] = get_se_golomb(&s->gb);
                        h->chroma_weight[i][list][j][1] = get_se_golomb(&s->gb);
                        if (h->chroma_weight[i][list][j][0] != chroma_def ||
                            h->chroma_weight[i][list][j][1] != 0) {
                            h->use_weight_chroma = 1;
                            h->chroma_weight_flag[list] = 1;
1995
                        }
Michael Niedermayer's avatar
Michael Niedermayer committed
1996
                    }
1997
                } else {
Michael Niedermayer's avatar
Michael Niedermayer committed
1998
                    int j;
1999 2000 2001
                    for (j = 0; j < 2; j++) {
                        h->chroma_weight[i][list][j][0] = chroma_def;
                        h->chroma_weight[i][list][j][1] = 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
2002
                    }
2003 2004 2005
                }
            }
        }
2006 2007
        if (h->slice_type_nos != AV_PICTURE_TYPE_B)
            break;
2008
    }
2009
    h->use_weight = h->use_weight || h->use_weight_chroma;
2010 2011 2012
    return 0;
}

2013 2014
/**
 * Initialize implicit_weight table.
2015
 * @param field  0/1 initialize the weight for interlaced MBAFF
2016 2017
 *                -1 initializes the rest
 */
2018 2019 2020
static void implicit_weight_table(H264Context *h, int field)
{
    MpegEncContext *const s = &h->s;
2021
    int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
2022

2023 2024 2025 2026 2027
    for (i = 0; i < 2; i++) {
        h->luma_weight_flag[i]   = 0;
        h->chroma_weight_flag[i] = 0;
    }

2028
    if (field < 0) {
2029 2030 2031 2032 2033
        if (s->picture_structure == PICT_FRAME) {
            cur_poc = s->current_picture_ptr->poc;
        } else {
            cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
        }
2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047
        if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF &&
            h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
            h->use_weight = 0;
            h->use_weight_chroma = 0;
            return;
        }
        ref_start  = 0;
        ref_count0 = h->ref_count[0];
        ref_count1 = h->ref_count[1];
    } else {
        cur_poc    = s->current_picture_ptr->field_poc[field];
        ref_start  = 16;
        ref_count0 = 16 + 2 * h->ref_count[0];
        ref_count1 = 16 + 2 * h->ref_count[1];
2048
    }
2049

2050 2051 2052 2053
    h->use_weight               = 2;
    h->use_weight_chroma        = 2;
    h->luma_log2_weight_denom   = 5;
    h->chroma_log2_weight_denom = 5;
2054

2055
    for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
2056
        int poc0 = h->ref_list[0][ref0].poc;
2057
        for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
2058 2059 2060
            int w = 32;
            if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
                int poc1 = h->ref_list[1][ref1].poc;
2061 2062
                int td   = av_clip(poc1 - poc0, -128, 127);
                if (td) {
2063 2064
                    int tb = av_clip(cur_poc - poc0, -128, 127);
                    int tx = (16384 + (FFABS(td) >> 1)) / td;
2065 2066
                    int dist_scale_factor = (tb * tx + 32) >> 8;
                    if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
2067 2068
                        w = 64 - dist_scale_factor;
                }
2069
            }
2070 2071 2072 2073 2074
            if (field < 0) {
                h->implicit_weight[ref0][ref1][0] =
                h->implicit_weight[ref0][ref1][1] = w;
            } else {
                h->implicit_weight[ref0][ref1][field] = w;
2075
            }
2076 2077 2078 2079
        }
    }
}

2080
/**
Loic Le Loarer's avatar
Loic Le Loarer committed
2081
 * instantaneous decoder refresh.
2082
 */
2083 2084
static void idr(H264Context *h)
{
2085
    int i;
2086
    ff_h264_remove_all_refs(h);
2087 2088
    h->prev_frame_num        = 0;
    h->prev_frame_num_offset = 0;
2089
    h->prev_poc_msb          = 1<<16;
2090
    h->prev_poc_lsb          = 0;
2091 2092
    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
        h->last_pocs[i] = INT_MIN;
2093 2094
}

2095
/* forget old pics after a seek */
2096 2097 2098
static void flush_dpb(AVCodecContext *avctx)
{
    H264Context *h = avctx->priv_data;
2099
    int i;
2100
    for (i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
2101
        if (h->delayed_pic[i])
2102
            h->delayed_pic[i]->f.reference = 0;
2103
        h->delayed_pic[i] = NULL;
2104
    }
2105
    h->outputed_poc = h->next_outputed_poc = INT_MIN;
2106
    h->prev_interlaced_frame = 1;
2107
    idr(h);
2108
    h->prev_frame_num = -1;
2109
    if (h->s.current_picture_ptr)
2110
        h->s.current_picture_ptr->f.reference = 0;
2111
    h->s.first_field = 0;
2112
    ff_h264_reset_sei(h);
2113
    ff_mpeg_flush(avctx);
2114
    h->recovery_frame= -1;
2115
    h->sync= 0;
2116 2117
}

2118 2119 2120 2121
static int init_poc(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int max_frame_num = 1 << h->sps.log2_max_frame_num;
2122
    int field_poc[2];
2123
    Picture *cur = s->current_picture_ptr;
2124

2125 2126
    h->frame_num_offset = h->prev_frame_num_offset;
    if (h->frame_num < h->prev_frame_num)
2127
        h->frame_num_offset += max_frame_num;
2128

2129 2130
    if (h->sps.poc_type == 0) {
        const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
2131

2132
        if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
2133
            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2134
        else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
2135 2136 2137
            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
        else
            h->poc_msb = h->prev_poc_msb;
2138
        // printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2139
        field_poc[0] =
2140
        field_poc[1] = h->poc_msb + h->poc_lsb;
2141
        if (s->picture_structure == PICT_FRAME)
2142
            field_poc[1] += h->delta_poc_bottom;
2143
    } else if (h->sps.poc_type == 1) {
2144 2145 2146
        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
        int i;

2147
        if (h->sps.poc_cycle_length != 0)
2148 2149 2150 2151
            abs_frame_num = h->frame_num_offset + h->frame_num;
        else
            abs_frame_num = 0;

2152
        if (h->nal_ref_idc == 0 && abs_frame_num > 0)
2153
            abs_frame_num--;
2154

2155
        expected_delta_per_poc_cycle = 0;
2156 2157 2158
        for (i = 0; i < h->sps.poc_cycle_length; i++)
            // FIXME integrate during sps parse
            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
2159

2160
        if (abs_frame_num > 0) {
2161 2162 2163 2164
            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;

            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2165 2166
            for (i = 0; i <= frame_num_in_poc_cycle; i++)
                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
2167 2168 2169
        } else
            expectedpoc = 0;

2170
        if (h->nal_ref_idc == 0)
2171
            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2172

2173 2174 2175
        field_poc[0] = expectedpoc + h->delta_poc[0];
        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;

2176
        if (s->picture_structure == PICT_FRAME)
2177
            field_poc[1] += h->delta_poc[1];
2178 2179
    } else {
        int poc = 2 * (h->frame_num_offset + h->frame_num);
2180

2181
        if (!h->nal_ref_idc)
2182
            poc--;
2183

2184 2185
        field_poc[0] = poc;
        field_poc[1] = poc;
2186
    }
2187

2188 2189 2190 2191 2192
    if (s->picture_structure != PICT_BOTTOM_FIELD)
        s->current_picture_ptr->field_poc[0] = field_poc[0];
    if (s->picture_structure != PICT_TOP_FIELD)
        s->current_picture_ptr->field_poc[1] = field_poc[1];
    cur->poc = FFMIN(cur->field_poc[0], cur->field_poc[1]);
2193 2194 2195 2196

    return 0;
}

2197 2198 2199
/**
 * initialize scan tables
 */
2200 2201
static void init_scan_tables(H264Context *h)
{
2202
    int i;
2203 2204
    for (i = 0; i < 16; i++) {
#define T(x) (x >> 2) | ((x << 2) & 0xF)
2205
        h->zigzag_scan[i] = T(zigzag_scan[i]);
2206
        h->field_scan[i]  = T(field_scan[i]);
2207 2208
#undef T
    }
2209 2210
    for (i = 0; i < 64; i++) {
#define T(x) (x >> 3) | ((x & 7) << 3)
2211 2212 2213 2214
        h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
        h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
        h->field_scan8x8[i]        = T(field_scan8x8[i]);
        h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
2215 2216
#undef T
    }
2217
    if (h->sps.transform_bypass) { // FIXME same ugly
2218 2219 2220 2221 2222 2223
        memcpy(h->zigzag_scan_q0          , zigzag_scan             , sizeof(h->zigzag_scan_q0         ));
        memcpy(h->zigzag_scan8x8_q0       , ff_zigzag_direct        , sizeof(h->zigzag_scan8x8_q0      ));
        memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc    , sizeof(h->zigzag_scan8x8_cavlc_q0));
        memcpy(h->field_scan_q0           , field_scan              , sizeof(h->field_scan_q0          ));
        memcpy(h->field_scan8x8_q0        , field_scan8x8           , sizeof(h->field_scan8x8_q0       ));
        memcpy(h->field_scan8x8_cavlc_q0  , field_scan8x8_cavlc     , sizeof(h->field_scan8x8_cavlc_q0 ));
2224
    } else {
2225 2226 2227 2228 2229 2230
        memcpy(h->zigzag_scan_q0          , h->zigzag_scan          , sizeof(h->zigzag_scan_q0         ));
        memcpy(h->zigzag_scan8x8_q0       , h->zigzag_scan8x8       , sizeof(h->zigzag_scan8x8_q0      ));
        memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0));
        memcpy(h->field_scan_q0           , h->field_scan           , sizeof(h->field_scan_q0          ));
        memcpy(h->field_scan8x8_q0        , h->field_scan8x8        , sizeof(h->field_scan8x8_q0       ));
        memcpy(h->field_scan8x8_cavlc_q0  , h->field_scan8x8_cavlc  , sizeof(h->field_scan8x8_cavlc_q0 ));
2231 2232
    }
}
2233

2234 2235 2236 2237
static int field_end(H264Context *h, int in_setup)
{
    MpegEncContext *const s     = &h->s;
    AVCodecContext *const avctx = s->avctx;
2238
    int err = 0;
2239
    s->mb_y = 0;
2240

2241
    if (!in_setup && !s->dropable)
2242
        ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
2243
                                  s->picture_structure == PICT_BOTTOM_FIELD);
2244

2245 2246
    if (CONFIG_H264_VDPAU_DECODER &&
        s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2247 2248
        ff_vdpau_h264_set_reference_frames(s);

2249 2250
    if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
        if (!s->dropable) {
2251
            err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
2252 2253
            h->prev_poc_msb = h->poc_msb;
            h->prev_poc_lsb = h->poc_lsb;
2254
        }
2255 2256 2257
        h->prev_frame_num_offset = h->frame_num_offset;
        h->prev_frame_num        = h->frame_num;
        h->outputed_poc          = h->next_outputed_poc;
2258 2259 2260 2261
    }

    if (avctx->hwaccel) {
        if (avctx->hwaccel->end_frame(avctx) < 0)
2262 2263
            av_log(avctx, AV_LOG_ERROR,
                   "hardware accelerator failed to decode picture\n");
2264 2265
    }

2266 2267
    if (CONFIG_H264_VDPAU_DECODER &&
        s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284
        ff_vdpau_h264_picture_complete(s);

    /*
     * FIXME: Error handling code does not seem to support interlaced
     * when slices span multiple rows
     * The ff_er_add_slice calls don't work right for bottom
     * fields; they cause massive erroneous error concealing
     * Error marking covers both fields (top and bottom).
     * This causes a mismatched s->error_count
     * and a bad error table. Further, the error count goes to
     * INT_MAX when called for bottom field, because mb_y is
     * past end by one (callers fault) and resync_mb_y != 0
     * causes problems for the first MB line, too.
     */
    if (!FIELD_PICTURE)
        ff_er_frame_end(s);

2285
    ff_MPV_frame_end(s);
2286

2287
    h->current_slice = 0;
2288 2289

    return err;
2290 2291
}

2292
/**
2293
 * Replicate H264 "master" context to thread contexts.
2294 2295 2296
 */
static void clone_slice(H264Context *dst, H264Context *src)
{
2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308
    memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
    dst->s.current_picture_ptr = src->s.current_picture_ptr;
    dst->s.current_picture     = src->s.current_picture;
    dst->s.linesize            = src->s.linesize;
    dst->s.uvlinesize          = src->s.uvlinesize;
    dst->s.first_field         = src->s.first_field;

    dst->prev_poc_msb          = src->prev_poc_msb;
    dst->prev_poc_lsb          = src->prev_poc_lsb;
    dst->prev_frame_num_offset = src->prev_frame_num_offset;
    dst->prev_frame_num        = src->prev_frame_num;
    dst->short_ref_count       = src->short_ref_count;
2309 2310 2311 2312 2313

    memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
    memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
    memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
    memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
2314 2315 2316

    memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
    memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
2317 2318
}

2319
/**
2320
 * Compute profile from profile_idc and constraint_set?_flags.
2321 2322 2323 2324 2325 2326 2327 2328 2329
 *
 * @param sps SPS
 *
 * @return profile as defined by FF_PROFILE_H264_*
 */
int ff_h264_get_profile(SPS *sps)
{
    int profile = sps->profile_idc;

2330
    switch (sps->profile_idc) {
2331 2332
    case FF_PROFILE_H264_BASELINE:
        // constraint_set1_flag set to 1
2333
        profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
2334 2335 2336 2337 2338
        break;
    case FF_PROFILE_H264_HIGH_10:
    case FF_PROFILE_H264_HIGH_422:
    case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
        // constraint_set3_flag set to 1
2339
        profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
2340 2341 2342 2343 2344 2345
        break;
    }

    return profile;
}

2346
/**
2347
 * Decode a slice header.
2348
 * This will also call ff_MPV_common_init() and frame_start() as needed.
2349 2350
 *
 * @param h h264context
2351 2352
 * @param h0 h264 master context (differs from 'h' when doing sliced based
 *           parallel decoding)
2353
 *
2354
 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
2355
 */
2356 2357 2358 2359
static int decode_slice_header(H264Context *h, H264Context *h0)
{
    MpegEncContext *const s  = &h->s;
    MpegEncContext *const s0 = &h0->s;
2360
    unsigned int first_mb_in_slice;
2361
    unsigned int pps_id;
2362
    int num_ref_idx_active_override_flag;
2363
    unsigned int slice_type, tmp, i, j;
2364
    int default_ref_list_done = 0;
2365
    int last_pic_structure, last_pic_dropable;
2366
    int must_reinit;
2367

2368
    /* FIXME: 2tap qpel isn't implemented for high bit depth. */
2369 2370 2371 2372 2373 2374 2375
    if ((s->avctx->flags2 & CODEC_FLAG2_FAST) &&
        !h->nal_ref_idc && !h->pixel_shift) {
        s->me.qpel_put = s->dsp.put_2tap_qpel_pixels_tab;
        s->me.qpel_avg = s->dsp.avg_2tap_qpel_pixels_tab;
    } else {
        s->me.qpel_put = s->dsp.put_h264_qpel_pixels_tab;
        s->me.qpel_avg = s->dsp.avg_h264_qpel_pixels_tab;
2376 2377
    }

2378
    first_mb_in_slice = get_ue_golomb_long(&s->gb);
2379

2380 2381
    if (first_mb_in_slice == 0) { // FIXME better field boundary detection
        if (h0->current_slice && FIELD_PICTURE) {
2382
            field_end(h, 1);
2383 2384
        }

2385
        h0->current_slice = 0;
2386 2387 2388 2389 2390 2391
        if (!s0->first_field) {
            if (s->current_picture_ptr && !s->dropable &&
                s->current_picture_ptr->owner2 == s) {
                ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
                                          s->picture_structure == PICT_BOTTOM_FIELD);
            }
2392
            s->current_picture_ptr = NULL;
2393
        }
2394 2395
    }

2396 2397 2398 2399 2400
    slice_type = get_ue_golomb_31(&s->gb);
    if (slice_type > 9) {
        av_log(h->s.avctx, AV_LOG_ERROR,
               "slice type too large (%d) at %d %d\n",
               h->slice_type, s->mb_x, s->mb_y);
Loic Le Loarer's avatar
Loic Le Loarer committed
2401
        return -1;
2402
    }
2403
    if (slice_type > 4) {
2404
        slice_type -= 5;
2405 2406 2407
        h->slice_type_fixed = 1;
    } else
        h->slice_type_fixed = 0;
2408

2409 2410 2411
    slice_type = golomb_to_pict_type[slice_type];
    if (slice_type == AV_PICTURE_TYPE_I ||
        (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
2412 2413
        default_ref_list_done = 1;
    }
2414 2415
    h->slice_type     = slice_type;
    h->slice_type_nos = slice_type & 3;
2416

2417 2418
    // to make a few old functions happy, it's wrong though
    s->pict_type = h->slice_type;
2419

2420 2421
    pps_id = get_ue_golomb(&s->gb);
    if (pps_id >= MAX_PPS_COUNT) {
2422
        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id %d out of range\n", pps_id);
2423 2424
        return -1;
    }
2425 2426 2427 2428
    if (!h0->pps_buffers[pps_id]) {
        av_log(h->s.avctx, AV_LOG_ERROR,
               "non-existing PPS %u referenced\n",
               pps_id);
2429 2430
        return -1;
    }
2431
    h->pps = *h0->pps_buffers[pps_id];
2432

2433 2434 2435 2436
    if (!h0->sps_buffers[h->pps.sps_id]) {
        av_log(h->s.avctx, AV_LOG_ERROR,
               "non-existing SPS %u referenced\n",
               h->pps.sps_id);
2437 2438
        return -1;
    }
2439
    h->sps = *h0->sps_buffers[h->pps.sps_id];
2440

2441
    s->avctx->profile = ff_h264_get_profile(&h->sps);
2442
    s->avctx->level   = h->sps.level_idc;
2443
    s->avctx->refs    = h->sps.ref_frame_count;
2444

2445 2446 2447 2448 2449 2450 2451 2452 2453 2454
    must_reinit = (s->context_initialized &&
                    (   16*h->sps.mb_width != s->avctx->coded_width
                     || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != s->avctx->coded_height
                     || s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
                     || h->cur_chroma_format_idc != h->sps.chroma_format_idc
                     || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio)));

    if(must_reinit && (h != h0 || (s->avctx->active_thread_type & FF_THREAD_FRAME))) {
        av_log_missing_feature(s->avctx,
                                "Width/height/bit depth/chroma idc changing with threads is", 0);
2455
        return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
2456 2457
    }

2458 2459
    s->mb_width  = h->sps.mb_width;
    s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
2460

2461
    h->b_stride = s->mb_width * 4;
2462

2463 2464
    s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p

2465 2466 2467
    s->width  = 16 * s->mb_width;
    s->height = 16 * s->mb_height;

2468
    if(must_reinit) {
2469
        free_tables(h, 0);
2470
        flush_dpb(s->avctx);
2471
        ff_MPV_common_end(s);
2472
        h->list_count = 0;
2473
        h->current_slice = 0;
2474 2475
    }
    if (!s->context_initialized) {
2476
        if (h != h0) {
2477 2478
            av_log(h->s.avctx, AV_LOG_ERROR,
                   "Cannot (re-)initialize context during parallel decoding.\n");
2479 2480
            return -1;
        }
2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493
        if(   FFALIGN(s->avctx->width , 16                                 ) == s->width
           && FFALIGN(s->avctx->height, 16*(2 - h->sps.frame_mbs_only_flag)) == s->height
           && !h->sps.crop_right && !h->sps.crop_bottom
           && (s->avctx->width != s->width || s->avctx->height && s->height)
        ) {
            av_log(h->s.avctx, AV_LOG_DEBUG, "Using externally provided dimensions\n");
            s->avctx->coded_width  = s->width;
            s->avctx->coded_height = s->height;
        } else{
            avcodec_set_dimensions(s->avctx, s->width, s->height);
            s->avctx->width  -= (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
            s->avctx->height -= (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1) * (2 - h->sps.frame_mbs_only_flag);
        }
2494
        s->avctx->sample_aspect_ratio = h->sps.sar;
2495
        av_assert0(s->avctx->sample_aspect_ratio.den);
2496

2497 2498 2499 2500 2501 2502 2503 2504 2505
        if (s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU
            && (h->sps.bit_depth_luma != 8 ||
                h->sps.chroma_format_idc > 1)) {
            av_log(s->avctx, AV_LOG_ERROR,
                   "VDPAU decoding does not support video "
                   "colorspace\n");
            return -1;
        }

2506 2507
        if (s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
            h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
2508
            if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 14 && h->sps.bit_depth_luma != 11 && h->sps.bit_depth_luma != 13 &&
2509
                (h->sps.bit_depth_luma != 9 || !CHROMA422)) {
2510 2511 2512 2513 2514 2515 2516
                s->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
                h->cur_chroma_format_idc = h->sps.chroma_format_idc;
                h->pixel_shift = h->sps.bit_depth_luma > 8;

                ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
                ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
                s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
2517
                ff_dsputil_init(&s->dsp, s->avctx);
2518
            } else {
2519
                av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d chroma_idc: %d\n",
2520
                       h->sps.bit_depth_luma, h->sps.chroma_format_idc);
2521 2522 2523 2524
                return -1;
            }
        }

2525
        if (h->sps.video_signal_type_present_flag) {
2526
            s->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG
2527 2528
                                                      : AVCOL_RANGE_MPEG;
            if (h->sps.colour_description_present_flag) {
2529 2530 2531 2532 2533 2534
                s->avctx->color_primaries = h->sps.color_primaries;
                s->avctx->color_trc       = h->sps.color_trc;
                s->avctx->colorspace      = h->sps.colorspace;
            }
        }

2535 2536 2537
        if (h->sps.timing_info_present_flag) {
            int64_t den = h->sps.time_scale;
            if (h->x264_build < 44U)
2538
                den *= 2;
2539
            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
2540
                      h->sps.num_units_in_tick, den, 1 << 30);
2541
        }
2542 2543

        switch (h->sps.bit_depth_luma) {
2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565
        case 9:
            if (CHROMA444) {
                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
                    s->avctx->pix_fmt = PIX_FMT_GBRP9;
                } else
                    s->avctx->pix_fmt = PIX_FMT_YUV444P9;
            } else if (CHROMA422)
                s->avctx->pix_fmt = PIX_FMT_YUV422P9;
            else
                s->avctx->pix_fmt = PIX_FMT_YUV420P9;
            break;
        case 10:
            if (CHROMA444) {
                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
                    s->avctx->pix_fmt = PIX_FMT_GBRP10;
                } else
                    s->avctx->pix_fmt = PIX_FMT_YUV444P10;
            } else if (CHROMA422)
                s->avctx->pix_fmt = PIX_FMT_YUV422P10;
            else
                s->avctx->pix_fmt = PIX_FMT_YUV420P10;
            break;
2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
        case 12:
            if (CHROMA444) {
                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
                    s->avctx->pix_fmt = PIX_FMT_GBRP12;
                } else
                    s->avctx->pix_fmt = PIX_FMT_YUV444P12;
            } else if (CHROMA422)
                s->avctx->pix_fmt = PIX_FMT_YUV422P12;
            else
                s->avctx->pix_fmt = PIX_FMT_YUV420P12;
            break;
        case 14:
            if (CHROMA444) {
                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
                    s->avctx->pix_fmt = PIX_FMT_GBRP14;
                } else
                    s->avctx->pix_fmt = PIX_FMT_YUV444P14;
            } else if (CHROMA422)
                s->avctx->pix_fmt = PIX_FMT_YUV422P14;
            else
                s->avctx->pix_fmt = PIX_FMT_YUV420P14;
            break;
2588 2589 2590 2591
        case 8:
            if (CHROMA444) {
                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P
                                                                                  : PIX_FMT_YUV444P;
2592
                    if (s->avctx->colorspace == AVCOL_SPC_RGB) {
2593 2594
                        s->avctx->pix_fmt = PIX_FMT_GBR24P;
                        av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
2595 2596
                    } else if (s->avctx->colorspace == AVCOL_SPC_YCGCO) {
                        av_log(h->s.avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n");
2597
                    }
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
            } else if (CHROMA422) {
                s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P
                                                                              : PIX_FMT_YUV422P;
            } else {
                s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
                                                         s->avctx->codec->pix_fmts ?
                                                         s->avctx->codec->pix_fmts :
                                                         s->avctx->color_range == AVCOL_RANGE_JPEG ?
                                                         hwaccel_pixfmt_list_h264_jpeg_420 :
                                                         ff_hwaccel_pixfmt_list_420);
            }
            break;
        default:
            av_log(s->avctx, AV_LOG_ERROR,
                   "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
            return AVERROR_INVALIDDATA;
2614 2615
        }

2616 2617
        s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id,
                                            s->avctx->pix_fmt);
2618

2619 2620
        if (ff_MPV_common_init(s) < 0) {
            av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
2621
            return -1;
2622
        }
2623
        s->first_field = 0;
2624
        h->prev_interlaced_frame = 1;
2625

2626
        init_scan_tables(h);
2627
        if (ff_h264_alloc_tables(h) < 0) {
2628 2629
            av_log(h->s.avctx, AV_LOG_ERROR,
                   "Could not allocate memory for h264\n");
2630 2631
            return AVERROR(ENOMEM);
        }
2632

2633
        if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) {
2634 2635
            if (context_init(h) < 0) {
                av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
2636
                return -1;
2637
            }
2638
        } else {
2639
            for (i = 1; i < s->slice_context_count; i++) {
2640 2641 2642 2643
                H264Context *c;
                c = h->thread_context[i] = av_malloc(sizeof(H264Context));
                memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
                memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
2644 2645 2646
                c->h264dsp     = h->h264dsp;
                c->sps         = h->sps;
                c->pps         = h->pps;
2647
                c->pixel_shift = h->pixel_shift;
2648
                c->cur_chroma_format_idc = h->cur_chroma_format_idc;
2649 2650 2651 2652
                init_scan_tables(c);
                clone_tables(c, h, i);
            }

2653
            for (i = 0; i < s->slice_context_count; i++)
2654
                if (context_init(h->thread_context[i]) < 0) {
2655 2656
                    av_log(h->s.avctx, AV_LOG_ERROR,
                           "context_init() failed.\n");
2657
                    return -1;
2658
                }
2659
        }
2660 2661
    }

2662
    if (h == h0 && h->dequant_coeff_pps != pps_id) {
2663 2664 2665 2666
        h->dequant_coeff_pps = pps_id;
        init_dequant_tables(h);
    }

2667
    h->frame_num = get_bits(&s->gb, h->sps.log2_max_frame_num);
2668

2669 2670
    h->mb_mbaff        = 0;
    h->mb_aff_frame    = 0;
2671
    last_pic_structure = s0->picture_structure;
2672 2673
    last_pic_dropable  = s->dropable;
    s->dropable        = h->nal_ref_idc == 0;
2674 2675 2676
    if (h->sps.frame_mbs_only_flag) {
        s->picture_structure = PICT_FRAME;
    } else {
2677
        if (!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B) {
2678 2679 2680
            av_log(h->s.avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n");
            return -1;
        }
2681 2682
        if (get_bits1(&s->gb)) { // field_pic_flag
            s->picture_structure = PICT_TOP_FIELD + get_bits1(&s->gb); // bottom_field_flag
2683
        } else {
2684 2685
            s->picture_structure = PICT_FRAME;
            h->mb_aff_frame      = h->sps.mb_aff;
2686
        }
2687
    }
2688
    h->mb_field_decoding_flag = s->picture_structure != PICT_FRAME;
2689

2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700
    if (h0->current_slice != 0) {
        if (last_pic_structure != s->picture_structure ||
            last_pic_dropable  != s->dropable) {
            av_log(h->s.avctx, AV_LOG_ERROR,
                   "Changing field mode (%d -> %d) between slices is not allowed\n",
                   last_pic_structure, s->picture_structure);
            s->picture_structure = last_pic_structure;
            s->dropable          = last_pic_dropable;
            return AVERROR_INVALIDDATA;
        }
    } else {
2701 2702
        /* Shorten frame num gaps so we don't have to allocate reference
         * frames just to throw them away */
2703
        if (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0) {
2704 2705
            int unwrap_prev_frame_num = h->prev_frame_num;
            int max_frame_num         = 1 << h->sps.log2_max_frame_num;
2706

2707 2708
            if (unwrap_prev_frame_num > h->frame_num)
                unwrap_prev_frame_num -= max_frame_num;
2709 2710 2711 2712 2713 2714 2715 2716 2717

            if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
                unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
                if (unwrap_prev_frame_num < 0)
                    unwrap_prev_frame_num += max_frame_num;

                h->prev_frame_num = unwrap_prev_frame_num;
            }
        }
2718

2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784
        /* See if we have a decoded first field looking for a pair...
         * Here, we're using that to see if we should mark previously
         * decode frames as "finished".
         * We have to do that before the "dummy" in-between frame allocation,
         * since that can modify s->current_picture_ptr. */
        if (s0->first_field) {
            assert(s0->current_picture_ptr);
            assert(s0->current_picture_ptr->f.data[0]);
            assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);

            /* Mark old field/frame as completed */
            if (!last_pic_dropable && s0->current_picture_ptr->owner2 == s0) {
                ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
                                          last_pic_structure == PICT_BOTTOM_FIELD);
            }

            /* figure out if we have a complementary field pair */
            if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
                /* Previous field is unmatched. Don't display it, but let it
                 * remain for reference if marked as such. */
                if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
                    ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
                                              last_pic_structure == PICT_TOP_FIELD);
                }
            } else {
                if (s0->current_picture_ptr->frame_num != h->frame_num) {
                    /* This and previous field were reference, but had
                     * different frame_nums. Consider this field first in
                     * pair. Throw away previous field except for reference
                     * purposes. */
                    if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
                        ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
                                                  last_pic_structure == PICT_TOP_FIELD);
                    }
                } else {
                    /* Second field in complementary pair */
                    if (!((last_pic_structure   == PICT_TOP_FIELD &&
                           s->picture_structure == PICT_BOTTOM_FIELD) ||
                          (last_pic_structure   == PICT_BOTTOM_FIELD &&
                           s->picture_structure == PICT_TOP_FIELD))) {
                        av_log(s->avctx, AV_LOG_ERROR,
                               "Invalid field mode combination %d/%d\n",
                               last_pic_structure, s->picture_structure);
                        s->picture_structure = last_pic_structure;
                        s->dropable          = last_pic_dropable;
                        return AVERROR_INVALIDDATA;
                    } else if (last_pic_dropable != s->dropable) {
                        av_log(s->avctx, AV_LOG_ERROR,
                               "Cannot combine reference and non-reference fields in the same frame\n");
                        av_log_ask_for_sample(s->avctx, NULL);
                        s->picture_structure = last_pic_structure;
                        s->dropable          = last_pic_dropable;
                        return AVERROR_INVALIDDATA;
                    }

                    /* Take ownership of this buffer. Note that if another thread owned
                     * the first field of this buffer, we're not operating on that pointer,
                     * so the original thread is still responsible for reporting progress
                     * on that first field (or if that was us, we just did that above).
                     * By taking ownership, we assign responsibility to ourselves to
                     * report progress on the second field. */
                    s0->current_picture_ptr->owner2 = s0;
                }
            }
        }

2785
        while (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0 &&
2786
               h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
2787
            Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
2788 2789
            av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
                   h->frame_num, h->prev_frame_num);
2790
            if (ff_h264_frame_start(h) < 0)
2791
                return -1;
2792
            h->prev_frame_num++;
2793 2794
            h->prev_frame_num %= 1 << h->sps.log2_max_frame_num;
            s->current_picture_ptr->frame_num = h->prev_frame_num;
2795 2796
            ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
            ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1);
2797
            ff_generate_sliding_window_mmcos(h);
2798
            if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
2799
                (s->avctx->err_recognition & AV_EF_EXPLODE))
2800
                return AVERROR_INVALIDDATA;
2801 2802 2803 2804
            /* Error concealment: if a ref is missing, copy the previous ref in its place.
             * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
             * about there being no actual duplicates.
             * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
2805
             * concealing a lost frame, this probably isn't noticeable by comparison, but it should
2806
             * be fixed. */
2807 2808
            if (h->short_ref_count) {
                if (prev) {
2809
                    av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
2810 2811 2812
                                  (const uint8_t **)prev->f.data, prev->f.linesize,
                                  s->avctx->pix_fmt, s->mb_width * 16, s->mb_height * 16);
                    h->short_ref[0]->poc = prev->poc + 2;
2813 2814 2815
                }
                h->short_ref[0]->frame_num = h->prev_frame_num;
            }
2816 2817
        }

2818 2819 2820
        /* See if we have a decoded first field looking for a pair...
         * We're using that to see whether to continue decoding in that
         * frame, or to allocate a new one. */
2821 2822
        if (s0->first_field) {
            assert(s0->current_picture_ptr);
2823
            assert(s0->current_picture_ptr->f.data[0]);
2824
            assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
2825 2826 2827

            /* figure out if we have a complementary field pair */
            if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
2828 2829
                /* Previous field is unmatched. Don't display it, but let it
                 * remain for reference if marked as such. */
2830
                s0->current_picture_ptr = NULL;
2831
                s0->first_field         = FIELD_PICTURE;
2832
            } else {
2833
                if (s0->current_picture_ptr->frame_num != h->frame_num) {
2834 2835
                    ff_thread_report_progress((AVFrame*)s0->current_picture_ptr, INT_MAX,
                                              s0->picture_structure==PICT_BOTTOM_FIELD);
2836 2837 2838
                    /* This and the previous field had different frame_nums.
                     * Consider this field first in pair. Throw away previous
                     * one except for reference purposes. */
2839
                    s0->first_field         = 1;
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850
                    s0->current_picture_ptr = NULL;
                } else {
                    /* Second field in complementary pair */
                    s0->first_field = 0;
                }
            }
        } else {
            /* Frame or first field in a potentially complementary pair */
            s0->first_field = FIELD_PICTURE;
        }

2851
        if (!FIELD_PICTURE || s0->first_field) {
2852 2853 2854 2855 2856 2857
            if (ff_h264_frame_start(h) < 0) {
                s0->first_field = 0;
                return -1;
            }
        } else {
            ff_release_unused_pictures(s, 0);
2858
        }
2859
    }
2860
    if (h != h0)
2861 2862
        clone_slice(h, h0);

2863
    s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
2864

2865
    assert(s->mb_num == s->mb_width * s->mb_height);
2866 2867
    if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
        first_mb_in_slice >= s->mb_num) {
2868
        av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
Michael Niedermayer's avatar
Michael Niedermayer committed
2869 2870
        return -1;
    }
2871
    s->resync_mb_x = s->mb_x =  first_mb_in_slice % s->mb_width;
2872 2873 2874
    s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
    if (s->picture_structure == PICT_BOTTOM_FIELD)
        s->resync_mb_y = s->mb_y = s->mb_y + 1;
2875
    assert(s->mb_y < s->mb_height);
2876

2877 2878 2879 2880 2881 2882
    if (s->picture_structure == PICT_FRAME) {
        h->curr_pic_num = h->frame_num;
        h->max_pic_num  = 1 << h->sps.log2_max_frame_num;
    } else {
        h->curr_pic_num = 2 * h->frame_num + 1;
        h->max_pic_num  = 1 << (h->sps.log2_max_frame_num + 1);
2883
    }
2884

2885
    if (h->nal_unit_type == NAL_IDR_SLICE)
Fabrice Bellard's avatar
Fabrice Bellard committed
2886
        get_ue_golomb(&s->gb); /* idr_pic_id */
2887

2888 2889
    if (h->sps.poc_type == 0) {
        h->poc_lsb = get_bits(&s->gb, h->sps.log2_max_poc_lsb);
2890

2891 2892
        if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
            h->delta_poc_bottom = get_se_golomb(&s->gb);
2893
    }
2894

2895 2896
    if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
        h->delta_poc[0] = get_se_golomb(&s->gb);
2897

2898 2899
        if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
            h->delta_poc[1] = get_se_golomb(&s->gb);
2900
    }
2901

2902
    init_poc(h);
2903

2904 2905
    if (h->pps.redundant_pic_cnt_present)
        h->redundant_pic_count = get_ue_golomb(&s->gb);
2906

2907 2908 2909
    // set defaults, might be overridden a few lines later
    h->ref_count[0] = h->pps.ref_count[0];
    h->ref_count[1] = h->pps.ref_count[1];
2910

2911
    if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
2912 2913
        unsigned max[2];
        max[0] = max[1] = s->picture_structure == PICT_FRAME ? 15 : 31;
2914

2915 2916 2917
        if (h->slice_type_nos == AV_PICTURE_TYPE_B)
            h->direct_spatial_mv_pred = get_bits1(&s->gb);
        num_ref_idx_active_override_flag = get_bits1(&s->gb);
2918

2919 2920 2921 2922
        if (num_ref_idx_active_override_flag) {
            h->ref_count[0] = get_ue_golomb(&s->gb) + 1;
            if (h->slice_type_nos == AV_PICTURE_TYPE_B)
                h->ref_count[1] = get_ue_golomb(&s->gb) + 1;
2923 2924 2925
            else
                // full range is spec-ok in this case, even for frames
                max[1] = 31;
2926
        }
2927

2928 2929
        if (h->ref_count[0]-1 > max[0] || h->ref_count[1]-1 > max[1]){
            av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n", h->ref_count[0]-1, max[0], h->ref_count[1]-1, max[1]);
2930 2931
            h->ref_count[0] = h->ref_count[1] = 1;
            return AVERROR_INVALIDDATA;
2932
        }
2933

2934 2935
        if (h->slice_type_nos == AV_PICTURE_TYPE_B)
            h->list_count = 2;
2936
        else
2937 2938
            h->list_count = 1;
    } else
2939
        h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
2940

2941
    if (!default_ref_list_done)
2942
        ff_h264_fill_default_ref_list(h);
2943

2944 2945 2946
    if (h->slice_type_nos != AV_PICTURE_TYPE_I &&
        ff_h264_decode_ref_pic_list_reordering(h) < 0) {
        h->ref_count[1] = h->ref_count[0] = 0;
2947
        return -1;
2948
    }
2949

2950 2951
    if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
        s->last_picture_ptr = &h->ref_list[0][0];
2952
        ff_copy_picture(&s->last_picture, s->last_picture_ptr);
2953
    }
2954 2955
    if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
        s->next_picture_ptr = &h->ref_list[1][0];
2956
        ff_copy_picture(&s->next_picture, s->next_picture_ptr);
2957 2958
    }

2959 2960 2961
    if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
        (h->pps.weighted_bipred_idc == 1 &&
         h->slice_type_nos == AV_PICTURE_TYPE_B))
2962
        pred_weight_table(h);
2963 2964
    else if (h->pps.weighted_bipred_idc == 2 &&
             h->slice_type_nos == AV_PICTURE_TYPE_B) {
2965
        implicit_weight_table(h, -1);
2966
    } else {
2967
        h->use_weight = 0;
2968 2969 2970 2971 2972
        for (i = 0; i < 2; i++) {
            h->luma_weight_flag[i]   = 0;
            h->chroma_weight_flag[i] = 0;
        }
    }
2973

2974 2975
    if (h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
        (s->avctx->err_recognition & AV_EF_EXPLODE))
2976
        return AVERROR_INVALIDDATA;
2977

2978
    if (FRAME_MBAFF) {
2979
        ff_h264_fill_mbaff_ref_list(h);
2980

2981
        if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
2982 2983 2984 2985 2986
            implicit_weight_table(h, 0);
            implicit_weight_table(h, 1);
        }
    }

2987
    if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
2988 2989
        ff_h264_direct_dist_scale_factor(h);
    ff_h264_direct_ref_list_init(h);
2990

2991
    if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) {
2992
        tmp = get_ue_golomb_31(&s->gb);
2993
        if (tmp > 2) {
2994 2995 2996
            av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
            return -1;
        }
2997
        h->cabac_init_idc = tmp;
2998
    }
2999 3000

    h->last_qscale_diff = 0;
3001
    tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3002
    if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) {
3003
        av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3004 3005
        return -1;
    }
3006
    s->qscale       = tmp;
3007 3008
    h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
    h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3009 3010
    // FIXME qscale / qp ... stuff
    if (h->slice_type == AV_PICTURE_TYPE_SP)
Fabrice Bellard's avatar
Fabrice Bellard committed
3011
        get_bits1(&s->gb); /* sp_for_switch_flag */
3012 3013
    if (h->slice_type == AV_PICTURE_TYPE_SP ||
        h->slice_type == AV_PICTURE_TYPE_SI)
Fabrice Bellard's avatar
Fabrice Bellard committed
3014
        get_se_golomb(&s->gb); /* slice_qs_delta */
3015

3016
    h->deblocking_filter     = 1;
3017
    h->slice_alpha_c0_offset = 52;
3018 3019 3020 3021 3022 3023
    h->slice_beta_offset     = 52;
    if (h->pps.deblocking_filter_parameters_present) {
        tmp = get_ue_golomb_31(&s->gb);
        if (tmp > 2) {
            av_log(s->avctx, AV_LOG_ERROR,
                   "deblocking_filter_idc %u out of range\n", tmp);
3024 3025
            return -1;
        }
3026 3027 3028
        h->deblocking_filter = tmp;
        if (h->deblocking_filter < 2)
            h->deblocking_filter ^= 1;  // 1<->0
3029

3030
        if (h->deblocking_filter) {
3031 3032
            h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
            h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
3033 3034 3035 3036 3037
            if (h->slice_alpha_c0_offset > 104U ||
                h->slice_beta_offset     > 104U) {
                av_log(s->avctx, AV_LOG_ERROR,
                       "deblocking filter parameters %d %d out of range\n",
                       h->slice_alpha_c0_offset, h->slice_beta_offset);
3038 3039
                return -1;
            }
3040
        }
3041
    }
3042

3043 3044 3045 3046 3047 3048 3049 3050
    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
         h->slice_type_nos != AV_PICTURE_TYPE_I) ||
        (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  &&
         h->slice_type_nos == AV_PICTURE_TYPE_B) ||
        (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
         h->nal_ref_idc == 0))
        h->deblocking_filter = 0;
3051

3052 3053
    if (h->deblocking_filter == 1 && h0->max_contexts > 1) {
        if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
3054
            /* Cheat slightly for speed:
3055
             * Do not bother to deblock across slices. */
3056 3057
            h->deblocking_filter = 2;
        } else {
Andreas Öman's avatar
Andreas Öman committed
3058
            h0->max_contexts = 1;
3059 3060 3061
            if (!h0->single_decode_warning) {
                av_log(s->avctx, AV_LOG_INFO,
                       "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
Andreas Öman's avatar
Andreas Öman committed
3062 3063
                h0->single_decode_warning = 1;
            }
3064
            if (h != h0) {
3065 3066
                av_log(h->s.avctx, AV_LOG_ERROR,
                       "Deblocking switched inside frame.\n");
3067 3068
                return 1;
            }
3069
        }
3070
    }
3071 3072 3073 3074 3075 3076
    h->qp_thresh = 15 + 52 -
                   FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) -
                   FFMAX3(0,
                          h->pps.chroma_qp_index_offset[0],
                          h->pps.chroma_qp_index_offset[1]) +
                   6 * (h->sps.bit_depth_luma - 8);
3077

3078 3079
    h0->last_slice_type = slice_type;
    h->slice_num = ++h0->current_slice;
3080

3081
    if (h->slice_num)
3082 3083 3084 3085 3086
        h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= s->resync_mb_y;
    if (   h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= s->resync_mb_y
        && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= s->resync_mb_y
        && h->slice_num >= MAX_SLICES) {
        //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case
3087
        av_log(s->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES);
3088
    }
Loic Le Loarer's avatar
Loic Le Loarer committed
3089

3090
    for (j = 0; j < 2; j++) {
3091
        int id_list[16];
3092 3093 3094
        int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j];
        for (i = 0; i < 16; i++) {
            id_list[i] = 60;
3095
            if (h->ref_list[j][i].f.data[0]) {
3096
                int k;
3097
                uint8_t *base = h->ref_list[j][i].f.base[0];
3098
                for (k = 0; k < h->short_ref_count; k++)
3099
                    if (h->short_ref[k]->f.base[0] == base) {
3100
                        id_list[i] = k;
3101 3102
                        break;
                    }
3103
                for (k = 0; k < h->long_ref_count; k++)
3104
                    if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
3105
                        id_list[i] = h->short_ref_count + k;
3106 3107 3108 3109 3110
                        break;
                    }
            }
        }

3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132
        ref2frm[0]     =
            ref2frm[1] = -1;
        for (i = 0; i < 16; i++)
            ref2frm[i + 2] = 4 * id_list[i] +
                             (h->ref_list[j][i].f.reference & 3);
        ref2frm[18 + 0]     =
            ref2frm[18 + 1] = -1;
        for (i = 16; i < 48; i++)
            ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] +
                             (h->ref_list[j][i].f.reference & 3);
    }

    // FIXME: fix draw_edges + PAFF + frame threads
    h->emu_edge_width  = (s->flags & CODEC_FLAG_EMU_EDGE ||
                          (!h->sps.frame_mbs_only_flag &&
                           s->avctx->active_thread_type))
                         ? 0 : 16;
    h->emu_edge_height = (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;

    if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
        av_log(h->s.avctx, AV_LOG_DEBUG,
               "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3133
               h->slice_num,
3134
               (s->picture_structure == PICT_FRAME ? "F" : s->picture_structure == PICT_TOP_FIELD ? "T" : "B"),
3135
               first_mb_in_slice,
3136 3137 3138
               av_get_picture_type_char(h->slice_type),
               h->slice_type_fixed ? " fix" : "",
               h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3139
               pps_id, h->frame_num,
3140 3141
               s->current_picture_ptr->field_poc[0],
               s->current_picture_ptr->field_poc[1],
3142 3143
               h->ref_count[0], h->ref_count[1],
               s->qscale,
3144 3145
               h->deblocking_filter,
               h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26,
3146
               h->use_weight,
3147 3148
               h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
               h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
3149 3150 3151 3152 3153
    }

    return 0;
}

3154
int ff_h264_get_slice_type(const H264Context *h)
3155 3156
{
    switch (h->slice_type) {
3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168
    case AV_PICTURE_TYPE_P:
        return 0;
    case AV_PICTURE_TYPE_B:
        return 1;
    case AV_PICTURE_TYPE_I:
        return 2;
    case AV_PICTURE_TYPE_SP:
        return 3;
    case AV_PICTURE_TYPE_SI:
        return 4;
    default:
        return -1;
3169 3170 3171
    }
}

3172 3173 3174 3175 3176 3177 3178
static av_always_inline void fill_filter_caches_inter(H264Context *h,
                                                      MpegEncContext *const s,
                                                      int mb_type, int top_xy,
                                                      int left_xy[LEFT_MBS],
                                                      int top_type,
                                                      int left_type[LEFT_MBS],
                                                      int mb_xy, int list)
3179 3180
{
    int b_stride = h->b_stride;
3181
    int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
3182
    int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
3183 3184 3185 3186
    if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
        if (USES_LIST(top_type, list)) {
            const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
            const int b8_xy = 4 * top_xy + 2;
3187
            int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3188 3189 3190 3191 3192 3193 3194 3195
            AV_COPY128(mv_dst - 1 * 8, s->current_picture.f.motion_val[list][b_xy + 0]);
            ref_cache[0 - 1 * 8] =
            ref_cache[1 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
            ref_cache[2 - 1 * 8] =
            ref_cache[3 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
        } else {
            AV_ZERO128(mv_dst - 1 * 8);
            AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3196 3197
        }

3198 3199 3200 3201
        if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) {
            if (USES_LIST(left_type[LTOP], list)) {
                const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                const int b8_xy = 4 * left_xy[LTOP] + 1;
3202
                int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219
                AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride * 0]);
                AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride * 1]);
                AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride * 2]);
                AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride * 3]);
                ref_cache[-1 +  0] =
                ref_cache[-1 +  8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 0]];
                ref_cache[-1 + 16] =
                ref_cache[-1 + 24] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 1]];
            } else {
                AV_ZERO32(mv_dst - 1 +  0);
                AV_ZERO32(mv_dst - 1 +  8);
                AV_ZERO32(mv_dst - 1 + 16);
                AV_ZERO32(mv_dst - 1 + 24);
                ref_cache[-1 +  0] =
                ref_cache[-1 +  8] =
                ref_cache[-1 + 16] =
                ref_cache[-1 + 24] = LIST_NOT_USED;
3220 3221 3222 3223
            }
        }
    }

3224 3225 3226 3227 3228 3229
    if (!USES_LIST(mb_type, list)) {
        fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4);
        AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
        AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
        AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
        AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3230 3231 3232 3233
        return;
    }

    {
3234
        int8_t *ref = &s->current_picture.f.ref_index[list][4 * mb_xy];
3235
        int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3236 3237 3238 3239 3240 3241
        uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
        uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
        AV_WN32A(&ref_cache[0 * 8], ref01);
        AV_WN32A(&ref_cache[1 * 8], ref01);
        AV_WN32A(&ref_cache[2 * 8], ref23);
        AV_WN32A(&ref_cache[3 * 8], ref23);
3242 3243 3244
    }

    {
3245 3246 3247 3248 3249
        int16_t(*mv_src)[2] = &s->current_picture.f.motion_val[list][4 * s->mb_x + 4 * s->mb_y * b_stride];
        AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride);
        AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride);
        AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride);
        AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride);
3250 3251 3252
    }
}

3253 3254
/**
 *
3255
 * @return non zero if the loop filter can be skipped
3256
 */
3257 3258 3259 3260
static int fill_filter_caches(H264Context *h, int mb_type)
{
    MpegEncContext *const s = &h->s;
    const int mb_xy = h->mb_xy;
3261 3262
    int top_xy, left_xy[LEFT_MBS];
    int top_type, left_type[LEFT_MBS];
3263 3264
    uint8_t *nnz;
    uint8_t *nnz_cache;
3265

3266
    top_xy = mb_xy - (s->mb_stride << MB_FIELD);
3267 3268 3269 3270

    /* Wow, what a mess, why didn't they simplify the interlacing & intra
     * stuff, I can't imagine that these complex rules are worth it. */

3271 3272 3273 3274 3275 3276
    left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
    if (FRAME_MBAFF) {
        const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
        const int curr_mb_field_flag = IS_INTERLACED(mb_type);
        if (s->mb_y & 1) {
            if (left_mb_field_flag != curr_mb_field_flag)
3277
                left_xy[LTOP] -= s->mb_stride;
3278 3279 3280 3281 3282
        } else {
            if (curr_mb_field_flag)
                top_xy += s->mb_stride &
                    (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
            if (left_mb_field_flag != curr_mb_field_flag)
3283
                left_xy[LBOT] += s->mb_stride;
3284 3285 3286
        }
    }

3287
    h->top_mb_xy        = top_xy;
3288 3289
    h->left_mb_xy[LTOP] = left_xy[LTOP];
    h->left_mb_xy[LBOT] = left_xy[LBOT];
3290
    {
3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301
        /* For sufficiently low qp, filtering wouldn't do anything.
         * This is a conservative estimate: could also check beta_offset
         * and more accurate chroma_qp. */
        int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice
        int qp        = s->current_picture.f.qscale_table[mb_xy];
        if (qp <= qp_thresh &&
            (left_xy[LTOP] < 0 ||
             ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) &&
            (top_xy < 0 ||
             ((qp + s->current_picture.f.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) {
            if (!FRAME_MBAFF)
3302
                return 1;
3303 3304 3305 3306
            if ((left_xy[LTOP] < 0 ||
                 ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) &&
                (top_xy < s->mb_stride ||
                 ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
3307 3308 3309 3310
                return 1;
        }
    }

3311 3312 3313
    top_type        = s->current_picture.f.mb_type[top_xy];
    left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
    left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
3314 3315 3316 3317 3318 3319 3320 3321 3322 3323
    if (h->deblocking_filter == 2) {
        if (h->slice_table[top_xy] != h->slice_num)
            top_type = 0;
        if (h->slice_table[left_xy[LBOT]] != h->slice_num)
            left_type[LTOP] = left_type[LBOT] = 0;
    } else {
        if (h->slice_table[top_xy] == 0xFFFF)
            top_type = 0;
        if (h->slice_table[left_xy[LBOT]] == 0xFFFF)
            left_type[LTOP] = left_type[LBOT] = 0;
3324
    }
3325 3326 3327
    h->top_type        = top_type;
    h->left_type[LTOP] = left_type[LTOP];
    h->left_type[LBOT] = left_type[LBOT];
3328

3329
    if (IS_INTRA(mb_type))
3330 3331
        return 0;

3332 3333 3334 3335 3336
    fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
                             top_type, left_type, mb_xy, 0);
    if (h->list_count == 2)
        fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
                                 top_type, left_type, mb_xy, 1);
3337

3338
    nnz       = h->non_zero_count[mb_xy];
3339
    nnz_cache = h->non_zero_count_cache;
3340 3341 3342 3343 3344
    AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]);
    AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]);
    AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]);
    AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]);
    h->cbp = h->cbp_table[mb_xy];
3345

3346
    if (top_type) {
3347
        nnz = h->non_zero_count[top_xy];
3348
        AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]);
3349 3350
    }

3351
    if (left_type[LTOP]) {
3352
        nnz = h->non_zero_count[left_xy[LTOP]];
3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366
        nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4];
        nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4];
        nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4];
        nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4];
    }

    /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
     * from what the loop filter needs */
    if (!CABAC && h->pps.transform_8x8_mode) {
        if (IS_8x8DCT(top_type)) {
            nnz_cache[4 + 8 * 0]     =
                nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12;
            nnz_cache[6 + 8 * 0]     =
                nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12;
3367
        }
3368 3369 3370
        if (IS_8x8DCT(left_type[LTOP])) {
            nnz_cache[3 + 8 * 1]     =
                nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF
3371
        }
3372 3373 3374
        if (IS_8x8DCT(left_type[LBOT])) {
            nnz_cache[3 + 8 * 3]     =
                nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF
3375 3376
        }

3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396
        if (IS_8x8DCT(mb_type)) {
            nnz_cache[scan8[0]] =
            nnz_cache[scan8[1]] =
            nnz_cache[scan8[2]] =
            nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12;

            nnz_cache[scan8[0 + 4]] =
            nnz_cache[scan8[1 + 4]] =
            nnz_cache[scan8[2 + 4]] =
            nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12;

            nnz_cache[scan8[0 + 8]] =
            nnz_cache[scan8[1 + 8]] =
            nnz_cache[scan8[2 + 8]] =
            nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12;

            nnz_cache[scan8[0 + 12]] =
            nnz_cache[scan8[1 + 12]] =
            nnz_cache[scan8[2 + 12]] =
            nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12;
3397 3398 3399 3400 3401 3402
        }
    }

    return 0;
}

3403 3404 3405 3406
static void loop_filter(H264Context *h, int start_x, int end_x)
{
    MpegEncContext *const s = &h->s;
    uint8_t *dest_y, *dest_cb, *dest_cr;
3407
    int linesize, uvlinesize, mb_x, mb_y;
3408 3409 3410 3411
    const int end_mb_y       = s->mb_y + FRAME_MBAFF;
    const int old_slice_type = h->slice_type;
    const int pixel_shift    = h->pixel_shift;
    const int block_h        = 16 >> s->chroma_y_shift;
3412

3413 3414 3415
    if (h->deblocking_filter) {
        for (mb_x = start_x; mb_x < end_x; mb_x++)
            for (mb_y = end_mb_y - FRAME_MBAFF; mb_y <= end_mb_y; mb_y++) {
3416
                int mb_xy, mb_type;
3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436
                mb_xy         = h->mb_xy = mb_x + mb_y * s->mb_stride;
                h->slice_num  = h->slice_table[mb_xy];
                mb_type       = s->current_picture.f.mb_type[mb_xy];
                h->list_count = h->list_counts[mb_xy];

                if (FRAME_MBAFF)
                    h->mb_mbaff               =
                    h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);

                s->mb_x = mb_x;
                s->mb_y = mb_y;
                dest_y  = s->current_picture.f.data[0] +
                          ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
                dest_cb = s->current_picture.f.data[1] +
                          (mb_x << pixel_shift) * (8 << CHROMA444) +
                          mb_y * s->uvlinesize * block_h;
                dest_cr = s->current_picture.f.data[2] +
                          (mb_x << pixel_shift) * (8 << CHROMA444) +
                          mb_y * s->uvlinesize * block_h;
                // FIXME simplify above
3437 3438

                if (MB_FIELD) {
3439
                    linesize   = h->mb_linesize   = s->linesize   * 2;
3440
                    uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3441 3442 3443 3444
                    if (mb_y & 1) { // FIXME move out of this function?
                        dest_y  -= s->linesize   * 15;
                        dest_cb -= s->uvlinesize * (block_h - 1);
                        dest_cr -= s->uvlinesize * (block_h - 1);
3445 3446 3447 3448 3449
                    }
                } else {
                    linesize   = h->mb_linesize   = s->linesize;
                    uvlinesize = h->mb_uvlinesize = s->uvlinesize;
                }
3450 3451 3452
                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
                                 uvlinesize, 0);
                if (fill_filter_caches(h, mb_type))
3453
                    continue;
3454 3455
                h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
                h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
3456

3457
                if (FRAME_MBAFF) {
3458 3459
                    ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr,
                                      linesize, uvlinesize);
3460
                } else {
3461 3462
                    ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb,
                                           dest_cr, linesize, uvlinesize);
3463 3464 3465
                }
            }
    }
3466 3467 3468
    h->slice_type   = old_slice_type;
    s->mb_x         = end_x;
    s->mb_y         = end_mb_y - FRAME_MBAFF;
3469 3470
    h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
    h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3471 3472
}

3473 3474 3475 3476 3477 3478 3479 3480 3481
static void predict_field_decoding_flag(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int mb_xy = s->mb_x + s->mb_y * s->mb_stride;
    int mb_type     = (h->slice_table[mb_xy - 1] == h->slice_num) ?
                      s->current_picture.f.mb_type[mb_xy - 1] :
                      (h->slice_table[mb_xy - s->mb_stride] == h->slice_num) ?
                      s->current_picture.f.mb_type[mb_xy - s->mb_stride] : 0;
    h->mb_mbaff     = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
3482 3483
}

3484 3485 3486
/**
 * Draw edges and report progress for the last MB row.
 */
3487 3488 3489 3490 3491 3492
static void decode_finish_row(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    int top            = 16 * (s->mb_y      >> FIELD_PICTURE);
    int pic_height     = 16 *  s->mb_height >> FIELD_PICTURE;
    int height         =  16      << FRAME_MBAFF;
3493 3494 3495
    int deblock_border = (16 + 4) << FRAME_MBAFF;

    if (h->deblocking_filter) {
3496
        if ((top + height) >= pic_height)
3497 3498 3499 3500 3501 3502 3503 3504 3505
            height += deblock_border;
        top -= deblock_border;
    }

    if (top >= pic_height || (top + height) < h->emu_edge_height)
        return;

    height = FFMIN(height, pic_height - top);
    if (top < h->emu_edge_height) {
3506 3507
        height = top + height;
        top    = 0;
3508 3509 3510 3511
    }

    ff_draw_horiz_band(s, top, height);

3512 3513
    if (s->dropable)
        return;
3514

3515 3516
    ff_thread_report_progress(&s->current_picture_ptr->f, top + height - 1,
                              s->picture_structure == PICT_BOTTOM_FIELD);
3517 3518
}

3519 3520 3521 3522 3523 3524
static int decode_slice(struct AVCodecContext *avctx, void *arg)
{
    H264Context *h = *(void **)arg;
    MpegEncContext *const s = &h->s;
    const int part_mask     = s->partitioned_frame ? (ER_AC_END | ER_AC_ERROR)
                                                   : 0x7F;
3525
    int lf_x_start = s->mb_x;
3526

3527
    s->mb_skip_run = -1;
3528

3529
    h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME ||
3530
                    s->codec_id != AV_CODEC_ID_H264 ||
3531
                    (CONFIG_GRAY && (s->flags & CODEC_FLAG_GRAY));
3532

3533
    if (h->pps.cabac) {
3534
        /* realign */
3535
        align_get_bits(&s->gb);
3536 3537

        /* init cabac */
3538
        ff_init_cabac_states();
3539 3540 3541
        ff_init_cabac_decoder(&h->cabac,
                              s->gb.buffer + get_bits_count(&s->gb) / 8,
                              (get_bits_left(&s->gb) + 7) / 8);
3542 3543

        ff_h264_init_cabac_states(h);
3544

3545 3546
        for (;;) {
            // START_TIMER
3547
            int ret = ff_h264_decode_mb_cabac(h);
3548
            int eos;
3549
            // STOP_TIMER("decode_mb_cabac")
3550

3551 3552
            if (ret >= 0)
                ff_h264_hl_decode_mb(h);
3553

3554 3555
            // FIXME optimal? or let mb_decode decode 16x32 ?
            if (ret >= 0 && FRAME_MBAFF) {
3556 3557
                s->mb_y++;

3558
                ret = ff_h264_decode_mb_cabac(h);
3559

3560 3561
                if (ret >= 0)
                    ff_h264_hl_decode_mb(h);
3562 3563
                s->mb_y--;
            }
3564 3565 3566 3567 3568 3569 3570 3571
            eos = get_cabac_terminate(&h->cabac);

            if ((s->workaround_bugs & FF_BUG_TRUNCATED) &&
                h->cabac.bytestream > h->cabac.bytestream_end + 2) {
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
                                s->mb_y, ER_MB_END & part_mask);
                if (s->mb_x >= lf_x_start)
                    loop_filter(h, lf_x_start, s->mb_x + 1);
3572 3573
                return 0;
            }
3574 3575
            if (h->cabac.bytestream > h->cabac.bytestream_end + 2 )
                av_log(h->s.avctx, AV_LOG_DEBUG, "bytestream overread %td\n", h->cabac.bytestream_end - h->cabac.bytestream);
3576
            if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 4) {
3577 3578 3579 3580 3581 3582
                av_log(h->s.avctx, AV_LOG_ERROR,
                       "error while decoding MB %d %d, bytestream (%td)\n",
                       s->mb_x, s->mb_y,
                       h->cabac.bytestream_end - h->cabac.bytestream);
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
                                s->mb_y, ER_MB_ERROR & part_mask);
3583 3584 3585
                return -1;
            }

3586
            if (++s->mb_x >= s->mb_width) {
3587 3588
                loop_filter(h, lf_x_start, s->mb_x);
                s->mb_x = lf_x_start = 0;
3589
                decode_finish_row(h);
Loic Le Loarer's avatar
Loic Le Loarer committed
3590
                ++s->mb_y;
3591
                if (FIELD_OR_MBAFF_PICTURE) {
3592
                    ++s->mb_y;
3593
                    if (FRAME_MBAFF && s->mb_y < s->mb_height)
3594
                        predict_field_decoding_flag(h);
3595
                }
3596 3597
            }

3598 3599 3600 3601 3602 3603 3604
            if (eos || s->mb_y >= s->mb_height) {
                tprintf(s->avctx, "slice end %d %d\n",
                        get_bits_count(&s->gb), s->gb.size_in_bits);
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
                                s->mb_y, ER_MB_END & part_mask);
                if (s->mb_x > lf_x_start)
                    loop_filter(h, lf_x_start, s->mb_x);
3605
                return 0;
3606 3607 3608
            }
        }
    } else {
3609
        for (;;) {
3610
            int ret = ff_h264_decode_mb_cavlc(h);
3611

3612 3613
            if (ret >= 0)
                ff_h264_hl_decode_mb(h);
3614

3615 3616
            // FIXME optimal? or let mb_decode decode 16x32 ?
            if (ret >= 0 && FRAME_MBAFF) {
3617
                s->mb_y++;
3618
                ret = ff_h264_decode_mb_cavlc(h);
3619

3620 3621
                if (ret >= 0)
                    ff_h264_hl_decode_mb(h);
3622 3623 3624
                s->mb_y--;
            }

3625 3626 3627 3628 3629
            if (ret < 0) {
                av_log(h->s.avctx, AV_LOG_ERROR,
                       "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
                                s->mb_y, ER_MB_ERROR & part_mask);
3630 3631
                return -1;
            }
3632

3633
            if (++s->mb_x >= s->mb_width) {
3634 3635
                loop_filter(h, lf_x_start, s->mb_x);
                s->mb_x = lf_x_start = 0;
3636
                decode_finish_row(h);
3637
                ++s->mb_y;
3638
                if (FIELD_OR_MBAFF_PICTURE) {
3639
                    ++s->mb_y;
3640
                    if (FRAME_MBAFF && s->mb_y < s->mb_height)
3641
                        predict_field_decoding_flag(h);
3642
                }
3643 3644 3645
                if (s->mb_y >= s->mb_height) {
                    tprintf(s->avctx, "slice end %d %d\n",
                            get_bits_count(&s->gb), s->gb.size_in_bits);
3646

3647 3648
                    if (   get_bits_left(&s->gb) == 0
                        || get_bits_left(&s->gb) > 0 && !(s->avctx->err_recognition & AV_EF_AGGRESSIVE)) {
3649 3650 3651
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
                                        s->mb_x - 1, s->mb_y,
                                        ER_MB_END & part_mask);
3652 3653

                        return 0;
3654 3655 3656 3657
                    } else {
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
                                        s->mb_x, s->mb_y,
                                        ER_MB_END & part_mask);
3658 3659 3660 3661 3662 3663

                        return -1;
                    }
                }
            }

3664 3665 3666
            if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0) {
                tprintf(s->avctx, "slice end %d %d\n",
                        get_bits_count(&s->gb), s->gb.size_in_bits);
3667
                if (get_bits_left(&s->gb) == 0) {
3668 3669 3670 3671 3672
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
                                    s->mb_x - 1, s->mb_y,
                                    ER_MB_END & part_mask);
                    if (s->mb_x > lf_x_start)
                        loop_filter(h, lf_x_start, s->mb_x);
3673 3674

                    return 0;
3675 3676 3677
                } else {
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
                                    s->mb_y, ER_MB_ERROR & part_mask);
3678 3679 3680 3681

                    return -1;
                }
            }
3682 3683 3684 3685
        }
    }
}

3686 3687 3688 3689 3690 3691
/**
 * Call decode_slice() for each context.
 *
 * @param h h264 master context
 * @param context_count number of contexts to execute
 */
3692 3693 3694 3695
static int execute_decode_slices(H264Context *h, int context_count)
{
    MpegEncContext *const s     = &h->s;
    AVCodecContext *const avctx = s->avctx;
3696 3697 3698
    H264Context *hx;
    int i;

3699 3700
    if (s->avctx->hwaccel ||
        s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
3701
        return 0;
3702
    if (context_count == 1) {
3703
        return decode_slice(avctx, &h);
3704
    } else {
3705 3706
        for (i = 1; i < context_count; i++) {
            hx                    = h->thread_context[i];
3707
            hx->s.err_recognition = avctx->err_recognition;
3708
            hx->s.error_count     = 0;
3709
            hx->x264_build        = h->x264_build;
3710 3711
        }

3712 3713
        avctx->execute(avctx, decode_slice, h->thread_context,
                       NULL, context_count, sizeof(void *));
3714 3715

        /* pull back stuff from slices to master context */
3716 3717 3718 3719
        hx                   = h->thread_context[context_count - 1];
        s->mb_x              = hx->s.mb_x;
        s->mb_y              = hx->s.mb_y;
        s->dropable          = hx->s.dropable;
3720
        s->picture_structure = hx->s.picture_structure;
3721
        for (i = 1; i < context_count; i++)
3722 3723
            h->s.error_count += h->thread_context[i]->s.error_count;
    }
3724 3725

    return 0;
3726 3727
}

3728 3729 3730 3731
static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
{
    MpegEncContext *const s     = &h->s;
    AVCodecContext *const avctx = s->avctx;
3732
    H264Context *hx; ///< thread context
3733 3734 3735 3736
    int buf_index;
    int context_count;
    int next_avc;
    int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
3737
    int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts
3738
    int nal_index;
3739

3740 3741
    h->nal_unit_type= 0;

3742 3743
    if(!s->slice_context_count)
         s->slice_context_count= 1;
3744
    h->max_contexts = s->slice_context_count;
3745
    if (!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
3746
        h->current_slice = 0;
3747
        if (!s->first_field)
3748
            s->current_picture_ptr = NULL;
3749
        ff_h264_reset_sei(h);
3750 3751
    }

3752 3753
    for (; pass <= 1; pass++) {
        buf_index     = 0;
3754
        context_count = 0;
3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766
        next_avc      = h->is_avc ? 0 : buf_size;
        nal_index     = 0;
        for (;;) {
            int consumed;
            int dst_length;
            int bit_length;
            const uint8_t *ptr;
            int i, nalsize = 0;
            int err;

            if (buf_index >= next_avc) {
                if (buf_index >= buf_size - h->nal_length_size)
Andreas Öman's avatar
Andreas Öman committed
3767
                    break;
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784
                nalsize = 0;
                for (i = 0; i < h->nal_length_size; i++)
                    nalsize = (nalsize << 8) | buf[buf_index++];
                if (nalsize <= 0 || nalsize > buf_size - buf_index) {
                    av_log(h->s.avctx, AV_LOG_ERROR,
                           "AVC: nal size %d\n", nalsize);
                    break;
                }
                next_avc = buf_index + nalsize;
            } else {
                // start code prefix search
                for (; buf_index + 3 < next_avc; buf_index++)
                    // This should always succeed in the first iteration.
                    if (buf[buf_index]     == 0 &&
                        buf[buf_index + 1] == 0 &&
                        buf[buf_index + 2] == 1)
                        break;
3785

3786 3787
                if (buf_index + 3 >= buf_size)
                    break;
3788

3789 3790 3791 3792
                buf_index += 3;
                if (buf_index >= next_avc)
                    continue;
            }
3793

3794
            hx = h->thread_context[context_count];
3795

3796 3797
            ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
                                     &consumed, next_avc - buf_index);
3798 3799 3800 3801
            if (ptr == NULL || dst_length < 0) {
                buf_index = -1;
                goto end;
            }
3802 3803 3804 3805 3806 3807 3808
            i = buf_index + consumed;
            if ((s->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc &&
                buf[i]     == 0x00 && buf[i + 1] == 0x00 &&
                buf[i + 2] == 0x01 && buf[i + 3] == 0xE0)
                s->workaround_bugs |= FF_BUG_TRUNCATED;

            if (!(s->workaround_bugs & FF_BUG_TRUNCATED))
3809
                while(dst_length > 0 && ptr[dst_length - 1] == 0)
3810 3811 3812
                    dst_length--;
            bit_length = !dst_length ? 0
                                     : (8 * dst_length -
3813
                                        decode_rbsp_trailing(h, ptr + dst_length - 1));
3814 3815

            if (s->avctx->debug & FF_DEBUG_STARTCODE)
3816
                av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass);
3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831

            if (h->is_avc && (nalsize != consumed) && nalsize)
                av_log(h->s.avctx, AV_LOG_DEBUG,
                       "AVC: Consumed only %d bytes instead of %d\n",
                       consumed, nalsize);

            buf_index += consumed;
            nal_index++;

            if (pass == 0) {
                /* packets can sometimes contain multiple PPS/SPS,
                 * e.g. two PAFF field pictures in one packet, or a demuxer
                 * which splits NALs strangely if so, when frame threading we
                 * can't start the next thread until we've read all of them */
                switch (hx->nal_unit_type) {
3832 3833
                case NAL_SPS:
                case NAL_PPS:
3834 3835
                    nals_needed = nal_index;
                    break;
3836 3837
                case NAL_IDR_SLICE:
                case NAL_SLICE:
3838
                    init_get_bits(&hx->s.gb, ptr, bit_length);
3839
                    if (!get_ue_golomb(&hx->s.gb))
3840
                        nals_needed = nal_index;
3841 3842
                }
                continue;
3843
            }
3844

3845 3846 3847
            // FIXME do not discard SEI id
            if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
                continue;
3848

3849 3850 3851 3852 3853 3854
again:
            err = 0;
            switch (hx->nal_unit_type) {
            case NAL_IDR_SLICE:
                if (h->nal_unit_type != NAL_IDR_SLICE) {
                    av_log(h->s.avctx, AV_LOG_ERROR,
3855
                           "Invalid mix of idr and non-idr slices\n");
3856 3857
                    buf_index = -1;
                    goto end;
3858 3859 3860 3861 3862 3863 3864 3865 3866 3867
                }
                idr(h); // FIXME ensure we don't lose some frames if there is reordering
            case NAL_SLICE:
                init_get_bits(&hx->s.gb, ptr, bit_length);
                hx->intra_gb_ptr        =
                    hx->inter_gb_ptr    = &hx->s.gb;
                hx->s.data_partitioning = 0;

                if ((err = decode_slice_header(hx, h)))
                    break;
3868

3869
                if (h->sei_recovery_frame_cnt >= 0 && (h->frame_num != h->sei_recovery_frame_cnt || hx->slice_type_nos != AV_PICTURE_TYPE_I))
3870
                    h->valid_recovery_point = 1;
3871

3872 3873 3874 3875 3876
                if (   h->sei_recovery_frame_cnt >= 0
                    && (   h->recovery_frame<0
                        || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt)) {
                    h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) %
                                        (1 << h->sps.log2_max_frame_num);
3877 3878 3879

                    if (!h->valid_recovery_point)
                        h->recovery_frame = h->frame_num;
3880
                }
3881

3882
                s->current_picture_ptr->f.key_frame |=
3883
                        (hx->nal_unit_type == NAL_IDR_SLICE);
3884

3885 3886 3887
                if (h->recovery_frame == h->frame_num) {
                    s->current_picture_ptr->sync |= 1;
                    h->recovery_frame = -1;
3888 3889
                }

3890 3891 3892
                h->sync |= !!s->current_picture_ptr->f.key_frame;
                h->sync |= 3*!!(s->flags2 & CODEC_FLAG2_SHOW_ALL);
                s->current_picture_ptr->sync |= h->sync;
3893

3894 3895 3896
                if (h->current_slice == 1) {
                    if (!(s->flags2 & CODEC_FLAG2_CHUNKS))
                        decode_postinit(h, nal_index >= nals_needed);
3897

3898 3899
                    if (s->avctx->hwaccel &&
                        s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
3900
                        return -1;
3901 3902 3903 3904
                    if (CONFIG_H264_VDPAU_DECODER &&
                        s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
                        ff_vdpau_h264_picture_start(s);
                }
3905

3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929
                if (hx->redundant_pic_count == 0 &&
                    (avctx->skip_frame < AVDISCARD_NONREF ||
                     hx->nal_ref_idc) &&
                    (avctx->skip_frame < AVDISCARD_BIDIR  ||
                     hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
                    (avctx->skip_frame < AVDISCARD_NONKEY ||
                     hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
                    avctx->skip_frame < AVDISCARD_ALL) {
                    if (avctx->hwaccel) {
                        if (avctx->hwaccel->decode_slice(avctx,
                                                         &buf[buf_index - consumed],
                                                         consumed) < 0)
                            return -1;
                    } else if (CONFIG_H264_VDPAU_DECODER &&
                               s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) {
                        static const uint8_t start_code[] = {
                            0x00, 0x00, 0x01 };
                        ff_vdpau_add_data_chunk(s, start_code,
                                                sizeof(start_code));
                        ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed],
                                                consumed);
                    } else
                        context_count++;
                }
3930
                break;
3931 3932 3933 3934
            case NAL_DPA:
                init_get_bits(&hx->s.gb, ptr, bit_length);
                hx->intra_gb_ptr =
                hx->inter_gb_ptr = NULL;
3935

3936 3937
                if ((err = decode_slice_header(hx, h)) < 0)
                    break;
3938

3939 3940 3941 3942 3943 3944 3945 3946 3947 3948
                hx->s.data_partitioning = 1;
                break;
            case NAL_DPB:
                init_get_bits(&hx->intra_gb, ptr, bit_length);
                hx->intra_gb_ptr = &hx->intra_gb;
                break;
            case NAL_DPC:
                init_get_bits(&hx->inter_gb, ptr, bit_length);
                hx->inter_gb_ptr = &hx->inter_gb;

3949 3950 3951
                av_log(h->s.avctx, AV_LOG_ERROR, "Partitioned H.264 support is incomplete\n");
                return AVERROR_PATCHWELCOME;

3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969
                if (hx->redundant_pic_count == 0 &&
                    hx->intra_gb_ptr &&
                    hx->s.data_partitioning &&
                    s->context_initialized &&
                    (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
                    (avctx->skip_frame < AVDISCARD_BIDIR  ||
                     hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
                    (avctx->skip_frame < AVDISCARD_NONKEY ||
                     hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
                    avctx->skip_frame < AVDISCARD_ALL)
                    context_count++;
                break;
            case NAL_SEI:
                init_get_bits(&s->gb, ptr, bit_length);
                ff_h264_decode_sei(h);
                break;
            case NAL_SPS:
                init_get_bits(&s->gb, ptr, bit_length);
3970
                if (ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? (nalsize != consumed) && nalsize : 1)) {
3971
                    av_log(h->s.avctx, AV_LOG_DEBUG,
3972
                           "SPS decoding failure, trying again with the complete NAL\n");
3973 3974 3975 3976
                    if (h->is_avc)
                        av_assert0(next_avc - buf_index + consumed == nalsize);
                    init_get_bits(&s->gb, &buf[buf_index + 1 - consumed],
                                  8*(next_avc - buf_index + consumed - 1));
3977 3978
                    ff_h264_decode_seq_parameter_set(h);
                }
3979

3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000
                if (s->flags & CODEC_FLAG_LOW_DELAY ||
                    (h->sps.bitstream_restriction_flag &&
                     !h->sps.num_reorder_frames))
                    s->low_delay = 1;
                if (avctx->has_b_frames < 2)
                    avctx->has_b_frames = !s->low_delay;
                break;
            case NAL_PPS:
                init_get_bits(&s->gb, ptr, bit_length);
                ff_h264_decode_picture_parameter_set(h, bit_length);
                break;
            case NAL_AUD:
            case NAL_END_SEQUENCE:
            case NAL_END_STREAM:
            case NAL_FILLER_DATA:
            case NAL_SPS_EXT:
            case NAL_AUXILIARY_SLICE:
                break;
            default:
                av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n",
                       hx->nal_unit_type, bit_length);
4001
            }
4002

4003 4004 4005 4006
            if (context_count == h->max_contexts) {
                execute_decode_slices(h, context_count);
                context_count = 0;
            }
4007

4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019
            if (err < 0)
                av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
            else if (err == 1) {
                /* Slice could not be decoded in parallel mode, copy down
                 * NAL unit stuff to context 0 and restart. Note that
                 * rbsp_buffer is not transferred, but since we no longer
                 * run in parallel mode this should not be an issue. */
                h->nal_unit_type = hx->nal_unit_type;
                h->nal_ref_idc   = hx->nal_ref_idc;
                hx               = h;
                goto again;
            }
4020 4021
        }
    }
4022
    if (context_count)
4023
        execute_decode_slices(h, context_count);
4024 4025 4026 4027 4028 4029 4030 4031 4032

end:
    /* clean up */
    if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
        !s->dropable) {
        ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
                                  s->picture_structure == PICT_BOTTOM_FIELD);
    }

4033 4034 4035 4036
    return buf_index;
}

/**
4037
 * Return the number of bytes consumed for building the current frame.
4038
 */
4039 4040 4041 4042 4043 4044
static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size)
{
    if (pos == 0)
        pos = 1;          // avoid infinite loops (i doubt that is needed but ...)
    if (pos + 10 > buf_size)
        pos = buf_size;                   // oops ;)
4045

4046
    return pos;
4047 4048
}

4049 4050
static int decode_frame(AVCodecContext *avctx, void *data,
                        int *data_size, AVPacket *avpkt)
4051
{
4052
    const uint8_t *buf = avpkt->data;
4053 4054 4055 4056 4057
    int buf_size       = avpkt->size;
    H264Context *h     = avctx->priv_data;
    MpegEncContext *s  = &h->s;
    AVFrame *pict      = data;
    int buf_index      = 0;
4058 4059
    Picture *out;
    int i, out_idx;
4060

4061 4062
    s->flags  = avctx->flags;
    s->flags2 = avctx->flags2;
4063

4064
    /* end of stream, output what is still in the buffers */
4065
    if (buf_size == 0) {
4066
 out:
4067

4068 4069
        s->current_picture_ptr = NULL;

4070 4071
        // FIXME factorize this with the output code below
        out     = h->delayed_pic[0];
4072
        out_idx = 0;
4073 4074 4075 4076 4077 4078 4079
        for (i = 1;
             h->delayed_pic[i] &&
             !h->delayed_pic[i]->f.key_frame &&
             !h->delayed_pic[i]->mmco_reset;
             i++)
            if (h->delayed_pic[i]->poc < out->poc) {
                out     = h->delayed_pic[i];
4080 4081 4082
                out_idx = i;
            }

4083 4084
        for (i = out_idx; h->delayed_pic[i]; i++)
            h->delayed_pic[i] = h->delayed_pic[i + 1];
4085

4086
        if (out) {
4087
            *data_size = sizeof(AVFrame);
4088
            *pict      = out->f;
4089 4090
        }

4091
        return buf_index;
4092
    }
4093 4094
    if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
        int cnt= buf[5]&0x1f;
4095
        const uint8_t *p= buf+6;
4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111
        while(cnt--){
            int nalsize= AV_RB16(p) + 2;
            if(nalsize > buf_size - (p-buf) || p[2]!=0x67)
                goto not_extra;
            p += nalsize;
        }
        cnt = *(p++);
        if(!cnt)
            goto not_extra;
        while(cnt--){
            int nalsize= AV_RB16(p) + 2;
            if(nalsize > buf_size - (p-buf) || p[2]!=0x68)
                goto not_extra;
            p += nalsize;
        }

4112
        return ff_h264_decode_extradata(h, buf, buf_size);
4113 4114
    }
not_extra:
4115

4116 4117
    buf_index = decode_nal_units(h, buf, buf_size);
    if (buf_index < 0)
4118 4119
        return -1;

4120
    if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
4121
        av_assert0(buf_index <= buf_size);
4122 4123 4124
        goto out;
    }

4125
    if (!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr) {
4126 4127
        if (avctx->skip_frame >= AVDISCARD_NONREF ||
            buf_size >= 4 && !memcmp("Q264", buf, 4))
4128
            return buf_size;
4129 4130 4131 4132
        av_log(avctx, AV_LOG_ERROR, "no frame!\n");
        return -1;
    }

4133 4134 4135 4136
    if (!(s->flags2 & CODEC_FLAG2_CHUNKS) ||
        (s->mb_y >= s->mb_height && s->mb_height)) {
        if (s->flags2 & CODEC_FLAG2_CHUNKS)
            decode_postinit(h, 1);
4137

4138 4139
        field_end(h, 0);

4140 4141
        /* Wait for second field. */
        *data_size = 0;
4142
        if (h->next_output_pic && (h->next_output_pic->sync || h->sync>1)) {
4143
            *data_size = sizeof(AVFrame);
4144
            *pict      = h->next_output_pic->f;
4145
        }
4146 4147
    }

4148
    assert(pict->data[0] || !*data_size);
4149
    ff_print_debug_info(s, pict);
4150
    // printf("out %d\n", (int)pict->data[0]);
4151 4152 4153 4154

    return get_consumed_bytes(s, buf_index, buf_size);
}

4155
av_cold void ff_h264_free_context(H264Context *h)
4156
{
4157
    int i;
4158

4159
    free_tables(h, 1); // FIXME cleanup init stuff perhaps
4160

4161
    for (i = 0; i < MAX_SPS_COUNT; i++)
4162 4163
        av_freep(h->sps_buffers + i);

4164
    for (i = 0; i < MAX_PPS_COUNT; i++)
4165
        av_freep(h->pps_buffers + i);
4166 4167
}

4168
static av_cold int h264_decode_end(AVCodecContext *avctx)
4169
{
4170
    H264Context *h    = avctx->priv_data;
4171 4172
    MpegEncContext *s = &h->s;

4173
    ff_h264_remove_all_refs(h);
4174
    ff_h264_free_context(h);
4175

4176
    ff_MPV_common_end(s);
4177

4178
    // memset(h, 0, sizeof(H264Context));
4179

4180 4181 4182
    return 0;
}

4183 4184 4185 4186 4187 4188 4189 4190 4191 4192
static const AVProfile profiles[] = {
    { FF_PROFILE_H264_BASELINE,             "Baseline"              },
    { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
    { FF_PROFILE_H264_MAIN,                 "Main"                  },
    { FF_PROFILE_H264_EXTENDED,             "Extended"              },
    { FF_PROFILE_H264_HIGH,                 "High"                  },
    { FF_PROFILE_H264_HIGH_10,              "High 10"               },
    { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
    { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
    { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
4193
    { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
4194 4195 4196 4197 4198
    { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
    { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
    { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
    { FF_PROFILE_UNKNOWN },
};
4199

4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219
static const AVOption h264_options[] = {
    {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, 0},
    {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 4, 0},
    {NULL}
};

static const AVClass h264_class = {
    "H264 Decoder",
    av_default_item_name,
    h264_options,
    LIBAVUTIL_VERSION_INT,
};

static const AVClass h264_vdpau_class = {
    "H264 VDPAU Decoder",
    av_default_item_name,
    h264_options,
    LIBAVUTIL_VERSION_INT,
};

4220
AVCodec ff_h264_decoder = {
4221 4222
    .name                  = "h264",
    .type                  = AVMEDIA_TYPE_VIDEO,
4223
    .id                    = AV_CODEC_ID_H264,
4224 4225
    .priv_data_size        = sizeof(H264Context),
    .init                  = ff_h264_decode_init,
4226
    .close                 = h264_decode_end,
4227 4228 4229 4230 4231 4232
    .decode                = decode_frame,
    .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
                             CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
                             CODEC_CAP_FRAME_THREADS,
    .flush                 = flush_dpb,
    .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
4233 4234
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
4235
    .profiles              = NULL_IF_CONFIG_SMALL(profiles),
4236
    .priv_class            = &h264_class,
4237 4238
};

4239
#if CONFIG_H264_VDPAU_DECODER
4240
AVCodec ff_h264_vdpau_decoder = {
4241 4242
    .name           = "h264_vdpau",
    .type           = AVMEDIA_TYPE_VIDEO,
4243
    .id             = AV_CODEC_ID_H264,
4244 4245
    .priv_data_size = sizeof(H264Context),
    .init           = ff_h264_decode_init,
4246
    .close          = h264_decode_end,
4247 4248
    .decode         = decode_frame,
    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
4249 4250 4251 4252 4253
    .flush          = flush_dpb,
    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
    .pix_fmts       = (const enum PixelFormat[]) { PIX_FMT_VDPAU_H264,
                                                   PIX_FMT_NONE},
    .profiles       = NULL_IF_CONFIG_SMALL(profiles),
4254
    .priv_class     = &h264_vdpau_class,
4255 4256
};
#endif