hevcdec.c 139 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*
 * HEVC video Decoder
 *
 * Copyright (C) 2012 - 2013 Guillaume Martres
 * Copyright (C) 2012 - 2013 Mickael Raulet
 * Copyright (C) 2012 - 2013 Gildas Cocherel
 * Copyright (C) 2012 - 2013 Wassim Hamidouche
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/attributes.h"
#include "libavutil/common.h"
28
#include "libavutil/display.h"
29
#include "libavutil/internal.h"
30
#include "libavutil/mastering_display_metadata.h"
31 32 33
#include "libavutil/md5.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
34
#include "libavutil/stereo3d.h"
35

36
#include "bswapdsp.h"
37 38 39
#include "bytestream.h"
#include "cabac_functions.h"
#include "golomb.h"
40
#include "hevc.h"
41
#include "hevc_data.h"
42
#include "hevc_parse.h"
43
#include "hevcdec.h"
44
#include "hwaccel.h"
45
#include "profiles.h"
46

47
const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77

/**
 * NOTE: Each function hls_foo correspond to the function foo in the
 * specification (HLS stands for High Level Syntax).
 */

/**
 * Section 5.7
 */

/* free everything allocated  by pic_arrays_init() */
static void pic_arrays_free(HEVCContext *s)
{
    av_freep(&s->sao);
    av_freep(&s->deblock);

    av_freep(&s->skip_flag);
    av_freep(&s->tab_ct_depth);

    av_freep(&s->tab_ipm);
    av_freep(&s->cbf_luma);
    av_freep(&s->is_pcm);

    av_freep(&s->qp_y_tab);
    av_freep(&s->tab_slice_address);
    av_freep(&s->filter_slice_edges);

    av_freep(&s->horizontal_bs);
    av_freep(&s->vertical_bs);

78 79 80 81
    av_freep(&s->sh.entry_point_offset);
    av_freep(&s->sh.size);
    av_freep(&s->sh.offset);

82 83 84 85 86
    av_buffer_pool_uninit(&s->tab_mvf_pool);
    av_buffer_pool_uninit(&s->rpl_tab_pool);
}

/* allocate arrays that depend on frame dimensions */
87
static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
88
{
89 90 91 92 93 94 95
    int log2_min_cb_size = sps->log2_min_cb_size;
    int width            = sps->width;
    int height           = sps->height;
    int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
                           ((height >> log2_min_cb_size) + 1);
    int ctb_count        = sps->ctb_width * sps->ctb_height;
    int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
96

97 98
    s->bs_width  = (width  >> 2) + 1;
    s->bs_height = (height >> 2) + 1;
99 100 101

    s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
    s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
102
    if (!s->sao || !s->deblock)
103 104
        goto fail;

105
    s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
106
    s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107 108 109
    if (!s->skip_flag || !s->tab_ct_depth)
        goto fail;

110
    s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
111
    s->tab_ipm  = av_mallocz(min_pu_size);
112
    s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
113 114 115
    if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
        goto fail;

116
    s->filter_slice_edges = av_mallocz(ctb_count);
117
    s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
118
                                      sizeof(*s->tab_slice_address));
119
    s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
120
                                      sizeof(*s->qp_y_tab));
121 122 123
    if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
        goto fail;

124 125
    s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
    s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
126 127 128
    if (!s->horizontal_bs || !s->vertical_bs)
        goto fail;

Anton Khirnov's avatar
Anton Khirnov committed
129
    s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
130
                                          av_buffer_allocz);
131 132
    s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
                                          av_buffer_allocz);
133
    if (!s->tab_mvf_pool || !s->rpl_tab_pool)
134 135 136
        goto fail;

    return 0;
137

138 139 140 141 142
fail:
    pic_arrays_free(s);
    return AVERROR(ENOMEM);
}

143
static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
144 145 146 147 148 149 150
{
    int i = 0;
    int j = 0;
    uint8_t luma_weight_l0_flag[16];
    uint8_t chroma_weight_l0_flag[16];
    uint8_t luma_weight_l1_flag[16];
    uint8_t chroma_weight_l1_flag[16];
151
    int luma_log2_weight_denom;
152

153
    luma_log2_weight_denom = get_ue_golomb_long(gb);
154
    if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
155
        av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
156 157
        return AVERROR_INVALIDDATA;
    }
158
    s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
159
    if (s->ps.sps->chroma_format_idc != 0) {
160 161 162 163 164 165
        int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
        if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
            av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
            return AVERROR_INVALIDDATA;
        }
        s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
166 167 168 169 170 171 172 173 174
    }

    for (i = 0; i < s->sh.nb_refs[L0]; i++) {
        luma_weight_l0_flag[i] = get_bits1(gb);
        if (!luma_weight_l0_flag[i]) {
            s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
            s->sh.luma_offset_l0[i] = 0;
        }
    }
175
    if (s->ps.sps->chroma_format_idc != 0) {
176
        for (i = 0; i < s->sh.nb_refs[L0]; i++)
177 178
            chroma_weight_l0_flag[i] = get_bits1(gb);
    } else {
179
        for (i = 0; i < s->sh.nb_refs[L0]; i++)
180 181 182 183 184 185 186 187 188 189 190 191
            chroma_weight_l0_flag[i] = 0;
    }
    for (i = 0; i < s->sh.nb_refs[L0]; i++) {
        if (luma_weight_l0_flag[i]) {
            int delta_luma_weight_l0 = get_se_golomb(gb);
            s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
            s->sh.luma_offset_l0[i] = get_se_golomb(gb);
        }
        if (chroma_weight_l0_flag[i]) {
            for (j = 0; j < 2; j++) {
                int delta_chroma_weight_l0 = get_se_golomb(gb);
                int delta_chroma_offset_l0 = get_se_golomb(gb);
192 193 194 195 196 197

                if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
                    || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
                    return AVERROR_INVALIDDATA;
                }

198
                s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
199
                s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
200
                                                                                    >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
201 202 203 204 205 206 207 208
            }
        } else {
            s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
            s->sh.chroma_offset_l0[i][0] = 0;
            s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
            s->sh.chroma_offset_l0[i][1] = 0;
        }
    }
209
    if (s->sh.slice_type == HEVC_SLICE_B) {
210 211 212 213 214 215 216
        for (i = 0; i < s->sh.nb_refs[L1]; i++) {
            luma_weight_l1_flag[i] = get_bits1(gb);
            if (!luma_weight_l1_flag[i]) {
                s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
                s->sh.luma_offset_l1[i] = 0;
            }
        }
217
        if (s->ps.sps->chroma_format_idc != 0) {
218
            for (i = 0; i < s->sh.nb_refs[L1]; i++)
219 220
                chroma_weight_l1_flag[i] = get_bits1(gb);
        } else {
221
            for (i = 0; i < s->sh.nb_refs[L1]; i++)
222 223 224 225 226 227 228 229 230 231 232 233
                chroma_weight_l1_flag[i] = 0;
        }
        for (i = 0; i < s->sh.nb_refs[L1]; i++) {
            if (luma_weight_l1_flag[i]) {
                int delta_luma_weight_l1 = get_se_golomb(gb);
                s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
                s->sh.luma_offset_l1[i] = get_se_golomb(gb);
            }
            if (chroma_weight_l1_flag[i]) {
                for (j = 0; j < 2; j++) {
                    int delta_chroma_weight_l1 = get_se_golomb(gb);
                    int delta_chroma_offset_l1 = get_se_golomb(gb);
234 235 236 237 238 239

                    if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
                        || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
                        return AVERROR_INVALIDDATA;
                    }

240
                    s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
241
                    s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
242
                                                                                        >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
243 244 245 246 247 248 249 250 251
                }
            } else {
                s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
                s->sh.chroma_offset_l1[i][0] = 0;
                s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
                s->sh.chroma_offset_l1[i][1] = 0;
            }
        }
    }
252
    return 0;
253 254 255 256
}

static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
{
257
    const HEVCSPS *sps = s->ps.sps;
258
    int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
259
    int prev_delta_msb = 0;
260
    unsigned int nb_sps = 0, nb_sh;
261 262 263 264 265 266 267
    int i;

    rps->nb_refs = 0;
    if (!sps->long_term_ref_pics_present_flag)
        return 0;

    if (sps->num_long_term_ref_pics_sps > 0)
268 269
        nb_sps = get_ue_golomb_long(gb);
    nb_sh = get_ue_golomb_long(gb);
270

271 272
    if (nb_sps > sps->num_long_term_ref_pics_sps)
        return AVERROR_INVALIDDATA;
273
    if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
        return AVERROR_INVALIDDATA;

    rps->nb_refs = nb_sh + nb_sps;

    for (i = 0; i < rps->nb_refs; i++) {
        uint8_t delta_poc_msb_present;

        if (i < nb_sps) {
            uint8_t lt_idx_sps = 0;

            if (sps->num_long_term_ref_pics_sps > 1)
                lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));

            rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
            rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
        } else {
            rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
            rps->used[i] = get_bits1(gb);
        }

        delta_poc_msb_present = get_bits1(gb);
        if (delta_poc_msb_present) {
296 297
            int64_t delta = get_ue_golomb_long(gb);
            int64_t poc;
298 299 300 301

            if (i && i != nb_sps)
                delta += prev_delta_msb;

302 303 304 305
            poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
            if (poc != (int32_t)poc)
                return AVERROR_INVALIDDATA;
            rps->poc[i] = poc;
306 307 308 309 310 311 312
            prev_delta_msb = delta;
        }
    }

    return 0;
}

313 314
static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
                                 const HEVCSPS *sps)
315
{
316
    const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
317
    const HEVCWindow *ow = &sps->output_window;
318 319 320 321 322
    unsigned int num = 0, den = 0;

    avctx->pix_fmt             = sps->pix_fmt;
    avctx->coded_width         = sps->width;
    avctx->coded_height        = sps->height;
323 324
    avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
    avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
    avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
    avctx->profile             = sps->ptl.general_ptl.profile_idc;
    avctx->level               = sps->ptl.general_ptl.level_idc;

    ff_set_sar(avctx, sps->vui.sar);

    if (sps->vui.video_signal_type_present_flag)
        avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
                                                            : AVCOL_RANGE_MPEG;
    else
        avctx->color_range = AVCOL_RANGE_MPEG;

    if (sps->vui.colour_description_present_flag) {
        avctx->color_primaries = sps->vui.colour_primaries;
        avctx->color_trc       = sps->vui.transfer_characteristic;
        avctx->colorspace      = sps->vui.matrix_coeffs;
    } else {
        avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
        avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
        avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
    }

    if (vps->vps_timing_info_present_flag) {
        num = vps->vps_num_units_in_tick;
        den = vps->vps_time_scale;
    } else if (sps->vui.vui_timing_info_present_flag) {
        num = sps->vui.vui_num_units_in_tick;
        den = sps->vui.vui_time_scale;
    }

    if (num != 0 && den != 0)
        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
                  num, den, 1 << 30);
}

360
static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
361
{
362 363
#define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
                     CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
James Almer's avatar
James Almer committed
364
                     CONFIG_HEVC_NVDEC_HWACCEL + \
365 366 367
                     CONFIG_HEVC_VAAPI_HWACCEL + \
                     CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
                     CONFIG_HEVC_VDPAU_HWACCEL)
368
    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
369

370 371 372
    switch (sps->pix_fmt) {
    case AV_PIX_FMT_YUV420P:
    case AV_PIX_FMT_YUVJ420P:
Hendrik Leppkes's avatar
Hendrik Leppkes committed
373 374
#if CONFIG_HEVC_DXVA2_HWACCEL
        *fmt++ = AV_PIX_FMT_DXVA2_VLD;
375 376 377
#endif
#if CONFIG_HEVC_D3D11VA_HWACCEL
        *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
378
        *fmt++ = AV_PIX_FMT_D3D11;
379
#endif
380 381 382
#if CONFIG_HEVC_VAAPI_HWACCEL
        *fmt++ = AV_PIX_FMT_VAAPI;
#endif
383 384
#if CONFIG_HEVC_VDPAU_HWACCEL
        *fmt++ = AV_PIX_FMT_VDPAU;
385
#endif
James Almer's avatar
James Almer committed
386 387 388
#if CONFIG_HEVC_NVDEC_HWACCEL
        *fmt++ = AV_PIX_FMT_CUDA;
#endif
389 390
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
Hendrik Leppkes's avatar
Hendrik Leppkes committed
391
#endif
392 393 394 395 396 397 398
        break;
    case AV_PIX_FMT_YUV420P10:
#if CONFIG_HEVC_DXVA2_HWACCEL
        *fmt++ = AV_PIX_FMT_DXVA2_VLD;
#endif
#if CONFIG_HEVC_D3D11VA_HWACCEL
        *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
399
        *fmt++ = AV_PIX_FMT_D3D11;
400 401 402
#endif
#if CONFIG_HEVC_VAAPI_HWACCEL
        *fmt++ = AV_PIX_FMT_VAAPI;
403 404 405
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
406
#endif
407 408 409 410 411
#if CONFIG_HEVC_NVDEC_HWACCEL
        *fmt++ = AV_PIX_FMT_CUDA;
#endif
        break;
    case AV_PIX_FMT_YUV420P12:
412 413 414
    case AV_PIX_FMT_YUV444P:
    case AV_PIX_FMT_YUV444P10:
    case AV_PIX_FMT_YUV444P12:
James Almer's avatar
James Almer committed
415
#if CONFIG_HEVC_NVDEC_HWACCEL
416
        *fmt++ = AV_PIX_FMT_CUDA;
417 418
#endif
        break;
Hendrik Leppkes's avatar
Hendrik Leppkes committed
419 420
    }

421 422
    *fmt++ = sps->pix_fmt;
    *fmt = AV_PIX_FMT_NONE;
423

424
    return ff_thread_get_format(s->avctx, pix_fmts);
425 426 427 428 429
}

static int set_sps(HEVCContext *s, const HEVCSPS *sps,
                   enum AVPixelFormat pix_fmt)
{
430
    int ret, i;
431 432 433 434 435 436 437 438 439

    pic_arrays_free(s);
    s->ps.sps = NULL;
    s->ps.vps = NULL;

    if (!sps)
        return 0;

    ret = pic_arrays_init(s, sps);
440 441
    if (ret < 0)
        goto fail;
442 443 444 445

    export_stream_params(s->avctx, &s->ps, sps);

    s->avctx->pix_fmt = pix_fmt;
446

447 448 449 450
    ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
    ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
    ff_videodsp_init (&s->vdsp,    sps->bit_depth);

451
    for (i = 0; i < 3; i++) {
452 453
        av_freep(&s->sao_pixel_buffer_h[i]);
        av_freep(&s->sao_pixel_buffer_v[i]);
454 455
    }

456
    if (sps->sao_enabled && !s->avctx->hwaccel) {
457 458 459 460 461 462 463
        int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
        int c_idx;

        for(c_idx = 0; c_idx < c_count; c_idx++) {
            int w = sps->width >> sps->hshift[c_idx];
            int h = sps->height >> sps->vshift[c_idx];
            s->sao_pixel_buffer_h[c_idx] =
464 465
                av_malloc((w * 2 * sps->ctb_height) <<
                          sps->pixel_shift);
466
            s->sao_pixel_buffer_v[c_idx] =
467 468 469
                av_malloc((h * 2 * sps->ctb_width) <<
                          sps->pixel_shift);
        }
470 471
    }

472 473
    s->ps.sps = sps;
    s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
474

475
    return 0;
476

477 478
fail:
    pic_arrays_free(s);
479
    s->ps.sps = NULL;
480 481 482
    return ret;
}

483 484
static int hls_slice_header(HEVCContext *s)
{
485
    GetBitContext *gb = &s->HEVClc->gb;
486
    SliceHeader *sh   = &s->sh;
487
    int i, ret;
488 489 490 491 492

    // Coded parameters
    sh->first_slice_in_pic_flag = get_bits1(gb);
    if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
        s->seq_decode = (s->seq_decode + 1) & 0xff;
493
        s->max_ra     = INT_MAX;
494 495 496
        if (IS_IDR(s))
            ff_hevc_clear_refs(s);
    }
497
    sh->no_output_of_prior_pics_flag = 0;
498
    if (IS_IRAP(s))
499 500
        sh->no_output_of_prior_pics_flag = get_bits1(gb);

501
    sh->pps_id = get_ue_golomb_long(gb);
502
    if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
503 504 505
        av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
        return AVERROR_INVALIDDATA;
    }
Anton Khirnov's avatar
Anton Khirnov committed
506
    if (!sh->first_slice_in_pic_flag &&
507
        s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
Anton Khirnov's avatar
Anton Khirnov committed
508 509 510
        av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
        return AVERROR_INVALIDDATA;
    }
511
    s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
James Almer's avatar
James Almer committed
512
    if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
Mickaël Raulet's avatar
Mickaël Raulet committed
513
        sh->no_output_of_prior_pics_flag = 1;
514

515
    if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
516
        const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
517
        const HEVCSPS *last_sps = s->ps.sps;
518 519
        enum AVPixelFormat pix_fmt;

James Almer's avatar
James Almer committed
520
        if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
521 522
            if (sps->width != last_sps->width || sps->height != last_sps->height ||
                sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
Mickaël Raulet's avatar
Mickaël Raulet committed
523
                last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
524 525
                sh->no_output_of_prior_pics_flag = 0;
        }
526
        ff_hevc_clear_refs(s);
527

528 529 530 531
        ret = set_sps(s, sps, sps->pix_fmt);
        if (ret < 0)
            return ret;

532
        pix_fmt = get_format(s, sps);
533 534
        if (pix_fmt < 0)
            return pix_fmt;
535
        s->avctx->pix_fmt = pix_fmt;
536

537 538
        s->seq_decode = (s->seq_decode + 1) & 0xff;
        s->max_ra     = INT_MAX;
539 540 541 542 543 544
    }

    sh->dependent_slice_segment_flag = 0;
    if (!sh->first_slice_in_pic_flag) {
        int slice_address_length;

545
        if (s->ps.pps->dependent_slice_segments_enabled_flag)
546 547
            sh->dependent_slice_segment_flag = get_bits1(gb);

548 549
        slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
                                            s->ps.sps->ctb_height);
550
        sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
551
        if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
552 553
            av_log(s->avctx, AV_LOG_ERROR,
                   "Invalid slice segment address: %u.\n",
554 555 556 557 558 559 560 561 562 563
                   sh->slice_segment_addr);
            return AVERROR_INVALIDDATA;
        }

        if (!sh->dependent_slice_segment_flag) {
            sh->slice_addr = sh->slice_segment_addr;
            s->slice_idx++;
        }
    } else {
        sh->slice_segment_addr = sh->slice_addr = 0;
564 565
        s->slice_idx           = 0;
        s->slice_initialized   = 0;
566 567 568 569 570
    }

    if (!sh->dependent_slice_segment_flag) {
        s->slice_initialized = 0;

571
        for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
Anton Khirnov's avatar
Anton Khirnov committed
572
            skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
573

574
        sh->slice_type = get_ue_golomb_long(gb);
575 576 577
        if (!(sh->slice_type == HEVC_SLICE_I ||
              sh->slice_type == HEVC_SLICE_P ||
              sh->slice_type == HEVC_SLICE_B)) {
578 579 580 581
            av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
                   sh->slice_type);
            return AVERROR_INVALIDDATA;
        }
582
        if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
Anton Khirnov's avatar
Anton Khirnov committed
583 584 585
            av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
            return AVERROR_INVALIDDATA;
        }
586

587
        // when flag is not present, picture is inferred to be output
588
        sh->pic_output_flag = 1;
589
        if (s->ps.pps->output_flag_present_flag)
590 591
            sh->pic_output_flag = get_bits1(gb);

592
        if (s->ps.sps->separate_colour_plane_flag)
593 594 595
            sh->colour_plane_id = get_bits(gb, 2);

        if (!IS_IDR(s)) {
596
            int poc, pos;
597

598
            sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
599
            poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
600 601 602 603 604 605 606 607 608
            if (!sh->first_slice_in_pic_flag && poc != s->poc) {
                av_log(s->avctx, AV_LOG_WARNING,
                       "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
                if (s->avctx->err_recognition & AV_EF_EXPLODE)
                    return AVERROR_INVALIDDATA;
                poc = s->poc;
            }
            s->poc = poc;

609
            sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
610
            pos = get_bits_left(gb);
611
            if (!sh->short_term_ref_pic_set_sps_flag) {
612
                ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
613 614 615 616 617 618 619
                if (ret < 0)
                    return ret;

                sh->short_term_rps = &sh->slice_rps;
            } else {
                int numbits, rps_idx;

620
                if (!s->ps.sps->nb_st_rps) {
621 622 623 624
                    av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
                    return AVERROR_INVALIDDATA;
                }

625
                numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
626
                rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
627
                sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
628
            }
629
            sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
630

631
            pos = get_bits_left(gb);
632 633 634 635 636 637
            ret = decode_lt_rps(s, &sh->long_term_rps, gb);
            if (ret < 0) {
                av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
                if (s->avctx->err_recognition & AV_EF_EXPLODE)
                    return AVERROR_INVALIDDATA;
            }
638
            sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
639

640
            if (s->ps.sps->sps_temporal_mvp_enabled_flag)
641 642 643 644 645
                sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
            else
                sh->slice_temporal_mvp_enabled_flag = 0;
        } else {
            s->sh.short_term_rps = NULL;
646
            s->poc               = 0;
647 648
        }

649
        /* 8.3.1 */
650
        if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
651 652 653 654 655 656 657
            s->nal_unit_type != HEVC_NAL_TRAIL_N &&
            s->nal_unit_type != HEVC_NAL_TSA_N   &&
            s->nal_unit_type != HEVC_NAL_STSA_N  &&
            s->nal_unit_type != HEVC_NAL_RADL_N  &&
            s->nal_unit_type != HEVC_NAL_RADL_R  &&
            s->nal_unit_type != HEVC_NAL_RASL_N  &&
            s->nal_unit_type != HEVC_NAL_RASL_R)
658 659
            s->pocTid0 = s->poc;

660
        if (s->ps.sps->sao_enabled) {
661
            sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
662
            if (s->ps.sps->chroma_format_idc) {
663 664 665
                sh->slice_sample_adaptive_offset_flag[1] =
                sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
            }
666 667 668 669 670 671 672
        } else {
            sh->slice_sample_adaptive_offset_flag[0] = 0;
            sh->slice_sample_adaptive_offset_flag[1] = 0;
            sh->slice_sample_adaptive_offset_flag[2] = 0;
        }

        sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
673
        if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
674 675
            int nb_refs;

676
            sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
677
            if (sh->slice_type == HEVC_SLICE_B)
678
                sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
679 680

            if (get_bits1(gb)) { // num_ref_idx_active_override_flag
681
                sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
682
                if (sh->slice_type == HEVC_SLICE_B)
683
                    sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
684
            }
685
            if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
686 687 688 689 690 691 692 693 694 695 696 697 698
                av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
                       sh->nb_refs[L0], sh->nb_refs[L1]);
                return AVERROR_INVALIDDATA;
            }

            sh->rpl_modification_flag[0] = 0;
            sh->rpl_modification_flag[1] = 0;
            nb_refs = ff_hevc_frame_nb_refs(s);
            if (!nb_refs) {
                av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
                return AVERROR_INVALIDDATA;
            }

699
            if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
700 701 702 703 704 705
                sh->rpl_modification_flag[0] = get_bits1(gb);
                if (sh->rpl_modification_flag[0]) {
                    for (i = 0; i < sh->nb_refs[L0]; i++)
                        sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
                }

706
                if (sh->slice_type == HEVC_SLICE_B) {
707 708 709 710 711 712 713
                    sh->rpl_modification_flag[1] = get_bits1(gb);
                    if (sh->rpl_modification_flag[1] == 1)
                        for (i = 0; i < sh->nb_refs[L1]; i++)
                            sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
                }
            }

714
            if (sh->slice_type == HEVC_SLICE_B)
715 716
                sh->mvd_l1_zero_flag = get_bits1(gb);

717
            if (s->ps.pps->cabac_init_present_flag)
718 719 720 721 722 723 724
                sh->cabac_init_flag = get_bits1(gb);
            else
                sh->cabac_init_flag = 0;

            sh->collocated_ref_idx = 0;
            if (sh->slice_temporal_mvp_enabled_flag) {
                sh->collocated_list = L0;
725
                if (sh->slice_type == HEVC_SLICE_B)
726 727 728
                    sh->collocated_list = !get_bits1(gb);

                if (sh->nb_refs[sh->collocated_list] > 1) {
729
                    sh->collocated_ref_idx = get_ue_golomb_long(gb);
730 731
                    if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
                        av_log(s->avctx, AV_LOG_ERROR,
732 733
                               "Invalid collocated_ref_idx: %d.\n",
                               sh->collocated_ref_idx);
734 735 736 737 738
                        return AVERROR_INVALIDDATA;
                    }
                }
            }

739 740
            if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
                (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
741 742 743
                int ret = pred_weight_table(s, gb);
                if (ret < 0)
                    return ret;
744 745
            }

746
            sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
747 748 749 750 751 752
            if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
                av_log(s->avctx, AV_LOG_ERROR,
                       "Invalid number of merging MVP candidates: %d.\n",
                       sh->max_num_merge_cand);
                return AVERROR_INVALIDDATA;
            }
753 754 755
        }

        sh->slice_qp_delta = get_se_golomb(gb);
756

757
        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
758 759 760 761 762 763 764
            sh->slice_cb_qp_offset = get_se_golomb(gb);
            sh->slice_cr_qp_offset = get_se_golomb(gb);
        } else {
            sh->slice_cb_qp_offset = 0;
            sh->slice_cr_qp_offset = 0;
        }

765
        if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
766 767 768 769
            sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
        else
            sh->cu_chroma_qp_offset_enabled_flag = 0;

770
        if (s->ps.pps->deblocking_filter_control_present_flag) {
771 772
            int deblocking_filter_override_flag = 0;

773
            if (s->ps.pps->deblocking_filter_override_enabled_flag)
774 775 776 777 778
                deblocking_filter_override_flag = get_bits1(gb);

            if (deblocking_filter_override_flag) {
                sh->disable_deblocking_filter_flag = get_bits1(gb);
                if (!sh->disable_deblocking_filter_flag) {
779 780 781 782 783 784 785 786 787 788 789
                    int beta_offset_div2 = get_se_golomb(gb);
                    int tc_offset_div2   = get_se_golomb(gb) ;
                    if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
                        tc_offset_div2   < -6 || tc_offset_div2   > 6) {
                        av_log(s->avctx, AV_LOG_ERROR,
                            "Invalid deblock filter offsets: %d, %d\n",
                            beta_offset_div2, tc_offset_div2);
                        return AVERROR_INVALIDDATA;
                    }
                    sh->beta_offset = beta_offset_div2 * 2;
                    sh->tc_offset   =   tc_offset_div2 * 2;
790 791
                }
            } else {
792 793 794
                sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
                sh->beta_offset                    = s->ps.pps->beta_offset;
                sh->tc_offset                      = s->ps.pps->tc_offset;
795 796 797
            }
        } else {
            sh->disable_deblocking_filter_flag = 0;
798 799
            sh->beta_offset                    = 0;
            sh->tc_offset                      = 0;
800 801
        }

802
        if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
803 804 805 806 807
            (sh->slice_sample_adaptive_offset_flag[0] ||
             sh->slice_sample_adaptive_offset_flag[1] ||
             !sh->disable_deblocking_filter_flag)) {
            sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
        } else {
808
            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
809 810 811 812 813 814 815
        }
    } else if (!s->slice_initialized) {
        av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
        return AVERROR_INVALIDDATA;
    }

    sh->num_entry_point_offsets = 0;
816
    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
817 818
        unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
        // It would be possible to bound this tighter but this here is simpler
819
        if (num_entry_point_offsets > get_bits_left(gb)) {
820 821 822 823 824
            av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
            return AVERROR_INVALIDDATA;
        }

        sh->num_entry_point_offsets = num_entry_point_offsets;
825
        if (sh->num_entry_point_offsets > 0) {
826
            int offset_len = get_ue_golomb_long(gb) + 1;
827 828 829 830 831 832 833

            if (offset_len < 1 || offset_len > 32) {
                sh->num_entry_point_offsets = 0;
                av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
                return AVERROR_INVALIDDATA;
            }

834 835 836
            av_freep(&sh->entry_point_offset);
            av_freep(&sh->offset);
            av_freep(&sh->size);
837
            sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
838 839
            sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
            sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
840 841 842 843 844
            if (!sh->entry_point_offset || !sh->offset || !sh->size) {
                sh->num_entry_point_offsets = 0;
                av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
                return AVERROR(ENOMEM);
            }
845
            for (i = 0; i < sh->num_entry_point_offsets; i++) {
846
                unsigned val = get_bits_long(gb, offset_len);
847 848
                sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
            }
849
            if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
850 851 852 853 854 855
                s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
                s->threads_number = 1;
            } else
                s->enable_parallel_tiles = 0;
        } else
            s->enable_parallel_tiles = 0;
856 857
    }

858
    if (s->ps.pps->slice_header_extension_present_flag) {
859
        unsigned int length = get_ue_golomb_long(gb);
860 861 862 863
        if (length*8LL > get_bits_left(gb)) {
            av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
            return AVERROR_INVALIDDATA;
        }
864
        for (i = 0; i < length; i++)
865
            skip_bits(gb, 8);  // slice_header_extension_data_byte
866 867 868
    }

    // Inferred parameters
869
    sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
870
    if (sh->slice_qp > 51 ||
871
        sh->slice_qp < -s->ps.sps->qp_bd_offset) {
872 873 874 875
        av_log(s->avctx, AV_LOG_ERROR,
               "The slice_qp %d is outside the valid range "
               "[%d, 51].\n",
               sh->slice_qp,
876
               -s->ps.sps->qp_bd_offset);
877 878 879
        return AVERROR_INVALIDDATA;
    }

880 881
    sh->slice_ctb_addr_rs = sh->slice_segment_addr;

882 883 884 885 886
    if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
        av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
        return AVERROR_INVALIDDATA;
    }

887 888 889 890 891 892
    if (get_bits_left(gb) < 0) {
        av_log(s->avctx, AV_LOG_ERROR,
               "Overread slice header by %d bits\n", -get_bits_left(gb));
        return AVERROR_INVALIDDATA;
    }

893
    s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
894

895
    if (!s->ps.pps->cu_qp_delta_enabled_flag)
896
        s->HEVClc->qp_y = s->sh.slice_qp;
897 898

    s->slice_initialized = 1;
899 900
    s->HEVClc->tu.cu_qp_offset_cb = 0;
    s->HEVClc->tu.cu_qp_offset_cr = 0;
901 902 903 904

    return 0;
}

905
#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920

#define SET_SAO(elem, value)                            \
do {                                                    \
    if (!sao_merge_up_flag && !sao_merge_left_flag)     \
        sao->elem = value;                              \
    else if (sao_merge_left_flag)                       \
        sao->elem = CTB(s->sao, rx-1, ry).elem;         \
    else if (sao_merge_up_flag)                         \
        sao->elem = CTB(s->sao, rx, ry-1).elem;         \
    else                                                \
        sao->elem = 0;                                  \
} while (0)

static void hls_sao_param(HEVCContext *s, int rx, int ry)
{
921
    HEVCLocalContext *lc    = s->HEVClc;
922 923
    int sao_merge_left_flag = 0;
    int sao_merge_up_flag   = 0;
924
    SAOParams *sao          = &CTB(s->sao, rx, ry);
925 926 927 928 929 930 931 932 933 934 935 936 937 938
    int c_idx, i;

    if (s->sh.slice_sample_adaptive_offset_flag[0] ||
        s->sh.slice_sample_adaptive_offset_flag[1]) {
        if (rx > 0) {
            if (lc->ctb_left_flag)
                sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
        }
        if (ry > 0 && !sao_merge_left_flag) {
            if (lc->ctb_up_flag)
                sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
        }
    }

939 940 941
    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
        int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
                                                 s->ps.pps->log2_sao_offset_scale_chroma;
942

943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
        if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
            sao->type_idx[c_idx] = SAO_NOT_APPLIED;
            continue;
        }

        if (c_idx == 2) {
            sao->type_idx[2] = sao->type_idx[1];
            sao->eo_class[2] = sao->eo_class[1];
        } else {
            SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
        }

        if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
            continue;

        for (i = 0; i < 4; i++)
            SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));

        if (sao->type_idx[c_idx] == SAO_BAND) {
            for (i = 0; i < 4; i++) {
                if (sao->offset_abs[c_idx][i]) {
964 965
                    SET_SAO(offset_sign[c_idx][i],
                            ff_hevc_sao_offset_sign_decode(s));
966 967 968 969 970 971 972 973 974 975
                } else {
                    sao->offset_sign[c_idx][i] = 0;
                }
            }
            SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
        } else if (c_idx != 2) {
            SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
        }

        // Inferred parameters
976
        sao->offset_val[c_idx][0] = 0;
977
        for (i = 0; i < 4; i++) {
978
            sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
979 980 981 982 983 984
            if (sao->type_idx[c_idx] == SAO_EDGE) {
                if (i > 1)
                    sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
            } else if (sao->offset_sign[c_idx][i]) {
                sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
            }
985
            sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
986 987 988 989 990 991 992
        }
    }
}

#undef SET_SAO
#undef CTB

993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
static int hls_cross_component_pred(HEVCContext *s, int idx) {
    HEVCLocalContext *lc    = s->HEVClc;
    int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);

    if (log2_res_scale_abs_plus1 !=  0) {
        int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
        lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
                               (1 - 2 * res_scale_sign_flag);
    } else {
        lc->tu.res_scale_val = 0;
    }


    return 0;
}

1009 1010 1011
static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                              int xBase, int yBase, int cb_xBase, int cb_yBase,
                              int log2_cb_size, int log2_trafo_size,
1012
                              int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1013
{
1014
    HEVCLocalContext *lc = s->HEVClc;
1015
    const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1016
    int i;
1017 1018 1019 1020 1021

    if (lc->cu.pred_mode == MODE_INTRA) {
        int trafo_size = 1 << log2_trafo_size;
        ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);

1022
        s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1023 1024
    }

1025
    if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1026
        (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1027 1028
        int scan_idx   = SCAN_DIAG;
        int scan_idx_c = SCAN_DIAG;
1029
        int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1030
                         (s->ps.sps->chroma_format_idc == 2 &&
1031
                         (cbf_cb[1] || cbf_cr[1]));
1032

1033
        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1034 1035 1036 1037 1038
            lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
            if (lc->tu.cu_qp_delta != 0)
                if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
                    lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
            lc->tu.is_cu_qp_delta_coded = 1;
1039

1040 1041
            if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
                lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1042 1043 1044 1045
                av_log(s->avctx, AV_LOG_ERROR,
                       "The cu_qp_delta %d is outside the valid range "
                       "[%d, %d].\n",
                       lc->tu.cu_qp_delta,
1046 1047
                       -(26 + s->ps.sps->qp_bd_offset / 2),
                        (25 + s->ps.sps->qp_bd_offset / 2));
1048 1049 1050
                return AVERROR_INVALIDDATA;
            }

1051
            ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1052 1053
        }

1054 1055 1056 1057 1058
        if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
            !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
            int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
            if (cu_chroma_qp_offset_flag) {
                int cu_chroma_qp_offset_idx  = 0;
1059
                if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1060 1061 1062 1063
                    cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
                    av_log(s->avctx, AV_LOG_ERROR,
                        "cu_chroma_qp_offset_idx not yet tested.\n");
                }
1064 1065
                lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
                lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1066 1067 1068 1069 1070 1071 1072
            } else {
                lc->tu.cu_qp_offset_cb = 0;
                lc->tu.cu_qp_offset_cr = 0;
            }
            lc->tu.is_cu_chroma_qp_offset_coded = 1;
        }

1073
        if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1074 1075
            if (lc->tu.intra_pred_mode >= 6 &&
                lc->tu.intra_pred_mode <= 14) {
1076
                scan_idx = SCAN_VERT;
1077 1078
            } else if (lc->tu.intra_pred_mode >= 22 &&
                       lc->tu.intra_pred_mode <= 30) {
1079 1080 1081
                scan_idx = SCAN_HORIZ;
            }

1082 1083
            if (lc->tu.intra_pred_mode_c >=  6 &&
                lc->tu.intra_pred_mode_c <= 14) {
1084
                scan_idx_c = SCAN_VERT;
1085 1086
            } else if (lc->tu.intra_pred_mode_c >= 22 &&
                       lc->tu.intra_pred_mode_c <= 30) {
1087 1088 1089 1090
                scan_idx_c = SCAN_HORIZ;
            }
        }

1091
        lc->tu.cross_pf = 0;
1092 1093

        if (cbf_luma)
1094
            ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1095 1096 1097 1098
        if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
            lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1099 1100
                                (lc->cu.pred_mode == MODE_INTER ||
                                 (lc->tu.chroma_mode_c ==  4)));
1101

1102 1103 1104
            if (lc->tu.cross_pf) {
                hls_cross_component_pred(s, 0);
            }
1105
            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1106 1107 1108 1109
                if (lc->cu.pred_mode == MODE_INTRA) {
                    ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
                    s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
                }
1110
                if (cbf_cb[i])
1111 1112
                    ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
                                                log2_trafo_size_c, scan_idx_c, 1);
1113 1114 1115
                else
                    if (lc->tu.cross_pf) {
                        ptrdiff_t stride = s->frame->linesize[1];
1116 1117
                        int hshift = s->ps.sps->hshift[1];
                        int vshift = s->ps.sps->vshift[1];
1118 1119
                        int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
                        int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1120 1121 1122
                        int size = 1 << log2_trafo_size_c;

                        uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1123
                                                              ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1124 1125 1126
                        for (i = 0; i < (size * size); i++) {
                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
                        }
1127
                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1128
                    }
1129 1130
            }

1131 1132 1133
            if (lc->tu.cross_pf) {
                hls_cross_component_pred(s, 1);
            }
1134
            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1135 1136 1137 1138
                if (lc->cu.pred_mode == MODE_INTRA) {
                    ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
                    s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
                }
1139
                if (cbf_cr[i])
1140 1141
                    ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
                                                log2_trafo_size_c, scan_idx_c, 2);
1142 1143 1144
                else
                    if (lc->tu.cross_pf) {
                        ptrdiff_t stride = s->frame->linesize[2];
1145 1146
                        int hshift = s->ps.sps->hshift[2];
                        int vshift = s->ps.sps->vshift[2];
1147 1148
                        int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
                        int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1149 1150 1151
                        int size = 1 << log2_trafo_size_c;

                        uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1152
                                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1153 1154 1155
                        for (i = 0; i < (size * size); i++) {
                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
                        }
1156
                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1157
                    }
1158
            }
1159
        } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1160
            int trafo_size_h = 1 << (log2_trafo_size + 1);
1161 1162
            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1163 1164 1165 1166 1167
                if (lc->cu.pred_mode == MODE_INTRA) {
                    ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
                                                    trafo_size_h, trafo_size_v);
                    s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
                }
1168
                if (cbf_cb[i])
1169 1170 1171
                    ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
                                                log2_trafo_size, scan_idx_c, 1);
            }
1172
            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1173 1174 1175 1176 1177
                if (lc->cu.pred_mode == MODE_INTRA) {
                    ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
                                                trafo_size_h, trafo_size_v);
                    s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
                }
1178
                if (cbf_cr[i])
1179 1180 1181 1182
                    ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
                                                log2_trafo_size, scan_idx_c, 2);
            }
        }
1183 1184 1185 1186
    } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
        if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1187 1188 1189
            ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
            s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
            s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1190
            if (s->ps.sps->chroma_format_idc == 2) {
1191 1192 1193 1194 1195 1196 1197
                ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
                                                trafo_size_h, trafo_size_v);
                s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
                s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
            }
        } else if (blk_idx == 3) {
            int trafo_size_h = 1 << (log2_trafo_size + 1);
1198
            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1199 1200 1201 1202
            ff_hevc_set_neighbour_available(s, xBase, yBase,
                                            trafo_size_h, trafo_size_v);
            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1203
            if (s->ps.sps->chroma_format_idc == 2) {
1204 1205 1206 1207 1208
                ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
                                                trafo_size_h, trafo_size_v);
                s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
                s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
            }
1209 1210
        }
    }
1211

1212
    return 0;
1213 1214 1215 1216
}

static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
{
1217
    int cb_size          = 1 << log2_cb_size;
1218
    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1219

1220 1221 1222
    int min_pu_width     = s->ps.sps->min_pu_width;
    int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
    int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1223 1224 1225 1226
    int i, j;

    for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
        for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1227
            s->is_pcm[i + j * min_pu_width] = 2;
1228 1229
}

1230 1231 1232
static int hls_transform_tree(HEVCContext *s, int x0, int y0,
                              int xBase, int yBase, int cb_xBase, int cb_yBase,
                              int log2_cb_size, int log2_trafo_size,
1233
                              int trafo_depth, int blk_idx,
1234
                              const int *base_cbf_cb, const int *base_cbf_cr)
1235
{
1236
    HEVCLocalContext *lc = s->HEVClc;
1237
    uint8_t split_transform_flag;
1238 1239
    int cbf_cb[2];
    int cbf_cr[2];
1240
    int ret;
1241

1242 1243 1244 1245
    cbf_cb[0] = base_cbf_cb[0];
    cbf_cb[1] = base_cbf_cb[1];
    cbf_cr[0] = base_cbf_cr[0];
    cbf_cr[1] = base_cbf_cr[1];
1246 1247

    if (lc->cu.intra_split_flag) {
1248 1249
        if (trafo_depth == 1) {
            lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1250
            if (s->ps.sps->chroma_format_idc == 3) {
1251 1252 1253 1254 1255 1256 1257
                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
            } else {
                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
            }
        }
1258
    } else {
1259 1260 1261
        lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
        lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
        lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1262 1263
    }

1264 1265
    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1266
        trafo_depth     < lc->cu.max_trafo_depth       &&
1267 1268 1269
        !(lc->cu.intra_split_flag && trafo_depth == 0)) {
        split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
    } else {
1270
        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1271 1272 1273 1274
                          lc->cu.pred_mode == MODE_INTER &&
                          lc->cu.part_mode != PART_2Nx2N &&
                          trafo_depth == 0;

1275
        split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1276
                               (lc->cu.intra_split_flag && trafo_depth == 0) ||
1277
                               inter_split;
1278 1279
    }

1280
    if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1281 1282
        if (trafo_depth == 0 || cbf_cb[0]) {
            cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1283
            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1284
                cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1285
            }
1286 1287
        }

1288 1289
        if (trafo_depth == 0 || cbf_cr[0]) {
            cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1290
            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1291
                cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1292
            }
1293 1294 1295 1296
        }
    }

    if (split_transform_flag) {
1297 1298 1299 1300 1301 1302 1303
        const int trafo_size_split = 1 << (log2_trafo_size - 1);
        const int x1 = x0 + trafo_size_split;
        const int y1 = y0 + trafo_size_split;

#define SUBDIVIDE(x, y, idx)                                                    \
do {                                                                            \
    ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1304 1305
                             log2_trafo_size - 1, trafo_depth + 1, idx,         \
                             cbf_cb, cbf_cr);                                   \
1306 1307 1308
    if (ret < 0)                                                                \
        return ret;                                                             \
} while (0)
1309

1310 1311 1312 1313 1314 1315
        SUBDIVIDE(x0, y0, 0);
        SUBDIVIDE(x1, y0, 1);
        SUBDIVIDE(x0, y1, 2);
        SUBDIVIDE(x1, y1, 3);

#undef SUBDIVIDE
1316
    } else {
1317 1318 1319
        int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
        int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
        int min_tu_width     = s->ps.sps->min_tb_width;
1320
        int cbf_luma         = 1;
1321 1322

        if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1323
            cbf_cb[0] || cbf_cr[0] ||
1324
            (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1325
            cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1326 1327
        }

1328
        ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1329
                                 log2_cb_size, log2_trafo_size,
1330
                                 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1331 1332
        if (ret < 0)
            return ret;
1333
        // TODO: store cbf_luma somewhere else
1334
        if (cbf_luma) {
1335
            int i, j;
1336 1337 1338 1339
            for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
                for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
                    int x_tu = (x0 + j) >> log2_min_tu_size;
                    int y_tu = (y0 + i) >> log2_min_tu_size;
1340
                    s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1341
                }
1342
        }
1343
        if (!s->sh.disable_deblocking_filter_flag) {
1344
            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1345
            if (s->ps.pps->transquant_bypass_enable_flag &&
1346
                lc->cu.cu_transquant_bypass_flag)
1347 1348 1349
                set_deblocking_bypass(s, x0, y0, log2_trafo_size);
        }
    }
1350
    return 0;
1351 1352 1353 1354
}

static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
{
1355
    HEVCLocalContext *lc = s->HEVClc;
1356
    GetBitContext gb;
1357
    int cb_size   = 1 << log2_cb_size;
1358 1359 1360
    ptrdiff_t stride0 = s->frame->linesize[0];
    ptrdiff_t stride1 = s->frame->linesize[1];
    ptrdiff_t stride2 = s->frame->linesize[2];
1361 1362 1363
    uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
    uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
    uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1364

1365 1366 1367 1368
    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
                         (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
                          ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
                          s->ps.sps->pcm.bit_depth_chroma;
1369
    const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1370 1371
    int ret;

1372 1373
    if (!s->sh.disable_deblocking_filter_flag)
        ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1374 1375 1376 1377 1378

    ret = init_get_bits(&gb, pcm, length);
    if (ret < 0)
        return ret;

1379 1380
    s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
    if (s->ps.sps->chroma_format_idc) {
1381
        s->hevcdsp.put_pcm(dst1, stride1,
1382 1383 1384
                           cb_size >> s->ps.sps->hshift[1],
                           cb_size >> s->ps.sps->vshift[1],
                           &gb, s->ps.sps->pcm.bit_depth_chroma);
1385
        s->hevcdsp.put_pcm(dst2, stride2,
1386 1387 1388
                           cb_size >> s->ps.sps->hshift[2],
                           cb_size >> s->ps.sps->vshift[2],
                           &gb, s->ps.sps->pcm.bit_depth_chroma);
1389 1390
    }

1391 1392 1393 1394
    return 0;
}

/**
1395
 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
 *
 * @param s HEVC decoding context
 * @param dst target buffer for block data at block position
 * @param dststride stride of the dst buffer
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block
 * @param block_h height of block
1406 1407
 * @param luma_weight weighting factor applied to the luma prediction
 * @param luma_offset additive offset applied to the luma prediction value
1408
 */
1409 1410 1411 1412

static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
                        AVFrame *ref, const Mv *mv, int x_off, int y_off,
                        int block_w, int block_h, int luma_weight, int luma_offset)
1413
{
1414
    HEVCLocalContext *lc = s->HEVClc;
1415 1416
    uint8_t *src         = ref->data[0];
    ptrdiff_t srcstride  = ref->linesize[0];
1417 1418
    int pic_width        = s->ps.sps->width;
    int pic_height       = s->ps.sps->height;
1419 1420
    int mx               = mv->x & 3;
    int my               = mv->y & 3;
James Almer's avatar
James Almer committed
1421 1422
    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1423
    int idx              = ff_hevc_pel_weight[block_w];
1424 1425 1426

    x_off += mv->x >> 2;
    y_off += mv->y >> 2;
1427
    src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1428

1429 1430 1431
    if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
        x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
        y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1432
        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1433 1434
        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1435

1436
        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1437
                                 edge_emu_stride, srcstride,
1438 1439 1440
                                 block_w + QPEL_EXTRA,
                                 block_h + QPEL_EXTRA,
                                 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1441
                                 pic_width, pic_height);
1442 1443
        src = lc->edge_emu_buffer + buf_offset;
        srcstride = edge_emu_stride;
1444
    }
1445 1446 1447 1448 1449 1450 1451 1452

    if (!weight_flag)
        s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
                                                      block_h, mx, my, block_w);
    else
        s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
                                                        block_h, s->sh.luma_log2_weight_denom,
                                                        luma_weight, luma_offset, mx, my, block_w);
1453 1454 1455
}

/**
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
 *
 * @param s HEVC decoding context
 * @param dst target buffer for block data at block position
 * @param dststride stride of the dst buffer
 * @param ref0 reference picture0 buffer at origin (0, 0)
 * @param mv0 motion vector0 (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block
 * @param block_h height of block
 * @param ref1 reference picture1 buffer at origin (0, 0)
 * @param mv1 motion vector1 (relative to block position) to get pixel data from
 * @param current_mv current motion vector structure
 */
 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
                       AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
                       int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
{
    HEVCLocalContext *lc = s->HEVClc;
    ptrdiff_t src0stride  = ref0->linesize[0];
    ptrdiff_t src1stride  = ref1->linesize[0];
1478 1479
    int pic_width        = s->ps.sps->width;
    int pic_height       = s->ps.sps->height;
1480 1481 1482 1483
    int mx0              = mv0->x & 3;
    int my0              = mv0->y & 3;
    int mx1              = mv1->x & 3;
    int my1              = mv1->y & 3;
James Almer's avatar
James Almer committed
1484 1485
    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1486 1487 1488 1489 1490 1491
    int x_off0           = x_off + (mv0->x >> 2);
    int y_off0           = y_off + (mv0->y >> 2);
    int x_off1           = x_off + (mv1->x >> 2);
    int y_off1           = y_off + (mv1->y >> 2);
    int idx              = ff_hevc_pel_weight[block_w];

1492 1493
    uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
    uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1494 1495 1496 1497

    if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
        x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
        y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
James Almer's avatar
James Almer committed
1498
        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1499 1500
        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514

        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
                                 edge_emu_stride, src0stride,
                                 block_w + QPEL_EXTRA,
                                 block_h + QPEL_EXTRA,
                                 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
                                 pic_width, pic_height);
        src0 = lc->edge_emu_buffer + buf_offset;
        src0stride = edge_emu_stride;
    }

    if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
        x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
        y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
James Almer's avatar
James Almer committed
1515
        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1516 1517
        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528

        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
                                 edge_emu_stride, src1stride,
                                 block_w + QPEL_EXTRA,
                                 block_h + QPEL_EXTRA,
                                 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
                                 pic_width, pic_height);
        src1 = lc->edge_emu_buffer2 + buf_offset;
        src1stride = edge_emu_stride;
    }

1529
    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1530 1531
                                                block_h, mx0, my0, block_w);
    if (!weight_flag)
1532
        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1533 1534
                                                       block_h, mx1, my1, block_w);
    else
1535
        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546
                                                         block_h, s->sh.luma_log2_weight_denom,
                                                         s->sh.luma_weight_l0[current_mv->ref_idx[0]],
                                                         s->sh.luma_weight_l1[current_mv->ref_idx[1]],
                                                         s->sh.luma_offset_l0[current_mv->ref_idx[0]],
                                                         s->sh.luma_offset_l1[current_mv->ref_idx[1]],
                                                         mx1, my1, block_w);

}

/**
 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557
 *
 * @param s HEVC decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param dststride stride of the dst1 and dst2 buffers
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block
 * @param block_h height of block
1558 1559
 * @param chroma_weight weighting factor applied to the chroma prediction
 * @param chroma_offset additive offset applied to the chroma prediction value
1560
 */
1561 1562 1563 1564

static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
                          ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1565
{
1566
    HEVCLocalContext *lc = s->HEVClc;
1567 1568
    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1569
    const Mv *mv         = &current_mv->mv[reflist];
James Almer's avatar
James Almer committed
1570 1571
    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1572
    int idx              = ff_hevc_pel_weight[block_w];
1573 1574
    int hshift           = s->ps.sps->hshift[1];
    int vshift           = s->ps.sps->vshift[1];
1575 1576
    intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
    intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1577 1578 1579 1580 1581
    intptr_t _mx         = mx << (1 - hshift);
    intptr_t _my         = my << (1 - vshift);

    x_off += mv->x >> (2 + hshift);
    y_off += mv->y >> (2 + vshift);
1582
    src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1583

1584 1585 1586
    if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
        x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
        y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1587
        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1588
        int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1589
        int buf_offset0 = EPEL_EXTRA_BEFORE *
1590
                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634
        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
                                 edge_emu_stride, srcstride,
                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
                                 x_off - EPEL_EXTRA_BEFORE,
                                 y_off - EPEL_EXTRA_BEFORE,
                                 pic_width, pic_height);

        src0 = lc->edge_emu_buffer + buf_offset0;
        srcstride = edge_emu_stride;
    }
    if (!weight_flag)
        s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
                                                  block_h, _mx, _my, block_w);
    else
        s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
                                                        block_h, s->sh.chroma_log2_weight_denom,
                                                        chroma_weight, chroma_offset, _mx, _my, block_w);
}

/**
 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
 *
 * @param s HEVC decoding context
 * @param dst target buffer for block data at block position
 * @param dststride stride of the dst buffer
 * @param ref0 reference picture0 buffer at origin (0, 0)
 * @param mv0 motion vector0 (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block
 * @param block_h height of block
 * @param ref1 reference picture1 buffer at origin (0, 0)
 * @param mv1 motion vector1 (relative to block position) to get pixel data from
 * @param current_mv current motion vector structure
 * @param cidx chroma component(cb, cr)
 */
static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
                         int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
{
    HEVCLocalContext *lc = s->HEVClc;
    uint8_t *src1        = ref0->data[cidx+1];
    uint8_t *src2        = ref1->data[cidx+1];
    ptrdiff_t src1stride = ref0->linesize[cidx+1];
    ptrdiff_t src2stride = ref1->linesize[cidx+1];
James Almer's avatar
James Almer committed
1635 1636
    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1637 1638
    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1639 1640
    Mv *mv0              = &current_mv->mv[0];
    Mv *mv1              = &current_mv->mv[1];
1641 1642
    int hshift = s->ps.sps->hshift[1];
    int vshift = s->ps.sps->vshift[1];
1643

1644 1645 1646 1647
    intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
    intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
    intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
    intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
    intptr_t _mx0 = mx0 << (1 - hshift);
    intptr_t _my0 = my0 << (1 - vshift);
    intptr_t _mx1 = mx1 << (1 - hshift);
    intptr_t _my1 = my1 << (1 - vshift);

    int x_off0 = x_off + (mv0->x >> (2 + hshift));
    int y_off0 = y_off + (mv0->y >> (2 + vshift));
    int x_off1 = x_off + (mv1->x >> (2 + hshift));
    int y_off1 = y_off + (mv1->y >> (2 + vshift));
    int idx = ff_hevc_pel_weight[block_w];
1658 1659
    src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
    src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1660 1661 1662 1663

    if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
        x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
        y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1664
        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1665
        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1666
        int buf_offset1 = EPEL_EXTRA_BEFORE *
1667
                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1668

1669
        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1670
                                 edge_emu_stride, src1stride,
1671
                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1672 1673
                                 x_off0 - EPEL_EXTRA_BEFORE,
                                 y_off0 - EPEL_EXTRA_BEFORE,
1674 1675
                                 pic_width, pic_height);

1676 1677
        src1 = lc->edge_emu_buffer + buf_offset1;
        src1stride = edge_emu_stride;
1678
    }
1679

1680 1681 1682
    if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
        x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
        y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1683 1684
        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
        int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1685
        int buf_offset1 = EPEL_EXTRA_BEFORE *
1686
                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1687 1688

        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1689
                                 edge_emu_stride, src2stride,
1690
                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1691 1692
                                 x_off1 - EPEL_EXTRA_BEFORE,
                                 y_off1 - EPEL_EXTRA_BEFORE,
1693
                                 pic_width, pic_height);
1694

1695 1696
        src2 = lc->edge_emu_buffer2 + buf_offset1;
        src2stride = edge_emu_stride;
1697
    }
1698

1699
    s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1700 1701 1702
                                                block_h, _mx0, _my0, block_w);
    if (!weight_flag)
        s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1703
                                                       src2, src2stride, lc->tmp,
1704 1705 1706
                                                       block_h, _mx1, _my1, block_w);
    else
        s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1707
                                                         src2, src2stride, lc->tmp,
1708 1709 1710 1711 1712 1713 1714
                                                         block_h,
                                                         s->sh.chroma_log2_weight_denom,
                                                         s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
                                                         s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
                                                         s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
                                                         s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
                                                         _mx1, _my1, block_w);
1715 1716 1717
}

static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1718
                                const Mv *mv, int y0, int height)
1719
{
1720 1721
    if (s->threads_type == FF_THREAD_FRAME ) {
        int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1722

1723
        ff_thread_await_progress(&ref->tf, y, 0);
1724
    }
1725 1726
}

1727
static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1728 1729 1730
                                  int nPbH, int log2_cb_size, int part_idx,
                                  int merge_idx, MvField *mv)
{
1731
    HEVCLocalContext *lc = s->HEVClc;
1732 1733 1734 1735
    enum InterPredIdc inter_pred_idc = PRED_L0;
    int mvp_flag;

    ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1736
    mv->pred_flag = 0;
1737
    if (s->sh.slice_type == HEVC_SLICE_B)
1738 1739 1740 1741 1742 1743
        inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);

    if (inter_pred_idc != PRED_L1) {
        if (s->sh.nb_refs[L0])
            mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);

1744 1745
        mv->pred_flag = PF_L0;
        ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
        mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
        ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
                                 part_idx, merge_idx, mv, mvp_flag, 0);
        mv->mv[0].x += lc->pu.mvd.x;
        mv->mv[0].y += lc->pu.mvd.y;
    }

    if (inter_pred_idc != PRED_L0) {
        if (s->sh.nb_refs[L1])
            mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);

        if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
            AV_ZERO32(&lc->pu.mvd);
        } else {
1760
            ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1761 1762
        }

1763
        mv->pred_flag += PF_L1;
1764 1765 1766 1767 1768 1769 1770 1771
        mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
        ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
                                 part_idx, merge_idx, mv, mvp_flag, 1);
        mv->mv[1].x += lc->pu.mvd.x;
        mv->mv[1].y += lc->pu.mvd.y;
    }
}

1772 1773
static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                                int nPbW, int nPbH,
1774
                                int log2_cb_size, int partIdx, int idx)
1775 1776
{
#define POS(c_idx, x, y)                                                              \
1777 1778
    &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
                           (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1779
    HEVCLocalContext *lc = s->HEVClc;
1780 1781 1782
    int merge_idx = 0;
    struct MvField current_mv = {{{ 0 }}};

1783
    int min_pu_width = s->ps.sps->min_pu_width;
1784 1785 1786

    MvField *tab_mvf = s->ref->tab_mvf;
    RefPicList  *refPicList = s->ref->refPicList;
1787
    HEVCFrame *ref0 = NULL, *ref1 = NULL;
1788 1789 1790
    uint8_t *dst0 = POS(0, x0, y0);
    uint8_t *dst1 = POS(1, x0, y0);
    uint8_t *dst2 = POS(2, x0, y0);
1791 1792
    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
    int min_cb_width     = s->ps.sps->min_cb_width;
1793 1794 1795 1796 1797
    int x_cb             = x0 >> log2_min_cb_size;
    int y_cb             = y0 >> log2_min_cb_size;
    int x_pu, y_pu;
    int i, j;

1798 1799 1800 1801 1802 1803
    int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);

    if (!skip_flag)
        lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);

    if (skip_flag || lc->pu.merge_flag) {
1804 1805 1806 1807 1808
        if (s->sh.max_num_merge_cand > 1)
            merge_idx = ff_hevc_merge_idx_decode(s);
        else
            merge_idx = 0;

1809 1810 1811
        ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
                                   partIdx, merge_idx, &current_mv);
    } else {
1812
        hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1813
                              partIdx, merge_idx, &current_mv);
1814
    }
1815

1816 1817
    x_pu = x0 >> s->ps.sps->log2_min_pu_size;
    y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1818

1819 1820
    for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
        for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1821
            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1822

1823
    if (current_mv.pred_flag & PF_L0) {
1824 1825 1826
        ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
        if (!ref0)
            return;
1827
        hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1828
    }
1829
    if (current_mv.pred_flag & PF_L1) {
1830 1831 1832
        ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
        if (!ref1)
            return;
1833
        hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1834 1835
    }

1836
    if (current_mv.pred_flag == PF_L0) {
1837 1838 1839 1840
        int x0_c = x0 >> s->ps.sps->hshift[1];
        int y0_c = y0 >> s->ps.sps->vshift[1];
        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1841 1842 1843 1844 1845 1846

        luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
                    s->sh.luma_offset_l0[current_mv.ref_idx[0]]);

1847
        if (s->ps.sps->chroma_format_idc) {
1848 1849 1850 1851 1852 1853 1854
            chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
                          0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
            chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
                          0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
        }
1855
    } else if (current_mv.pred_flag == PF_L1) {
1856 1857 1858 1859
        int x0_c = x0 >> s->ps.sps->hshift[1];
        int y0_c = y0 >> s->ps.sps->vshift[1];
        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1860 1861 1862 1863 1864 1865

        luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
                    &current_mv.mv[1], x0, y0, nPbW, nPbH,
                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
                    s->sh.luma_offset_l1[current_mv.ref_idx[1]]);

1866
        if (s->ps.sps->chroma_format_idc) {
1867 1868 1869
            chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
                          1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1870

1871 1872 1873 1874
            chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
                          1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
        }
1875
    } else if (current_mv.pred_flag == PF_BI) {
1876 1877 1878 1879
        int x0_c = x0 >> s->ps.sps->hshift[1];
        int y0_c = y0 >> s->ps.sps->vshift[1];
        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1880 1881 1882 1883 1884

        luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
                   &current_mv.mv[0], x0, y0, nPbW, nPbH,
                   ref1->frame, &current_mv.mv[1], &current_mv);

1885
        if (s->ps.sps->chroma_format_idc) {
1886 1887
            chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
                         x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1888

1889 1890 1891
            chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
                         x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
        }
1892 1893 1894 1895 1896 1897 1898 1899 1900
    }
}

/**
 * 8.4.1
 */
static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
                                int prev_intra_luma_pred_flag)
{
1901
    HEVCLocalContext *lc = s->HEVClc;
1902 1903 1904 1905
    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
    int min_pu_width     = s->ps.sps->min_pu_width;
    int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1906 1907
    int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
    int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1908

1909 1910 1911 1912
    int cand_up   = (lc->ctb_up_flag || y0b) ?
                    s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
    int cand_left = (lc->ctb_left_flag || x0b) ?
                    s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1913

1914
    int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1915

1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
    MvField *tab_mvf = s->ref->tab_mvf;
    int intra_pred_mode;
    int candidate[3];
    int i, j;

    // intra_pred_mode prediction does not cross vertical CTB boundaries
    if ((y0 - 1) < y_ctb)
        cand_up = INTRA_DC;

    if (cand_left == cand_up) {
        if (cand_left < 2) {
            candidate[0] = INTRA_PLANAR;
            candidate[1] = INTRA_DC;
            candidate[2] = INTRA_ANGULAR_26;
        } else {
            candidate[0] = cand_left;
            candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
            candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
        }
    } else {
        candidate[0] = cand_left;
        candidate[1] = cand_up;
        if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
            candidate[2] = INTRA_PLANAR;
        } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
            candidate[2] = INTRA_DC;
        } else {
            candidate[2] = INTRA_ANGULAR_26;
        }
    }

    if (prev_intra_luma_pred_flag) {
        intra_pred_mode = candidate[lc->pu.mpm_idx];
    } else {
        if (candidate[0] > candidate[1])
            FFSWAP(uint8_t, candidate[0], candidate[1]);
        if (candidate[0] > candidate[2])
            FFSWAP(uint8_t, candidate[0], candidate[2]);
        if (candidate[1] > candidate[2])
            FFSWAP(uint8_t, candidate[1], candidate[2]);

        intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1958
        for (i = 0; i < 3; i++)
1959 1960 1961 1962 1963
            if (intra_pred_mode >= candidate[i])
                intra_pred_mode++;
    }

    /* write the intra prediction units into the mv array */
1964
    if (!size_in_pus)
1965 1966
        size_in_pus = 1;
    for (i = 0; i < size_in_pus; i++) {
1967
        memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1968 1969 1970
               intra_pred_mode, size_in_pus);

        for (j = 0; j < size_in_pus; j++) {
1971
            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1972 1973 1974 1975 1976 1977 1978 1979 1980
        }
    }

    return intra_pred_mode;
}

static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
                                          int log2_cb_size, int ct_depth)
{
1981 1982 1983
    int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
    int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
    int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1984 1985 1986
    int y;

    for (y = 0; y < length; y++)
1987
        memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1988 1989 1990
               ct_depth, length);
}

1991 1992 1993 1994
static const uint8_t tab_mode_idx[] = {
     0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
    21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};

1995 1996
static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
                                  int log2_cb_size)
1997
{
1998
    HEVCLocalContext *lc = s->HEVClc;
1999
    static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
    uint8_t prev_intra_luma_pred_flag[4];
    int split   = lc->cu.part_mode == PART_NxN;
    int pb_size = (1 << log2_cb_size) >> split;
    int side    = split + 1;
    int chroma_mode;
    int i, j;

    for (i = 0; i < side; i++)
        for (j = 0; j < side; j++)
            prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);

    for (i = 0; i < side; i++) {
        for (j = 0; j < side; j++) {
2013
            if (prev_intra_luma_pred_flag[2 * i + j])
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023
                lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
            else
                lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);

            lc->pu.intra_pred_mode[2 * i + j] =
                luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
                                     prev_intra_luma_pred_flag[2 * i + j]);
        }
    }

2024
    if (s->ps.sps->chroma_format_idc == 3) {
2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037
        for (i = 0; i < side; i++) {
            for (j = 0; j < side; j++) {
                lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
                if (chroma_mode != 4) {
                    if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
                        lc->pu.intra_pred_mode_c[2 * i + j] = 34;
                    else
                        lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
                } else {
                    lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
                }
            }
        }
2038
    } else if (s->ps.sps->chroma_format_idc == 2) {
2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049
        int mode_idx;
        lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
        if (chroma_mode != 4) {
            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
                mode_idx = 34;
            else
                mode_idx = intra_chroma_table[chroma_mode];
        } else {
            mode_idx = lc->pu.intra_pred_mode[0];
        }
        lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2050
    } else if (s->ps.sps->chroma_format_idc != 0) {
2051 2052 2053 2054 2055 2056 2057 2058 2059
        chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
        if (chroma_mode != 4) {
            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
                lc->pu.intra_pred_mode_c[0] = 34;
            else
                lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
        } else {
            lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
        }
2060 2061 2062
    }
}

2063 2064 2065
static void intra_prediction_unit_default_value(HEVCContext *s,
                                                int x0, int y0,
                                                int log2_cb_size)
2066
{
2067
    HEVCLocalContext *lc = s->HEVClc;
2068
    int pb_size          = 1 << log2_cb_size;
2069 2070
    int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
    int min_pu_width     = s->ps.sps->min_pu_width;
2071
    MvField *tab_mvf     = s->ref->tab_mvf;
2072 2073
    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2074 2075 2076 2077
    int j, k;

    if (size_in_pus == 0)
        size_in_pus = 1;
2078
    for (j = 0; j < size_in_pus; j++)
2079
        memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2080 2081 2082 2083
    if (lc->cu.pred_mode == MODE_INTRA)
        for (j = 0; j < size_in_pus; j++)
            for (k = 0; k < size_in_pus; k++)
                tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2084 2085 2086 2087 2088
}

static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
{
    int cb_size          = 1 << log2_cb_size;
2089
    HEVCLocalContext *lc = s->HEVClc;
2090
    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2091
    int length           = cb_size >> log2_min_cb_size;
2092
    int min_cb_width     = s->ps.sps->min_cb_width;
2093 2094
    int x_cb             = x0 >> log2_min_cb_size;
    int y_cb             = y0 >> log2_min_cb_size;
2095
    int idx              = log2_cb_size - 2;
2096
    int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2097
    int x, y, ret;
2098

2099 2100 2101 2102 2103
    lc->cu.x                = x0;
    lc->cu.y                = y0;
    lc->cu.pred_mode        = MODE_INTRA;
    lc->cu.part_mode        = PART_2Nx2N;
    lc->cu.intra_split_flag = 0;
2104 2105 2106 2107

    SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
    for (x = 0; x < 4; x++)
        lc->pu.intra_pred_mode[x] = 1;
2108
    if (s->ps.pps->transquant_bypass_enable_flag) {
2109 2110 2111 2112 2113 2114
        lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
        if (lc->cu.cu_transquant_bypass_flag)
            set_deblocking_bypass(s, x0, y0, log2_cb_size);
    } else
        lc->cu.cu_transquant_bypass_flag = 0;

2115
    if (s->sh.slice_type != HEVC_SLICE_I) {
2116 2117
        uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);

2118
        x = y_cb * min_cb_width + x_cb;
2119 2120
        for (y = 0; y < length; y++) {
            memset(&s->skip_flag[x], skip_flag, length);
2121
            x += min_cb_width;
2122 2123
        }
        lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
Mickaël Raulet's avatar
Mickaël Raulet committed
2124 2125 2126 2127 2128 2129
    } else {
        x = y_cb * min_cb_width + x_cb;
        for (y = 0; y < length; y++) {
            memset(&s->skip_flag[x], 0, length);
            x += min_cb_width;
        }
2130 2131 2132
    }

    if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2133
        hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2134 2135 2136
        intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);

        if (!s->sh.disable_deblocking_filter_flag)
2137
            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2138
    } else {
2139 2140
        int pcm_flag = 0;

2141
        if (s->sh.slice_type != HEVC_SLICE_I)
2142 2143
            lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
        if (lc->cu.pred_mode != MODE_INTRA ||
2144
            log2_cb_size == s->ps.sps->log2_min_cb_size) {
2145
            lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2146 2147 2148 2149 2150
            lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
                                      lc->cu.pred_mode == MODE_INTRA;
        }

        if (lc->cu.pred_mode == MODE_INTRA) {
2151 2152 2153
            if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2154
                pcm_flag = ff_hevc_pcm_flag_decode(s);
2155
            }
2156
            if (pcm_flag) {
2157 2158
                intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
                ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2159
                if (s->ps.sps->pcm.loop_filter_disable_flag)
2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170
                    set_deblocking_bypass(s, x0, y0, log2_cb_size);

                if (ret < 0)
                    return ret;
            } else {
                intra_prediction_unit(s, x0, y0, log2_cb_size);
            }
        } else {
            intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
            switch (lc->cu.part_mode) {
            case PART_2Nx2N:
2171
                hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2172 2173
                break;
            case PART_2NxN:
2174 2175
                hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
                hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2176 2177
                break;
            case PART_Nx2N:
2178 2179
                hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
                hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2180 2181
                break;
            case PART_2NxnU:
2182 2183
                hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
                hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2184 2185
                break;
            case PART_2NxnD:
2186 2187
                hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
                hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2188 2189
                break;
            case PART_nLx2N:
2190 2191
                hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
                hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2192 2193
                break;
            case PART_nRx2N:
2194 2195
                hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
                hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2196 2197
                break;
            case PART_NxN:
2198 2199 2200 2201
                hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
                hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
                hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
                hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2202 2203 2204 2205
                break;
            }
        }

2206
        if (!pcm_flag) {
2207 2208
            int rqt_root_cbf = 1;

2209 2210
            if (lc->cu.pred_mode != MODE_INTRA &&
                !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2211
                rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2212
            }
2213
            if (rqt_root_cbf) {
2214
                const static int cbf[2] = { 0 };
2215
                lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2216 2217
                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
                                         s->ps.sps->max_transform_hierarchy_depth_inter;
2218 2219
                ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
                                         log2_cb_size,
2220
                                         log2_cb_size, 0, 0, cbf, cbf);
2221 2222
                if (ret < 0)
                    return ret;
2223 2224
            } else {
                if (!s->sh.disable_deblocking_filter_flag)
2225
                    ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2226 2227 2228 2229
            }
        }
    }

2230
    if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2231
        ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2232

2233
    x = y_cb * min_cb_width + x_cb;
2234 2235
    for (y = 0; y < length; y++) {
        memset(&s->qp_y_tab[x], lc->qp_y, length);
2236
        x += min_cb_width;
2237 2238
    }

2239 2240 2241 2242 2243
    if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
       ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
        lc->qPy_pred = lc->qp_y;
    }

2244
    set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2245 2246 2247 2248

    return 0;
}

2249 2250
static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
                               int log2_cb_size, int cb_depth)
2251
{
2252
    HEVCLocalContext *lc = s->HEVClc;
2253
    const int cb_size    = 1 << log2_cb_size;
2254
    int ret;
2255
    int split_cu;
2256

2257
    lc->ct_depth = cb_depth;
2258 2259 2260
    if (x0 + cb_size <= s->ps.sps->width  &&
        y0 + cb_size <= s->ps.sps->height &&
        log2_cb_size > s->ps.sps->log2_min_cb_size) {
2261
        split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2262
    } else {
2263
        split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2264
    }
2265 2266
    if (s->ps.pps->cu_qp_delta_enabled_flag &&
        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2267 2268 2269 2270
        lc->tu.is_cu_qp_delta_coded = 0;
        lc->tu.cu_qp_delta          = 0;
    }

2271
    if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2272
        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2273 2274 2275
        lc->tu.is_cu_chroma_qp_offset_coded = 0;
    }

2276
    if (split_cu) {
2277
        int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2278 2279 2280
        const int cb_size_split = cb_size >> 1;
        const int x1 = x0 + cb_size_split;
        const int y1 = y0 + cb_size_split;
2281

2282 2283 2284 2285 2286 2287
        int more_data = 0;

        more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
        if (more_data < 0)
            return more_data;

2288
        if (more_data && x1 < s->ps.sps->width) {
2289
            more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2290 2291 2292
            if (more_data < 0)
                return more_data;
        }
2293
        if (more_data && y1 < s->ps.sps->height) {
2294
            more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2295 2296 2297
            if (more_data < 0)
                return more_data;
        }
2298 2299
        if (more_data && x1 < s->ps.sps->width &&
            y1 < s->ps.sps->height) {
2300 2301 2302
            more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
            if (more_data < 0)
                return more_data;
2303
        }
2304 2305 2306 2307 2308

        if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
            ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
            lc->qPy_pred = lc->qp_y;

2309
        if (more_data)
2310 2311
            return ((x1 + cb_size_split) < s->ps.sps->width ||
                    (y1 + cb_size_split) < s->ps.sps->height);
2312 2313 2314 2315 2316 2317
        else
            return 0;
    } else {
        ret = hls_coding_unit(s, x0, y0, log2_cb_size);
        if (ret < 0)
            return ret;
2318
        if ((!((x0 + cb_size) %
2319 2320
               (1 << (s->ps.sps->log2_ctb_size))) ||
             (x0 + cb_size >= s->ps.sps->width)) &&
2321
            (!((y0 + cb_size) %
2322 2323
               (1 << (s->ps.sps->log2_ctb_size))) ||
             (y0 + cb_size >= s->ps.sps->height))) {
2324 2325 2326 2327 2328 2329 2330 2331 2332 2333
            int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
            return !end_of_slice_flag;
        } else {
            return 1;
        }
    }

    return 0;
}

2334 2335
static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
                                 int ctb_addr_ts)
2336
{
2337
    HEVCLocalContext *lc  = s->HEVClc;
2338 2339
    int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
    int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2340 2341 2342 2343
    int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;

    s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;

2344
    if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2345 2346
        if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
            lc->first_qp_group = 1;
2347 2348 2349 2350 2351
        lc->end_of_tiles_x = s->ps.sps->width;
    } else if (s->ps.pps->tiles_enabled_flag) {
        if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
            int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
            lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2352 2353 2354
            lc->first_qp_group   = 1;
        }
    } else {
2355
        lc->end_of_tiles_x = s->ps.sps->width;
2356 2357
    }

2358
    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2359

2360
    lc->boundary_flags = 0;
2361 2362
    if (s->ps.pps->tiles_enabled_flag) {
        if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2363 2364 2365
            lc->boundary_flags |= BOUNDARY_LEFT_TILE;
        if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2366
        if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2367
            lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2368
        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2369
            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2370
    } else {
2371
        if (ctb_addr_in_slice <= 0)
2372
            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2373
        if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2374
            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2375
    }
2376 2377

    lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2378 2379 2380
    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2381 2382
}

2383
static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2384
{
2385
    HEVCContext *s  = avctxt->priv_data;
2386
    int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2387 2388 2389
    int more_data   = 1;
    int x_ctb       = 0;
    int y_ctb       = 0;
2390
    int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2391
    int ret;
2392

2393 2394 2395 2396 2397
    if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
        av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
        return AVERROR_INVALIDDATA;
    }

2398
    if (s->sh.dependent_slice_segment_flag) {
2399
        int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2400
        if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2401 2402 2403 2404 2405
            av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
            return AVERROR_INVALIDDATA;
        }
    }

2406 2407
    while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
        int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2408

2409 2410
        x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
        y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2411 2412
        hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);

2413 2414 2415 2416 2417
        ret = ff_hevc_cabac_init(s, ctb_addr_ts);
        if (ret < 0) {
            s->tab_slice_address[ctb_addr_rs] = -1;
            return ret;
        }
2418

2419
        hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2420 2421 2422 2423 2424

        s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
        s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
        s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;

2425
        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2426 2427
        if (more_data < 0) {
            s->tab_slice_address[ctb_addr_rs] = -1;
2428
            return more_data;
2429 2430
        }

2431 2432 2433 2434 2435 2436

        ctb_addr_ts++;
        ff_hevc_save_states(s, ctb_addr_ts);
        ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
    }

2437 2438
    if (x_ctb + ctb_size >= s->ps.sps->width &&
        y_ctb + ctb_size >= s->ps.sps->height)
2439
        ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2440 2441 2442 2443

    return ctb_addr_ts;
}

2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458
static int hls_slice_data(HEVCContext *s)
{
    int arg[2];
    int ret[2];

    arg[0] = 0;
    arg[1] = 1;

    s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
    return ret[0];
}
static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
{
    HEVCContext *s1  = avctxt->priv_data, *s;
    HEVCLocalContext *lc;
2459
    int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2460 2461 2462
    int more_data   = 1;
    int *ctb_row_p    = input_ctb_row;
    int ctb_row = ctb_row_p[job];
2463 2464
    int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
    int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2465 2466 2467 2468 2469 2470 2471 2472 2473
    int thread = ctb_row % s1->threads_number;
    int ret;

    s = s1->sList[self_id];
    lc = s->HEVClc;

    if(ctb_row) {
        ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
        if (ret < 0)
2474
            goto error;
2475
        ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2476 2477
    }

2478 2479 2480
    while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
        int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
        int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2481 2482 2483 2484 2485

        hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);

        ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);

2486
        if (atomic_load(&s1->wpp_err)) {
2487 2488 2489 2490
            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
            return 0;
        }

2491 2492 2493
        ret = ff_hevc_cabac_init(s, ctb_addr_ts);
        if (ret < 0)
            goto error;
2494 2495
        hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2496

2497
        if (more_data < 0) {
2498 2499
            ret = more_data;
            goto error;
2500
        }
2501 2502 2503 2504 2505 2506 2507

        ctb_addr_ts++;

        ff_hevc_save_states(s, ctb_addr_ts);
        ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
        ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);

2508
        if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2509
            atomic_store(&s1->wpp_err, 1);
2510 2511 2512 2513
            ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
            return 0;
        }

2514
        if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2515
            ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2516 2517 2518
            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
            return ctb_addr_ts;
        }
2519
        ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2520 2521
        x_ctb+=ctb_size;

2522
        if(x_ctb >= s->ps.sps->width) {
2523 2524 2525 2526 2527 2528
            break;
        }
    }
    ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);

    return 0;
2529 2530 2531 2532 2533
error:
    s->tab_slice_address[ctb_addr_rs] = -1;
    atomic_store(&s1->wpp_err, 1);
    ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
    return ret;
2534 2535
}

2536
static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2537
{
2538 2539
    const uint8_t *data = nal->data;
    int length          = nal->size;
2540
    HEVCLocalContext *lc = s->HEVClc;
2541 2542
    int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
    int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2543
    int64_t offset;
2544
    int64_t startheader, cmpt = 0;
2545 2546
    int i, j, res = 0;

2547 2548 2549 2550 2551 2552
    if (!ret || !arg) {
        av_free(ret);
        av_free(arg);
        return AVERROR(ENOMEM);
    }

2553 2554 2555 2556 2557 2558 2559 2560 2561
    if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
        av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
            s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
            s->ps.sps->ctb_width, s->ps.sps->ctb_height
        );
        res = AVERROR_INVALIDDATA;
        goto error;
    }

2562
    ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2563 2564 2565 2566 2567

    if (!s->sList[1]) {
        for (i = 1; i < s->threads_number; i++) {
            s->sList[i] = av_malloc(sizeof(HEVCContext));
            memcpy(s->sList[i], s, sizeof(HEVCContext));
2568
            s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2569 2570 2571 2572 2573 2574
            s->sList[i]->HEVClc = s->HEVClcList[i];
        }
    }

    offset = (lc->gb.index >> 3);

2575
    for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2576
        if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2577 2578 2579 2580 2581 2582 2583 2584
            startheader--;
            cmpt++;
        }
    }

    for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
        offset += (s->sh.entry_point_offset[i - 1] - cmpt);
        for (j = 0, cmpt = 0, startheader = offset
2585
             + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2586
            if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2587 2588 2589 2590 2591 2592 2593 2594 2595 2596
                startheader--;
                cmpt++;
            }
        }
        s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
        s->sh.offset[i - 1] = offset;

    }
    if (s->sh.num_entry_point_offsets != 0) {
        offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2597 2598 2599 2600 2601
        if (length < offset) {
            av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
            res = AVERROR_INVALIDDATA;
            goto error;
        }
2602 2603 2604 2605
        s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
        s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;

    }
2606
    s->data = data;
2607 2608 2609 2610 2611 2612 2613 2614

    for (i = 1; i < s->threads_number; i++) {
        s->sList[i]->HEVClc->first_qp_group = 1;
        s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
        memcpy(s->sList[i], s, sizeof(HEVCContext));
        s->sList[i]->HEVClc = s->HEVClcList[i];
    }

2615
    atomic_store(&s->wpp_err, 0);
2616 2617 2618 2619 2620 2621 2622
    ff_reset_entries(s->avctx);

    for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
        arg[i] = i;
        ret[i] = 0;
    }

2623
    if (s->ps.pps->entropy_coding_sync_enabled_flag)
2624
        s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2625 2626 2627

    for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
        res += ret[i];
2628
error:
2629 2630 2631 2632 2633
    av_free(ret);
    av_free(arg);
    return res;
}

2634 2635 2636 2637
static int set_side_data(HEVCContext *s)
{
    AVFrame *out = s->ref->frame;

2638 2639 2640 2641 2642
    if (s->sei.frame_packing.present &&
        s->sei.frame_packing.arrangement_type >= 3 &&
        s->sei.frame_packing.arrangement_type <= 5 &&
        s->sei.frame_packing.content_interpretation_type > 0 &&
        s->sei.frame_packing.content_interpretation_type < 3) {
2643 2644 2645 2646
        AVStereo3D *stereo = av_stereo3d_create_side_data(out);
        if (!stereo)
            return AVERROR(ENOMEM);

2647
        switch (s->sei.frame_packing.arrangement_type) {
2648
        case 3:
2649
            if (s->sei.frame_packing.quincunx_subsampling)
2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661
                stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
            else
                stereo->type = AV_STEREO3D_SIDEBYSIDE;
            break;
        case 4:
            stereo->type = AV_STEREO3D_TOPBOTTOM;
            break;
        case 5:
            stereo->type = AV_STEREO3D_FRAMESEQUENCE;
            break;
        }

2662
        if (s->sei.frame_packing.content_interpretation_type == 2)
2663
            stereo->flags = AV_STEREO3D_FLAG_INVERT;
2664 2665 2666 2667 2668 2669 2670

        if (s->sei.frame_packing.arrangement_type == 5) {
            if (s->sei.frame_packing.current_frame_is_frame0_flag)
                stereo->view = AV_STEREO3D_VIEW_LEFT;
            else
                stereo->view = AV_STEREO3D_VIEW_RIGHT;
        }
2671 2672
    }

2673 2674 2675 2676
    if (s->sei.display_orientation.present &&
        (s->sei.display_orientation.anticlockwise_rotation ||
         s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
        double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2677 2678 2679 2680 2681 2682 2683 2684
        AVFrameSideData *rotation = av_frame_new_side_data(out,
                                                           AV_FRAME_DATA_DISPLAYMATRIX,
                                                           sizeof(int32_t) * 9);
        if (!rotation)
            return AVERROR(ENOMEM);

        av_display_rotation_set((int32_t *)rotation->data, angle);
        av_display_matrix_flip((int32_t *)rotation->data,
2685 2686
                               s->sei.display_orientation.hflip,
                               s->sei.display_orientation.vflip);
2687 2688
    }

2689 2690
    // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
    // so the side data persists for the entire coded video sequence.
2691
    if (s->sei.mastering_display.present > 0 &&
2692
        IS_IRAP(s) && s->no_rasl_output_flag) {
2693
        s->sei.mastering_display.present--;
2694
    }
2695
    if (s->sei.mastering_display.present) {
2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707
        // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
        const int mapping[3] = {2, 0, 1};
        const int chroma_den = 50000;
        const int luma_den = 10000;
        int i;
        AVMasteringDisplayMetadata *metadata =
            av_mastering_display_metadata_create_side_data(out);
        if (!metadata)
            return AVERROR(ENOMEM);

        for (i = 0; i < 3; i++) {
            const int j = mapping[i];
2708
            metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2709
            metadata->display_primaries[i][0].den = chroma_den;
2710
            metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2711 2712
            metadata->display_primaries[i][1].den = chroma_den;
        }
2713
        metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2714
        metadata->white_point[0].den = chroma_den;
2715
        metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2716 2717
        metadata->white_point[1].den = chroma_den;

2718
        metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2719
        metadata->max_luminance.den = luma_den;
2720
        metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738
        metadata->min_luminance.den = luma_den;
        metadata->has_luminance = 1;
        metadata->has_primaries = 1;

        av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
        av_log(s->avctx, AV_LOG_DEBUG,
               "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
               av_q2d(metadata->display_primaries[0][0]),
               av_q2d(metadata->display_primaries[0][1]),
               av_q2d(metadata->display_primaries[1][0]),
               av_q2d(metadata->display_primaries[1][1]),
               av_q2d(metadata->display_primaries[2][0]),
               av_q2d(metadata->display_primaries[2][1]),
               av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
        av_log(s->avctx, AV_LOG_DEBUG,
               "min_luminance=%f, max_luminance=%f\n",
               av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
    }
2739 2740
    // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
    // so the side data persists for the entire coded video sequence.
2741
    if (s->sei.content_light.present > 0 &&
2742
        IS_IRAP(s) && s->no_rasl_output_flag) {
2743
        s->sei.content_light.present--;
2744
    }
2745
    if (s->sei.content_light.present) {
2746 2747 2748 2749
        AVContentLightMetadata *metadata =
            av_content_light_metadata_create_side_data(out);
        if (!metadata)
            return AVERROR(ENOMEM);
2750 2751
        metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
        metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2752 2753 2754 2755 2756

        av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
        av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
               metadata->MaxCLL, metadata->MaxFALL);
    }
2757

2758
    if (s->sei.a53_caption.a53_caption) {
2759 2760
        AVFrameSideData* sd = av_frame_new_side_data(out,
                                                     AV_FRAME_DATA_A53_CC,
2761
                                                     s->sei.a53_caption.a53_caption_size);
2762
        if (sd)
2763 2764 2765
            memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
        av_freep(&s->sei.a53_caption.a53_caption);
        s->sei.a53_caption.a53_caption_size = 0;
2766 2767 2768
        s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
    }

2769 2770 2771
    if (s->sei.alternative_transfer.present &&
        av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
        s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2772
        s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2773 2774
    }

2775 2776 2777
    return 0;
}

2778 2779
static int hevc_frame_start(HEVCContext *s)
{
2780
    HEVCLocalContext *lc = s->HEVClc;
2781 2782
    int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
                           ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2783 2784
    int ret;

2785 2786
    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2787
    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2788
    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2789
    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2790 2791

    s->is_decoded        = 0;
2792
    s->first_nal_type    = s->nal_unit_type;
2793

2794 2795
    s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);

2796 2797
    if (s->ps.pps->tiles_enabled_flag)
        lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2798

2799
    ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2800 2801 2802 2803 2804 2805 2806 2807 2808
    if (ret < 0)
        goto fail;

    ret = ff_hevc_frame_rps(s);
    if (ret < 0) {
        av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
        goto fail;
    }

2809 2810
    s->ref->frame->key_frame = IS_IRAP(s);

2811 2812 2813 2814
    ret = set_side_data(s);
    if (ret < 0)
        goto fail;

2815
    s->frame->pict_type = 3 - s->sh.slice_type;
2816

2817 2818 2819
    if (!IS_IRAP(s))
        ff_hevc_bump_frame(s);

2820 2821 2822 2823 2824
    av_frame_unref(s->output_frame);
    ret = ff_hevc_output_frame(s, s->output_frame, 0);
    if (ret < 0)
        goto fail;

2825 2826
    if (!s->avctx->hwaccel)
        ff_thread_finish_setup(s->avctx);
2827 2828

    return 0;
2829

2830
fail:
Guillaume Martres's avatar
Guillaume Martres committed
2831
    if (s->ref)
2832
        ff_hevc_unref_frame(s, s->ref, ~0);
2833 2834 2835 2836
    s->ref = NULL;
    return ret;
}

2837
static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2838
{
2839
    HEVCLocalContext *lc = s->HEVClc;
2840
    GetBitContext *gb    = &lc->gb;
2841
    int ctb_addr_ts, ret;
2842

2843 2844 2845
    *gb              = nal->gb;
    s->nal_unit_type = nal->type;
    s->temporal_id   = nal->temporal_id;
2846 2847

    switch (s->nal_unit_type) {
2848
    case HEVC_NAL_VPS:
2849 2850 2851 2852 2853 2854 2855 2856
        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
            ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                   nal->type,
                                                   nal->raw_data,
                                                   nal->raw_size);
            if (ret < 0)
                goto fail;
        }
2857
        ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2858
        if (ret < 0)
2859
            goto fail;
2860
        break;
2861
    case HEVC_NAL_SPS:
2862 2863 2864 2865 2866 2867 2868 2869
        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
            ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                   nal->type,
                                                   nal->raw_data,
                                                   nal->raw_size);
            if (ret < 0)
                goto fail;
        }
2870 2871
        ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
                                     s->apply_defdispwin);
2872
        if (ret < 0)
2873
            goto fail;
2874
        break;
2875
    case HEVC_NAL_PPS:
2876 2877 2878 2879 2880 2881 2882 2883
        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
            ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                   nal->type,
                                                   nal->raw_data,
                                                   nal->raw_size);
            if (ret < 0)
                goto fail;
        }
2884
        ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2885
        if (ret < 0)
2886
            goto fail;
2887
        break;
2888 2889
    case HEVC_NAL_SEI_PREFIX:
    case HEVC_NAL_SEI_SUFFIX:
2890 2891 2892 2893 2894 2895 2896 2897
        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
            ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                   nal->type,
                                                   nal->raw_data,
                                                   nal->raw_size);
            if (ret < 0)
                goto fail;
        }
2898
        ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2899
        if (ret < 0)
2900
            goto fail;
2901
        break;
2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917
    case HEVC_NAL_TRAIL_R:
    case HEVC_NAL_TRAIL_N:
    case HEVC_NAL_TSA_N:
    case HEVC_NAL_TSA_R:
    case HEVC_NAL_STSA_N:
    case HEVC_NAL_STSA_R:
    case HEVC_NAL_BLA_W_LP:
    case HEVC_NAL_BLA_W_RADL:
    case HEVC_NAL_BLA_N_LP:
    case HEVC_NAL_IDR_W_RADL:
    case HEVC_NAL_IDR_N_LP:
    case HEVC_NAL_CRA_NUT:
    case HEVC_NAL_RADL_N:
    case HEVC_NAL_RADL_R:
    case HEVC_NAL_RASL_N:
    case HEVC_NAL_RASL_R:
2918 2919 2920 2921
        ret = hls_slice_header(s);
        if (ret < 0)
            return ret;

2922 2923 2924
        if (
            (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
            (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2925
            (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2926 2927 2928
            break;
        }

2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942
        if (s->sh.first_slice_in_pic_flag) {
            if (s->max_ra == INT_MAX) {
                if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
                    s->max_ra = s->poc;
                } else {
                    if (IS_IDR(s))
                        s->max_ra = INT_MIN;
                }
            }

            if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
                s->poc <= s->max_ra) {
                s->is_decoded = 0;
                break;
2943
            } else {
2944
                if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2945 2946 2947
                    s->max_ra = INT_MIN;
            }

2948
            s->overlap ++;
2949 2950 2951 2952 2953
            ret = hevc_frame_start(s);
            if (ret < 0)
                return ret;
        } else if (!s->ref) {
            av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2954
            goto fail;
2955 2956
        }

2957 2958 2959 2960 2961 2962 2963
        if (s->nal_unit_type != s->first_nal_type) {
            av_log(s->avctx, AV_LOG_ERROR,
                   "Non-matching NAL types of the VCL NALUs: %d %d\n",
                   s->first_nal_type, s->nal_unit_type);
            return AVERROR_INVALIDDATA;
        }

2964
        if (!s->sh.dependent_slice_segment_flag &&
2965
            s->sh.slice_type != HEVC_SLICE_I) {
2966 2967
            ret = ff_hevc_slice_rpl(s);
            if (ret < 0) {
2968 2969
                av_log(s->avctx, AV_LOG_WARNING,
                       "Error constructing the reference lists for the current slice.\n");
2970
                goto fail;
2971 2972 2973
            }
        }

2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984
        if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
            ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
            if (ret < 0)
                goto fail;
        }

        if (s->avctx->hwaccel) {
            ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
            if (ret < 0)
                goto fail;
        } else {
2985
            if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2986
                ctb_addr_ts = hls_slice_data_wpp(s, nal);
2987 2988
            else
                ctb_addr_ts = hls_slice_data(s);
2989
            if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2990 2991
                s->is_decoded = 1;
            }
2992

2993 2994 2995 2996
            if (ctb_addr_ts < 0) {
                ret = ctb_addr_ts;
                goto fail;
            }
2997
        }
2998
        break;
2999 3000
    case HEVC_NAL_EOS_NUT:
    case HEVC_NAL_EOB_NUT:
3001 3002 3003
        s->seq_decode = (s->seq_decode + 1) & 0xff;
        s->max_ra     = INT_MAX;
        break;
3004 3005
    case HEVC_NAL_AUD:
    case HEVC_NAL_FD_NUT:
3006 3007
        break;
    default:
3008 3009
        av_log(s->avctx, AV_LOG_INFO,
               "Skipping NAL unit %d\n", s->nal_unit_type);
3010 3011 3012
    }

    return 0;
3013 3014 3015 3016
fail:
    if (s->avctx->err_recognition & AV_EF_EXPLODE)
        return ret;
    return 0;
3017 3018 3019 3020
}

static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
{
3021
    int i, ret = 0;
3022
    int eos_at_start = 1;
3023 3024

    s->ref = NULL;
3025
    s->last_eos = s->eos;
3026
    s->eos = 0;
3027
    s->overlap = 0;
3028 3029 3030

    /* split the input packet into NAL units, so we know the upper bound on the
     * number of slices in the frame */
3031
    ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3032
                                s->nal_length_size, s->avctx->codec_id, 1, 0);
3033 3034 3035 3036 3037
    if (ret < 0) {
        av_log(s->avctx, AV_LOG_ERROR,
               "Error splitting the input into NAL units.\n");
        return ret;
    }
3038

3039
    for (i = 0; i < s->pkt.nb_nals; i++) {
3040
        if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3041 3042 3043 3044 3045 3046 3047 3048 3049
            s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
            if (eos_at_start) {
                s->last_eos = 1;
            } else {
                s->eos = 1;
            }
        } else {
            eos_at_start = 0;
        }
3050 3051
    }

3052
    /* decode the NAL units */
3053
    for (i = 0; i < s->pkt.nb_nals; i++) {
3054 3055 3056 3057 3058 3059 3060 3061
        H2645NAL *nal = &s->pkt.nals[i];

        if (s->avctx->skip_frame >= AVDISCARD_ALL ||
            (s->avctx->skip_frame >= AVDISCARD_NONREF
            && ff_hevc_nal_is_nonref(nal->type)))
            continue;

        ret = decode_nal_unit(s, nal);
3062 3063
        if (ret >= 0 && s->overlap > 2)
            ret = AVERROR_INVALIDDATA;
3064
        if (ret < 0) {
3065 3066
            av_log(s->avctx, AV_LOG_WARNING,
                   "Error parsing NAL unit #%d.\n", i);
3067
            goto fail;
3068 3069 3070 3071
        }
    }

fail:
3072
    if (s->ref && s->threads_type == FF_THREAD_FRAME)
3073 3074 3075 3076 3077
        ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);

    return ret;
}

3078
static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3079 3080 3081 3082 3083 3084 3085 3086 3087
{
    int i;
    for (i = 0; i < 16; i++)
        av_log(log_ctx, level, "%02"PRIx8, md5[i]);
}

static int verify_md5(HEVCContext *s, AVFrame *frame)
{
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3088
    int pixel_shift;
3089 3090 3091 3092 3093
    int i, j;

    if (!desc)
        return AVERROR(EINVAL);

3094
    pixel_shift = desc->comp[0].depth > 8;
3095

3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117
    av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
           s->poc);

    /* the checksums are LE, so we have to byteswap for >8bpp formats
     * on BE arches */
#if HAVE_BIGENDIAN
    if (pixel_shift && !s->checksum_buf) {
        av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
                       FFMAX3(frame->linesize[0], frame->linesize[1],
                              frame->linesize[2]));
        if (!s->checksum_buf)
            return AVERROR(ENOMEM);
    }
#endif

    for (i = 0; frame->data[i]; i++) {
        int width  = s->avctx->coded_width;
        int height = s->avctx->coded_height;
        int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
        int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
        uint8_t md5[16];

3118
        av_md5_init(s->md5_ctx);
3119 3120 3121 3122
        for (j = 0; j < h; j++) {
            const uint8_t *src = frame->data[i] + j * frame->linesize[i];
#if HAVE_BIGENDIAN
            if (pixel_shift) {
3123 3124
                s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
                                    (const uint16_t *) src, w);
3125 3126 3127
                src = s->checksum_buf;
            }
#endif
3128
            av_md5_update(s->md5_ctx, src, w << pixel_shift);
3129
        }
3130
        av_md5_final(s->md5_ctx, md5);
3131

3132
        if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3133 3134 3135 3136 3137 3138 3139
            av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
            print_md5(s->avctx, AV_LOG_DEBUG, md5);
            av_log   (s->avctx, AV_LOG_DEBUG, "; ");
        } else {
            av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
            print_md5(s->avctx, AV_LOG_ERROR, md5);
            av_log   (s->avctx, AV_LOG_ERROR, " != ");
3140
            print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3141 3142 3143 3144 3145 3146 3147 3148 3149 3150
            av_log   (s->avctx, AV_LOG_ERROR, "\n");
            return AVERROR_INVALIDDATA;
        }
    }

    av_log(s->avctx, AV_LOG_DEBUG, "\n");

    return 0;
}

3151
static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3152 3153 3154
{
    int ret, i;

3155
    ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3156 3157 3158 3159
                                   &s->nal_length_size, s->avctx->err_recognition,
                                   s->apply_defdispwin, s->avctx);
    if (ret < 0)
        return ret;
3160 3161 3162

    /* export stream parameters from the first SPS */
    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3163
        if (first && s->ps.sps_list[i]) {
3164 3165 3166 3167 3168 3169 3170 3171 3172
            const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
            export_stream_params(s->avctx, &s->ps, sps);
            break;
        }
    }

    return 0;
}

3173 3174 3175 3176
static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
                             AVPacket *avpkt)
{
    int ret;
3177 3178
    int new_extradata_size;
    uint8_t *new_extradata;
3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189
    HEVCContext *s = avctx->priv_data;

    if (!avpkt->size) {
        ret = ff_hevc_output_frame(s, data, 1);
        if (ret < 0)
            return ret;

        *got_output = ret;
        return 0;
    }

3190 3191 3192
    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
                                            &new_extradata_size);
    if (new_extradata && new_extradata_size > 0) {
3193
        ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3194 3195 3196 3197
        if (ret < 0)
            return ret;
    }

3198
    s->ref = NULL;
3199
    ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3200 3201 3202
    if (ret < 0)
        return ret;

3203
    if (avctx->hwaccel) {
wm4's avatar
wm4 committed
3204
        if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3205 3206
            av_log(avctx, AV_LOG_ERROR,
                   "hardware accelerator failed to decode picture\n");
wm4's avatar
wm4 committed
3207 3208 3209
            ff_hevc_unref_frame(s, s->ref, ~0);
            return ret;
        }
3210
    } else {
3211 3212
        /* verify the SEI checksum */
        if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3213
            s->sei.picture_hash.is_md5) {
3214 3215 3216 3217 3218
            ret = verify_md5(s, s->ref->frame);
            if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
                ff_hevc_unref_frame(s, s->ref, ~0);
                return ret;
            }
3219 3220
        }
    }
3221
    s->sei.picture_hash.is_md5 = 0;
3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262

    if (s->is_decoded) {
        av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
        s->is_decoded = 0;
    }

    if (s->output_frame->buf[0]) {
        av_frame_move_ref(data, s->output_frame);
        *got_output = 1;
    }

    return avpkt->size;
}

static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
{
    int ret;

    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
    if (ret < 0)
        return ret;

    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
    if (!dst->tab_mvf_buf)
        goto fail;
    dst->tab_mvf = src->tab_mvf;

    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
    if (!dst->rpl_tab_buf)
        goto fail;
    dst->rpl_tab = src->rpl_tab;

    dst->rpl_buf = av_buffer_ref(src->rpl_buf);
    if (!dst->rpl_buf)
        goto fail;

    dst->poc        = src->poc;
    dst->ctb_count  = src->ctb_count;
    dst->flags      = src->flags;
    dst->sequence   = src->sequence;

3263 3264 3265 3266 3267 3268 3269
    if (src->hwaccel_picture_private) {
        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
        if (!dst->hwaccel_priv_buf)
            goto fail;
        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
    }

3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282
    return 0;
fail:
    ff_hevc_unref_frame(s, dst, ~0);
    return AVERROR(ENOMEM);
}

static av_cold int hevc_decode_free(AVCodecContext *avctx)
{
    HEVCContext       *s = avctx->priv_data;
    int i;

    pic_arrays_free(s);

3283
    av_freep(&s->md5_ctx);
3284

3285 3286
    av_freep(&s->cabac_state);

3287 3288 3289 3290
    for (i = 0; i < 3; i++) {
        av_freep(&s->sao_pixel_buffer_h[i]);
        av_freep(&s->sao_pixel_buffer_v[i]);
    }
3291 3292 3293 3294 3295 3296 3297
    av_frame_free(&s->output_frame);

    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
        ff_hevc_unref_frame(s, &s->DPB[i], ~0);
        av_frame_free(&s->DPB[i].frame);
    }

3298
    ff_hevc_ps_uninit(&s->ps);
3299

3300 3301 3302 3303 3304
    av_freep(&s->sh.entry_point_offset);
    av_freep(&s->sh.offset);
    av_freep(&s->sh.size);

    for (i = 1; i < s->threads_number; i++) {
3305
        HEVCLocalContext *lc = s->HEVClcList[i];
3306 3307 3308 3309 3310
        if (lc) {
            av_freep(&s->HEVClcList[i]);
            av_freep(&s->sList[i]);
        }
    }
3311 3312
    if (s->HEVClc == s->HEVClcList[0])
        s->HEVClc = NULL;
3313 3314
    av_freep(&s->HEVClcList[0]);

3315
    ff_h2645_packet_uninit(&s->pkt);
3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326

    return 0;
}

static av_cold int hevc_init_context(AVCodecContext *avctx)
{
    HEVCContext *s = avctx->priv_data;
    int i;

    s->avctx = avctx;

3327 3328 3329 3330 3331 3332 3333 3334 3335 3336
    s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
    if (!s->HEVClc)
        goto fail;
    s->HEVClcList[0] = s->HEVClc;
    s->sList[0] = s;

    s->cabac_state = av_malloc(HEVC_CONTEXTS);
    if (!s->cabac_state)
        goto fail;

3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349
    s->output_frame = av_frame_alloc();
    if (!s->output_frame)
        goto fail;

    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
        s->DPB[i].frame = av_frame_alloc();
        if (!s->DPB[i].frame)
            goto fail;
        s->DPB[i].tf.f = s->DPB[i].frame;
    }

    s->max_ra = INT_MAX;

3350 3351
    s->md5_ctx = av_md5_alloc();
    if (!s->md5_ctx)
3352 3353
        goto fail;

3354
    ff_bswapdsp_init(&s->bdsp);
3355 3356

    s->context_initialized = 1;
3357
    s->eos = 0;
3358

3359
    ff_hevc_reset_sei(&s->sei);
3360

3361
    return 0;
3362

3363 3364 3365 3366 3367
fail:
    hevc_decode_free(avctx);
    return AVERROR(ENOMEM);
}

3368
#if HAVE_THREADS
3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390
static int hevc_update_thread_context(AVCodecContext *dst,
                                      const AVCodecContext *src)
{
    HEVCContext *s  = dst->priv_data;
    HEVCContext *s0 = src->priv_data;
    int i, ret;

    if (!s->context_initialized) {
        ret = hevc_init_context(dst);
        if (ret < 0)
            return ret;
    }

    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
        ff_hevc_unref_frame(s, &s->DPB[i], ~0);
        if (s0->DPB[i].frame->buf[0]) {
            ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
            if (ret < 0)
                return ret;
        }
    }

3391 3392
    if (s->ps.sps != s0->ps.sps)
        s->ps.sps = NULL;
3393 3394 3395 3396 3397
    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
        av_buffer_unref(&s->ps.vps_list[i]);
        if (s0->ps.vps_list[i]) {
            s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
            if (!s->ps.vps_list[i])
gcocherel's avatar
gcocherel committed
3398 3399 3400 3401
                return AVERROR(ENOMEM);
        }
    }

3402 3403 3404 3405 3406
    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
        av_buffer_unref(&s->ps.sps_list[i]);
        if (s0->ps.sps_list[i]) {
            s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
            if (!s->ps.sps_list[i])
3407 3408 3409 3410
                return AVERROR(ENOMEM);
        }
    }

3411 3412 3413 3414 3415
    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
        av_buffer_unref(&s->ps.pps_list[i]);
        if (s0->ps.pps_list[i]) {
            s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
            if (!s->ps.pps_list[i])
3416 3417 3418 3419
                return AVERROR(ENOMEM);
        }
    }

3420
    if (s->ps.sps != s0->ps.sps)
3421
        if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3422
            return ret;
3423

3424 3425 3426 3427
    s->seq_decode = s0->seq_decode;
    s->seq_output = s0->seq_output;
    s->pocTid0    = s0->pocTid0;
    s->max_ra     = s0->max_ra;
3428
    s->eos        = s0->eos;
3429
    s->no_rasl_output_flag = s0->no_rasl_output_flag;
3430 3431 3432 3433

    s->is_nalff        = s0->is_nalff;
    s->nal_length_size = s0->nal_length_size;

3434 3435
    s->threads_number      = s0->threads_number;
    s->threads_type        = s0->threads_type;
3436 3437 3438 3439 3440 3441

    if (s0->eos) {
        s->seq_decode = (s->seq_decode + 1) & 0xff;
        s->max_ra = INT_MAX;
    }

3442 3443 3444 3445 3446 3447
    s->sei.frame_packing        = s0->sei.frame_packing;
    s->sei.display_orientation  = s0->sei.display_orientation;
    s->sei.mastering_display    = s0->sei.mastering_display;
    s->sei.content_light        = s0->sei.content_light;
    s->sei.alternative_transfer = s0->sei.alternative_transfer;

3448 3449
    return 0;
}
3450
#endif
3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462

static av_cold int hevc_decode_init(AVCodecContext *avctx)
{
    HEVCContext *s = avctx->priv_data;
    int ret;

    avctx->internal->allocate_progress = 1;

    ret = hevc_init_context(avctx);
    if (ret < 0)
        return ret;

3463
    s->enable_parallel_tiles = 0;
3464
    s->sei.picture_timing.picture_struct = 0;
3465
    s->eos = 1;
3466

3467 3468
    atomic_init(&s->wpp_err, 0);

3469 3470 3471 3472 3473
    if(avctx->active_thread_type & FF_THREAD_SLICE)
        s->threads_number = avctx->thread_count;
    else
        s->threads_number = 1;

3474
    if (avctx->extradata_size > 0 && avctx->extradata) {
3475
        ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3476 3477 3478 3479 3480 3481
        if (ret < 0) {
            hevc_decode_free(avctx);
            return ret;
        }
    }

3482 3483 3484 3485 3486
    if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
            s->threads_type = FF_THREAD_FRAME;
        else
            s->threads_type = FF_THREAD_SLICE;

3487 3488 3489
    return 0;
}

3490
#if HAVE_THREADS
3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503
static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
{
    HEVCContext *s = avctx->priv_data;
    int ret;

    memset(s, 0, sizeof(*s));

    ret = hevc_init_context(avctx);
    if (ret < 0)
        return ret;

    return 0;
}
3504
#endif
3505 3506 3507 3508 3509 3510

static void hevc_decode_flush(AVCodecContext *avctx)
{
    HEVCContext *s = avctx->priv_data;
    ff_hevc_flush_dpb(s);
    s->max_ra = INT_MAX;
3511
    s->eos = 1;
3512 3513 3514 3515
}

#define OFFSET(x) offsetof(HEVCContext, x)
#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
gcocherel's avatar
gcocherel committed
3516

3517
static const AVOption options[] = {
3518
    { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3519
        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3520
    { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3521
        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542
    { NULL },
};

static const AVClass hevc_decoder_class = {
    .class_name = "HEVC decoder",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
};

AVCodec ff_hevc_decoder = {
    .name                  = "hevc",
    .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
    .type                  = AVMEDIA_TYPE_VIDEO,
    .id                    = AV_CODEC_ID_HEVC,
    .priv_data_size        = sizeof(HEVCContext),
    .priv_class            = &hevc_decoder_class,
    .init                  = hevc_decode_init,
    .close                 = hevc_decode_free,
    .decode                = hevc_decode_frame,
    .flush                 = hevc_decode_flush,
3543 3544
    .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3545
    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3546
                             AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3547
    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3548
    .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572
    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
#if CONFIG_HEVC_DXVA2_HWACCEL
                               HWACCEL_DXVA2(hevc),
#endif
#if CONFIG_HEVC_D3D11VA_HWACCEL
                               HWACCEL_D3D11VA(hevc),
#endif
#if CONFIG_HEVC_D3D11VA2_HWACCEL
                               HWACCEL_D3D11VA2(hevc),
#endif
#if CONFIG_HEVC_NVDEC_HWACCEL
                               HWACCEL_NVDEC(hevc),
#endif
#if CONFIG_HEVC_VAAPI_HWACCEL
                               HWACCEL_VAAPI(hevc),
#endif
#if CONFIG_HEVC_VDPAU_HWACCEL
                               HWACCEL_VDPAU(hevc),
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
                               HWACCEL_VIDEOTOOLBOX(hevc),
#endif
                               NULL
                           },
3573
};