dnxhdenc.c 37.2 KB
Newer Older
1 2 3
/*
 * VC3/DNxHD encoder
 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
4
 * Copyright (c) 2011 MirriAd Ltd
5 6
 *
 * VC-3 encoder funded by the British Broadcasting Corporation
7
 * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define RC_VARIANCE 1 // use variance or ssd for fast rc

28
#include "libavutil/attributes.h"
29
#include "libavutil/internal.h"
30
#include "libavutil/opt.h"
31
#include "libavutil/timer.h"
32 33
#include "avcodec.h"
#include "dsputil.h"
34
#include "internal.h"
35
#include "mpegvideo.h"
36
#include "dnxhdenc.h"
37

38
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
39
#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples.
40 41

static const AVOption options[]={
42
    {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, VE},
43 44
{NULL}
};
45

46
static const AVClass dnxhd_class = {
47 48 49 50 51
    .class_name = "dnxhd",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
};
52

53 54
#define LAMBDA_FRAC_BITS 10

55
static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size)
56 57 58 59 60 61 62 63 64 65
{
    int i;
    for (i = 0; i < 4; i++) {
        block[0] = pixels[0]; block[1] = pixels[1];
        block[2] = pixels[2]; block[3] = pixels[3];
        block[4] = pixels[4]; block[5] = pixels[5];
        block[6] = pixels[6]; block[7] = pixels[7];
        pixels += line_size;
        block += 8;
    }
66 67 68 69
    memcpy(block,      block -  8, sizeof(*block) * 8);
    memcpy(block +  8, block - 16, sizeof(*block) * 8);
    memcpy(block + 16, block - 24, sizeof(*block) * 8);
    memcpy(block + 24, block - 32, sizeof(*block) * 8);
70 71
}

72
static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size)
73 74
{
    int i;
75
    const uint16_t* pixels16 = (const uint16_t*)pixels;
76
    line_size >>= 1;
77 78

    for (i = 0; i < 4; i++) {
79 80 81 82 83 84
        block[0] = pixels16[0]; block[1] = pixels16[1];
        block[2] = pixels16[2]; block[3] = pixels16[3];
        block[4] = pixels16[4]; block[5] = pixels16[5];
        block[6] = pixels16[6]; block[7] = pixels16[7];
        pixels16 += line_size;
        block += 8;
85
    }
86 87 88 89
    memcpy(block,      block -  8, sizeof(*block) * 8);
    memcpy(block +  8, block - 16, sizeof(*block) * 8);
    memcpy(block + 16, block - 24, sizeof(*block) * 8);
    memcpy(block + 24, block - 32, sizeof(*block) * 8);
90 91
}

Diego Biurrun's avatar
Diego Biurrun committed
92
static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
93 94 95
                                    int n, int qscale, int *overflow)
{
    const uint8_t *scantable= ctx->intra_scantable.scantable;
96
    const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
97
    int last_non_zero = 0;
98
    int i;
99 100 101 102 103 104

    ctx->dsp.fdct(block);

    // Divide by 4 with rounding, to compensate scaling of DCT coefficients
    block[0] = (block[0] + 2) >> 2;

105
    for (i = 1; i < 64; ++i) {
106 107 108 109 110 111 112 113 114 115 116 117
        int j = scantable[i];
        int sign = block[j] >> 31;
        int level = (block[j] ^ sign) - sign;
        level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
        block[j] = (level ^ sign) - sign;
        if (level)
            last_non_zero = i;
    }

    return last_non_zero;
}

118
static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
119
{
120 121 122
    int i, j, level, run;
    int max_level = 1<<(ctx->cid_table->bit_depth+2);

123
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail);
124 125 126
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits,  max_level*4*sizeof(*ctx->vlc_bits) , fail);
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2,                                fail);
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits,  63,                                  fail);
127

128 129
    ctx->vlc_codes += max_level*2;
    ctx->vlc_bits  += max_level*2;
130 131 132 133 134 135 136 137 138 139 140
    for (level = -max_level; level < max_level; level++) {
        for (run = 0; run < 2; run++) {
            int index = (level<<1)|run;
            int sign, offset = 0, alevel = level;

            MASK_ABS(sign, alevel);
            if (alevel > 64) {
                offset = (alevel-1)>>6;
                alevel -= offset<<6;
            }
            for (j = 0; j < 257; j++) {
141
                if (ctx->cid_table->ac_level[j] >> 1 == alevel &&
142 143
                    (!offset || (ctx->cid_table->ac_flags[j] & 1) && offset) &&
                    (!run    || (ctx->cid_table->ac_flags[j] & 2) && run)) {
144
                    av_assert1(!ctx->vlc_codes[index]);
145
                    if (alevel) {
146 147
                        ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1);
                        ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1;
148
                    } else {
149 150
                        ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
                        ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j];
151 152 153 154
                    }
                    break;
                }
            }
155
            av_assert0(!alevel || j < 257);
156
            if (offset) {
157 158
                ctx->vlc_codes[index] = (ctx->vlc_codes[index]<<ctx->cid_table->index_bits)|offset;
                ctx->vlc_bits [index]+= ctx->cid_table->index_bits;
159 160
            }
        }
161 162 163
    }
    for (i = 0; i < 62; i++) {
        int run = ctx->cid_table->run[i];
164
        av_assert0(run < 63);
165 166
        ctx->run_codes[run] = ctx->cid_table->run_codes[i];
        ctx->run_bits [run] = ctx->cid_table->run_bits[i];
167 168 169
    }
    return 0;
 fail:
170
    return AVERROR(ENOMEM);
171 172
}

173
static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
174 175 176 177
{
    // init first elem to 1 to avoid div by 0 in convert_matrix
    uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
    int qscale, i;
178 179
    const uint8_t *luma_weight_table   = ctx->cid_table->luma_weight;
    const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
180

181 182
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int),      fail);
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int),      fail);
183 184
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
185

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
    if (ctx->cid_table->bit_depth == 8) {
        for (i = 1; i < 64; i++) {
            int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
            weight_matrix[j] = ctx->cid_table->luma_weight[i];
        }
        ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
                          ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
        for (i = 1; i < 64; i++) {
            int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
            weight_matrix[j] = ctx->cid_table->chroma_weight[i];
        }
        ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
                          ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);

        for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
            for (i = 0; i < 64; i++) {
                ctx->qmatrix_l  [qscale]   [i] <<= 2; ctx->qmatrix_c  [qscale]   [i] <<= 2;
                ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
                ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
            }
        }
    } else {
        // 10-bit
        for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
            for (i = 1; i < 64; i++) {
                int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];

                // The quantization formula from the VC-3 standard is:
                // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i]))
                // Where p is 32 for 8-bit samples and 8 for 10-bit ones.
                // The s factor compensates scaling of DCT coefficients done by the DCT routines,
                // and therefore is not present in standard.  It's 8 for 8-bit samples and 4 for 10-bit ones.
                // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
                // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i])
                // For 10-bit samples, p / s == 2
                ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]);
                ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]);
            }
224 225
        }
    }
226

227 228 229 230 231
    ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16;
    ctx->m.q_chroma_intra_matrix   = ctx->qmatrix_c;
    ctx->m.q_intra_matrix16        = ctx->qmatrix_l16;
    ctx->m.q_intra_matrix          = ctx->qmatrix_l;

232 233
    return 0;
 fail:
234
    return AVERROR(ENOMEM);
235 236
}

237
static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx)
238
{
239
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc, 8160*(ctx->m.avctx->qmax + 1)*sizeof(RCEntry), fail);
240
    if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
241
        FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry), fail);
242

243
    ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4 - ctx->min_padding) * 8;
244 245 246 247
    ctx->qscale = 1;
    ctx->lambda = 2<<LAMBDA_FRAC_BITS; // qscale 2
    return 0;
 fail:
248
    return AVERROR(ENOMEM);
249 250
}

251
static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
252 253
{
    DNXHDEncContext *ctx = avctx->priv_data;
254
    int i, index, bit_depth, ret;
255 256

    switch (avctx->pix_fmt) {
257
    case AV_PIX_FMT_YUV422P:
258 259
        bit_depth = 8;
        break;
260
    case AV_PIX_FMT_YUV422P10:
261 262 263 264
        bit_depth = 10;
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n");
265
        return AVERROR(EINVAL);
266
    }
267

268 269
    ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
    if (!ctx->cid) {
270 271
        av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n");
        ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR);
272
        return AVERROR(EINVAL);
273
    }
274
    av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
275 276

    index = ff_dnxhd_get_cid_table(ctx->cid);
277
    av_assert0(index >= 0);
278 279 280 281 282 283
    ctx->cid_table = &ff_dnxhd_cid_table[index];

    ctx->m.avctx = avctx;
    ctx->m.mb_intra = 1;
    ctx->m.h263_aic = 1;

284
    avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
285

286
    ff_dct_common_init(&ctx->m);
287 288
    ff_dct_encode_init(&ctx->m);

289
    if (!ctx->m.dct_quantize)
290
        ctx->m.dct_quantize = ff_dct_quantize_c;
291 292 293 294 295 296 297 298 299 300

    if (ctx->cid_table->bit_depth == 10) {
       ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
       ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
       ctx->block_width_l2 = 4;
    } else {
       ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
       ctx->block_width_l2 = 3;
    }

301 302
    if (ARCH_X86)
        ff_dnxhdenc_init_x86(ctx);
303 304 305 306 307 308 309 310 311 312 313 314 315

    ctx->m.mb_height = (avctx->height + 15) / 16;
    ctx->m.mb_width  = (avctx->width  + 15) / 16;

    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
        ctx->interlaced = 1;
        ctx->m.mb_height /= 2;
    }

    ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;

    if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
        ctx->m.intra_quant_bias = avctx->intra_quant_bias;
316 317
    if ((ret = dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0)) < 0) // XXX tune lbias/cbias
        return ret;
318

319 320 321 322
    // Avid Nitris hardware decoder requires a minimum amount of padding in the coding unit payload
    if (ctx->nitris_compat)
        ctx->min_padding = 1600;

323 324 325 326
    if ((ret = dnxhd_init_vlc(ctx)) < 0)
        return ret;
    if ((ret = dnxhd_init_rc(ctx)) < 0)
        return ret;
327

328
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail);
329
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail);
330
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits,    ctx->m.mb_num   *sizeof(uint16_t), fail);
331
    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale,  ctx->m.mb_num   *sizeof(uint8_t),  fail);
332

333 334 335 336 337 338
    avctx->coded_frame = av_frame_alloc();
    if (!avctx->coded_frame)
        return AVERROR(ENOMEM);

    avctx->coded_frame->key_frame = 1;
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
339

340
    if (avctx->thread_count > MAX_THREADS) {
341
        av_log(avctx, AV_LOG_ERROR, "too many threads\n");
342
        return AVERROR(EINVAL);
343 344
    }

345 346 347 348 349
    if (avctx->qmax <= 1) {
        av_log(avctx, AV_LOG_ERROR, "qmax must be at least 2\n");
        return AVERROR(EINVAL);
    }

350 351 352 353 354 355 356
    ctx->thread[0] = ctx;
    for (i = 1; i < avctx->thread_count; i++) {
        ctx->thread[i] =  av_malloc(sizeof(DNXHDEncContext));
        memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
    }

    return 0;
357
 fail: //for FF_ALLOCZ_OR_GOTO
358
    return AVERROR(ENOMEM);
359 360 361 362 363
}

static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
{
    DNXHDEncContext *ctx = avctx->priv_data;
364
    static const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
365

366 367
    memset(buf, 0, 640);

368 369 370 371
    memcpy(buf, header_prefix, 5);
    buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
    buf[6] = 0x80; // crc flag off
    buf[7] = 0xa0; // reserved
372
    AV_WB16(buf + 0x18, avctx->height>>ctx->interlaced); // ALPF
373
    AV_WB16(buf + 0x1a, avctx->width);  // SPL
374
    AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL
375

376
    buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38;
377
    buf[0x22] = 0x88 + (ctx->interlaced<<2);
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
    AV_WB32(buf + 0x28, ctx->cid); // CID
    buf[0x2c] = ctx->interlaced ? 0 : 0x80;

    buf[0x5f] = 0x01; // UDL

    buf[0x167] = 0x02; // reserved
    AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
    buf[0x16d] = ctx->m.mb_height; // Ns
    buf[0x16f] = 0x10; // reserved

    ctx->msip = buf + 0x170;
    return 0;
}

static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
{
    int nbits;
    if (diff < 0) {
        nbits = av_log2_16bit(-2*diff);
        diff--;
    } else {
        nbits = av_log2_16bit(2*diff);
    }
    put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
             (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
}

Diego Biurrun's avatar
Diego Biurrun committed
405
static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block, int last_index, int n)
406 407 408 409 410 411 412 413 414 415 416 417
{
    int last_non_zero = 0;
    int slevel, i, j;

    dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
    ctx->m.last_dc[n] = block[0];

    for (i = 1; i <= last_index; i++) {
        j = ctx->m.intra_scantable.permutated[i];
        slevel = block[j];
        if (slevel) {
            int run_level = i - last_non_zero - 1;
418
            int rlevel = (slevel<<1)|!!run_level;
419
            put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
420
            if (run_level)
421
                put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]);
422 423 424
            last_non_zero = i;
        }
    }
425
    put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
426 427
}

Diego Biurrun's avatar
Diego Biurrun committed
428
static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n, int qscale, int last_index)
429
{
Baptiste Coudurier's avatar
Baptiste Coudurier committed
430
    const uint8_t *weight_matrix;
431 432 433
    int level;
    int i;

Baptiste Coudurier's avatar
Baptiste Coudurier committed
434
    weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight;
435 436 437 438 439 440

    for (i = 1; i <= last_index; i++) {
        int j = ctx->m.intra_scantable.permutated[i];
        level = block[j];
        if (level) {
            if (level < 0) {
Baptiste Coudurier's avatar
Baptiste Coudurier committed
441
                level = (1-2*level) * qscale * weight_matrix[i];
442 443 444 445 446 447 448 449 450
                if (ctx->cid_table->bit_depth == 10) {
                    if (weight_matrix[i] != 8)
                        level += 8;
                    level >>= 4;
                } else {
                    if (weight_matrix[i] != 32)
                        level += 32;
                    level >>= 6;
                }
451 452
                level = -level;
            } else {
Baptiste Coudurier's avatar
Baptiste Coudurier committed
453
                level = (2*level+1) * qscale * weight_matrix[i];
454 455 456 457 458 459 460 461 462
                if (ctx->cid_table->bit_depth == 10) {
                    if (weight_matrix[i] != 8)
                        level += 8;
                    level >>= 4;
                } else {
                    if (weight_matrix[i] != 32)
                        level += 32;
                    level >>= 6;
                }
463 464 465 466 467 468
            }
            block[j] = level;
        }
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
469
static av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block)
470 471 472 473
{
    int score = 0;
    int i;
    for (i = 0; i < 64; i++)
474
        score += (block[i] - qblock[i]) * (block[i] - qblock[i]);
475 476 477
    return score;
}

Diego Biurrun's avatar
Diego Biurrun committed
478
static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index)
479 480 481 482 483 484 485 486 487
{
    int last_non_zero = 0;
    int bits = 0;
    int i, j, level;
    for (i = 1; i <= last_index; i++) {
        j = ctx->m.intra_scantable.permutated[i];
        level = block[j];
        if (level) {
            int run_level = i - last_non_zero - 1;
488
            bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level];
489 490 491 492 493 494 495 496
            last_non_zero = i;
        }
    }
    return bits;
}

static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
{
497 498 499 500 501
    const int bs = ctx->block_width_l2;
    const int bw = 1 << bs;
    const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize)   + (mb_x << bs+1);
    const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
    const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
502 503
    DSPContext *dsp = &ctx->m.dsp;

504 505 506 507
    dsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
    dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
    dsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
    dsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
508

509
    if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
510
        if (ctx->interlaced) {
511 512 513 514
            ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset,      ctx->m.linesize);
            ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
            ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,     ctx->m.uvlinesize);
            ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,     ctx->m.uvlinesize);
Baptiste Coudurier's avatar
Baptiste Coudurier committed
515
        } else {
516 517 518 519
            dsp->clear_block(ctx->blocks[4]);
            dsp->clear_block(ctx->blocks[5]);
            dsp->clear_block(ctx->blocks[6]);
            dsp->clear_block(ctx->blocks[7]);
Baptiste Coudurier's avatar
Baptiste Coudurier committed
520
        }
521
    } else {
522 523 524 525
        dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset,      ctx->m.linesize);
        dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
        dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,     ctx->m.uvlinesize);
        dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,     ctx->m.uvlinesize);
526 527 528 529 530
    }
}

static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
{
531
    const static uint8_t component[8]={0,0,1,2,0,0,1,2};
532
    return component[i];
533 534
}

535
static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
536
{
537 538 539
    DNXHDEncContext *ctx = avctx->priv_data;
    int mb_y = jobnr, mb_x;
    int qscale = ctx->qscale;
Diego Biurrun's avatar
Diego Biurrun committed
540
    LOCAL_ALIGNED_16(int16_t, block, [64]);
541
    ctx = ctx->thread[threadnr];
542

543 544
    ctx->m.last_dc[0] =
    ctx->m.last_dc[1] =
545
    ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
546 547 548 549 550 551 552 553 554 555 556

    for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
        unsigned mb = mb_y * ctx->m.mb_width + mb_x;
        int ssd     = 0;
        int ac_bits = 0;
        int dc_bits = 0;
        int i;

        dnxhd_get_blocks(ctx, mb_x, mb_y);

        for (i = 0; i < 8; i++) {
Diego Biurrun's avatar
Diego Biurrun committed
557
            int16_t *src_block = ctx->blocks[i];
558 559 560
            int overflow, nbits, diff, last_index;
            int n = dnxhd_switch_matrix(ctx, i);

561
            memcpy(block, src_block, 64*sizeof(*block));
562
            last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow);
563 564 565 566 567
            ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);

            diff = block[0] - ctx->m.last_dc[n];
            if (diff < 0) nbits = av_log2_16bit(-2*diff);
            else          nbits = av_log2_16bit( 2*diff);
568

569
            av_assert1(nbits < ctx->cid_table->bit_depth + 4);
570 571 572 573 574 575 576 577
            dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;

            ctx->m.last_dc[n] = block[0];

            if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
                dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
                ctx->m.dsp.idct(block);
                ssd += dnxhd_ssd_block(block, src_block);
578 579
            }
        }
580 581 582
        ctx->mb_rc[qscale][mb].ssd = ssd;
        ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0];
    }
583 584 585
    return 0;
}

586
static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
587
{
588 589 590 591
    DNXHDEncContext *ctx = avctx->priv_data;
    int mb_y = jobnr, mb_x;
    ctx = ctx->thread[threadnr];
    init_put_bits(&ctx->m.pb, (uint8_t *)arg + 640 + ctx->slice_offs[jobnr], ctx->slice_size[jobnr]);
592

593 594
    ctx->m.last_dc[0] =
    ctx->m.last_dc[1] =
595
    ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
596 597 598 599 600 601 602 603 604 605
    for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
        unsigned mb = mb_y * ctx->m.mb_width + mb_x;
        int qscale = ctx->mb_qscale[mb];
        int i;

        put_bits(&ctx->m.pb, 12, qscale<<1);

        dnxhd_get_blocks(ctx, mb_x, mb_y);

        for (i = 0; i < 8; i++) {
Diego Biurrun's avatar
Diego Biurrun committed
606
            int16_t *block = ctx->blocks[i];
607
            int overflow, n = dnxhd_switch_matrix(ctx, i);
608
            int last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow);
609 610 611
            //START_TIMER;
            dnxhd_encode_block(ctx, block, last_index, n);
            //STOP_TIMER("encode_block");
612
        }
613 614 615
    }
    if (put_bits_count(&ctx->m.pb)&31)
        put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
616 617 618 619
    flush_put_bits(&ctx->m.pb);
    return 0;
}

620
static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
621 622
{
    int mb_y, mb_x;
623 624 625 626
    int offset = 0;
    for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
        int thread_size;
        ctx->slice_offs[mb_y] = offset;
627 628 629 630 631 632 633 634
        ctx->slice_size[mb_y] = 0;
        for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
            unsigned mb = mb_y * ctx->m.mb_width + mb_x;
            ctx->slice_size[mb_y] += ctx->mb_bits[mb];
        }
        ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
        ctx->slice_size[mb_y] >>= 3;
        thread_size = ctx->slice_size[mb_y];
635 636 637 638
        offset += thread_size;
    }
}

639
static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
640
{
641
    DNXHDEncContext *ctx = avctx->priv_data;
642 643 644 645
    int mb_y = jobnr, mb_x, x, y;
    int partial_last_row = (mb_y == ctx->m.mb_height - 1) &&
                           ((avctx->height >> ctx->interlaced) & 0xF);

646
    ctx = ctx->thread[threadnr];
647 648 649 650
    if (ctx->cid_table->bit_depth == 8) {
        uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize);
        for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
            unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
            int sum;
            int varc;

            if (!partial_last_row && mb_x * 16 <= avctx->width - 16) {
                sum  = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
                varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize);
            } else {
                int bw = FFMIN(avctx->width - 16 * mb_x, 16);
                int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
                sum = varc = 0;
                for (y = 0; y < bh; y++) {
                    for (x = 0; x < bw; x++) {
                        uint8_t val = pix[x + y * ctx->m.linesize];
                        sum  += val;
                        varc += val * val;
                    }
                }
            }
            varc = (varc - (((unsigned)sum * sum) >> 8) + 128) >> 8;

671 672 673 674 675 676 677 678 679 680 681
            ctx->mb_cmp[mb].value = varc;
            ctx->mb_cmp[mb].mb = mb;
        }
    } else { // 10-bit
        int const linesize = ctx->m.linesize >> 1;
        for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
            uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4);
            unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
            int sum = 0;
            int sqsum = 0;
            int mean, sqmean;
682
            int i, j;
683
            // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
684 685
            for (i = 0; i < 16; ++i) {
                for (j = 0; j < 16; ++j) {
686 687 688 689 690 691 692 693 694 695 696 697 698
                    // Turn 16-bit pixels into 10-bit ones.
                    int const sample = (unsigned)pix[j] >> 6;
                    sum += sample;
                    sqsum += sample * sample;
                    // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
                }
                pix += linesize;
            }
            mean = sum >> 8; // 16*16 == 2^8
            sqmean = sqsum >> 8;
            ctx->mb_cmp[mb].value = sqmean - mean * mean;
            ctx->mb_cmp[mb].mb = mb;
        }
699
    }
700 701 702 703 704
    return 0;
}

static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
{
705 706
    int lambda, up_step, down_step;
    int last_lower = INT_MAX, last_higher = 0;
707 708 709 710
    int x, y, q;

    for (q = 1; q < avctx->qmax; q++) {
        ctx->qscale = q;
711
        avctx->execute2(avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
712
    }
713
    up_step = down_step = 2<<LAMBDA_FRAC_BITS;
714 715 716 717 718
    lambda = ctx->lambda;

    for (;;) {
        int bits = 0;
        int end = 0;
719
        if (lambda == last_higher) {
720
            lambda++;
721
            end = 1; // need to set final qscales/bits
722 723 724 725 726 727 728
        }
        for (y = 0; y < ctx->m.mb_height; y++) {
            for (x = 0; x < ctx->m.mb_width; x++) {
                unsigned min = UINT_MAX;
                int qscale = 1;
                int mb = y*ctx->m.mb_width+x;
                for (q = 1; q < avctx->qmax; q++) {
729 730
                    unsigned score = ctx->mb_rc[q][mb].bits*lambda+
                        ((unsigned)ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
731 732 733 734 735 736 737 738 739 740 741 742 743
                    if (score < min) {
                        min = score;
                        qscale = q;
                    }
                }
                bits += ctx->mb_rc[qscale][mb].bits;
                ctx->mb_qscale[mb] = qscale;
                ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
            }
            bits = (bits+31)&~31; // padding
            if (bits > ctx->frame_bits)
                break;
        }
744
        //av_dlog(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
745
        //        lambda, last_higher, last_lower, bits, ctx->frame_bits);
746 747
        if (end) {
            if (bits > ctx->frame_bits)
748
                return AVERROR(EINVAL);
749 750 751
            break;
        }
        if (bits < ctx->frame_bits) {
752 753 754 755 756
            last_lower = FFMIN(lambda, last_lower);
            if (last_higher != 0)
                lambda = (lambda+last_higher)>>1;
            else
                lambda -= down_step;
757
            down_step = FFMIN((int64_t)down_step*5, INT_MAX);
758 759 760 761
            up_step = 1<<LAMBDA_FRAC_BITS;
            lambda = FFMAX(1, lambda);
            if (lambda == last_lower)
                break;
762
        } else {
763 764 765
            last_higher = FFMAX(lambda, last_higher);
            if (last_lower != INT_MAX)
                lambda = (lambda+last_lower)>>1;
766
            else if ((int64_t)lambda + up_step > INT_MAX)
767
                return AVERROR(EINVAL);
768 769
            else
                lambda += up_step;
770
            up_step = FFMIN((int64_t)up_step*5, INT_MAX);
771
            down_step = 1<<LAMBDA_FRAC_BITS;
772 773
        }
    }
774
    //av_dlog(ctx->m.avctx, "out lambda %d\n", lambda);
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
    ctx->lambda = lambda;
    return 0;
}

static int dnxhd_find_qscale(DNXHDEncContext *ctx)
{
    int bits = 0;
    int up_step = 1;
    int down_step = 1;
    int last_higher = 0;
    int last_lower = INT_MAX;
    int qscale;
    int x, y;

    qscale = ctx->qscale;
    for (;;) {
        bits = 0;
        ctx->qscale = qscale;
        // XXX avoid recalculating bits
794
        ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
795 796 797 798 799 800 801
        for (y = 0; y < ctx->m.mb_height; y++) {
            for (x = 0; x < ctx->m.mb_width; x++)
                bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
            bits = (bits+31)&~31; // padding
            if (bits > ctx->frame_bits)
                break;
        }
802
        //av_dlog(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
803 804 805
        //        ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
        if (bits < ctx->frame_bits) {
            if (qscale == 1)
806
                return 1;
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
            if (last_higher == qscale - 1) {
                qscale = last_higher;
                break;
            }
            last_lower = FFMIN(qscale, last_lower);
            if (last_higher != 0)
                qscale = (qscale+last_higher)>>1;
            else
                qscale -= down_step++;
            if (qscale < 1)
                qscale = 1;
            up_step = 1;
        } else {
            if (last_lower == qscale + 1)
                break;
            last_higher = FFMAX(qscale, last_higher);
            if (last_lower != INT_MAX)
                qscale = (qscale+last_lower)>>1;
            else
                qscale += up_step++;
            down_step = 1;
            if (qscale >= ctx->m.avctx->qmax)
829
                return AVERROR(EINVAL);
830 831
        }
    }
832
    //av_dlog(ctx->m.avctx, "out qscale %d\n", qscale);
833 834 835 836
    ctx->qscale = qscale;
    return 0;
}

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
#define BUCKET_BITS 8
#define RADIX_PASSES 4
#define NBUCKETS (1 << BUCKET_BITS)

static inline int get_bucket(int value, int shift)
{
    value >>= shift;
    value &= NBUCKETS - 1;
    return NBUCKETS - 1 - value;
}

static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS])
{
    int i, j;
    memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS);
    for (i = 0; i < size; i++) {
        int v = data[i].value;
        for (j = 0; j < RADIX_PASSES; j++) {
            buckets[j][get_bucket(v, 0)]++;
            v >>= BUCKET_BITS;
        }
858
        av_assert1(!v);
859 860 861 862 863
    }
    for (j = 0; j < RADIX_PASSES; j++) {
        int offset = size;
        for (i = NBUCKETS - 1; i >= 0; i--)
            buckets[j][i] = offset -= buckets[j][i];
864
        av_assert1(!buckets[j][0]);
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
    }
}

static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass)
{
    int shift = pass * BUCKET_BITS;
    int i;
    for (i = 0; i < size; i++) {
        int v = get_bucket(data[i].value, shift);
        int pos = buckets[v]++;
        dst[pos] = data[i];
    }
}

static void radix_sort(RCCMPEntry *data, int size)
880
{
881 882 883 884 885 886 887 888 889 890
    int buckets[RADIX_PASSES][NBUCKETS];
    RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size);
    radix_count(data, size, buckets);
    radix_sort_pass(tmp, data, size, buckets[0], 0);
    radix_sort_pass(data, tmp, size, buckets[1], 1);
    if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) {
        radix_sort_pass(tmp, data, size, buckets[2], 2);
        radix_sort_pass(data, tmp, size, buckets[3], 3);
    }
    av_free(tmp);
891 892
}

893
static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
894 895
{
    int max_bits = 0;
896 897
    int ret, x, y;
    if ((ret = dnxhd_find_qscale(ctx)) < 0)
898
        return ret;
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
    for (y = 0; y < ctx->m.mb_height; y++) {
        for (x = 0; x < ctx->m.mb_width; x++) {
            int mb = y*ctx->m.mb_width+x;
            int delta_bits;
            ctx->mb_qscale[mb] = ctx->qscale;
            ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
            max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
            if (!RC_VARIANCE) {
                delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
                ctx->mb_cmp[mb].mb = mb;
                ctx->mb_cmp[mb].value = delta_bits ?
                    ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
                    : INT_MIN; //avoid increasing qscale
            }
        }
        max_bits += 31; //worst padding
    }
916
    if (!ret) {
917
        if (RC_VARIANCE)
918
            avctx->execute2(avctx, dnxhd_mb_var_thread, NULL, NULL, ctx->m.mb_height);
919
        radix_sort(ctx->mb_cmp, ctx->m.mb_num);
920 921 922 923 924 925 926 927 928 929
        for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
            int mb = ctx->mb_cmp[x].mb;
            max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
            ctx->mb_qscale[mb] = ctx->qscale+1;
            ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
        }
    }
    return 0;
}

Michael Niedermayer's avatar
Michael Niedermayer committed
930
static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
931 932 933 934
{
    int i;

    for (i = 0; i < ctx->m.avctx->thread_count; i++) {
935 936
        ctx->thread[i]->m.linesize    = frame->linesize[0] << ctx->interlaced;
        ctx->thread[i]->m.uvlinesize  = frame->linesize[1] << ctx->interlaced;
937 938 939 940
        ctx->thread[i]->dct_y_offset  = ctx->m.linesize  *8;
        ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
    }

941
    ctx->m.avctx->coded_frame->interlaced_frame = frame->interlaced_frame;
942 943 944
    ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
}

945 946
static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
                                const AVFrame *frame, int *got_packet)
947 948 949 950
{
    DNXHDEncContext *ctx = avctx->priv_data;
    int first_field = 1;
    int offset, i, ret;
951
    uint8_t *buf;
952

953
    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->cid_table->frame_size)) < 0)
954 955
        return ret;
    buf = pkt->data;
956

957
    dnxhd_load_picture(ctx, frame);
958 959 960

 encode_coding_unit:
    for (i = 0; i < 3; i++) {
961
        ctx->src[i] = frame->data[i];
962
        if (ctx->interlaced && ctx->cur_field)
963
            ctx->src[i] += frame->linesize[i];
964 965 966 967 968 969 970
    }

    dnxhd_write_header(avctx, buf);

    if (avctx->mb_decision == FF_MB_DECISION_RD)
        ret = dnxhd_encode_rdo(avctx, ctx);
    else
971
        ret = dnxhd_encode_fast(avctx, ctx);
972
    if (ret < 0) {
973 974
        av_log(avctx, AV_LOG_ERROR,
               "picture could not fit ratecontrol constraints, increase qmax\n");
975
        return ret;
976 977
    }

978
    dnxhd_setup_threads_slices(ctx);
979 980 981 982 983

    offset = 0;
    for (i = 0; i < ctx->m.mb_height; i++) {
        AV_WB32(ctx->msip + i * 4, offset);
        offset += ctx->slice_size[i];
984
        av_assert1(!(ctx->slice_size[i] & 3));
985 986
    }

987
    avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
988

989
    av_assert1(640 + offset + 4 <= ctx->cid_table->coding_unit_size);
990 991
    memset(buf + 640 + offset, 0, ctx->cid_table->coding_unit_size - 4 - offset - 640);

992 993 994 995 996 997 998 999 1000
    AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE); // EOF

    if (ctx->interlaced && first_field) {
        first_field     = 0;
        ctx->cur_field ^= 1;
        buf      += ctx->cid_table->coding_unit_size;
        goto encode_coding_unit;
    }

1001
    avctx->coded_frame->quality = ctx->qscale * FF_QP2LAMBDA;
1002

1003 1004 1005
    pkt->flags |= AV_PKT_FLAG_KEY;
    *got_packet = 1;
    return 0;
1006 1007
}

1008
static av_cold int dnxhd_encode_end(AVCodecContext *avctx)
1009 1010
{
    DNXHDEncContext *ctx = avctx->priv_data;
1011
    int max_level = 1<<(ctx->cid_table->bit_depth+2);
1012 1013
    int i;

1014 1015 1016 1017
    av_free(ctx->vlc_codes-max_level*2);
    av_free(ctx->vlc_bits -max_level*2);
    av_freep(&ctx->run_codes);
    av_freep(&ctx->run_bits);
1018 1019 1020 1021 1022 1023

    av_freep(&ctx->mb_bits);
    av_freep(&ctx->mb_qscale);
    av_freep(&ctx->mb_rc);
    av_freep(&ctx->mb_cmp);
    av_freep(&ctx->slice_size);
1024
    av_freep(&ctx->slice_offs);
1025 1026 1027 1028 1029 1030 1031 1032 1033

    av_freep(&ctx->qmatrix_c);
    av_freep(&ctx->qmatrix_l);
    av_freep(&ctx->qmatrix_c16);
    av_freep(&ctx->qmatrix_l16);

    for (i = 1; i < avctx->thread_count; i++)
        av_freep(&ctx->thread[i]);

1034 1035
    av_frame_free(&avctx->coded_frame);

1036 1037 1038
    return 0;
}

1039 1040 1041 1042 1043
static const AVCodecDefault dnxhd_defaults[] = {
    { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */
    { NULL },
};

1044
AVCodec ff_dnxhd_encoder = {
1045
    .name           = "dnxhd",
1046
    .long_name      = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
1047
    .type           = AVMEDIA_TYPE_VIDEO,
1048
    .id             = AV_CODEC_ID_DNXHD,
1049 1050
    .priv_data_size = sizeof(DNXHDEncContext),
    .init           = dnxhd_encode_init,
1051
    .encode2        = dnxhd_encode_picture,
1052
    .close          = dnxhd_encode_end,
1053
    .capabilities   = CODEC_CAP_SLICE_THREADS,
1054 1055 1056
    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV422P,
                                                  AV_PIX_FMT_YUV422P10,
                                                  AV_PIX_FMT_NONE },
1057
    .priv_class     = &dnxhd_class,
1058
    .defaults       = dnxhd_defaults,
1059
};