huffyuvenc.c 36.6 KB
Newer Older
1
/*
2
 * Copyright (c) 2002-2014 Michael Niedermayer <michaelni@gmx.at>
3 4 5 6
 *
 * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
 * the algorithm used
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10 11 12 13
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15 16 17 18 19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 23
 *
 * yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA
24 25 26 27 28 29 30 31 32 33
 */

/**
 * @file
 * huffyuv encoder
 */

#include "avcodec.h"
#include "huffyuv.h"
#include "huffman.h"
34
#include "huffyuvencdsp.h"
35
#include "internal.h"
36
#include "put_bits.h"
37
#include "libavutil/opt.h"
38
#include "libavutil/pixdesc.h"
39

40 41 42 43
static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
                              const uint8_t *src0, const uint8_t *src1, int w)
{
    if (s->bps <= 8) {
44
        s->hencdsp.diff_bytes(dst, src0, src1, w);
45
    } else {
46
        s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
47 48 49
    }
}

50
static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
51
                                      const uint8_t *src, int w, int left)
52 53
{
    int i;
54 55 56 57 58 59 60 61 62 63 64 65 66 67
    if (s->bps <= 8) {
        if (w < 32) {
            for (i = 0; i < w; i++) {
                const int temp = src[i];
                dst[i] = temp - left;
                left   = temp;
            }
            return left;
        } else {
            for (i = 0; i < 16; i++) {
                const int temp = src[i];
                dst[i] = temp - left;
                left   = temp;
            }
68
            s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
69
            return src[w-1];
70 71
        }
    } else {
72 73
        const uint16_t *src16 = (const uint16_t *)src;
        uint16_t       *dst16 = (      uint16_t *)dst;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
        if (w < 32) {
            for (i = 0; i < w; i++) {
                const int temp = src16[i];
                dst16[i] = temp - left;
                left   = temp;
            }
            return left;
        } else {
            for (i = 0; i < 16; i++) {
                const int temp = src16[i];
                dst16[i] = temp - left;
                left   = temp;
            }
            s->llviddsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
            return src16[w-1];
89 90 91 92 93
        }
    }
}

static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst,
94
                                             const uint8_t *src, int w,
95 96
                                             int *red, int *green, int *blue,
                                             int *alpha)
97 98
{
    int i;
99
    int r, g, b, a;
100 101 102
    r = *red;
    g = *green;
    b = *blue;
103
    a = *alpha;
104 105 106 107 108

    for (i = 0; i < FFMIN(w, 4); i++) {
        const int rt = src[i * 4 + R];
        const int gt = src[i * 4 + G];
        const int bt = src[i * 4 + B];
109
        const int at = src[i * 4 + A];
110 111 112
        dst[i * 4 + R] = rt - r;
        dst[i * 4 + G] = gt - g;
        dst[i * 4 + B] = bt - b;
113
        dst[i * 4 + A] = at - a;
114 115 116
        r = rt;
        g = gt;
        b = bt;
117
        a = at;
118 119
    }

120
    s->hencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16);
121 122 123 124

    *red   = src[(w - 1) * 4 + R];
    *green = src[(w - 1) * 4 + G];
    *blue  = src[(w - 1) * 4 + B];
125 126 127
    *alpha = src[(w - 1) * 4 + A];
}

128 129 130 131
static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst,
                                             uint8_t *src, int w,
                                             int *red, int *green, int *blue)
{
132
    int i;
133
    int r, g, b;
134 135 136
    r = *red;
    g = *green;
    b = *blue;
137 138 139 140 141 142 143
    for (i = 0; i < FFMIN(w, 16); i++) {
        const int rt = src[i * 3 + 0];
        const int gt = src[i * 3 + 1];
        const int bt = src[i * 3 + 2];
        dst[i * 3 + 0] = rt - r;
        dst[i * 3 + 1] = gt - g;
        dst[i * 3 + 2] = bt - b;
144 145 146 147 148
        r = rt;
        g = gt;
        b = bt;
    }

149
    s->hencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48);
150

151 152 153
    *red   = src[(w - 1) * 3 + 0];
    *green = src[(w - 1) * 3 + 1];
    *blue  = src[(w - 1) * 3 + 2];
154 155
}

156 157 158
static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
{
    if (s->bps <= 8) {
159
        s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top);
160
    } else {
161
        s->llviddsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
162 163 164
    }
}

165 166 167 168
static int store_table(HYuvContext *s, const uint8_t *len, uint8_t *buf)
{
    int i;
    int index = 0;
169
    int n = s->vlc_n;
170

171
    for (i = 0; i < n;) {
172 173 174
        int val = len[i];
        int repeat = 0;

175
        for (; i < n && len[i] == val && repeat < 255; i++)
176 177
            repeat++;

178
        av_assert0(val < 32 && val >0 && repeat < 256 && repeat>0);
179
        if (repeat > 7) {
180 181 182 183 184 185 186 187 188 189
            buf[index++] = val;
            buf[index++] = repeat;
        } else {
            buf[index++] = val | (repeat << 5);
        }
    }

    return index;
}

190 191
static int store_huffman_tables(HYuvContext *s, uint8_t *buf)
{
192
    int i, ret;
193 194 195 196 197 198 199
    int size = 0;
    int count = 3;

    if (s->version > 2)
        count = 1 + s->alpha + 2*s->chroma;

    for (i = 0; i < count; i++) {
200
        if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], s->vlc_n, 0)) < 0)
201
            return ret;
202

203
        if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->vlc_n) < 0) {
204 205 206 207 208 209 210 211
            return -1;
        }

        size += store_table(s, s->len[i], buf + size);
    }
    return size;
}

212 213 214 215
static av_cold int encode_init(AVCodecContext *avctx)
{
    HYuvContext *s = avctx->priv_data;
    int i, j;
216
    int ret;
217
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
218 219

    ff_huffyuv_common_init(avctx);
220
    ff_huffyuvencdsp_init(&s->hencdsp);
221

222
    avctx->extradata = av_mallocz(3*MAX_N + 4);
223
    if (!avctx->extradata)
224
        return AVERROR(ENOMEM);
225 226 227 228 229
    if (s->flags&CODEC_FLAG_PASS1) {
#define STATS_OUT_SIZE 21*MAX_N*3 + 4
        avctx->stats_out = av_mallocz(STATS_OUT_SIZE); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
        if (!avctx->stats_out)
            return AVERROR(ENOMEM);
230
    }
231 232
    s->version = 2;

233 234 235 236 237 238
    avctx->coded_frame = av_frame_alloc();
    if (!avctx->coded_frame)
        return AVERROR(ENOMEM);

    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
    avctx->coded_frame->key_frame = 1;
239

240
    s->bps = desc->comp[0].depth_minus1 + 1;
241 242 243 244 245 246 247
    s->yuv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
    s->chroma = desc->nb_components > 2;
    s->alpha = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
                                     &s->chroma_h_shift,
                                     &s->chroma_v_shift);

248 249 250
    switch (avctx->pix_fmt) {
    case AV_PIX_FMT_YUV420P:
    case AV_PIX_FMT_YUV422P:
251
        if (s->width & 1) {
252
            av_log(avctx, AV_LOG_ERROR, "Width must be even for this colorspace.\n");
253 254 255
            return AVERROR(EINVAL);
        }
        s->bitstream_bpp = avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 12 : 16;
256
        break;
257 258 259 260
    case AV_PIX_FMT_YUV444P:
    case AV_PIX_FMT_YUV410P:
    case AV_PIX_FMT_YUV411P:
    case AV_PIX_FMT_YUV440P:
261
    case AV_PIX_FMT_GBRP:
262 263 264 265 266
    case AV_PIX_FMT_GBRP9:
    case AV_PIX_FMT_GBRP10:
    case AV_PIX_FMT_GBRP12:
    case AV_PIX_FMT_GBRP14:
    case AV_PIX_FMT_GBRP16:
267
    case AV_PIX_FMT_GRAY8:
268
    case AV_PIX_FMT_GRAY16:
269 270 271 272 273
    case AV_PIX_FMT_YUVA444P:
    case AV_PIX_FMT_YUVA420P:
    case AV_PIX_FMT_YUVA422P:
    case AV_PIX_FMT_GBRAP:
    case AV_PIX_FMT_GRAY8A:
274
    case AV_PIX_FMT_YUV420P9:
275
    case AV_PIX_FMT_YUV420P10:
276 277
    case AV_PIX_FMT_YUV420P12:
    case AV_PIX_FMT_YUV420P14:
278
    case AV_PIX_FMT_YUV420P16:
279 280 281 282
    case AV_PIX_FMT_YUV422P9:
    case AV_PIX_FMT_YUV422P10:
    case AV_PIX_FMT_YUV422P12:
    case AV_PIX_FMT_YUV422P14:
283
    case AV_PIX_FMT_YUV422P16:
284 285 286 287
    case AV_PIX_FMT_YUV444P9:
    case AV_PIX_FMT_YUV444P10:
    case AV_PIX_FMT_YUV444P12:
    case AV_PIX_FMT_YUV444P14:
288
    case AV_PIX_FMT_YUV444P16:
289 290
    case AV_PIX_FMT_YUVA420P9:
    case AV_PIX_FMT_YUVA420P10:
291
    case AV_PIX_FMT_YUVA420P16:
292 293
    case AV_PIX_FMT_YUVA422P9:
    case AV_PIX_FMT_YUVA422P10:
294
    case AV_PIX_FMT_YUVA422P16:
295 296
    case AV_PIX_FMT_YUVA444P9:
    case AV_PIX_FMT_YUVA444P10:
297
    case AV_PIX_FMT_YUVA444P16:
298 299
        s->version = 3;
        break;
300
    case AV_PIX_FMT_RGB32:
301 302 303
        s->bitstream_bpp = 32;
        break;
    case AV_PIX_FMT_RGB24:
304 305 306 307
        s->bitstream_bpp = 24;
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
308
        return AVERROR(EINVAL);
309
    }
310
    s->n = 1<<s->bps;
311
    s->vlc_n = FFMIN(s->n, MAX_VLC_N);
312

313
    avctx->bits_per_coded_sample = s->bitstream_bpp;
314
    s->decorrelate = s->bitstream_bpp >= 24 && !s->yuv && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
315 316 317 318 319 320 321 322
    s->predictor = avctx->prediction_method;
    s->interlaced = avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
    if (avctx->context_model == 1) {
        s->context = avctx->context_model;
        if (s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)) {
            av_log(avctx, AV_LOG_ERROR,
                   "context=1 is not compatible with "
                   "2 pass huffyuv encoding\n");
323
            return AVERROR(EINVAL);
324 325 326 327 328 329 330 331
        }
    }else s->context= 0;

    if (avctx->codec->id == AV_CODEC_ID_HUFFYUV) {
        if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
            av_log(avctx, AV_LOG_ERROR,
                   "Error: YV12 is not supported by huffyuv; use "
                   "vcodec=ffvhuff or format=422p\n");
332
            return AVERROR(EINVAL);
333 334 335 336 337
        }
        if (avctx->context_model) {
            av_log(avctx, AV_LOG_ERROR,
                   "Error: per-frame huffman tables are not supported "
                   "by huffyuv; use vcodec=ffvhuff\n");
338
            return AVERROR(EINVAL);
339
        }
340 341 342 343 344 345
        if (s->version > 2) {
            av_log(avctx, AV_LOG_ERROR,
                   "Error: ver>2 is not supported "
                   "by huffyuv; use vcodec=ffvhuff\n");
            return AVERROR(EINVAL);
        }
346 347 348 349 350
        if (s->interlaced != ( s->height > 288 ))
            av_log(avctx, AV_LOG_INFO,
                   "using huffyuv 2.2.0 or newer interlacing flag\n");
    }

351 352
    if (s->version > 3 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
        av_log(avctx, AV_LOG_ERROR, "Ver > 3 is under development, files encoded with it may not be decodable with future versions!!!\n"
353 354 355 356
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
        return AVERROR(EINVAL);
    }

357
    if (s->bitstream_bpp >= 24 && s->predictor == MEDIAN && s->version <= 2) {
358 359
        av_log(avctx, AV_LOG_ERROR,
               "Error: RGB is incompatible with median predictor\n");
360
        return AVERROR(EINVAL);
361 362 363 364 365 366
    }

    ((uint8_t*)avctx->extradata)[0] = s->predictor | (s->decorrelate << 6);
    ((uint8_t*)avctx->extradata)[2] = s->interlaced ? 0x10 : 0x20;
    if (s->context)
        ((uint8_t*)avctx->extradata)[2] |= 0x40;
367 368 369 370 371 372 373 374 375 376 377
    if (s->version < 3) {
        ((uint8_t*)avctx->extradata)[1] = s->bitstream_bpp;
        ((uint8_t*)avctx->extradata)[3] = 0;
    } else {
        ((uint8_t*)avctx->extradata)[1] = ((s->bps-1)<<4) | s->chroma_h_shift | (s->chroma_v_shift<<2);
        if (s->chroma)
            ((uint8_t*)avctx->extradata)[2] |= s->yuv ? 1 : 2;
        if (s->alpha)
            ((uint8_t*)avctx->extradata)[2] |= 4;
        ((uint8_t*)avctx->extradata)[3] = 1;
    }
378 379 380 381 382
    s->avctx->extradata_size = 4;

    if (avctx->stats_in) {
        char *p = avctx->stats_in;

383
        for (i = 0; i < 4; i++)
384
            for (j = 0; j < s->vlc_n; j++)
385 386 387
                s->stats[i][j] = 1;

        for (;;) {
388
            for (i = 0; i < 4; i++) {
389 390
                char *next;

391
                for (j = 0; j < s->vlc_n; j++) {
392 393 394 395 396 397 398 399
                    s->stats[i][j] += strtol(p, &next, 0);
                    if (next == p) return -1;
                    p = next;
                }
            }
            if (p[0] == 0 || p[1] == 0 || p[2] == 0) break;
        }
    } else {
400
        for (i = 0; i < 4; i++)
401 402
            for (j = 0; j < s->vlc_n; j++) {
                int d = FFMIN(j, s->vlc_n - j);
403

404
                s->stats[i][j] = 100000000 / (d*d + 1);
405 406 407
            }
    }

408 409 410 411
    ret = store_huffman_tables(s, s->avctx->extradata + s->avctx->extradata_size);
    if (ret < 0)
        return ret;
    s->avctx->extradata_size += ret;
412 413

    if (s->context) {
414
        for (i = 0; i < 4; i++) {
415
            int pels = s->width * s->height / (i ? 40 : 10);
416 417
            for (j = 0; j < s->vlc_n; j++) {
                int d = FFMIN(j, s->vlc_n - j);
418
                s->stats[i][j] = pels/(d*d + 1);
419 420 421
            }
        }
    } else {
422
        for (i = 0; i < 4; i++)
423
            for (j = 0; j < s->vlc_n; j++)
424 425 426
                s->stats[i][j]= 0;
    }

427 428 429 430
    if (ff_huffyuv_alloc_temp(s)) {
        ff_huffyuv_common_end(s);
        return AVERROR(ENOMEM);
    }
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490

    s->picture_number=0;

    return 0;
}
static int encode_422_bitstream(HYuvContext *s, int offset, int count)
{
    int i;
    const uint8_t *y = s->temp[0] + offset;
    const uint8_t *u = s->temp[1] + offset / 2;
    const uint8_t *v = s->temp[2] + offset / 2;

    if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < 2 * 4 * count) {
        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
        return -1;
    }

#define LOAD4\
            int y0 = y[2 * i];\
            int y1 = y[2 * i + 1];\
            int u0 = u[i];\
            int v0 = v[i];

    count /= 2;

    if (s->flags & CODEC_FLAG_PASS1) {
        for(i = 0; i < count; i++) {
            LOAD4;
            s->stats[0][y0]++;
            s->stats[1][u0]++;
            s->stats[0][y1]++;
            s->stats[2][v0]++;
        }
    }
    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
        return 0;
    if (s->context) {
        for (i = 0; i < count; i++) {
            LOAD4;
            s->stats[0][y0]++;
            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
            s->stats[1][u0]++;
            put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
            s->stats[0][y1]++;
            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
            s->stats[2][v0]++;
            put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
        }
    } else {
        for(i = 0; i < count; i++) {
            LOAD4;
            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
            put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
            put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
        }
    }
    return 0;
}

491
static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
492
{
493
    int i, count = width/2;
494

495
    if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < count * s->bps / 2) {
496 497 498 499
        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
        return -1;
    }

500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
#define LOADEND\
            int y0 = s->temp[0][width-1];
#define LOADEND_14\
            int y0 = s->temp16[0][width-1] & mask;
#define LOADEND_16\
            int y0 = s->temp16[0][width-1];
#define STATEND\
            s->stats[plane][y0]++;
#define STATEND_16\
            s->stats[plane][y0>>2]++;
#define WRITEEND\
            put_bits(&s->pb, s->len[plane][y0], s->bits[plane][y0]);
#define WRITEEND_16\
            put_bits(&s->pb, s->len[plane][y0>>2], s->bits[plane][y0>>2]);\
            put_bits(&s->pb, 2, y0&3);

516 517 518
#define LOAD2\
            int y0 = s->temp[0][2 * i];\
            int y1 = s->temp[0][2 * i + 1];
519
#define LOAD2_14\
520 521
            int y0 = s->temp16[0][2 * i] & mask;\
            int y1 = s->temp16[0][2 * i + 1] & mask;
522 523 524
#define LOAD2_16\
            int y0 = s->temp16[0][2 * i];\
            int y1 = s->temp16[0][2 * i + 1];
525 526 527
#define STAT2\
            s->stats[plane][y0]++;\
            s->stats[plane][y1]++;
528 529 530
#define STAT2_16\
            s->stats[plane][y0>>2]++;\
            s->stats[plane][y1>>2]++;
531 532 533
#define WRITE2\
            put_bits(&s->pb, s->len[plane][y0], s->bits[plane][y0]);\
            put_bits(&s->pb, s->len[plane][y1], s->bits[plane][y1]);
534 535 536 537 538
#define WRITE2_16\
            put_bits(&s->pb, s->len[plane][y0>>2], s->bits[plane][y0>>2]);\
            put_bits(&s->pb, 2, y0&3);\
            put_bits(&s->pb, s->len[plane][y1>>2], s->bits[plane][y1>>2]);\
            put_bits(&s->pb, 2, y1&3);
539

540
    if (s->bps <= 8) {
541 542 543 544 545
    if (s->flags & CODEC_FLAG_PASS1) {
        for (i = 0; i < count; i++) {
            LOAD2;
            STAT2;
        }
546 547 548 549
        if (width&1) {
            LOADEND;
            STATEND;
        }
550 551 552 553 554 555 556 557 558 559
    }
    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
        return 0;

    if (s->context) {
        for (i = 0; i < count; i++) {
            LOAD2;
            STAT2;
            WRITE2;
        }
560 561 562 563 564
        if (width&1) {
            LOADEND;
            STATEND;
            WRITEEND;
        }
565 566 567 568 569
    } else {
        for (i = 0; i < count; i++) {
            LOAD2;
            WRITE2;
        }
570 571 572 573
        if (width&1) {
            LOADEND;
            WRITEEND;
        }
574
    }
575
    } else if (s->bps <= 14) {
576 577 578
        int mask = s->n - 1;
        if (s->flags & CODEC_FLAG_PASS1) {
            for (i = 0; i < count; i++) {
579
                LOAD2_14;
580 581
                STAT2;
            }
582 583 584 585
            if (width&1) {
                LOADEND_14;
                STATEND;
            }
586 587 588 589 590 591
        }
        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
            return 0;

        if (s->context) {
            for (i = 0; i < count; i++) {
592
                LOAD2_14;
593 594 595
                STAT2;
                WRITE2;
            }
596 597 598 599 600
            if (width&1) {
                LOADEND_14;
                STATEND;
                WRITEEND;
            }
601 602
        } else {
            for (i = 0; i < count; i++) {
603
                LOAD2_14;
604 605
                WRITE2;
            }
606 607 608 609
            if (width&1) {
                LOADEND_14;
                WRITEEND;
            }
610
        }
611 612 613 614 615 616
    } else {
        if (s->flags & CODEC_FLAG_PASS1) {
            for (i = 0; i < count; i++) {
                LOAD2_16;
                STAT2_16;
            }
617 618 619 620
            if (width&1) {
                LOADEND_16;
                STATEND_16;
            }
621 622 623 624 625 626 627 628 629 630
        }
        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
            return 0;

        if (s->context) {
            for (i = 0; i < count; i++) {
                LOAD2_16;
                STAT2_16;
                WRITE2_16;
            }
631 632 633 634 635
            if (width&1) {
                LOADEND_16;
                STATEND_16;
                WRITEEND_16;
            }
636 637 638 639 640
        } else {
            for (i = 0; i < count; i++) {
                LOAD2_16;
                WRITE2_16;
            }
641 642 643 644
            if (width&1) {
                LOADEND_16;
                WRITEEND_16;
            }
645
        }
646
    }
647 648 649 650 651 652
#undef LOAD2
#undef STAT2
#undef WRITE2
    return 0;
}

653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
static int encode_gray_bitstream(HYuvContext *s, int count)
{
    int i;

    if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < 4 * count) {
        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
        return -1;
    }

#define LOAD2\
            int y0 = s->temp[0][2 * i];\
            int y1 = s->temp[0][2 * i + 1];
#define STAT2\
            s->stats[0][y0]++;\
            s->stats[0][y1]++;
#define WRITE2\
            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);\
            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);

    count /= 2;

    if (s->flags & CODEC_FLAG_PASS1) {
        for (i = 0; i < count; i++) {
            LOAD2;
            STAT2;
        }
    }
    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
        return 0;

    if (s->context) {
        for (i = 0; i < count; i++) {
            LOAD2;
            STAT2;
            WRITE2;
        }
    } else {
        for (i = 0; i < count; i++) {
            LOAD2;
            WRITE2;
        }
    }
    return 0;
}

698
static inline int encode_bgra_bitstream(HYuvContext *s, int count, int planes)
699 700 701
{
    int i;

702 703
    if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
        4 * planes * count) {
704 705 706 707
        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
        return -1;
    }

708 709
#define LOAD_GBRA                                                       \
    int g = s->temp[0][planes == 3 ? 3 * i + 1 : 4 * i + G];            \
710 711
    int b =(s->temp[0][planes == 3 ? 3 * i + 2 : 4 * i + B] - g) & 0xFF;\
    int r =(s->temp[0][planes == 3 ? 3 * i + 0 : 4 * i + R] - g) & 0xFF;\
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
    int a = s->temp[0][planes * i + A];

#define STAT_BGRA                                                       \
    s->stats[0][b]++;                                                   \
    s->stats[1][g]++;                                                   \
    s->stats[2][r]++;                                                   \
    if (planes == 4)                                                    \
        s->stats[2][a]++;

#define WRITE_GBRA                                                      \
    put_bits(&s->pb, s->len[1][g], s->bits[1][g]);                      \
    put_bits(&s->pb, s->len[0][b], s->bits[0][b]);                      \
    put_bits(&s->pb, s->len[2][r], s->bits[2][r]);                      \
    if (planes == 4)                                                    \
        put_bits(&s->pb, s->len[2][a], s->bits[2][a]);
727 728 729 730

    if ((s->flags & CODEC_FLAG_PASS1) &&
        (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
        for (i = 0; i < count; i++) {
731 732
            LOAD_GBRA;
            STAT_BGRA;
733 734 735
        }
    } else if (s->context || (s->flags & CODEC_FLAG_PASS1)) {
        for (i = 0; i < count; i++) {
736 737 738
            LOAD_GBRA;
            STAT_BGRA;
            WRITE_GBRA;
739 740 741
        }
    } else {
        for (i = 0; i < count; i++) {
742 743
            LOAD_GBRA;
            WRITE_GBRA;
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
        }
    }
    return 0;
}

static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                        const AVFrame *pict, int *got_packet)
{
    HYuvContext *s = avctx->priv_data;
    const int width = s->width;
    const int width2 = s->width>>1;
    const int height = s->height;
    const int fake_ystride = s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
    const int fake_ustride = s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
    const int fake_vstride = s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
759
    const AVFrame * const p = pict;
760 761
    int i, j, size = 0, ret;

762
    if ((ret = ff_alloc_packet2(avctx, pkt, width * height * 3 * 4 + FF_MIN_BUFFER_SIZE)) < 0)
763 764 765
        return ret;

    if (s->context) {
766 767 768
        size = store_huffman_tables(s, pkt->data);
        if (size < 0)
            return size;
769

770
        for (i = 0; i < 4; i++)
771
            for (j = 0; j < s->vlc_n; j++)
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
                s->stats[i][j] >>= 1;
    }

    init_put_bits(&s->pb, pkt->data + size, pkt->size - size);

    if (avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
        avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
        int lefty, leftu, leftv, y, cy;

        put_bits(&s->pb, 8, leftv = p->data[2][0]);
        put_bits(&s->pb, 8, lefty = p->data[0][1]);
        put_bits(&s->pb, 8, leftu = p->data[1][0]);
        put_bits(&s->pb, 8,         p->data[0][0]);

        lefty = sub_left_prediction(s, s->temp[0], p->data[0], width , 0);
        leftu = sub_left_prediction(s, s->temp[1], p->data[1], width2, 0);
        leftv = sub_left_prediction(s, s->temp[2], p->data[2], width2, 0);

        encode_422_bitstream(s, 2, width-2);

        if (s->predictor==MEDIAN) {
            int lefttopy, lefttopu, lefttopv;
            cy = y = 1;
            if (s->interlaced) {
                lefty = sub_left_prediction(s, s->temp[0], p->data[0] + p->linesize[0], width , lefty);
                leftu = sub_left_prediction(s, s->temp[1], p->data[1] + p->linesize[1], width2, leftu);
                leftv = sub_left_prediction(s, s->temp[2], p->data[2] + p->linesize[2], width2, leftv);

                encode_422_bitstream(s, 0, width);
                y++; cy++;
            }

            lefty = sub_left_prediction(s, s->temp[0], p->data[0] + fake_ystride, 4, lefty);
            leftu = sub_left_prediction(s, s->temp[1], p->data[1] + fake_ustride, 2, leftu);
            leftv = sub_left_prediction(s, s->temp[2], p->data[2] + fake_vstride, 2, leftv);

            encode_422_bitstream(s, 0, 4);

            lefttopy = p->data[0][3];
            lefttopu = p->data[1][1];
            lefttopv = p->data[2][1];
813 814 815
            s->hencdsp.sub_hfyu_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width  - 4, &lefty, &lefttopy);
            s->hencdsp.sub_hfyu_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu);
            s->hencdsp.sub_hfyu_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv);
816 817 818 819 820 821 822 823 824
            encode_422_bitstream(s, 0, width - 4);
            y++; cy++;

            for (; y < height; y++,cy++) {
                uint8_t *ydst, *udst, *vdst;

                if (s->bitstream_bpp == 12) {
                    while (2 * cy > y) {
                        ydst = p->data[0] + p->linesize[0] * y;
825
                        s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy);
826 827 828 829 830 831 832 833 834
                        encode_gray_bitstream(s, width);
                        y++;
                    }
                    if (y >= height) break;
                }
                ydst = p->data[0] + p->linesize[0] * y;
                udst = p->data[1] + p->linesize[1] * cy;
                vdst = p->data[2] + p->linesize[2] * cy;

835 836 837
                s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width,  &lefty, &lefttopy);
                s->hencdsp.sub_hfyu_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
                s->hencdsp.sub_hfyu_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
838 839 840 841 842 843 844 845 846 847 848 849

                encode_422_bitstream(s, 0, width);
            }
        } else {
            for (cy = y = 1; y < height; y++, cy++) {
                uint8_t *ydst, *udst, *vdst;

                /* encode a luma only line & y++ */
                if (s->bitstream_bpp == 12) {
                    ydst = p->data[0] + p->linesize[0] * y;

                    if (s->predictor == PLANE && s->interlaced < y) {
850
                        s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865

                        lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
                    } else {
                        lefty = sub_left_prediction(s, s->temp[0], ydst, width , lefty);
                    }
                    encode_gray_bitstream(s, width);
                    y++;
                    if (y >= height) break;
                }

                ydst = p->data[0] + p->linesize[0] * y;
                udst = p->data[1] + p->linesize[1] * cy;
                vdst = p->data[2] + p->linesize[2] * cy;

                if (s->predictor == PLANE && s->interlaced < cy) {
866 867 868
                    s->hencdsp.diff_bytes(s->temp[1],          ydst, ydst - fake_ystride, width);
                    s->hencdsp.diff_bytes(s->temp[2],          udst, udst - fake_ustride, width2);
                    s->hencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2);
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886

                    lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
                    leftu = sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu);
                    leftv = sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv);
                } else {
                    lefty = sub_left_prediction(s, s->temp[0], ydst, width , lefty);
                    leftu = sub_left_prediction(s, s->temp[1], udst, width2, leftu);
                    leftv = sub_left_prediction(s, s->temp[2], vdst, width2, leftv);
                }

                encode_422_bitstream(s, 0, width);
            }
        }
    } else if(avctx->pix_fmt == AV_PIX_FMT_RGB32) {
        uint8_t *data = p->data[0] + (height - 1) * p->linesize[0];
        const int stride = -p->linesize[0];
        const int fake_stride = -fake_ystride;
        int y;
887
        int leftr, leftg, leftb, lefta;
888

889
        put_bits(&s->pb, 8, lefta = data[A]);
890 891 892 893
        put_bits(&s->pb, 8, leftr = data[R]);
        put_bits(&s->pb, 8, leftg = data[G]);
        put_bits(&s->pb, 8, leftb = data[B]);

894 895
        sub_left_prediction_bgr32(s, s->temp[0], data + 4, width - 1,
                                  &leftr, &leftg, &leftb, &lefta);
896
        encode_bgra_bitstream(s, width - 1, 4);
897 898 899 900

        for (y = 1; y < s->height; y++) {
            uint8_t *dst = data + y*stride;
            if (s->predictor == PLANE && s->interlaced < y) {
901
                s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4);
902 903
                sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width,
                                          &leftr, &leftg, &leftb, &lefta);
904
            } else {
905 906
                sub_left_prediction_bgr32(s, s->temp[0], dst, width,
                                          &leftr, &leftg, &leftb, &lefta);
907 908 909
            }
            encode_bgra_bitstream(s, width, 4);
        }
910 911
    } else if (avctx->pix_fmt == AV_PIX_FMT_RGB24) {
        uint8_t *data = p->data[0] + (height - 1) * p->linesize[0];
912 913 914 915 916
        const int stride = -p->linesize[0];
        const int fake_stride = -fake_ystride;
        int y;
        int leftr, leftg, leftb;

917 918 919
        put_bits(&s->pb, 8, leftr = data[0]);
        put_bits(&s->pb, 8, leftg = data[1]);
        put_bits(&s->pb, 8, leftb = data[2]);
920 921
        put_bits(&s->pb, 8, 0);

922 923
        sub_left_prediction_rgb24(s, s->temp[0], data + 3, width - 1,
                                  &leftr, &leftg, &leftb);
924 925
        encode_bgra_bitstream(s, width-1, 3);

926 927 928
        for (y = 1; y < s->height; y++) {
            uint8_t *dst = data + y * stride;
            if (s->predictor == PLANE && s->interlaced < y) {
929 930
                s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride,
                                      width * 3);
931 932
                sub_left_prediction_rgb24(s, s->temp[0], s->temp[1], width,
                                          &leftr, &leftg, &leftb);
933
            } else {
934 935
                sub_left_prediction_rgb24(s, s->temp[0], dst, width,
                                          &leftr, &leftg, &leftb);
936
            }
937
            encode_bgra_bitstream(s, width, 3);
938
        }
939
    } else if (s->version > 2) {
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
        int plane;
        for (plane = 0; plane < 1 + 2*s->chroma + s->alpha; plane++) {
            int left, y;
            int w = width;
            int h = height;
            int fake_stride = fake_ystride;

            if (s->chroma && (plane == 1 || plane == 2)) {
                w >>= s->chroma_h_shift;
                h >>= s->chroma_v_shift;
                fake_stride = plane == 1 ? fake_ustride : fake_vstride;
            }

            left = sub_left_prediction(s, s->temp[0], p->data[plane], w , 0);

            encode_plane_bitstream(s, w, plane);

            if (s->predictor==MEDIAN) {
                int lefttop;
                y = 1;
                if (s->interlaced) {
                    left = sub_left_prediction(s, s->temp[0], p->data[plane] + p->linesize[plane], w , left);

                    encode_plane_bitstream(s, w, plane);
                    y++;
                }

                lefttop = p->data[plane][0];

                for (; y < h; y++) {
                    uint8_t *dst = p->data[plane] + p->linesize[plane] * y;

972
                    sub_median_prediction(s, s->temp[0], dst - fake_stride, dst, w , &left, &lefttop);
973 974 975 976 977 978 979 980

                    encode_plane_bitstream(s, w, plane);
                }
            } else {
                for (y = 1; y < h; y++) {
                    uint8_t *dst = p->data[plane] + p->linesize[plane] * y;

                    if (s->predictor == PLANE && s->interlaced < y) {
981
                        diff_bytes(s, s->temp[1], dst, dst - fake_stride, w);
982 983 984 985 986 987 988 989 990 991

                        left = sub_left_prediction(s, s->temp[0], s->temp[1], w , left);
                    } else {
                        left = sub_left_prediction(s, s->temp[0], dst, w , left);
                    }

                    encode_plane_bitstream(s, w, plane);
                }
            }
        }
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
    } else {
        av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
    }
    emms_c();

    size += (put_bits_count(&s->pb) + 31) / 8;
    put_bits(&s->pb, 16, 0);
    put_bits(&s->pb, 15, 0);
    size /= 4;

    if ((s->flags&CODEC_FLAG_PASS1) && (s->picture_number & 31) == 0) {
        int j;
        char *p = avctx->stats_out;
1005
        char *end = p + STATS_OUT_SIZE;
1006
        for (i = 0; i < 4; i++) {
1007
            for (j = 0; j < s->vlc_n; j++) {
1008 1009 1010 1011 1012 1013
                snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
                p += strlen(p);
                s->stats[i][j]= 0;
            }
            snprintf(p, end-p, "\n");
            p++;
1014 1015
            if (end <= p)
                return AVERROR(ENOMEM);
1016
        }
1017
    } else if (avctx->stats_out)
1018 1019 1020
        avctx->stats_out[0] = '\0';
    if (!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
        flush_put_bits(&s->pb);
1021
        s->bdsp.bswap_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size);
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
    }

    s->picture_number++;

    pkt->size   = size * 4;
    pkt->flags |= AV_PKT_FLAG_KEY;
    *got_packet = 1;

    return 0;
}

static av_cold int encode_end(AVCodecContext *avctx)
{
    HYuvContext *s = avctx->priv_data;

    ff_huffyuv_common_end(s);

    av_freep(&avctx->extradata);
    av_freep(&avctx->stats_out);

1042 1043
    av_frame_free(&avctx->coded_frame);

1044 1045 1046
    return 0;
}

1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
static const AVOption options[] = {
    { "non_deterministic", "Allow multithreading for e.g. context=1 at the expense of determinism",
      offsetof(HYuvContext, non_determ), AV_OPT_TYPE_INT, { .i64 = 1 },
      0, 1, AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
    { NULL },
};

static const AVClass normal_class = {
    .class_name = "huffyuv",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
};

static const AVClass ff_class = {
    .class_name = "ffvhuff",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
};

1068 1069
AVCodec ff_huffyuv_encoder = {
    .name           = "huffyuv",
1070
    .long_name      = NULL_IF_CONFIG_SMALL("Huffyuv / HuffYUV"),
1071 1072 1073 1074 1075 1076
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_HUFFYUV,
    .priv_data_size = sizeof(HYuvContext),
    .init           = encode_init,
    .encode2        = encode_frame,
    .close          = encode_end,
1077
    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
1078
    .priv_class     = &normal_class,
1079
    .pix_fmts       = (const enum AVPixelFormat[]){
1080 1081
        AV_PIX_FMT_YUV422P, AV_PIX_FMT_RGB24,
        AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
1082 1083 1084 1085 1086 1087
    },
};

#if CONFIG_FFVHUFF_ENCODER
AVCodec ff_ffvhuff_encoder = {
    .name           = "ffvhuff",
1088
    .long_name      = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
1089 1090 1091 1092 1093 1094
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_FFVHUFF,
    .priv_data_size = sizeof(HYuvContext),
    .init           = encode_init,
    .encode2        = encode_frame,
    .close          = encode_end,
1095
    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
1096
    .priv_class     = &ff_class,
1097
    .pix_fmts       = (const enum AVPixelFormat[]){
1098 1099
        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV411P,
        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
1100
        AV_PIX_FMT_GBRP,
1101
        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14,
1102
        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
1103 1104 1105
        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
        AV_PIX_FMT_GBRAP,
        AV_PIX_FMT_GRAY8A,
1106 1107 1108 1109 1110 1111
        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV420P16,
        AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV422P16,
        AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV444P16,
        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
        AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P16,
        AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P16,
1112
        AV_PIX_FMT_RGB24,
1113
        AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
1114 1115 1116
    },
};
#endif