vp3.c 84.7 KB
Newer Older
1
/*
2
 * Copyright (C) 2003-2004 the ffmpeg project
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7 8
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12 13 14 15 16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 20 21
 */

/**
22
 * @file
23
 * On2 VP3 Video Decoder
24 25 26
 *
 * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx)
 * For more information about the VP3 coding process, visit:
27
 *   http://wiki.multimedia.cx/index.php?title=On2_VP3
28 29
 *
 * Theora decoder by Alex Beregszaszi
30 31 32 33 34 35
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

36
#include "libavutil/imgutils.h"
37
#include "avcodec.h"
38
#include "internal.h"
39
#include "dsputil.h"
40
#include "get_bits.h"
41
#include "hpeldsp.h"
42
#include "videodsp.h"
43
#include "vp3data.h"
44
#include "vp3dsp.h"
45
#include "xiph.h"
46
#include "thread.h"
47 48 49

#define FRAGMENT_PIXELS 8

50
//FIXME split things out into their own arrays
51
typedef struct Vp3Fragment {
52
    int16_t dc;
53
    uint8_t coding_method;
54
    uint8_t qpi;
55 56 57 58 59 60
} Vp3Fragment;

#define SB_NOT_CODED        0
#define SB_PARTIALLY_CODED  1
#define SB_FULLY_CODED      2

61 62 63 64 65
// This is the maximum length of a single long bit run that can be encoded
// for superblock coding or block qps. Theora special-cases this to read a
// bit instead of flipping the current bit to allow for runs longer than 4129.
#define MAXIMUM_LONG_BIT_RUN 4129

66 67 68 69 70 71 72 73 74 75 76 77 78 79
#define MODE_INTER_NO_MV      0
#define MODE_INTRA            1
#define MODE_INTER_PLUS_MV    2
#define MODE_INTER_LAST_MV    3
#define MODE_INTER_PRIOR_LAST 4
#define MODE_USING_GOLDEN     5
#define MODE_GOLDEN_MV        6
#define MODE_INTER_FOURMV     7
#define CODING_MODE_COUNT     8

/* special internal mode */
#define MODE_COPY             8

/* There are 6 preset schemes, plus a free-form scheme */
80
static const int ModeAlphabet[6][CODING_MODE_COUNT] =
81 82
{
    /* scheme 1: Last motion vector dominates */
83
    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
84
         MODE_INTER_PLUS_MV,    MODE_INTER_NO_MV,
85
         MODE_INTRA,            MODE_USING_GOLDEN,
86 87 88
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

    /* scheme 2 */
89
    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
90
         MODE_INTER_NO_MV,      MODE_INTER_PLUS_MV,
91
         MODE_INTRA,            MODE_USING_GOLDEN,
92 93 94
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

    /* scheme 3 */
95
    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
96
         MODE_INTER_PRIOR_LAST, MODE_INTER_NO_MV,
97
         MODE_INTRA,            MODE_USING_GOLDEN,
98 99 100
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

    /* scheme 4 */
101
    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
102
         MODE_INTER_NO_MV,      MODE_INTER_PRIOR_LAST,
103
         MODE_INTRA,            MODE_USING_GOLDEN,
104 105 106
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

    /* scheme 5: No motion vector dominates */
107
    {    MODE_INTER_NO_MV,      MODE_INTER_LAST_MV,
108
         MODE_INTER_PRIOR_LAST, MODE_INTER_PLUS_MV,
109
         MODE_INTRA,            MODE_USING_GOLDEN,
110 111 112
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

    /* scheme 6 */
113
    {    MODE_INTER_NO_MV,      MODE_USING_GOLDEN,
114
         MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
115
         MODE_INTER_PLUS_MV,    MODE_INTRA,
116 117 118 119
         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },

};

120 121 122 123 124 125 126
static const uint8_t hilbert_offset[16][2] = {
    {0,0}, {1,0}, {1,1}, {0,1},
    {0,2}, {0,3}, {1,3}, {1,2},
    {2,2}, {2,3}, {3,3}, {3,2},
    {3,1}, {2,1}, {2,0}, {3,0}
};

127 128 129 130
#define MIN_DEQUANT_VAL 2

typedef struct Vp3DecodeContext {
    AVCodecContext *avctx;
131
    int theora, theora_tables;
132
    int version;
133
    int width, height;
134
    int chroma_x_shift, chroma_y_shift;
135 136 137
    ThreadFrame golden_frame;
    ThreadFrame last_frame;
    ThreadFrame current_frame;
138
    int keyframe;
139
    uint8_t idct_permutation[64];
140
    uint8_t idct_scantable[64];
141
    HpelDSPContext hdsp;
142
    VideoDSPContext vdsp;
143
    VP3DSPContext vp3dsp;
Diego Biurrun's avatar
Diego Biurrun committed
144
    DECLARE_ALIGNED(16, int16_t, block)[64];
145
    int flipped_image;
146
    int last_slice_end;
147
    int skip_loop_filter;
148

149 150 151
    int qps[3];
    int nqps;
    int last_qps[3];
152 153

    int superblock_count;
154 155
    int y_superblock_width;
    int y_superblock_height;
156
    int y_superblock_count;
157 158
    int c_superblock_width;
    int c_superblock_height;
159
    int c_superblock_count;
160 161 162 163 164 165 166 167 168
    int u_superblock_start;
    int v_superblock_start;
    unsigned char *superblock_coding;

    int macroblock_count;
    int macroblock_width;
    int macroblock_height;

    int fragment_count;
169 170
    int fragment_width[2];
    int fragment_height[2];
171 172

    Vp3Fragment *all_fragments;
Michael Niedermayer's avatar
Michael Niedermayer committed
173
    int fragment_start[3];
174
    int data_offset[3];
175

176 177
    int8_t (*motion_val[2])[2];

178 179
    /* tables */
    uint16_t coded_dc_scale_factor[64];
180
    uint32_t coded_ac_scale_factor[64];
181 182 183 184
    uint8_t base_matrix[384][64];
    uint8_t qr_count[2][3];
    uint8_t qr_size [2][3][64];
    uint16_t qr_base[2][3][64];
185

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
    /**
     * This is a list of all tokens in bitstream order. Reordering takes place
     * by pulling from each level during IDCT. As a consequence, IDCT must be
     * in Hilbert order, making the minimum slice height 64 for 4:2:0 and 32
     * otherwise. The 32 different tokens with up to 12 bits of extradata are
     * collapsed into 3 types, packed as follows:
     *   (from the low to high bits)
     *
     * 2 bits: type (0,1,2)
     *   0: EOB run, 14 bits for run length (12 needed)
     *   1: zero run, 7 bits for run length
     *                7 bits for the next coefficient (3 needed)
     *   2: coefficient, 14 bits (11 needed)
     *
     * Coefficients are signed, so are packed in the highest bits for automatic
     * sign extension.
     */
    int16_t *dct_tokens[3][64];
    int16_t *dct_tokens_base;
#define TOKEN_EOB(eob_run)              ((eob_run) << 2)
#define TOKEN_ZERO_RUN(coeff, zero_run) (((coeff) << 9) + ((zero_run) << 2) + 1)
#define TOKEN_COEFF(coeff)              (((coeff) << 2) + 2)

    /**
     * number of blocks that contain DCT coefficients at the given level or higher
     */
    int num_coded_frags[3][64];
    int total_num_coded_frags;

215
    /* this is a list of indexes into the all_fragments array indicating
216
     * which of the fragments are coded */
217
    int *coded_fragment_list[3];
218

219 220 221 222 223 224
    VLC dc_vlc[16];
    VLC ac_vlc_1[16];
    VLC ac_vlc_2[16];
    VLC ac_vlc_3[16];
    VLC ac_vlc_4[16];

225 226 227 228 229
    VLC superblock_run_length_vlc;
    VLC fragment_run_length_vlc;
    VLC mode_code_vlc;
    VLC motion_vector_vlc;

230 231
    /* these arrays need to be on 16-byte boundaries since SSE2 operations
     * index into them */
232
    DECLARE_ALIGNED(16, int16_t, qmat)[3][2][3][64];     ///< qmat[qpi][is_inter][plane]
233 234

    /* This table contains superblock_count * 16 entries. Each set of 16
235
     * numbers corresponds to the fragment indexes 0..15 of the superblock.
236 237 238 239
     * An entry will be -1 to indicate that no entry corresponds to that
     * index. */
    int *superblock_fragments;

240
    /* This is an array that indicates how a particular macroblock
241
     * is coded. */
242
    unsigned char *macroblock_coding;
243

244
    uint8_t *edge_emu_buffer;
245

246 247 248 249 250
    /* Huffman decode */
    int hti;
    unsigned int hbits;
    int entries;
    int huff_code_size;
251
    uint32_t huffman_table[80][32][2];
252

253
    uint8_t filter_limit_values[64];
254
    DECLARE_ALIGNED(8, int, bounding_values_array)[256+2];
255 256 257 258 259 260
} Vp3DecodeContext;

/************************************************************************
 * VP3 specific functions
 ************************************************************************/

261 262 263 264
static void vp3_decode_flush(AVCodecContext *avctx)
{
    Vp3DecodeContext *s = avctx->priv_data;

265
    if (s->golden_frame.f)
266
        ff_thread_release_buffer(avctx, &s->golden_frame);
267
    if (s->last_frame.f)
268
        ff_thread_release_buffer(avctx, &s->last_frame);
269
    if (s->current_frame.f)
270 271 272 273 274 275 276 277
        ff_thread_release_buffer(avctx, &s->current_frame);
}

static av_cold int vp3_decode_end(AVCodecContext *avctx)
{
    Vp3DecodeContext *s = avctx->priv_data;
    int i;

278 279 280 281 282 283 284 285 286
    av_freep(&s->superblock_coding);
    av_freep(&s->all_fragments);
    av_freep(&s->coded_fragment_list[0]);
    av_freep(&s->dct_tokens_base);
    av_freep(&s->superblock_fragments);
    av_freep(&s->macroblock_coding);
    av_freep(&s->motion_val[0]);
    av_freep(&s->motion_val[1]);
    av_freep(&s->edge_emu_buffer);
287

288 289 290 291 292 293
    /* release all frames */
    vp3_decode_flush(avctx);
    av_frame_free(&s->current_frame.f);
    av_frame_free(&s->last_frame.f);
    av_frame_free(&s->golden_frame.f);

294 295 296 297
    if (avctx->internal->is_copy)
        return 0;

    for (i = 0; i < 16; i++) {
298 299 300 301 302
        ff_free_vlc(&s->dc_vlc[i]);
        ff_free_vlc(&s->ac_vlc_1[i]);
        ff_free_vlc(&s->ac_vlc_2[i]);
        ff_free_vlc(&s->ac_vlc_3[i]);
        ff_free_vlc(&s->ac_vlc_4[i]);
303 304
    }

305 306 307 308
    ff_free_vlc(&s->superblock_run_length_vlc);
    ff_free_vlc(&s->fragment_run_length_vlc);
    ff_free_vlc(&s->mode_code_vlc);
    ff_free_vlc(&s->motion_vector_vlc);
309 310 311 312 313


    return 0;
}

314 315 316 317
/*
 * This function sets up all of the various blocks mappings:
 * superblocks <-> fragments, macroblocks <-> fragments,
 * superblocks <-> macroblocks
318
 *
319
 * @return 0 is successful; returns 1 if *anything* went wrong.
320
 */
321
static int init_block_mapping(Vp3DecodeContext *s)
322
{
323 324 325 326 327 328
    int sb_x, sb_y, plane;
    int x, y, i, j = 0;

    for (plane = 0; plane < 3; plane++) {
        int sb_width    = plane ? s->c_superblock_width  : s->y_superblock_width;
        int sb_height   = plane ? s->c_superblock_height : s->y_superblock_height;
329 330
        int frag_width  = s->fragment_width[!!plane];
        int frag_height = s->fragment_height[!!plane];
331 332 333 334 335 336 337 338 339 340 341 342

        for (sb_y = 0; sb_y < sb_height; sb_y++)
            for (sb_x = 0; sb_x < sb_width; sb_x++)
                for (i = 0; i < 16; i++) {
                    x = 4*sb_x + hilbert_offset[i][0];
                    y = 4*sb_y + hilbert_offset[i][1];

                    if (x < frag_width && y < frag_height)
                        s->superblock_fragments[j++] = s->fragment_start[plane] + y*frag_width + x;
                    else
                        s->superblock_fragments[j++] = -1;
                }
343 344
    }

345
    return 0;  /* successful path out */
346 347 348
}

/*
349
 * This function sets up the dequantization tables used for a particular
350 351
 * frame.
 */
352
static void init_dequantizer(Vp3DecodeContext *s, int qpi)
353
{
354 355
    int ac_scale_factor = s->coded_ac_scale_factor[s->qps[qpi]];
    int dc_scale_factor = s->coded_dc_scale_factor[s->qps[qpi]];
356
    int i, plane, inter, qri, bmi, bmj, qistart;
357

358 359 360 361 362
    for(inter=0; inter<2; inter++){
        for(plane=0; plane<3; plane++){
            int sum=0;
            for(qri=0; qri<s->qr_count[inter][plane]; qri++){
                sum+= s->qr_size[inter][plane][qri];
363
                if(s->qps[qpi] <= sum)
364 365 366 367 368 369
                    break;
            }
            qistart= sum - s->qr_size[inter][plane][qri];
            bmi= s->qr_base[inter][plane][qri  ];
            bmj= s->qr_base[inter][plane][qri+1];
            for(i=0; i<64; i++){
370 371
                int coeff= (  2*(sum    -s->qps[qpi])*s->base_matrix[bmi][i]
                            - 2*(qistart-s->qps[qpi])*s->base_matrix[bmj][i]
372 373 374
                            + s->qr_size[inter][plane][qri])
                           / (2*s->qr_size[inter][plane][qri]);

Michael Niedermayer's avatar
Michael Niedermayer committed
375
                int qmin= 8<<(inter + !i);
376 377
                int qscale= i ? ac_scale_factor : dc_scale_factor;

378 379
                s->qmat[qpi][inter][plane][s->idct_permutation[i]] =
                    av_clip((qscale * coeff) / 100 * 4, qmin, 4096);
380
            }
381 382
            // all DC coefficients use the same quant so as not to interfere with DC prediction
            s->qmat[qpi][inter][plane][0] = s->qmat[0][inter][plane][0];
383
        }
384 385 386
    }
}

387 388 389
/*
 * This function initializes the loop filter boundary limits if the frame's
 * quality index is different from the previous frame's.
390 391
 *
 * The filter_limit_values may not be larger than 127.
392 393 394 395 396 397
 */
static void init_loop_filter(Vp3DecodeContext *s)
{
    int *bounding_values= s->bounding_values_array+127;
    int filter_limit;
    int x;
398
    int value;
399

400
    filter_limit = s->filter_limit_values[s->qps[0]];
401
    assert(filter_limit < 128);
402 403 404 405 406 407 408

    /* set up the bounding values */
    memset(s->bounding_values_array, 0, 256 * sizeof(int));
    for (x = 0; x < filter_limit; x++) {
        bounding_values[-x] = -x;
        bounding_values[x] = x;
    }
409 410 411 412 413 414
    for (x = value = filter_limit; x < 128 && value; x++, value--) {
        bounding_values[ x] =  value;
        bounding_values[-x] = -value;
    }
    if (value)
        bounding_values[128] = value;
David Conrad's avatar
David Conrad committed
415
    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202;
416 417
}

418
/*
419
 * This function unpacks all of the superblock/macroblock/fragment coding
420 421
 * information from the bitstream.
 */
422
static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
423
{
David Conrad's avatar
David Conrad committed
424
    int superblock_starts[3] = { 0, s->u_superblock_start, s->v_superblock_start };
425 426 427
    int bit = 0;
    int current_superblock = 0;
    int current_run = 0;
428
    int num_partial_superblocks = 0;
429 430 431

    int i, j;
    int current_fragment;
432
    int plane;
433 434 435 436 437 438 439

    if (s->keyframe) {
        memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count);

    } else {

        /* unpack the list of partially-coded superblocks */
440 441 442
        bit = get_bits1(gb) ^ 1;
        current_run = 0;

443
        while (current_superblock < s->superblock_count && get_bits_left(gb) > 0) {
444 445 446 447 448
            if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN)
                bit = get_bits1(gb);
            else
                bit ^= 1;

449
                current_run = get_vlc2(gb,
450 451
                    s->superblock_run_length_vlc.table, 6, 2) + 1;
                if (current_run == 34)
452
                    current_run += get_bits(gb, 12);
453

454 455 456 457 458 459 460 461
            if (current_superblock + current_run > s->superblock_count) {
                av_log(s->avctx, AV_LOG_ERROR, "Invalid partially coded superblock run length\n");
                return -1;
            }

            memset(s->superblock_coding + current_superblock, bit, current_run);

            current_superblock += current_run;
462 463
            if (bit)
                num_partial_superblocks += current_run;
464 465 466 467
        }

        /* unpack the list of fully coded superblocks if any of the blocks were
         * not marked as partially coded in the previous step */
468 469
        if (num_partial_superblocks < s->superblock_count) {
            int superblocks_decoded = 0;
470 471

            current_superblock = 0;
472 473 474
            bit = get_bits1(gb) ^ 1;
            current_run = 0;

475 476
            while (superblocks_decoded < s->superblock_count - num_partial_superblocks
                   && get_bits_left(gb) > 0) {
477 478 479 480 481 482

                if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN)
                    bit = get_bits1(gb);
                else
                    bit ^= 1;

483
                        current_run = get_vlc2(gb,
484 485
                            s->superblock_run_length_vlc.table, 6, 2) + 1;
                        if (current_run == 34)
486
                            current_run += get_bits(gb, 12);
487 488 489 490 491

                for (j = 0; j < current_run; current_superblock++) {
                    if (current_superblock >= s->superblock_count) {
                        av_log(s->avctx, AV_LOG_ERROR, "Invalid fully coded superblock run length\n");
                        return -1;
492
                    }
493 494 495

                /* skip any superblocks already marked as partially coded */
                if (s->superblock_coding[current_superblock] == SB_NOT_CODED) {
496
                    s->superblock_coding[current_superblock] = 2*bit;
497 498
                    j++;
                }
499
                }
500
                superblocks_decoded += current_run;
501 502 503 504 505
            }
        }

        /* if there were partial blocks, initialize bitstream for
         * unpacking fragment codings */
506
        if (num_partial_superblocks) {
507 508

            current_run = 0;
509
            bit = get_bits1(gb);
510
            /* toggle the bit because as soon as the first run length is
511 512 513 514 515 516 517
             * fetched the bit will be toggled again */
            bit ^= 1;
        }
    }

    /* figure out which fragments are coded; iterate through each
     * superblock (all planes) */
518
    s->total_num_coded_frags = 0;
519
    memset(s->macroblock_coding, MODE_COPY, s->macroblock_count);
520 521

    for (plane = 0; plane < 3; plane++) {
David Conrad's avatar
David Conrad committed
522
        int sb_start = superblock_starts[plane];
523
        int sb_end = sb_start + (plane ? s->c_superblock_count : s->y_superblock_count);
524
        int num_coded_frags = 0;
525

526
    for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) {
527 528 529 530 531 532 533

        /* iterate through all 16 fragments in a superblock */
        for (j = 0; j < 16; j++) {

            /* if the fragment is in bounds, check its coding status */
            current_fragment = s->superblock_fragments[i * 16 + j];
            if (current_fragment != -1) {
534
                int coded = s->superblock_coding[i];
535

536
                if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
537 538 539

                    /* fragment may or may not be coded; this is the case
                     * that cares about the fragment coding runs */
540
                    if (current_run-- == 0) {
541
                        bit ^= 1;
542
                        current_run = get_vlc2(gb,
543
                            s->fragment_run_length_vlc.table, 5, 2);
544
                    }
545 546
                    coded = bit;
                }
547

548
                    if (coded) {
549
                        /* default mode; actual mode will be decoded in
550
                         * the next phase */
551
                        s->all_fragments[current_fragment].coding_method =
552
                            MODE_INTER_NO_MV;
553
                        s->coded_fragment_list[plane][num_coded_frags++] =
554 555 556 557 558 559 560 561 562
                            current_fragment;
                    } else {
                        /* not coded; copy this fragment from the prior frame */
                        s->all_fragments[current_fragment].coding_method =
                            MODE_COPY;
                    }
            }
        }
    }
563 564 565 566 567
        s->total_num_coded_frags += num_coded_frags;
        for (i = 0; i < 64; i++)
            s->num_coded_frags[plane][i] = num_coded_frags;
        if (plane < 2)
            s->coded_fragment_list[plane+1] = s->coded_fragment_list[plane] + num_coded_frags;
568
    }
569
    return 0;
570 571 572 573 574 575
}

/*
 * This function unpacks all the coding mode data for individual macroblocks
 * from the bitstream.
 */
576
static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
577
{
578
    int i, j, k, sb_x, sb_y;
579 580 581 582
    int scheme;
    int current_macroblock;
    int current_fragment;
    int coding_mode;
583
    int custom_mode_alphabet[CODING_MODE_COUNT];
584
    const int *alphabet;
585
    Vp3Fragment *frag;
586 587 588 589 590 591 592 593 594 595 596 597

    if (s->keyframe) {
        for (i = 0; i < s->fragment_count; i++)
            s->all_fragments[i].coding_method = MODE_INTRA;

    } else {

        /* fetch the mode coding scheme for this frame */
        scheme = get_bits(gb, 3);

        /* is it a custom coding scheme? */
        if (scheme == 0) {
598 599
            for (i = 0; i < 8; i++)
                custom_mode_alphabet[i] = MODE_INTER_NO_MV;
600
            for (i = 0; i < 8; i++)
601
                custom_mode_alphabet[get_bits(gb, 3)] = i;
602 603 604
            alphabet = custom_mode_alphabet;
        } else
            alphabet = ModeAlphabet[scheme-1];
605 606 607

        /* iterate through all of the macroblocks that contain 1 or more
         * coded fragments */
608 609
        for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) {
            for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) {
610 611
                if (get_bits_left(gb) <= 0)
                    return -1;
612 613

            for (j = 0; j < 4; j++) {
614 615 616 617
                int mb_x = 2*sb_x +   (j>>1);
                int mb_y = 2*sb_y + (((j>>1)+j)&1);
                current_macroblock = mb_y * s->macroblock_width + mb_x;

618
                if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height)
619 620
                    continue;

621 622
#define BLOCK_X (2*mb_x + (k&1))
#define BLOCK_Y (2*mb_y + (k>>1))
623 624 625
                /* coding modes are only stored if the macroblock has at least one
                 * luma block coded, otherwise it must be INTER_NO_MV */
                for (k = 0; k < 4; k++) {
626
                    current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X;
627 628 629 630 631 632 633
                    if (s->all_fragments[current_fragment].coding_method != MODE_COPY)
                        break;
                }
                if (k == 4) {
                    s->macroblock_coding[current_macroblock] = MODE_INTER_NO_MV;
                    continue;
                }
634

635 636 637 638
                /* mode 7 means get 3 bits for each coding mode */
                if (scheme == 7)
                    coding_mode = get_bits(gb, 3);
                else
639
                    coding_mode = alphabet
640
                        [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)];
641

642
                s->macroblock_coding[current_macroblock] = coding_mode;
643
                for (k = 0; k < 4; k++) {
644 645 646
                    frag = s->all_fragments + BLOCK_Y*s->fragment_width[0] + BLOCK_X;
                    if (frag->coding_method != MODE_COPY)
                        frag->coding_method = coding_mode;
647
                }
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668

#define SET_CHROMA_MODES \
    if (frag[s->fragment_start[1]].coding_method != MODE_COPY) \
        frag[s->fragment_start[1]].coding_method = coding_mode;\
    if (frag[s->fragment_start[2]].coding_method != MODE_COPY) \
        frag[s->fragment_start[2]].coding_method = coding_mode;

                if (s->chroma_y_shift) {
                    frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x;
                    SET_CHROMA_MODES
                } else if (s->chroma_x_shift) {
                    frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x;
                    for (k = 0; k < 2; k++) {
                        SET_CHROMA_MODES
                        frag += s->fragment_width[1];
                    }
                } else {
                    for (k = 0; k < 4; k++) {
                        frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X;
                        SET_CHROMA_MODES
                    }
669 670
                }
            }
671
            }
672 673
        }
    }
674 675

    return 0;
676 677
}

678 679 680 681
/*
 * This function unpacks all the motion vectors for the individual
 * macroblocks from the bitstream.
 */
682
static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
683
{
684
    int j, k, sb_x, sb_y;
685
    int coding_mode;
686 687
    int motion_x[4];
    int motion_y[4];
688 689 690 691 692 693
    int last_motion_x = 0;
    int last_motion_y = 0;
    int prior_last_motion_x = 0;
    int prior_last_motion_y = 0;
    int current_macroblock;
    int current_fragment;
694
    int frag;
695

David Conrad's avatar
David Conrad committed
696
    if (s->keyframe)
697
        return 0;
David Conrad's avatar
David Conrad committed
698

David Conrad's avatar
David Conrad committed
699 700
    /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
    coding_mode = get_bits1(gb);
701

David Conrad's avatar
David Conrad committed
702 703
    /* iterate through all of the macroblocks that contain 1 or more
     * coded fragments */
704 705
    for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) {
        for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) {
706 707
            if (get_bits_left(gb) <= 0)
                return -1;
708

David Conrad's avatar
David Conrad committed
709
        for (j = 0; j < 4; j++) {
710 711 712 713 714
            int mb_x = 2*sb_x +   (j>>1);
            int mb_y = 2*sb_y + (((j>>1)+j)&1);
            current_macroblock = mb_y * s->macroblock_width + mb_x;

            if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height ||
David Conrad's avatar
David Conrad committed
715 716
                (s->macroblock_coding[current_macroblock] == MODE_COPY))
                continue;
717

David Conrad's avatar
David Conrad committed
718 719 720 721 722 723 724 725 726 727 728
            switch (s->macroblock_coding[current_macroblock]) {

            case MODE_INTER_PLUS_MV:
            case MODE_GOLDEN_MV:
                /* all 6 fragments use the same motion vector */
                if (coding_mode == 0) {
                    motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
                    motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
                } else {
                    motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)];
                    motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)];
729
                }
730

David Conrad's avatar
David Conrad committed
731 732 733
                /* vector maintenance, only on MODE_INTER_PLUS_MV */
                if (s->macroblock_coding[current_macroblock] ==
                    MODE_INTER_PLUS_MV) {
734 735
                    prior_last_motion_x = last_motion_x;
                    prior_last_motion_y = last_motion_y;
David Conrad's avatar
David Conrad committed
736 737 738 739 740 741 742 743 744 745 746 747 748
                    last_motion_x = motion_x[0];
                    last_motion_y = motion_y[0];
                }
                break;

            case MODE_INTER_FOURMV:
                /* vector maintenance */
                prior_last_motion_x = last_motion_x;
                prior_last_motion_y = last_motion_y;

                /* fetch 4 vectors from the bitstream, one for each
                 * Y fragment, then average for the C fragment vectors */
                for (k = 0; k < 4; k++) {
749
                    current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X;
750
                    if (s->all_fragments[current_fragment].coding_method != MODE_COPY) {
David Conrad's avatar
David Conrad committed
751 752 753
                        if (coding_mode == 0) {
                            motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
                            motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
754
                        } else {
David Conrad's avatar
David Conrad committed
755 756
                            motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)];
                            motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)];
757
                        }
David Conrad's avatar
David Conrad committed
758 759 760 761 762
                        last_motion_x = motion_x[k];
                        last_motion_y = motion_y[k];
                    } else {
                        motion_x[k] = 0;
                        motion_y[k] = 0;
763
                    }
David Conrad's avatar
David Conrad committed
764 765 766 767 768 769 770
                }
                break;

            case MODE_INTER_LAST_MV:
                /* all 6 fragments use the last motion vector */
                motion_x[0] = last_motion_x;
                motion_y[0] = last_motion_y;
771

David Conrad's avatar
David Conrad committed
772 773 774 775 776 777 778 779 780
                /* no vector maintenance (last vector remains the
                 * last vector) */
                break;

            case MODE_INTER_PRIOR_LAST:
                /* all 6 fragments use the motion vector prior to the
                 * last motion vector */
                motion_x[0] = prior_last_motion_x;
                motion_y[0] = prior_last_motion_y;
781

David Conrad's avatar
David Conrad committed
782 783 784 785 786 787
                /* vector maintenance */
                prior_last_motion_x = last_motion_x;
                prior_last_motion_y = last_motion_y;
                last_motion_x = motion_x[0];
                last_motion_y = motion_y[0];
                break;
788

David Conrad's avatar
David Conrad committed
789 790
            default:
                /* covers intra, inter without MV, golden without MV */
791 792
                motion_x[0] = 0;
                motion_y[0] = 0;
793

David Conrad's avatar
David Conrad committed
794 795 796
                /* no vector maintenance */
                break;
            }
797

David Conrad's avatar
David Conrad committed
798
            /* assign the motion vectors to the correct fragments */
799
            for (k = 0; k < 4; k++) {
David Conrad's avatar
David Conrad committed
800
                current_fragment =
801
                    BLOCK_Y*s->fragment_width[0] + BLOCK_X;
802
                if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
803 804
                    s->motion_val[0][current_fragment][0] = motion_x[k];
                    s->motion_val[0][current_fragment][1] = motion_y[k];
805
                } else {
806 807
                    s->motion_val[0][current_fragment][0] = motion_x[0];
                    s->motion_val[0][current_fragment][1] = motion_y[0];
808
                }
809
            }
810 811

            if (s->chroma_y_shift) {
812 813 814 815
                if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
                    motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2);
                    motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2);
                }
816 817
                motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
                motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1);
818 819 820
                frag = mb_y*s->fragment_width[1] + mb_x;
                s->motion_val[1][frag][0] = motion_x[0];
                s->motion_val[1][frag][1] = motion_y[0];
821 822 823 824 825 826 827 828 829 830 831 832 833
            } else if (s->chroma_x_shift) {
                if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
                    motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1);
                    motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1);
                    motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1);
                    motion_y[1] = RSHIFT(motion_y[2] + motion_y[3], 1);
                } else {
                    motion_x[1] = motion_x[0];
                    motion_y[1] = motion_y[0];
                }
                motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
                motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1);

834
                frag = 2*mb_y*s->fragment_width[1] + mb_x;
835
                for (k = 0; k < 2; k++) {
836 837
                    s->motion_val[1][frag][0] = motion_x[k];
                    s->motion_val[1][frag][1] = motion_y[k];
838 839 840 841
                    frag += s->fragment_width[1];
                }
            } else {
                for (k = 0; k < 4; k++) {
842
                    frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X;
843
                    if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
844 845
                        s->motion_val[1][frag][0] = motion_x[k];
                        s->motion_val[1][frag][1] = motion_y[k];
846
                    } else {
847 848
                        s->motion_val[1][frag][0] = motion_x[0];
                        s->motion_val[1][frag][1] = motion_y[0];
849 850
                    }
                }
851
            }
852
        }
853
        }
David Conrad's avatar
David Conrad committed
854
    }
855 856

    return 0;
857 858
}

859 860 861
static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb)
{
    int qpi, i, j, bit, run_length, blocks_decoded, num_blocks_at_qpi;
862
    int num_blocks = s->total_num_coded_frags;
863 864 865 866

    for (qpi = 0; qpi < s->nqps-1 && num_blocks > 0; qpi++) {
        i = blocks_decoded = num_blocks_at_qpi = 0;

867 868
        bit = get_bits1(gb) ^ 1;
        run_length = 0;
869 870

        do {
871 872 873 874 875
            if (run_length == MAXIMUM_LONG_BIT_RUN)
                bit = get_bits1(gb);
            else
                bit ^= 1;

876 877 878 879 880 881 882 883 884
            run_length = get_vlc2(gb, s->superblock_run_length_vlc.table, 6, 2) + 1;
            if (run_length == 34)
                run_length += get_bits(gb, 12);
            blocks_decoded += run_length;

            if (!bit)
                num_blocks_at_qpi += run_length;

            for (j = 0; j < run_length; i++) {
885
                if (i >= s->total_num_coded_frags)
886 887
                    return -1;

888 889
                if (s->all_fragments[s->coded_fragment_list[0][i]].qpi == qpi) {
                    s->all_fragments[s->coded_fragment_list[0][i]].qpi += bit;
890 891 892
                    j++;
                }
            }
893
        } while (blocks_decoded < num_blocks && get_bits_left(gb) > 0);
894 895 896 897 898 899 900

        num_blocks -= num_blocks_at_qpi;
    }

    return 0;
}

901
/*
902 903 904 905 906 907 908 909 910 911 912 913 914
 * This function is called by unpack_dct_coeffs() to extract the VLCs from
 * the bitstream. The VLCs encode tokens which are used to unpack DCT
 * data. This function unpacks all the VLCs for either the Y plane or both
 * C planes, and is called for DC coefficients or different AC coefficient
 * levels (since different coefficient types require different VLC tables.
 *
 * This function returns a residual eob run. E.g, if a particular token gave
 * instructions to EOB the next 5 fragments and there were only 2 fragments
 * left in the current fragment range, 3 would be returned so that it could
 * be passed into the next call to this same function.
 */
static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                        VLC *table, int coeff_index,
915
                        int plane,
916 917
                        int eob_run)
{
918
    int i, j = 0;
919
    int token;
920
    int zero_run = 0;
Diego Biurrun's avatar
Diego Biurrun committed
921
    int16_t coeff = 0;
922
    int bits_to_get;
923 924 925 926
    int blocks_ended;
    int coeff_i = 0;
    int num_coeffs = s->num_coded_frags[plane][coeff_index];
    int16_t *dct_tokens = s->dct_tokens[plane][coeff_index];
927

928
    /* local references to structure members to avoid repeated deferences */
929
    int *coded_fragment_list = s->coded_fragment_list[plane];
930 931 932
    Vp3Fragment *all_fragments = s->all_fragments;
    VLC_TYPE (*vlc_table)[2] = table->table;

933 934 935 936 937 938
    if (num_coeffs < 0)
        av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coefficents at level %d\n", coeff_index);

    if (eob_run > num_coeffs) {
        coeff_i = blocks_ended = num_coeffs;
        eob_run -= num_coeffs;
939
    } else {
940 941
        coeff_i = blocks_ended = eob_run;
        eob_run = 0;
942 943
    }

944 945 946
    // insert fake EOB token to cover the split between planes or zzi
    if (blocks_ended)
        dct_tokens[j++] = blocks_ended << 2;
947

948
    while (coeff_i < num_coeffs && get_bits_left(gb) > 0) {
949
            /* decode a VLC into a token */
950
            token = get_vlc2(gb, vlc_table, 11, 3);
951
            /* use the token to get a zero run, a coefficient, and an eob run */
952
            if ((unsigned) token <= 6U) {
953 954 955
                eob_run = eob_run_base[token];
                if (eob_run_get_bits[token])
                    eob_run += get_bits(gb, eob_run_get_bits[token]);
956 957 958 959 960 961 962 963 964 965 966 967 968 969

                // record only the number of blocks ended in this plane,
                // any spill will be recorded in the next plane.
                if (eob_run > num_coeffs - coeff_i) {
                    dct_tokens[j++] = TOKEN_EOB(num_coeffs - coeff_i);
                    blocks_ended   += num_coeffs - coeff_i;
                    eob_run        -= num_coeffs - coeff_i;
                    coeff_i         = num_coeffs;
                } else {
                    dct_tokens[j++] = TOKEN_EOB(eob_run);
                    blocks_ended   += eob_run;
                    coeff_i        += eob_run;
                    eob_run = 0;
                }
970
            } else if (token >= 0) {
971
                bits_to_get = coeff_get_bits[token];
972 973 974
                if (bits_to_get)
                    bits_to_get = get_bits(gb, bits_to_get);
                coeff = coeff_tables[token][bits_to_get];
975 976 977 978

                zero_run = zero_run_base[token];
                if (zero_run_get_bits[token])
                    zero_run += get_bits(gb, zero_run_get_bits[token]);
979

980 981 982 983 984 985 986 987 988 989 990 991 992 993
                if (zero_run) {
                    dct_tokens[j++] = TOKEN_ZERO_RUN(coeff, zero_run);
                } else {
                    // Save DC into the fragment structure. DC prediction is
                    // done in raster order, so the actual DC can't be in with
                    // other tokens. We still need the token in dct_tokens[]
                    // however, or else the structure collapses on itself.
                    if (!coeff_index)
                        all_fragments[coded_fragment_list[coeff_i]].dc = coeff;

                    dct_tokens[j++] = TOKEN_COEFF(coeff);
                }

                if (coeff_index + zero_run > 64) {
994
                    av_log(s->avctx, AV_LOG_DEBUG, "Invalid zero run of %d with"
995 996 997
                           " %d coeffs left\n", zero_run, 64-coeff_index);
                    zero_run = 64 - coeff_index;
                }
998

999 1000 1001 1002 1003
                // zero runs code multiple coefficients,
                // so don't try to decode coeffs for those higher levels
                for (i = coeff_index+1; i <= coeff_index+zero_run; i++)
                    s->num_coded_frags[plane][i]--;
                coeff_i++;
1004 1005 1006 1007
            } else {
                av_log(s->avctx, AV_LOG_ERROR,
                       "Invalid token %d\n", token);
                return -1;
1008
            }
1009 1010
    }

1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
    if (blocks_ended > s->num_coded_frags[plane][coeff_index])
        av_log(s->avctx, AV_LOG_ERROR, "More blocks ended than coded!\n");

    // decrement the number of blocks that have higher coeffecients for each
    // EOB run at this level
    if (blocks_ended)
        for (i = coeff_index+1; i < 64; i++)
            s->num_coded_frags[plane][i] -= blocks_ended;

    // setup the next buffer
    if (plane < 2)
        s->dct_tokens[plane+1][coeff_index] = dct_tokens + j;
    else if (coeff_index < 63)
        s->dct_tokens[0][coeff_index+1] = dct_tokens + j;

1026 1027 1028
    return eob_run;
}

1029 1030 1031 1032
static void reverse_dc_prediction(Vp3DecodeContext *s,
                                  int first_fragment,
                                  int fragment_width,
                                  int fragment_height);
1033 1034 1035 1036
/*
 * This function unpacks all of the DCT coefficient data from the
 * bitstream.
 */
1037
static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
1038 1039 1040 1041 1042 1043 1044
{
    int i;
    int dc_y_table;
    int dc_c_table;
    int ac_y_table;
    int ac_c_table;
    int residual_eob_run = 0;
1045 1046
    VLC *y_tables[64];
    VLC *c_tables[64];
1047

1048 1049
    s->dct_tokens[0][0] = s->dct_tokens_base;

1050
    /* fetch the DC table indexes */
1051 1052 1053 1054
    dc_y_table = get_bits(gb, 4);
    dc_c_table = get_bits(gb, 4);

    /* unpack the Y plane DC coefficients */
1055
    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0,
1056
        0, residual_eob_run);
1057 1058
    if (residual_eob_run < 0)
        return residual_eob_run;
1059

1060
    /* reverse prediction of the Y-plane DC coefficients */
1061
    reverse_dc_prediction(s, 0, s->fragment_width[0], s->fragment_height[0]);
1062

1063 1064
    /* unpack the C plane DC coefficients */
    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
1065
        1, residual_eob_run);
1066 1067
    if (residual_eob_run < 0)
        return residual_eob_run;
1068 1069
    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
        2, residual_eob_run);
1070 1071
    if (residual_eob_run < 0)
        return residual_eob_run;
1072

1073 1074 1075 1076
    /* reverse prediction of the C-plane DC coefficients */
    if (!(s->avctx->flags & CODEC_FLAG_GRAY))
    {
        reverse_dc_prediction(s, s->fragment_start[1],
1077
            s->fragment_width[1], s->fragment_height[1]);
1078
        reverse_dc_prediction(s, s->fragment_start[2],
1079
            s->fragment_width[1], s->fragment_height[1]);
1080 1081
    }

1082
    /* fetch the AC table indexes */
1083 1084 1085
    ac_y_table = get_bits(gb, 4);
    ac_c_table = get_bits(gb, 4);

1086
    /* build tables of AC VLC tables */
1087
    for (i = 1; i <= 5; i++) {
1088 1089
        y_tables[i] = &s->ac_vlc_1[ac_y_table];
        c_tables[i] = &s->ac_vlc_1[ac_c_table];
1090 1091
    }
    for (i = 6; i <= 14; i++) {
1092 1093
        y_tables[i] = &s->ac_vlc_2[ac_y_table];
        c_tables[i] = &s->ac_vlc_2[ac_c_table];
1094 1095
    }
    for (i = 15; i <= 27; i++) {
1096 1097
        y_tables[i] = &s->ac_vlc_3[ac_y_table];
        c_tables[i] = &s->ac_vlc_3[ac_c_table];
1098 1099
    }
    for (i = 28; i <= 63; i++) {
1100 1101 1102 1103 1104 1105 1106
        y_tables[i] = &s->ac_vlc_4[ac_y_table];
        c_tables[i] = &s->ac_vlc_4[ac_c_table];
    }

    /* decode all AC coefficents */
    for (i = 1; i <= 63; i++) {
            residual_eob_run = unpack_vlcs(s, gb, y_tables[i], i,
1107
                0, residual_eob_run);
1108 1109
            if (residual_eob_run < 0)
                return residual_eob_run;
1110

1111
            residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i,
1112
                1, residual_eob_run);
1113 1114
            if (residual_eob_run < 0)
                return residual_eob_run;
1115 1116
            residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i,
                2, residual_eob_run);
1117 1118
            if (residual_eob_run < 0)
                return residual_eob_run;
1119
    }
1120 1121

    return 0;
1122 1123 1124 1125
}

/*
 * This function reverses the DC prediction for each coded fragment in
1126
 * the frame. Much of this function is adapted directly from the original
1127 1128 1129 1130
 * VP3 source code.
 */
#define COMPATIBLE_FRAME(x) \
  (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
1131
#define DC_COEFF(u) s->all_fragments[u].dc
1132 1133 1134 1135

static void reverse_dc_prediction(Vp3DecodeContext *s,
                                  int first_fragment,
                                  int fragment_width,
1136
                                  int fragment_height)
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
{

#define PUL 8
#define PU 4
#define PUR 2
#define PL 1

    int x, y;
    int i = first_fragment;

1147
    int predicted_dc;
1148 1149 1150 1151

    /* DC values for the left, up-left, up, and up-right fragments */
    int vl, vul, vu, vur;

1152
    /* indexes for the left, up-left, up, and up-right fragments */
1153 1154
    int l, ul, u, ur;

1155
    /*
1156 1157 1158 1159 1160 1161
     * The 6 fields mean:
     *   0: up-left multiplier
     *   1: up multiplier
     *   2: up-right multiplier
     *   3: left multiplier
     */
1162
    static const int predictor_transform[16][4] = {
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
        {  0,  0,  0,  0},
        {  0,  0,  0,128},        // PL
        {  0,  0,128,  0},        // PUR
        {  0,  0, 53, 75},        // PUR|PL
        {  0,128,  0,  0},        // PU
        {  0, 64,  0, 64},        // PU|PL
        {  0,128,  0,  0},        // PU|PUR
        {  0,  0, 53, 75},        // PU|PUR|PL
        {128,  0,  0,  0},        // PUL
        {  0,  0,  0,128},        // PUL|PL
        { 64,  0, 64,  0},        // PUL|PUR
        {  0,  0, 53, 75},        // PUL|PUR|PL
        {  0,128,  0,  0},        // PUL|PU
       {-104,116,  0,116},        // PUL|PU|PL
        { 24, 80, 24,  0},        // PUL|PU|PUR
       {-104,116,  0,116}         // PUL|PU|PUR|PL
1179 1180 1181 1182 1183
    };

    /* This table shows which types of blocks can use other blocks for
     * prediction. For example, INTRA is the only mode in this table to
     * have a frame number of 0. That means INTRA blocks can only predict
1184
     * from other INTRA blocks. There are 2 golden frame coding types;
1185 1186
     * blocks encoding in these modes can only predict from other blocks
     * that were encoded with these 1 of these 2 modes. */
1187
    static const unsigned char compatible_frame[9] = {
1188 1189 1190 1191 1192 1193 1194
        1,    /* MODE_INTER_NO_MV */
        0,    /* MODE_INTRA */
        1,    /* MODE_INTER_PLUS_MV */
        1,    /* MODE_INTER_LAST_MV */
        1,    /* MODE_INTER_PRIOR_MV */
        2,    /* MODE_USING_GOLDEN */
        2,    /* MODE_GOLDEN_MV */
1195 1196
        1,    /* MODE_INTER_FOUR_MV */
        3     /* MODE_COPY */
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
    };
    int current_frame_type;

    /* there is a last DC predictor for each of the 3 frame types */
    short last_dc[3];

    int transform = 0;

    vul = vu = vur = vl = 0;
    last_dc[0] = last_dc[1] = last_dc[2] = 0;

    /* for each fragment row... */
    for (y = 0; y < fragment_height; y++) {

        /* for each fragment in a row... */
        for (x = 0; x < fragment_width; x++, i++) {

            /* reverse prediction if this block was coded */
            if (s->all_fragments[i].coding_method != MODE_COPY) {

1217
                current_frame_type =
1218 1219
                    compatible_frame[s->all_fragments[i].coding_method];

Michael Niedermayer's avatar
Michael Niedermayer committed
1220 1221 1222
                transform= 0;
                if(x){
                    l= i-1;
1223
                    vl = DC_COEFF(l);
1224
                    if(COMPATIBLE_FRAME(l))
1225
                        transform |= PL;
Michael Niedermayer's avatar
Michael Niedermayer committed
1226 1227 1228
                }
                if(y){
                    u= i-fragment_width;
1229
                    vu = DC_COEFF(u);
1230
                    if(COMPATIBLE_FRAME(u))
1231
                        transform |= PU;
Michael Niedermayer's avatar
Michael Niedermayer committed
1232 1233 1234
                    if(x){
                        ul= i-fragment_width-1;
                        vul = DC_COEFF(ul);
1235
                        if(COMPATIBLE_FRAME(ul))
1236
                            transform |= PUL;
Michael Niedermayer's avatar
Michael Niedermayer committed
1237 1238 1239 1240
                    }
                    if(x + 1 < fragment_width){
                        ur= i-fragment_width+1;
                        vur = DC_COEFF(ur);
1241
                        if(COMPATIBLE_FRAME(ur))
1242
                            transform |= PUR;
Michael Niedermayer's avatar
Michael Niedermayer committed
1243
                    }
1244 1245 1246 1247 1248 1249
                }

                if (transform == 0) {

                    /* if there were no fragments to predict from, use last
                     * DC saved */
1250
                    predicted_dc = last_dc[current_frame_type];
1251 1252 1253 1254 1255 1256 1257 1258 1259
                } else {

                    /* apply the appropriate predictor transform */
                    predicted_dc =
                        (predictor_transform[transform][0] * vul) +
                        (predictor_transform[transform][1] * vu) +
                        (predictor_transform[transform][2] * vur) +
                        (predictor_transform[transform][3] * vl);

Michael Niedermayer's avatar
Michael Niedermayer committed
1260
                    predicted_dc /= 128;
1261 1262 1263

                    /* check for outranging on the [ul u l] and
                     * [ul u ur l] predictors */
1264
                    if ((transform == 15) || (transform == 13)) {
1265
                        if (FFABS(predicted_dc - vu) > 128)
1266
                            predicted_dc = vu;
1267
                        else if (FFABS(predicted_dc - vl) > 128)
1268
                            predicted_dc = vl;
1269
                        else if (FFABS(predicted_dc - vul) > 128)
1270 1271 1272 1273
                            predicted_dc = vul;
                    }
                }

1274
                /* at long last, apply the predictor */
1275
                DC_COEFF(i) += predicted_dc;
1276
                /* save the DC */
1277
                last_dc[current_frame_type] = DC_COEFF(i);
1278 1279 1280 1281 1282
            }
        }
    }
}

1283
static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int yend)
1284 1285 1286 1287
{
    int x, y;
    int *bounding_values= s->bounding_values_array+127;

1288 1289
    int width           = s->fragment_width[!!plane];
    int height          = s->fragment_height[!!plane];
David Conrad's avatar
David Conrad committed
1290
    int fragment        = s->fragment_start        [plane] + ystart * width;
1291 1292
    int stride          = s->current_frame.f->linesize[plane];
    uint8_t *plane_data = s->current_frame.f->data    [plane];
David Conrad's avatar
David Conrad committed
1293
    if (!s->flipped_image) stride = -stride;
1294
    plane_data += s->data_offset[plane] + 8*ystart*stride;
David Conrad's avatar
David Conrad committed
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306

    for (y = ystart; y < yend; y++) {

        for (x = 0; x < width; x++) {
            /* This code basically just deblocks on the edges of coded blocks.
             * However, it has to be much more complicated because of the
             * braindamaged deblock ordering used in VP3/Theora. Order matters
             * because some pixels get filtered twice. */
            if( s->all_fragments[fragment].coding_method != MODE_COPY )
            {
                /* do not perform left edge filter for left columns frags */
                if (x > 0) {
1307
                    s->vp3dsp.h_loop_filter(
1308
                        plane_data + 8*x,
David Conrad's avatar
David Conrad committed
1309 1310
                        stride, bounding_values);
                }
1311

David Conrad's avatar
David Conrad committed
1312 1313
                /* do not perform top edge filter for top row fragments */
                if (y > 0) {
1314
                    s->vp3dsp.v_loop_filter(
1315
                        plane_data + 8*x,
David Conrad's avatar
David Conrad committed
1316 1317
                        stride, bounding_values);
                }
1318

David Conrad's avatar
David Conrad committed
1319 1320 1321 1322 1323
                /* do not perform right edge filter for right column
                 * fragments or if right fragment neighbor is also coded
                 * in this frame (it will be filtered in next iteration) */
                if ((x < width - 1) &&
                    (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
1324
                    s->vp3dsp.h_loop_filter(
1325
                        plane_data + 8*x + 8,
David Conrad's avatar
David Conrad committed
1326
                        stride, bounding_values);
1327 1328
                }

David Conrad's avatar
David Conrad committed
1329 1330 1331 1332 1333
                /* do not perform bottom edge filter for bottom row
                 * fragments or if bottom fragment neighbor is also coded
                 * in this frame (it will be filtered in the next row) */
                if ((y < height - 1) &&
                    (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
1334
                    s->vp3dsp.v_loop_filter(
1335
                        plane_data + 8*x + 8*stride,
David Conrad's avatar
David Conrad committed
1336 1337
                        stride, bounding_values);
                }
1338
            }
David Conrad's avatar
David Conrad committed
1339 1340

            fragment++;
1341
        }
1342
        plane_data += 8*stride;
David Conrad's avatar
David Conrad committed
1343
    }
1344 1345
}

1346
/**
1347
 * Pull DCT tokens from the 64 levels to decode and dequant the coefficients
1348 1349 1350
 * for the next block in coding order
 */
static inline int vp3_dequant(Vp3DecodeContext *s, Vp3Fragment *frag,
Diego Biurrun's avatar
Diego Biurrun committed
1351
                              int plane, int inter, int16_t block[64])
1352 1353
{
    int16_t *dequantizer = s->qmat[frag->qpi][inter][plane];
1354
    uint8_t *perm = s->idct_scantable;
1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
    int i = 0;

    do {
        int token = *s->dct_tokens[plane][i];
        switch (token & 3) {
        case 0: // EOB
            if (--token < 4) // 0-3 are token types, so the EOB run must now be 0
                s->dct_tokens[plane][i]++;
            else
                *s->dct_tokens[plane][i] = token & ~3;
            goto end;
        case 1: // zero run
            s->dct_tokens[plane][i]++;
            i += (token >> 2) & 0x7f;
1369 1370 1371 1372
            if (i > 63) {
                av_log(s->avctx, AV_LOG_ERROR, "Coefficient index overflow\n");
                return i;
            }
1373 1374 1375 1376 1377 1378 1379
            block[perm[i]] = (token >> 9) * dequantizer[perm[i]];
            i++;
            break;
        case 2: // coeff
            block[perm[i]] = (token >> 2) * dequantizer[perm[i]];
            s->dct_tokens[plane][i++]++;
            break;
1380
        default: // shouldn't happen
1381 1382 1383
            return i;
        }
    } while (i < 64);
1384 1385
    // return value is expected to be a valid level
    i--;
1386 1387 1388 1389 1390 1391
end:
    // the actual DC+prediction is in the fragment structure
    block[0] = frag->dc * s->qmat[0][inter][plane][0];
    return i;
}

1392 1393 1394 1395 1396
/**
 * called when all pixels up to row y are complete
 */
static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
{
1397 1398
    int h, cy, i;
    int offset[AV_NUM_DATA_POINTERS];
1399

1400
    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
1401 1402 1403 1404 1405 1406 1407 1408
        int y_flipped = s->flipped_image ? s->avctx->height-y : y;

        // At the end of the frame, report INT_MAX instead of the height of the frame.
        // This makes the other threads' ff_thread_await_progress() calls cheaper, because
        // they don't have to clip their values.
        ff_thread_report_progress(&s->current_frame, y_flipped==s->avctx->height ? INT_MAX : y_flipped-1, 0);
    }

1409 1410 1411 1412
    if(s->avctx->draw_horiz_band==NULL)
        return;

    h= y - s->last_slice_end;
1413
    s->last_slice_end= y;
1414 1415 1416
    y -= h;

    if (!s->flipped_image) {
1417
        y = s->avctx->height - y - h;
1418 1419
    }

1420
    cy = y >> s->chroma_y_shift;
1421 1422 1423
    offset[0] = s->current_frame.f->linesize[0]*y;
    offset[1] = s->current_frame.f->linesize[1]*cy;
    offset[2] = s->current_frame.f->linesize[2]*cy;
1424 1425
    for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
        offset[i] = 0;
1426 1427

    emms_c();
1428
    s->avctx->draw_horiz_band(s->avctx, s->current_frame.f, offset, y, 3, h);
1429 1430
}

1431 1432 1433 1434 1435 1436
/**
 * Wait for the reference frame of the current fragment.
 * The progress value is in luma pixel rows.
 */
static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int motion_y, int y)
{
1437
    ThreadFrame *ref_frame;
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
    int ref_row;
    int border = motion_y&1;

    if (fragment->coding_method == MODE_USING_GOLDEN ||
        fragment->coding_method == MODE_GOLDEN_MV)
        ref_frame = &s->golden_frame;
    else
        ref_frame = &s->last_frame;

    ref_row = y + (motion_y>>1);
    ref_row = FFMAX(FFABS(ref_row), ref_row + 8 + border);

    ff_thread_await_progress(ref_frame, ref_row, 0);
}

1453 1454
/*
 * Perform the final rendering for a particular slice of data.
1455
 * The slice number ranges from 0..(c_superblock_height - 1).
1456 1457 1458
 */
static void render_slice(Vp3DecodeContext *s, int slice)
{
1459
    int x, y, i, j, fragment;
Diego Biurrun's avatar
Diego Biurrun committed
1460
    int16_t *block = s->block;
1461 1462 1463
    int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
    int motion_halfpel_index;
    uint8_t *motion_source;
1464
    int plane, first_pixel;
1465

1466
    if (slice >= s->c_superblock_height)
1467 1468 1469
        return;

    for (plane = 0; plane < 3; plane++) {
1470 1471 1472 1473
        uint8_t *output_plane = s->current_frame.f->data    [plane] + s->data_offset[plane];
        uint8_t *  last_plane = s->   last_frame.f->data    [plane] + s->data_offset[plane];
        uint8_t *golden_plane = s-> golden_frame.f->data    [plane] + s->data_offset[plane];
        int stride            = s->current_frame.f->linesize[plane];
1474 1475
        int plane_width       = s->width  >> (plane && s->chroma_x_shift);
        int plane_height      = s->height >> (plane && s->chroma_y_shift);
1476
        int8_t (*motion_val)[2] = s->motion_val[!!plane];
1477

1478 1479
        int sb_x, sb_y        = slice << (!plane && s->chroma_y_shift);
        int slice_height      = sb_y + 1 + (!plane && s->chroma_y_shift);
1480 1481
        int slice_width       = plane ? s->c_superblock_width : s->y_superblock_width;

1482 1483
        int fragment_width    = s->fragment_width[!!plane];
        int fragment_height   = s->fragment_height[!!plane];
1484
        int fragment_start    = s->fragment_start[plane];
1485
        int do_await          = !plane && HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME);
Michael Niedermayer's avatar
Michael Niedermayer committed
1486 1487

        if (!s->flipped_image) stride = -stride;
1488 1489
        if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY))
            continue;
1490

1491 1492
        /* for each superblock row in the slice (both of them)... */
        for (; sb_y < slice_height; sb_y++) {
1493

1494 1495
            /* for each superblock in a row... */
            for (sb_x = 0; sb_x < slice_width; sb_x++) {
1496

1497 1498 1499 1500
                /* for each block in a superblock... */
                for (j = 0; j < 16; j++) {
                    x = 4*sb_x + hilbert_offset[j][0];
                    y = 4*sb_y + hilbert_offset[j][1];
1501
                    fragment = y*fragment_width + x;
1502

1503
                    i = fragment_start + fragment;
1504 1505 1506 1507 1508 1509

                    // bounds check
                    if (x >= fragment_width || y >= fragment_height)
                        continue;

                first_pixel = 8*y*stride + 8*x;
1510

1511 1512 1513
                if (do_await && s->all_fragments[i].coding_method != MODE_INTRA)
                    await_reference_row(s, &s->all_fragments[i], motion_val[fragment][1], (16*y) >> s->chroma_y_shift);

1514
                /* transform if this block was coded */
1515
                if (s->all_fragments[i].coding_method != MODE_COPY) {
1516 1517 1518
                    if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) ||
                        (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
                        motion_source= golden_plane;
1519
                    else
1520 1521
                        motion_source= last_plane;

1522
                    motion_source += first_pixel;
1523 1524 1525 1526 1527 1528 1529
                    motion_halfpel_index = 0;

                    /* sort out the motion vector if this fragment is coded
                     * using a motion vector method */
                    if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
                        (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
                        int src_x, src_y;
1530 1531
                        motion_x = motion_val[fragment][0];
                        motion_y = motion_val[fragment][1];
1532

1533 1534
                        src_x= (motion_x>>1) + 8*x;
                        src_y= (motion_y>>1) + 8*y;
1535 1536 1537 1538 1539 1540 1541 1542 1543

                        motion_halfpel_index = motion_x & 0x01;
                        motion_source += (motion_x >> 1);

                        motion_halfpel_index |= (motion_y & 0x01) << 1;
                        motion_source += ((motion_y >> 1) * stride);

                        if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){
                            uint8_t *temp= s->edge_emu_buffer;
David Conrad's avatar
David Conrad committed
1544
                            if(stride<0) temp -= 8*stride;
1545

1546
                            s->vdsp.emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height);
1547 1548 1549
                            motion_source= temp;
                        }
                    }
1550

1551 1552 1553 1554

                    /* first, take care of copying a block from either the
                     * previous or the golden frame */
                    if (s->all_fragments[i].coding_method != MODE_INTRA) {
1555 1556 1557
                        /* Note, it is possible to implement all MC cases with
                           put_no_rnd_pixels_l2 which would look more like the
                           VP3 source but this would be slower as
1558 1559
                           put_no_rnd_pixels_tab is better optimzed */
                        if(motion_halfpel_index != 3){
1560
                            s->hdsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
1561
                                output_plane + first_pixel,
1562 1563 1564
                                motion_source, stride, 8);
                        }else{
                            int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
1565
                            s->vp3dsp.put_no_rnd_pixels_l2(
1566
                                output_plane + first_pixel,
1567 1568
                                motion_source - d,
                                motion_source + stride + 1 + d,
1569 1570 1571 1572 1573
                                stride, 8);
                        }
                    }

                    /* invert DCT and place (or add) in final output */
1574

1575
                    if (s->all_fragments[i].coding_method == MODE_INTRA) {
1576 1577 1578 1579
                        int index;
                        index = vp3_dequant(s, s->all_fragments + i, plane, 0, block);
                        if (index > 63)
                            continue;
1580
                        s->vp3dsp.idct_put(
1581
                            output_plane + first_pixel,
1582 1583 1584
                            stride,
                            block);
                    } else {
1585 1586 1587 1588
                        int index = vp3_dequant(s, s->all_fragments + i, plane, 1, block);
                        if (index > 63)
                            continue;
                        if (index > 0) {
1589
                        s->vp3dsp.idct_add(
1590
                            output_plane + first_pixel,
1591 1592
                            stride,
                            block);
David Conrad's avatar
David Conrad committed
1593
                        } else {
1594
                            s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block);
David Conrad's avatar
David Conrad committed
1595
                        }
1596 1597 1598 1599
                    }
                } else {

                    /* copy directly from the previous frame */
1600
                    s->hdsp.put_pixels_tab[1][0](
1601 1602
                        output_plane + first_pixel,
                        last_plane + first_pixel,
1603 1604 1605
                        stride, 8);

                }
1606
                }
1607
            }
1608 1609

            // Filter up to the last row in the superblock row
1610 1611
            if (!s->skip_loop_filter)
                apply_loop_filter(s, plane, 4*sb_y - !!sb_y, FFMIN(4*sb_y+3, fragment_height-1));
1612 1613 1614 1615 1616 1617
        }
    }

     /* this looks like a good place for slice dispatch... */
     /* algorithm:
      *   if (slice == s->macroblock_height - 1)
1618 1619 1620
      *     dispatch (both last slice & 2nd-to-last slice);
      *   else if (slice > 0)
      *     dispatch (slice - 1);
1621 1622
      */

1623
    vp3_draw_horiz_band(s, FFMIN((32 << s->chroma_y_shift) * (slice + 1) -16, s->height-16));
1624 1625
}

1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
/// Allocate tables for per-frame data in Vp3DecodeContext
static av_cold int allocate_tables(AVCodecContext *avctx)
{
    Vp3DecodeContext *s = avctx->priv_data;
    int y_fragment_count, c_fragment_count;

    y_fragment_count = s->fragment_width[0] * s->fragment_height[0];
    c_fragment_count = s->fragment_width[1] * s->fragment_height[1];

    s->superblock_coding = av_malloc(s->superblock_count);
    s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
    s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int));
    s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base));
    s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0]));
    s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1]));

    /* work out the block mapping tables */
    s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int));
    s->macroblock_coding = av_malloc(s->macroblock_count + 1);

    if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base ||
        !s->coded_fragment_list[0] || !s->superblock_fragments || !s->macroblock_coding ||
        !s->motion_val[0] || !s->motion_val[1]) {
        vp3_decode_end(avctx);
        return -1;
    }

    init_block_mapping(s);

    return 0;
}

1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673
static av_cold int init_frames(Vp3DecodeContext *s)
{
    s->current_frame.f = av_frame_alloc();
    s->last_frame.f    = av_frame_alloc();
    s->golden_frame.f  = av_frame_alloc();

    if (!s->current_frame.f || !s->last_frame.f || !s->golden_frame.f) {
        av_frame_free(&s->current_frame.f);
        av_frame_free(&s->last_frame.f);
        av_frame_free(&s->golden_frame.f);
        return AVERROR(ENOMEM);
    }

    return 0;
}

1674
static av_cold int vp3_decode_init(AVCodecContext *avctx)
1675 1676
{
    Vp3DecodeContext *s = avctx->priv_data;
1677
    int i, inter, plane, ret;
1678 1679
    int c_width;
    int c_height;
1680
    int y_fragment_count, c_fragment_count;
1681

1682 1683 1684 1685 1686 1687
    ret = init_frames(s);
    if (ret < 0)
        return ret;

    avctx->internal->allocate_progress = 1;

1688
    if (avctx->codec_tag == MKTAG('V','P','3','0'))
1689
        s->version = 0;
1690
    else
1691
        s->version = 1;
1692

1693
    s->avctx = avctx;
1694 1695
    s->width = FFALIGN(avctx->width, 16);
    s->height = FFALIGN(avctx->height, 16);
1696 1697
    if (avctx->pix_fmt == AV_PIX_FMT_NONE)
        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1698
    avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
1699
    ff_hpeldsp_init(&s->hdsp, avctx->flags | CODEC_FLAG_BITEXACT);
1700
    ff_videodsp_init(&s->vdsp, 8);
1701
    ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
1702

1703 1704 1705 1706 1707 1708
    for (i = 0; i < 64; i++) {
#define T(x) (x >> 3) | ((x & 7) << 3)
        s->idct_permutation[i] = T(i);
        s->idct_scantable[i] = T(ff_zigzag_direct[i]);
#undef T
    }
1709 1710 1711

    /* initialize to an impossible value which will force a recalculation
     * in the first frame decode */
1712 1713
    for (i = 0; i < 3; i++)
        s->qps[i] = -1;
1714

1715 1716
    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift,
                                     &s->chroma_y_shift);
1717

1718 1719
    s->y_superblock_width = (s->width + 31) / 32;
    s->y_superblock_height = (s->height + 31) / 32;
1720
    s->y_superblock_count = s->y_superblock_width * s->y_superblock_height;
1721 1722

    /* work out the dimensions for the C planes */
1723 1724
    c_width = s->width >> s->chroma_x_shift;
    c_height = s->height >> s->chroma_y_shift;
1725 1726
    s->c_superblock_width = (c_width + 31) / 32;
    s->c_superblock_height = (c_height + 31) / 32;
1727
    s->c_superblock_count = s->c_superblock_width * s->c_superblock_height;
1728

1729 1730 1731
    s->superblock_count = s->y_superblock_count + (s->c_superblock_count * 2);
    s->u_superblock_start = s->y_superblock_count;
    s->v_superblock_start = s->u_superblock_start + s->c_superblock_count;
1732 1733 1734 1735 1736

    s->macroblock_width = (s->width + 15) / 16;
    s->macroblock_height = (s->height + 15) / 16;
    s->macroblock_count = s->macroblock_width * s->macroblock_height;

1737 1738
    s->fragment_width[0] = s->width / FRAGMENT_PIXELS;
    s->fragment_height[0] = s->height / FRAGMENT_PIXELS;
1739 1740
    s->fragment_width[1]  = s->fragment_width[0]  >> s->chroma_x_shift;
    s->fragment_height[1] = s->fragment_height[0] >> s->chroma_y_shift;
1741 1742

    /* fragment count covers all 8x8 blocks for all 3 planes */
1743 1744 1745 1746 1747
    y_fragment_count     = s->fragment_width[0] * s->fragment_height[0];
    c_fragment_count     = s->fragment_width[1] * s->fragment_height[1];
    s->fragment_count    = y_fragment_count + 2*c_fragment_count;
    s->fragment_start[1] = y_fragment_count;
    s->fragment_start[2] = y_fragment_count + c_fragment_count;
1748

1749 1750
    if (!s->theora_tables)
    {
Michael Niedermayer's avatar
Michael Niedermayer committed
1751
        for (i = 0; i < 64; i++) {
1752 1753
            s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
            s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
1754 1755 1756
            s->base_matrix[0][i] = vp31_intra_y_dequant[i];
            s->base_matrix[1][i] = vp31_intra_c_dequant[i];
            s->base_matrix[2][i] = vp31_inter_dequant[i];
1757
            s->filter_limit_values[i] = vp31_filter_limit_values[i];
Michael Niedermayer's avatar
Michael Niedermayer committed
1758
        }
1759

1760 1761 1762 1763 1764 1765 1766 1767 1768
        for(inter=0; inter<2; inter++){
            for(plane=0; plane<3; plane++){
                s->qr_count[inter][plane]= 1;
                s->qr_size [inter][plane][0]= 63;
                s->qr_base [inter][plane][0]=
                s->qr_base [inter][plane][1]= 2*inter + (!!plane)*!inter;
            }
        }

1769 1770 1771 1772
        /* init VLC tables */
        for (i = 0; i < 16; i++) {

            /* DC histograms */
1773
            init_vlc(&s->dc_vlc[i], 11, 32,
1774 1775 1776 1777
                &dc_bias[i][0][1], 4, 2,
                &dc_bias[i][0][0], 4, 2, 0);

            /* group 1 AC histograms */
1778
            init_vlc(&s->ac_vlc_1[i], 11, 32,
1779 1780 1781 1782
                &ac_bias_0[i][0][1], 4, 2,
                &ac_bias_0[i][0][0], 4, 2, 0);

            /* group 2 AC histograms */
1783
            init_vlc(&s->ac_vlc_2[i], 11, 32,
1784 1785 1786 1787
                &ac_bias_1[i][0][1], 4, 2,
                &ac_bias_1[i][0][0], 4, 2, 0);

            /* group 3 AC histograms */
1788
            init_vlc(&s->ac_vlc_3[i], 11, 32,
1789 1790 1791 1792
                &ac_bias_2[i][0][1], 4, 2,
                &ac_bias_2[i][0][0], 4, 2, 0);

            /* group 4 AC histograms */
1793
            init_vlc(&s->ac_vlc_4[i], 11, 32,
1794 1795 1796 1797 1798
                &ac_bias_3[i][0][1], 4, 2,
                &ac_bias_3[i][0][0], 4, 2, 0);
        }
    } else {

1799
        for (i = 0; i < 16; i++) {
1800
            /* DC histograms */
1801 1802 1803
            if (init_vlc(&s->dc_vlc[i], 11, 32,
                &s->huffman_table[i][0][1], 8, 4,
                &s->huffman_table[i][0][0], 8, 4, 0) < 0)
1804
                goto vlc_fail;
1805 1806

            /* group 1 AC histograms */
1807 1808 1809
            if (init_vlc(&s->ac_vlc_1[i], 11, 32,
                &s->huffman_table[i+16][0][1], 8, 4,
                &s->huffman_table[i+16][0][0], 8, 4, 0) < 0)
1810
                goto vlc_fail;
1811 1812

            /* group 2 AC histograms */
1813 1814 1815
            if (init_vlc(&s->ac_vlc_2[i], 11, 32,
                &s->huffman_table[i+16*2][0][1], 8, 4,
                &s->huffman_table[i+16*2][0][0], 8, 4, 0) < 0)
1816
                goto vlc_fail;
1817 1818

            /* group 3 AC histograms */
1819 1820 1821
            if (init_vlc(&s->ac_vlc_3[i], 11, 32,
                &s->huffman_table[i+16*3][0][1], 8, 4,
                &s->huffman_table[i+16*3][0][0], 8, 4, 0) < 0)
1822
                goto vlc_fail;
1823 1824

            /* group 4 AC histograms */
1825 1826 1827
            if (init_vlc(&s->ac_vlc_4[i], 11, 32,
                &s->huffman_table[i+16*4][0][1], 8, 4,
                &s->huffman_table[i+16*4][0][0], 8, 4, 0) < 0)
1828
                goto vlc_fail;
1829
        }
1830 1831
    }

1832 1833 1834 1835
    init_vlc(&s->superblock_run_length_vlc, 6, 34,
        &superblock_run_length_vlc_table[0][1], 4, 2,
        &superblock_run_length_vlc_table[0][0], 4, 2, 0);

1836
    init_vlc(&s->fragment_run_length_vlc, 5, 30,
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
        &fragment_run_length_vlc_table[0][1], 4, 2,
        &fragment_run_length_vlc_table[0][0], 4, 2, 0);

    init_vlc(&s->mode_code_vlc, 3, 8,
        &mode_code_vlc_table[0][1], 2, 1,
        &mode_code_vlc_table[0][0], 2, 1, 0);

    init_vlc(&s->motion_vector_vlc, 6, 63,
        &motion_vector_vlc_table[0][1], 2, 1,
        &motion_vector_vlc_table[0][0], 2, 1, 0);

1848
    return allocate_tables(avctx);
1849 1850 1851 1852

vlc_fail:
    av_log(avctx, AV_LOG_FATAL, "Invalid huffman table\n");
    return -1;
1853 1854
}

1855
/// Release and shuffle frames after decode finishes
1856
static int update_frames(AVCodecContext *avctx)
1857 1858
{
    Vp3DecodeContext *s = avctx->priv_data;
1859
    int ret = 0;
1860 1861 1862


    /* shuffle frames (last = current) */
1863 1864 1865 1866
    ff_thread_release_buffer(avctx, &s->last_frame);
    ret = ff_thread_ref_frame(&s->last_frame, &s->current_frame);
    if (ret < 0)
        goto fail;
1867 1868

    if (s->keyframe) {
1869 1870
        ff_thread_release_buffer(avctx, &s->golden_frame);
        ret = ff_thread_ref_frame(&s->golden_frame, &s->current_frame);
1871 1872
    }

1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
fail:
    ff_thread_release_buffer(avctx, &s->current_frame);
    return ret;
}

static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
{
    ff_thread_release_buffer(s->avctx, dst);
    if (src->f->data[0])
        return ff_thread_ref_frame(dst, src);
    return 0;
}

static int ref_frames(Vp3DecodeContext *dst, Vp3DecodeContext *src)
{
    int ret;
    if ((ret = ref_frame(dst, &dst->current_frame, &src->current_frame)) < 0 ||
        (ret = ref_frame(dst, &dst->golden_frame,  &src->golden_frame)) < 0  ||
        (ret = ref_frame(dst, &dst->last_frame,    &src->last_frame)) < 0)
        return ret;
    return 0;
1894 1895 1896 1897 1898 1899 1900
}

static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    Vp3DecodeContext *s = dst->priv_data, *s1 = src->priv_data;
    int qps_changed = 0, i, err;

1901 1902
#define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)

1903
    if (!s1->current_frame.f->data[0]
1904
        ||s->width != s1->width
1905 1906
        ||s->height!= s1->height) {
        if (s != s1)
1907
            ref_frames(s, s1);
1908
        return -1;
1909
    }
1910 1911 1912

    if (s != s1) {
        // init tables if the first frame hasn't been decoded
1913
        if (!s->current_frame.f->data[0]) {
1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925
            int y_fragment_count, c_fragment_count;
            s->avctx = dst;
            err = allocate_tables(dst);
            if (err)
                return err;
            y_fragment_count = s->fragment_width[0] * s->fragment_height[0];
            c_fragment_count = s->fragment_width[1] * s->fragment_height[1];
            memcpy(s->motion_val[0], s1->motion_val[0], y_fragment_count * sizeof(*s->motion_val[0]));
            memcpy(s->motion_val[1], s1->motion_val[1], c_fragment_count * sizeof(*s->motion_val[1]));
        }

        // copy previous frame data
1926 1927 1928 1929
        if ((err = ref_frames(s, s1)) < 0)
            return err;

        s->keyframe = s1->keyframe;
1930 1931 1932 1933 1934 1935 1936 1937 1938

        // copy qscale data if necessary
        for (i = 0; i < 3; i++) {
            if (s->qps[i] != s1->qps[1]) {
                qps_changed = 1;
                memcpy(&s->qmat[i], &s1->qmat[i], sizeof(s->qmat[i]));
            }
        }

1939
        if (s->qps[0] != s1->qps[0])
1940 1941 1942 1943 1944 1945 1946
            memcpy(&s->bounding_values_array, &s1->bounding_values_array, sizeof(s->bounding_values_array));

        if (qps_changed)
            copy_fields(s, s1, qps, superblock_count);
#undef copy_fields
    }

1947
    return update_frames(dst);
1948 1949
}

1950
static int vp3_decode_frame(AVCodecContext *avctx,
1951
                            void *data, int *got_frame,
1952
                            AVPacket *avpkt)
1953
{
1954 1955
    const uint8_t *buf = avpkt->data;
    int buf_size = avpkt->size;
1956 1957
    Vp3DecodeContext *s = avctx->priv_data;
    GetBitContext gb;
1958
    int i, ret;
1959 1960

    init_get_bits(&gb, buf, buf_size * 8);
1961

1962 1963
    if (s->theora && get_bits1(&gb))
    {
1964 1965
        av_log(avctx, AV_LOG_ERROR, "Header packet passed to frame decoder, skipping\n");
        return -1;
1966
    }
1967 1968 1969

    s->keyframe = !get_bits1(&gb);
    if (!s->theora)
1970
        skip_bits(&gb, 1);
1971 1972
    for (i = 0; i < 3; i++)
        s->last_qps[i] = s->qps[i];
1973

1974
    s->nqps=0;
1975
    do{
1976 1977 1978 1979
        s->qps[s->nqps++]= get_bits(&gb, 6);
    } while(s->theora >= 0x030200 && s->nqps<3 && get_bits1(&gb));
    for (i = s->nqps; i < 3; i++)
        s->qps[i] = -1;
1980

1981
    if (s->avctx->debug & FF_DEBUG_PICT_INFO)
1982
        av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
1983
            s->keyframe?"key":"", avctx->frame_number+1, s->qps[0]);
1984

1985 1986 1987
    s->skip_loop_filter = !s->filter_limit_values[s->qps[0]] ||
        avctx->skip_loop_filter >= (s->keyframe ? AVDISCARD_ALL : AVDISCARD_NONKEY);

1988
    if (s->qps[0] != s->last_qps[0])
1989
        init_loop_filter(s);
1990 1991 1992 1993 1994 1995

    for (i = 0; i < s->nqps; i++)
        // reinit all dequantizers if the first one changed, because
        // the DC of the first quantizer must be used for all matrices
        if (s->qps[i] != s->last_qps[i] || s->qps[0] != s->last_qps[0])
            init_dequantizer(s, i);
1996

1997 1998 1999
    if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe)
        return buf_size;

2000 2001
    s->current_frame.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
    if (ff_thread_get_buffer(avctx, &s->current_frame, AV_GET_BUFFER_FLAG_REF) < 0) {
2002
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2003
        goto error;
2004 2005
    }

2006
    if (!s->edge_emu_buffer)
2007
        s->edge_emu_buffer = av_malloc(9*FFABS(s->current_frame.f->linesize[0]));
2008

2009
    if (s->keyframe) {
2010 2011 2012 2013 2014 2015 2016
        if (!s->theora)
        {
            skip_bits(&gb, 4); /* width code */
            skip_bits(&gb, 4); /* height code */
            if (s->version)
            {
                s->version = get_bits(&gb, 5);
2017
                if (avctx->frame_number == 0)
2018 2019 2020 2021 2022 2023 2024 2025 2026
                    av_log(s->avctx, AV_LOG_DEBUG, "VP version: %d\n", s->version);
            }
        }
        if (s->version || s->theora)
        {
                if (get_bits1(&gb))
                    av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n");
            skip_bits(&gb, 2); /* reserved? */
        }
2027
    } else {
2028
        if (!s->golden_frame.f->data[0]) {
2029
            av_log(s->avctx, AV_LOG_WARNING, "vp3: first frame not a keyframe\n");
David Conrad's avatar
David Conrad committed
2030

2031 2032
            s->golden_frame.f->pict_type = AV_PICTURE_TYPE_I;
            if (ff_thread_get_buffer(avctx, &s->golden_frame, AV_GET_BUFFER_FLAG_REF) < 0) {
2033 2034 2035
                av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
                goto error;
            }
2036 2037 2038
            ff_thread_release_buffer(avctx, &s->last_frame);
            if ((ret = ff_thread_ref_frame(&s->last_frame, &s->golden_frame)) < 0)
                goto error;
2039
            ff_thread_report_progress(&s->last_frame, INT_MAX, 0);
2040 2041 2042
        }
    }

2043
    memset(s->all_fragments, 0, s->fragment_count * sizeof(Vp3Fragment));
2044
    ff_thread_finish_setup(avctx);
2045

2046 2047
    if (unpack_superblocks(s, &gb)){
        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n");
2048
        goto error;
2049 2050 2051
    }
    if (unpack_modes(s, &gb)){
        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n");
2052
        goto error;
2053 2054 2055
    }
    if (unpack_vectors(s, &gb)){
        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n");
2056
        goto error;
2057
    }
2058 2059
    if (unpack_block_qpis(s, &gb)){
        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_block_qpis\n");
2060
        goto error;
2061
    }
2062 2063
    if (unpack_dct_coeffs(s, &gb)){
        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n");
2064
        goto error;
2065
    }
2066 2067

    for (i = 0; i < 3; i++) {
2068
        int height = s->height >> (i && s->chroma_y_shift);
2069 2070 2071
        if (s->flipped_image)
            s->data_offset[i] = 0;
        else
2072
            s->data_offset[i] = (height-1) * s->current_frame.f->linesize[i];
2073
    }
2074

2075
    s->last_slice_end = 0;
2076
    for (i = 0; i < s->c_superblock_height; i++)
2077
        render_slice(s, i);
2078

2079 2080
    // filter the last row
    for (i = 0; i < 3; i++) {
2081
        int row = (s->height >> (3+(i && s->chroma_y_shift))) - 1;
2082 2083
        apply_loop_filter(s, i, row, row+1);
    }
2084
    vp3_draw_horiz_band(s, s->avctx->height);
2085

2086 2087
    if ((ret = av_frame_ref(data, s->current_frame.f)) < 0)
        return ret;
2088
    *got_frame = 1;
2089

2090 2091 2092 2093 2094
    if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) {
        ret = update_frames(avctx);
        if (ret < 0)
            return ret;
    }
2095 2096

    return buf_size;
2097 2098

error:
2099 2100
    ff_thread_report_progress(&s->current_frame, INT_MAX, 0);

2101
    if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME))
2102
        av_frame_unref(s->current_frame.f);
2103

2104
    return -1;
2105 2106
}

2107 2108 2109 2110
static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
{
    Vp3DecodeContext *s = avctx->priv_data;

2111
    if (get_bits1(gb)) {
2112 2113 2114 2115 2116 2117
        int token;
        if (s->entries >= 32) { /* overflow */
            av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n");
            return -1;
        }
        token = get_bits(gb, 5);
2118 2119
        av_dlog(avctx, "hti %d hbits %x token %d entry : %d size %d\n",
                s->hti, s->hbits, token, s->entries, s->huff_code_size);
2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130
        s->huffman_table[s->hti][token][0] = s->hbits;
        s->huffman_table[s->hti][token][1] = s->huff_code_size;
        s->entries++;
    }
    else {
        if (s->huff_code_size >= 32) {/* overflow */
            av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n");
            return -1;
        }
        s->huff_code_size++;
        s->hbits <<= 1;
2131 2132
        if (read_huffman_tree(avctx, gb))
            return -1;
2133
        s->hbits |= 1;
2134 2135
        if (read_huffman_tree(avctx, gb))
            return -1;
2136 2137 2138 2139 2140 2141
        s->hbits >>= 1;
        s->huff_code_size--;
    }
    return 0;
}

2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155
static int vp3_init_thread_copy(AVCodecContext *avctx)
{
    Vp3DecodeContext *s = avctx->priv_data;

    s->superblock_coding      = NULL;
    s->all_fragments          = NULL;
    s->coded_fragment_list[0] = NULL;
    s->dct_tokens_base        = NULL;
    s->superblock_fragments   = NULL;
    s->macroblock_coding      = NULL;
    s->motion_val[0]          = NULL;
    s->motion_val[1]          = NULL;
    s->edge_emu_buffer        = NULL;

2156
    return init_frames(s);
2157 2158
}

2159
#if CONFIG_THEORA_DECODER
2160 2161
static const enum AVPixelFormat theora_pix_fmts[4] = {
    AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P
2162 2163
};

2164
static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
2165 2166
{
    Vp3DecodeContext *s = avctx->priv_data;
2167
    int visible_width, visible_height, colorspace;
2168
    int offset_x = 0, offset_y = 0;
2169
    AVRational fps, aspect;
2170

2171
    s->theora = get_bits_long(gb, 24);
2172
    av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora);
2173

2174
    /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */
2175
    /* but previous versions have the image flipped relative to vp3 */
2176
    if (s->theora < 0x030200)
2177
    {
2178
        s->flipped_image = 1;
2179 2180
        av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n");
    }
2181

2182 2183
    visible_width  = s->width  = get_bits(gb, 16) << 4;
    visible_height = s->height = get_bits(gb, 16) << 4;
2184

2185
    if(av_image_check_size(s->width, s->height, 0, avctx)){
2186
        av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
2187 2188 2189
        s->width= s->height= 0;
        return -1;
    }
2190

2191
    if (s->theora >= 0x030200) {
David Conrad's avatar
David Conrad committed
2192 2193
        visible_width  = get_bits_long(gb, 24);
        visible_height = get_bits_long(gb, 24);
2194

2195 2196
        offset_x = get_bits(gb, 8); /* offset x */
        offset_y = get_bits(gb, 8); /* offset y, from bottom */
2197
    }
2198

2199 2200 2201
    fps.num = get_bits_long(gb, 32);
    fps.den = get_bits_long(gb, 32);
    if (fps.num && fps.den) {
2202 2203
        av_reduce(&avctx->time_base.num, &avctx->time_base.den,
                  fps.den, fps.num, 1<<30);
2204 2205
    }

2206 2207 2208 2209 2210 2211 2212
    aspect.num = get_bits_long(gb, 24);
    aspect.den = get_bits_long(gb, 24);
    if (aspect.num && aspect.den) {
        av_reduce(&avctx->sample_aspect_ratio.num,
                  &avctx->sample_aspect_ratio.den,
                  aspect.num, aspect.den, 1<<30);
    }
2213

2214
    if (s->theora < 0x030200)
2215
        skip_bits(gb, 5); /* keyframe frequency force */
2216
    colorspace = get_bits(gb, 8);
2217
    skip_bits(gb, 24); /* bitrate */
2218

2219
    skip_bits(gb, 6); /* quality hint */
2220

2221
    if (s->theora >= 0x030200)
2222
    {
2223
        skip_bits(gb, 5); /* keyframe frequency force */
2224
        avctx->pix_fmt = theora_pix_fmts[get_bits(gb, 2)];
2225
        skip_bits(gb, 3); /* reserved */
2226
    }
2227

2228
//    align_get_bits(gb);
2229

2230
    if (   visible_width  <= s->width  && visible_width  > s->width-16
2231 2232
        && visible_height <= s->height && visible_height > s->height-16
        && !offset_x && (offset_y == s->height - visible_height))
2233 2234 2235
        avcodec_set_dimensions(avctx, visible_width, visible_height);
    else
        avcodec_set_dimensions(avctx, s->width, s->height);
2236

2237 2238 2239 2240 2241 2242 2243 2244 2245 2246
    if (colorspace == 1) {
        avctx->color_primaries = AVCOL_PRI_BT470M;
    } else if (colorspace == 2) {
        avctx->color_primaries = AVCOL_PRI_BT470BG;
    }
    if (colorspace == 1 || colorspace == 2) {
        avctx->colorspace = AVCOL_SPC_BT470BG;
        avctx->color_trc  = AVCOL_TRC_BT709;
    }

2247 2248 2249
    return 0;
}

2250
static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
2251 2252
{
    Vp3DecodeContext *s = avctx->priv_data;
2253
    int i, n, matrices, inter, plane;
2254 2255

    if (s->theora >= 0x030200) {
2256
        n = get_bits(gb, 3);
2257
        /* loop filter limit values table */
2258 2259
        if (n)
            for (i = 0; i < 64; i++)
2260
                s->filter_limit_values[i] = get_bits(gb, n);
2261
    }
2262

2263
    if (s->theora >= 0x030200)
2264
        n = get_bits(gb, 4) + 1;
2265 2266
    else
        n = 16;
2267 2268
    /* quality threshold table */
    for (i = 0; i < 64; i++)
2269
        s->coded_ac_scale_factor[i] = get_bits(gb, n);
2270

2271
    if (s->theora >= 0x030200)
2272
        n = get_bits(gb, 4) + 1;
2273 2274
    else
        n = 16;
2275 2276
    /* dc scale factor table */
    for (i = 0; i < 64; i++)
2277
        s->coded_dc_scale_factor[i] = get_bits(gb, n);
2278

2279
    if (s->theora >= 0x030200)
2280
        matrices = get_bits(gb, 9) + 1;
2281
    else
2282
        matrices = 3;
2283

2284 2285 2286 2287
    if(matrices > 384){
        av_log(avctx, AV_LOG_ERROR, "invalid number of base matrixes\n");
        return -1;
    }
2288

2289
    for(n=0; n<matrices; n++){
2290
        for (i = 0; i < 64; i++)
2291 2292
            s->base_matrix[n][i]= get_bits(gb, 8);
    }
2293

2294 2295 2296 2297
    for (inter = 0; inter <= 1; inter++) {
        for (plane = 0; plane <= 2; plane++) {
            int newqr= 1;
            if (inter || plane > 0)
2298
                newqr = get_bits1(gb);
2299
            if (!newqr) {
2300
                int qtj, plj;
2301
                if(inter && get_bits1(gb)){
2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312
                    qtj = 0;
                    plj = plane;
                }else{
                    qtj= (3*inter + plane - 1) / 3;
                    plj= (plane + 2) % 3;
                }
                s->qr_count[inter][plane]= s->qr_count[qtj][plj];
                memcpy(s->qr_size[inter][plane], s->qr_size[qtj][plj], sizeof(s->qr_size[0][0]));
                memcpy(s->qr_base[inter][plane], s->qr_base[qtj][plj], sizeof(s->qr_base[0][0]));
            } else {
                int qri= 0;
2313
                int qi = 0;
2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326

                for(;;){
                    i= get_bits(gb, av_log2(matrices-1)+1);
                    if(i>= matrices){
                        av_log(avctx, AV_LOG_ERROR, "invalid base matrix index\n");
                        return -1;
                    }
                    s->qr_base[inter][plane][qri]= i;
                    if(qi >= 63)
                        break;
                    i = get_bits(gb, av_log2(63-qi)+1) + 1;
                    s->qr_size[inter][plane][qri++]= i;
                    qi += i;
2327
                }
2328

2329
                if (qi > 63) {
2330
                    av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi);
2331 2332
                    return -1;
                }
2333
                s->qr_count[inter][plane]= qri;
2334 2335 2336 2337
            }
        }
    }

2338
    /* Huffman tables */
2339 2340 2341
    for (s->hti = 0; s->hti < 80; s->hti++) {
        s->entries = 0;
        s->huff_code_size = 1;
2342
        if (!get_bits1(gb)) {
2343
            s->hbits = 0;
2344 2345
            if(read_huffman_tree(avctx, gb))
                return -1;
2346
            s->hbits = 1;
2347 2348
            if(read_huffman_tree(avctx, gb))
                return -1;
2349 2350
        }
    }
2351

2352
    s->theora_tables = 1;
2353

2354 2355 2356
    return 0;
}

2357
static av_cold int theora_decode_init(AVCodecContext *avctx)
2358 2359 2360 2361
{
    Vp3DecodeContext *s = avctx->priv_data;
    GetBitContext gb;
    int ptype;
2362 2363 2364
    uint8_t *header_start[3];
    int header_len[3];
    int i;
2365

2366 2367 2368
    s->theora = 1;

    if (!avctx->extradata_size)
2369 2370
    {
        av_log(avctx, AV_LOG_ERROR, "Missing extradata!\n");
2371
        return -1;
2372
    }
2373

2374
    if (avpriv_split_xiph_headers(avctx->extradata, avctx->extradata_size,
2375 2376 2377 2378
                              42, header_start, header_len) < 0) {
        av_log(avctx, AV_LOG_ERROR, "Corrupt extradata\n");
        return -1;
    }
2379

2380
  for(i=0;i<3;i++) {
2381 2382
    if (header_len[i] <= 0)
        continue;
2383
    init_get_bits(&gb, header_start[i], header_len[i] * 8);
2384 2385

    ptype = get_bits(&gb, 8);
2386

2387 2388 2389
     if (!(ptype & 0x80))
     {
        av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n");
2390
//        return -1;
2391
     }
2392

2393
    // FIXME: Check for this as well.
2394
    skip_bits_long(&gb, 6*8); /* "theora" */
2395

2396 2397 2398
    switch(ptype)
    {
        case 0x80:
2399
            theora_decode_header(avctx, &gb);
2400 2401
                break;
        case 0x81:
2402
// FIXME: is this needed? it breaks sometimes
2403 2404 2405
//            theora_decode_comments(avctx, gb);
            break;
        case 0x82:
2406 2407
            if (theora_decode_tables(avctx, &gb))
                return -1;
2408 2409 2410 2411
            break;
        default:
            av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80);
            break;
2412
    }
2413 2414
    if(ptype != 0x81 && 8*header_len[i] != get_bits_count(&gb))
        av_log(avctx, AV_LOG_WARNING, "%d bits left in packet %X\n", 8*header_len[i] - get_bits_count(&gb), ptype);
2415 2416
    if (s->theora < 0x030200)
        break;
2417
  }
2418

2419
    return vp3_decode_init(avctx);
2420 2421
}

2422
AVCodec ff_theora_decoder = {
2423 2424
    .name                  = "theora",
    .type                  = AVMEDIA_TYPE_VIDEO,
2425
    .id                    = AV_CODEC_ID_THEORA,
2426 2427 2428 2429 2430 2431 2432 2433
    .priv_data_size        = sizeof(Vp3DecodeContext),
    .init                  = theora_decode_init,
    .close                 = vp3_decode_end,
    .decode                = vp3_decode_frame,
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
                             CODEC_CAP_FRAME_THREADS,
    .flush                 = vp3_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("Theora"),
2434
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy),
2435
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context)
2436
};
2437
#endif
2438

2439
AVCodec ff_vp3_decoder = {
2440 2441
    .name                  = "vp3",
    .type                  = AVMEDIA_TYPE_VIDEO,
2442
    .id                    = AV_CODEC_ID_VP3,
2443 2444 2445 2446 2447 2448 2449 2450
    .priv_data_size        = sizeof(Vp3DecodeContext),
    .init                  = vp3_decode_init,
    .close                 = vp3_decode_end,
    .decode                = vp3_decode_frame,
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
                             CODEC_CAP_FRAME_THREADS,
    .flush                 = vp3_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP3"),
2451
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy),
2452
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context),
2453
};