vp8.c 74.5 KB
Newer Older
1
/*
David Conrad's avatar
David Conrad committed
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
7
 * Copyright (C) 2012 Daniel Kang
David Conrad's avatar
David Conrad committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
David Conrad's avatar
David Conrad committed
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
David Conrad's avatar
David Conrad committed
30 31
#include "vp8data.h"
#include "rectangle.h"
32
#include "thread.h"
David Conrad's avatar
David Conrad committed
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
40 41 42
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
43 44 45 46
#if HAVE_THREADS
            pthread_cond_destroy(&s->thread_data[i].cond);
            pthread_mutex_destroy(&s->thread_data[i].lock);
#endif
47 48 49
            av_freep(&s->thread_data[i].filter_strength);
        }
    av_freep(&s->thread_data);
50 51 52 53 54 55 56 57
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

58
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
59 60
{
    int ret;
61 62
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
63
        return ret;
64 65
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, &f->tf);
66 67 68 69 70
        return AVERROR(ENOMEM);
    }
    return 0;
}

71
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
72
{
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    av_buffer_unref(&f->seg_map);
    ff_thread_release_buffer(s->avctx, &f->tf);
}

static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
{
    int ret;

    vp8_release_frame(s, dst);

    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
        return ret;
    if (src->seg_map &&
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
89
    }
90 91

    return 0;
92 93
}

94 95

static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
David Conrad's avatar
David Conrad committed
96 97 98 99
{
    VP8Context *s = avctx->priv_data;
    int i;

100 101
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        vp8_release_frame(s, &s->frames[i]);
David Conrad's avatar
David Conrad committed
102 103
    memset(s->framep, 0, sizeof(s->framep));

104
    if (free_mem)
105
        free_buffers(s);
106 107 108 109
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
110
    vp8_decode_flush_impl(avctx, 0);
David Conrad's avatar
David Conrad committed
111 112 113 114
}

static int update_dimensions(VP8Context *s, int width, int height)
{
115
    AVCodecContext *avctx = s->avctx;
116
    int i, ret;
117

118
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
119
        height != s->avctx->height) {
120
        vp8_decode_flush_impl(s->avctx, 1);
David Conrad's avatar
David Conrad committed
121

122 123 124
        ret = ff_set_dimensions(s->avctx, width, height);
        if (ret < 0)
            return ret;
125
    }
David Conrad's avatar
David Conrad committed
126 127 128 129

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

130 131 132 133 134 135 136 137 138 139
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
David Conrad's avatar
David Conrad committed
140

141 142
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
143
#if HAVE_THREADS
144 145
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
146
#endif
147
    }
David Conrad's avatar
David Conrad committed
148

149 150
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
151 152
        return AVERROR(ENOMEM);

153
    s->macroblocks        = s->macroblocks_base + 1;
David Conrad's avatar
David Conrad committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

184 185 186
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
David Conrad's avatar
David Conrad committed
187

188 189 190 191 192 193 194 195 196 197 198 199 200
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
David Conrad's avatar
David Conrad committed
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
216
        int size = AV_RL24(sizes + 3*i);
David Conrad's avatar
David Conrad committed
217 218 219
        if (buf_size - size < 0)
            return -1;

220
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
David Conrad's avatar
David Conrad committed
221 222 223
        buf      += size;
        buf_size -= size;
    }
224
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
David Conrad's avatar
David Conrad committed
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

249 250 251 252 253 254 255
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
256 257 258

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
David Conrad's avatar
David Conrad committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
305
    int header_size, hscale, vscale, i, j, k, l, m, ret;
David Conrad's avatar
David Conrad committed
306 307 308 309 310 311
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
312
    header_size  = AV_RL24(buf) >> 5;
David Conrad's avatar
David Conrad committed
313 314 315
    buf      += 3;
    buf_size -= 3;

David Conrad's avatar
David Conrad committed
316 317 318 319 320 321 322
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
David Conrad's avatar
David Conrad committed
323 324 325 326 327 328 329

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
330 331
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
David Conrad's avatar
David Conrad committed
332 333 334 335 336 337 338 339 340
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

341
        if (hscale || vscale)
342
            avpriv_request_sample(s->avctx, "Upscaling");
343

David Conrad's avatar
David Conrad committed
344
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 346 347 348
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
David Conrad's avatar
David Conrad committed
349 350 351 352
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
353
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
David Conrad's avatar
David Conrad committed
354 355
    }

356
    ff_vp56_init_range_decoder(c, buf, header_size);
David Conrad's avatar
David Conrad committed
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

384
    if (!s->macroblocks_base || /* first frame */
385
        width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
386 387 388 389
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

David Conrad's avatar
David Conrad committed
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
409 410
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
411 412
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
413
                    }
David Conrad's avatar
David Conrad committed
414 415

    if ((s->mbskip_enabled = vp8_rac_get(c)))
416
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
David Conrad's avatar
David Conrad committed
417 418

    if (!s->keyframe) {
419 420 421
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
David Conrad's avatar
David Conrad committed
422 423 424 425 426 427 428 429 430 431 432

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
433
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
David Conrad's avatar
David Conrad committed
434 435 436 437 438 439
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

440
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
David Conrad's avatar
David Conrad committed
441
{
442 443
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
David Conrad's avatar
David Conrad committed
444 445 446 447 448 449 450
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
451
    int bit, x = 0;
David Conrad's avatar
David Conrad committed
452

453
    if (vp56_rac_get_prob_branchy(c, p[0])) {
David Conrad's avatar
David Conrad committed
454 455 456 457 458 459 460 461
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
462 463 464 465 466 467 468 469 470 471 472
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
David Conrad's avatar
David Conrad committed
473 474 475 476

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

477 478
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
David Conrad's avatar
David Conrad committed
479
{
480 481 482
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
David Conrad's avatar
David Conrad committed
483
        return vp8_submv_prob[2];
484
    return vp8_submv_prob[1-!!left];
David Conrad's avatar
David Conrad committed
485 486 487 488
}

/**
 * Split motion vector prediction, 16.4.
489
 * @returns the number of motion vectors parsed (2, 4 or 16)
David Conrad's avatar
David Conrad committed
490
 */
491
static av_always_inline
492
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
David Conrad's avatar
David Conrad committed
493
{
494 495
    int part_idx;
    int n, num;
496
    VP8Macroblock *top_mb;
497 498
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
499
                  *mbsplits_top,
500
                  *mbsplits_cur, *firstidx;
501
    VP56mv *top_mv;
502 503
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
David Conrad's avatar
David Conrad committed
504

505 506 507 508 509 510 511
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

David Conrad's avatar
David Conrad committed
527
    for (n = 0; n < num; n++) {
528
        int k = firstidx[n];
529
        uint32_t left, above;
530 531
        const uint8_t *submv_prob;

532 533 534 535 536 537 538 539
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
540 541

        submv_prob = get_submv_prob(left, above);
David Conrad's avatar
David Conrad committed
542

543 544 545 546 547 548 549 550 551 552 553 554
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
555
            AV_WN32A(&mb->bmv[n], left);
David Conrad's avatar
David Conrad committed
556 557
        }
    }
558 559

    return num;
David Conrad's avatar
David Conrad committed
560 561
}

562
static av_always_inline
563
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
564
{
565
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
566
                                  mb - 1 /* left */,
567
                                  0 /* top-left */ };
568
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
570 571
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
572
    int8_t *sign_bias = s->sign_bias;
573 574 575 576
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

577 578 579 580 581 582 583 584 585
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

586 587
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
588
    AV_ZERO32(&near_mv[2]);
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
620
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621 622 623 624 625 626 627 628 629 630 631 632
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
633
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634 635 636
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
637 638 639

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
640
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
641 642 643 644 645 646
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
647
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
648 649 650
                mb->bmv[0] = mb->mv;
            }
        } else {
651
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
652 653 654 655 656 657 658 659 660
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

661
static av_always_inline
662
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
663
                           int mb_x, int keyframe, int layout)
David Conrad's avatar
David Conrad committed
664
{
665 666
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

667
    if (layout) {
668 669 670
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
671
    if (keyframe) {
672
        int x, y;
673
        uint8_t* top;
674
        uint8_t* const left = s->intra4x4_pred_mode_left;
675
        if (layout)
676 677 678
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
679 680
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
681 682 683 684 685
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
David Conrad's avatar
David Conrad committed
686 687
            }
        }
688
    } else {
689
        int i;
690 691
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
David Conrad's avatar
David Conrad committed
692 693 694
    }
}

695
static av_always_inline
696 697
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
David Conrad's avatar
David Conrad committed
698 699 700
{
    VP56RangeCoder *c = &s->c;

701 702 703
    if (s->segmentation.update_map) {
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
704
    } else if (s->segmentation.enabled)
705
        *segment = ref ? *ref : *segment;
706
    mb->segment = *segment;
David Conrad's avatar
David Conrad committed
707

708
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
David Conrad's avatar
David Conrad committed
709 710 711 712 713

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
714
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715 716
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717
            if (s->mb_layout)
718 719 720 721
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
722
        }
David Conrad's avatar
David Conrad committed
723

724
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
David Conrad's avatar
David Conrad committed
725
        mb->ref_frame = VP56_FRAME_CURRENT;
726
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
David Conrad's avatar
David Conrad committed
727
        // inter MB, 16.2
728 729
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
David Conrad's avatar
David Conrad committed
730 731 732
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
733
        s->ref_count[mb->ref_frame-1]++;
David Conrad's avatar
David Conrad committed
734 735

        // motion vectors, 16.3
736
        decode_mvs(s, mb, mb_x, mb_y, layout);
David Conrad's avatar
David Conrad committed
737 738 739 740
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

741
        if (mb->mode == MODE_I4x4)
742
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
David Conrad's avatar
David Conrad committed
743

744
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
David Conrad's avatar
David Conrad committed
745
        mb->ref_frame = VP56_FRAME_CURRENT;
746
        mb->partitioning = VP8_SPLITMVMODE_NONE;
747
        AV_ZERO32(&mb->bmv[0]);
David Conrad's avatar
David Conrad committed
748 749 750
    }
}

751
#ifndef decode_block_coeffs_internal
David Conrad's avatar
David Conrad committed
752
/**
753
 * @param r arithmetic bitstream reader context
754 755
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
David Conrad's avatar
David Conrad committed
756
 * @param i initial coeff index, 0 unless a separate DC block is coded
757
 * @param qmul array holding the dc/ac dequant factor at position 0/1
David Conrad's avatar
David Conrad committed
758 759 760
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
Diego Biurrun's avatar
Diego Biurrun committed
761
static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763
                                        int i, uint8_t *token_prob, int16_t qmul[2])
David Conrad's avatar
David Conrad committed
764
{
765
    VP56RangeCoder c = *r;
766
    goto skip_eob;
767
    do {
768
        int coeff;
769 770
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
David Conrad's avatar
David Conrad committed
771

772
skip_eob:
773
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774
            if (++i == 16)
775
                break; // invalid input; blocks should end with EOB
776
            token_prob = probs[i][0];
777
            goto skip_eob;
778 779
        }

780
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781
            coeff = 1;
782
            token_prob = probs[i+1][1];
783
        } else {
784 785
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786
                if (coeff)
787
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
788 789 790
                coeff += 2;
            } else {
                // DCT_CAT*
791 792 793
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
794 795
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
796 797
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798 799
                    }
                } else {    // DCT_CAT3 and up
800 801
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 803
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
804
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
805 806
                }
            }
807
            token_prob = probs[i+1][2];
808
        }
809
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
810
    } while (++i < 16);
811

812
    *r = c;
813
    return i;
David Conrad's avatar
David Conrad committed
814
}
815
#endif
David Conrad's avatar
David Conrad committed
816

817 818 819 820 821 822 823 824 825 826 827
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
828
static av_always_inline
Diego Biurrun's avatar
Diego Biurrun committed
829
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 832 833 834 835 836 837 838
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

839
static av_always_inline
840
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
David Conrad's avatar
David Conrad committed
842 843 844
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
845
    int segment = mb->segment;
846
    int block_dc = 0;
David Conrad's avatar
David Conrad committed
847 848 849 850 851

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
852
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
David Conrad's avatar
David Conrad committed
853 854
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
855 856 857 858
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
859
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
860
            else
861
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
862
        }
David Conrad's avatar
David Conrad committed
863 864 865 866 867 868 869
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
870
            nnz_pred = l_nnz[y] + t_nnz[x];
871
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872
                                      nnz_pred, s->qmat[segment].luma_qmul);
873
            // nnz+block_dc may be one more than the actual last index, but we don't care
874
            td->non_zero_count_cache[y][x] = nnz + block_dc;
David Conrad's avatar
David Conrad committed
875 876 877 878 879 880 881 882 883 884 885
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
David Conrad's avatar
David Conrad committed
887
                                          nnz_pred, s->qmat[segment].chroma_qmul);
888
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
David Conrad's avatar
David Conrad committed
889 890 891 892 893 894 895 896 897 898 899
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

Måns Rullgård's avatar
Måns Rullgård committed
921 922 923 924
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
925 926 927 928

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
929
    if (mb_x < mb_width-1)
930
        XCHG(top_border+32, src_y+16, 1);
931

932 933 934 935 936 937 938 939 940 941
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

942
static av_always_inline
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1005
        }
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
David Conrad's avatar
David Conrad committed
1018 1019 1020 1021
    }
    return mode;
}

1022
static av_always_inline
1023 1024
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
David Conrad's avatar
David Conrad committed
1025
{
1026 1027
    int x, y, mode, nnz;
    uint32_t tr;
David Conrad's avatar
David Conrad committed
1028

1029 1030
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
1031
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1032 1033 1034 1035
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

David Conrad's avatar
David Conrad committed
1036
    if (mb->mode < MODE_I4x4) {
1037
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
David Conrad's avatar
David Conrad committed
1038 1039 1040
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1041
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1042
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
David Conrad's avatar
David Conrad committed
1043 1044 1045 1046 1047 1048 1049

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1050
        if (mb_y &&
1051
            mb_x == s->mb_width-1) {
1052
            tr = tr_right[-1]*0x01010101u;
David Conrad's avatar
David Conrad committed
1053 1054 1055
            tr_right = (uint8_t *)&tr;
        }

1056
        if (mb->skip)
1057
            AV_ZERO128(td->non_zero_count_cache);
1058

David Conrad's avatar
David Conrad committed
1059 1060 1061
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1062 1063 1064 1065
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

1066
                if ((y == 0 || x == 3) && mb_y == 0) {
1067 1068
                    topright = tr_top;
                } else if (x == 3)
David Conrad's avatar
David Conrad committed
1069 1070
                    topright = tr_right;

1071 1072 1073 1074 1075 1076 1077 1078 1079
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                if (copy) {
                    dst = copy_dst + 12;
                    linesize = 8;
                    if (!(mb_y + y)) {
                        copy_dst[3] = 127U;
                        AV_WN32A(copy_dst+4, 127U * 0x01010101U);
                    } else {
                        AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1080
                        if (!(mb_x + x)) {
1081
                            copy_dst[3] = 129U;
1082
                        } else {
1083
                            copy_dst[3] = ptr[4*x-s->linesize-1];
1084 1085
                        }
                    }
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
                    if (!(mb_x + x)) {
                        copy_dst[11] =
                        copy_dst[19] =
                        copy_dst[27] =
                        copy_dst[35] = 129U;
                    } else {
                        copy_dst[11] = ptr[4*x              -1];
                        copy_dst[19] = ptr[4*x+s->linesize  -1];
                        copy_dst[27] = ptr[4*x+s->linesize*2-1];
                        copy_dst[35] = ptr[4*x+s->linesize*3-1];
                    }
1097 1098 1099
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
1100 1101 1102 1103
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1104
                }
David Conrad's avatar
David Conrad committed
1105

1106
                nnz = td->non_zero_count_cache[y][x];
David Conrad's avatar
David Conrad committed
1107 1108
                if (nnz) {
                    if (nnz == 1)
1109
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
David Conrad's avatar
David Conrad committed
1110
                    else
1111
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
David Conrad's avatar
David Conrad committed
1112 1113 1114 1115 1116
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1117
            intra4x4 += 4;
David Conrad's avatar
David Conrad committed
1118 1119 1120
        }
    }

1121
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
David Conrad's avatar
David Conrad committed
1122 1123
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1124

1125
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1126 1127 1128
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
David Conrad's avatar
David Conrad committed
1129 1130
}

1131 1132 1133 1134 1135 1136 1137
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

David Conrad's avatar
David Conrad committed
1138
/**
1139
 * luma MC function
David Conrad's avatar
David Conrad committed
1140 1141 1142
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1143
 * @param ref reference picture buffer at origin (0, 0)
David Conrad's avatar
David Conrad committed
1144 1145 1146 1147 1148 1149 1150 1151
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1152
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
David Conrad's avatar
David Conrad committed
1153
 */
1154
static av_always_inline
1155
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1156
                 ThreadFrame *ref, const VP56mv *mv,
1157
                 int x_off, int y_off, int block_w, int block_h,
1158
                 int width, int height, ptrdiff_t linesize,
1159
                 vp8_mc_func mc_func[3][3])
David Conrad's avatar
David Conrad committed
1160
{
1161
    uint8_t *src = ref->f->data[0];
1162

1163
    if (AV_RN32A(mv)) {
1164
        int src_linesize = linesize;
1165 1166 1167 1168 1169 1170

        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1171 1172

        // edge emulation
1173
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1174
        src += y_off * linesize + x_off;
1175 1176
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1177 1178
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src - my_idx * linesize - mx_idx,
1179
                                     EDGE_EMU_LINESIZE, linesize,
1180 1181
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
1182
                                     x_off - mx_idx, y_off - my_idx, width, height);
1183 1184
            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
            src_linesize = EDGE_EMU_LINESIZE;
1185
        }
1186
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1187 1188
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1189
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1190
    }
David Conrad's avatar
David Conrad committed
1191 1192
}

1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1210
static av_always_inline
1211
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1212
                   ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1213
                   int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1214 1215
                   vp8_mc_func mc_func[3][3])
{
1216
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1217

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
1228
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1229 1230
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1231 1232
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src1 - my_idx * linesize - mx_idx,
1233
                                     EDGE_EMU_LINESIZE, linesize,
1234 1235
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
1236
                                     x_off - mx_idx, y_off - my_idx, width, height);
1237 1238
            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1239

1240 1241
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src2 - my_idx * linesize - mx_idx,
1242
                                     EDGE_EMU_LINESIZE, linesize,
1243 1244
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
1245
                                     x_off - mx_idx, y_off - my_idx, width, height);
1246
            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1247
            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1248 1249 1250 1251 1252
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
1253
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1254 1255 1256 1257 1258
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1259
static av_always_inline
1260
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1261
                 ThreadFrame *ref_frame, int x_off, int y_off,
1262 1263 1264
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1265 1266 1267 1268
{
    VP56mv uvmv = *mv;

    /* Y */
1269
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1270
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1271 1272
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
1283
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1284 1285
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1286 1287
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1288 1289
}

1290 1291
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1292
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1293
{
1294 1295
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1296
        int x_off = mb_x << 4, y_off = mb_y << 4;
1297 1298
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
1299
        uint8_t **src= s->framep[ref]->tf.f->data;
1300
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1301 1302 1303
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
1304
        s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1305
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1306
        s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1307
    }
1308 1309
}

David Conrad's avatar
David Conrad committed
1310 1311 1312
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1313
static av_always_inline
1314 1315
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
David Conrad's avatar
David Conrad committed
1316 1317 1318
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1319
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1320
    VP56mv *bmv = mb->bmv;
David Conrad's avatar
David Conrad committed
1321

1322 1323
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
1324
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1325
                    0, 0, 16, 16, width, height, &mb->mv);
1326
        break;
1327
    case VP8_SPLITMVMODE_4x4: {
David Conrad's avatar
David Conrad committed
1328
        int x, y;
1329
        VP56mv uvmv;
David Conrad's avatar
David Conrad committed
1330 1331 1332 1333

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
1334
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1335
                            ref, &bmv[4*y + x],
1336 1337 1338
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
David Conrad's avatar
David Conrad committed
1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1354 1355
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
David Conrad's avatar
David Conrad committed
1356 1357 1358 1359
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
1360
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1361
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1362 1363 1364
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
David Conrad's avatar
David Conrad committed
1365 1366
            }
        }
1367 1368 1369
        break;
    }
    case VP8_SPLITMVMODE_16x8:
1370
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1371
                    0, 0, 16, 8, width, height, &bmv[0]);
1372
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1373
                    0, 8, 16, 8, width, height, &bmv[1]);
1374 1375
        break;
    case VP8_SPLITMVMODE_8x16:
1376
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1377
                    0, 0, 8, 16, width, height, &bmv[0]);
1378
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1379
                    8, 0, 8, 16, width, height, &bmv[1]);
1380 1381
        break;
    case VP8_SPLITMVMODE_8x8:
1382
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383
                    0, 0, 8, 8, width, height, &bmv[0]);
1384
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385
                    8, 0, 8, 8, width, height, &bmv[1]);
1386
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387
                    0, 8, 8, 8, width, height, &bmv[2]);
1388
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389
                    8, 8, 8, 8, width, height, &bmv[3]);
1390
        break;
David Conrad's avatar
David Conrad committed
1391 1392 1393
    }
}

1394 1395
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
David Conrad's avatar
David Conrad committed
1396
{
1397
    int x, y, ch;
David Conrad's avatar
David Conrad committed
1398

1399 1400
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
David Conrad's avatar
David Conrad committed
1401
        for (y = 0; y < 4; y++) {
1402
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1403 1404
            if (nnz4) {
                if (nnz4&~0x01010101) {
1405
                    for (x = 0; x < 4; x++) {
1406
                        if ((uint8_t)nnz4 == 1)
1407
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1408
                        else if((uint8_t)nnz4 > 1)
1409
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1410 1411 1412
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
1413 1414
                    }
                } else {
1415
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
David Conrad's avatar
David Conrad committed
1416 1417 1418 1419
                }
            }
            y_dst += 4*s->linesize;
        }
1420
    }
David Conrad's avatar
David Conrad committed
1421

1422
    for (ch = 0; ch < 2; ch++) {
1423
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1424
        if (nnz4) {
1425
            uint8_t *ch_dst = dst[1+ch];
1426 1427 1428
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1429
                        if ((uint8_t)nnz4 == 1)
1430
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1431
                        else if((uint8_t)nnz4 > 1)
1432
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1433 1434
                        nnz4 >>= 8;
                        if (!nnz4)
1435
                            goto chroma_idct_end;
1436
                    }
1437
                    ch_dst += 4*s->uvlinesize;
1438
                }
1439
            } else {
1440
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
David Conrad's avatar
David Conrad committed
1441 1442
            }
        }
1443
chroma_idct_end: ;
David Conrad's avatar
David Conrad committed
1444 1445 1446
    }
}

1447
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
David Conrad's avatar
David Conrad committed
1448 1449 1450 1451
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1452
        filter_level = s->segmentation.filter_level[mb->segment];
David Conrad's avatar
David Conrad committed
1453 1454 1455 1456 1457 1458 1459
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1460
        filter_level += s->lf_delta.mode[mb->mode];
David Conrad's avatar
David Conrad committed
1461
    }
1462

1463
    filter_level = av_clip_uintp2(filter_level, 6);
David Conrad's avatar
David Conrad committed
1464 1465 1466

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1467
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
David Conrad's avatar
David Conrad committed
1468 1469 1470 1471
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1472 1473
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1474
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
David Conrad's avatar
David Conrad committed
1475 1476
}

1477
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
David Conrad's avatar
David Conrad committed
1478
{
1479 1480 1481
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1482
    int inner_filter = f->inner_filter;
1483 1484
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
David Conrad's avatar
David Conrad committed
1495 1496 1497 1498

    if (!filter_level)
        return;

1499 1500
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1501

1502
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1503

David Conrad's avatar
David Conrad committed
1504
    if (mb_x) {
1505
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1506
                                       mbedge_lim, inner_limit, hev_thresh);
1507
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1508
                                       mbedge_lim, inner_limit, hev_thresh);
David Conrad's avatar
David Conrad committed
1509 1510
    }

1511
    if (inner_filter) {
1512 1513 1514 1515 1516 1517 1518 1519 1520
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
David Conrad's avatar
David Conrad committed
1521 1522 1523
    }

    if (mb_y) {
1524
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1525
                                       mbedge_lim, inner_limit, hev_thresh);
1526
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1527
                                       mbedge_lim, inner_limit, hev_thresh);
David Conrad's avatar
David Conrad committed
1528 1529
    }

1530
    if (inner_filter) {
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1543
                                             inner_limit, hev_thresh);
David Conrad's avatar
David Conrad committed
1544 1545 1546
    }
}

1547
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
David Conrad's avatar
David Conrad committed
1548
{
1549 1550 1551
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1552
    int inner_filter = f->inner_filter;
1553
    int linesize = s->linesize;
David Conrad's avatar
David Conrad committed
1554 1555 1556 1557

    if (!filter_level)
        return;

1558 1559
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
David Conrad's avatar
David Conrad committed
1560 1561

    if (mb_x)
1562
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1563
    if (inner_filter) {
1564 1565 1566
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
David Conrad's avatar
David Conrad committed
1567 1568 1569
    }

    if (mb_y)
1570
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1571
    if (inner_filter) {
1572 1573 1574
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
David Conrad's avatar
David Conrad committed
1575 1576 1577
    }
}

1578
#define MARGIN (16 << 2)
1579 1580
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
                                   VP8Frame *prev_frame)
1581 1582
{
    VP8Context *s = avctx->priv_data;
1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1598 1599 1600
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 1);
1601 1602 1603 1604 1605 1606 1607 1608
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1609
#if HAVE_THREADS
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1641 1642 1643 1644
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
1645 1646 1647 1648 1649 1650 1651

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1652
    int mb_x, mb_xy = mb_y*s->mb_width;
1653
    int num_jobs = s->num_jobs;
1654
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1655 1656
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
David Conrad's avatar
David Conrad committed
1657
    uint8_t *dst[3] = {
1658 1659 1660
        curframe->tf.f->data[0] + 16*mb_y*s->linesize,
        curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
        curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
David Conrad's avatar
David Conrad committed
1661
    };
1662 1663 1664 1665 1666 1667 1668
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
1669 1670 1671 1672 1673
        // Make sure the previous frame has read its segmentation map,
        // if we re-use the same map.
        if (prev_frame && s->segmentation.enabled &&
            !s->segmentation.update_map)
            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1674 1675 1676 1677 1678 1679
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1680 1681 1682 1683 1684

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1685 1686 1687 1688 1689 1690 1691 1692 1693
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

1694 1695
        s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1696

1697
        if (!s->mb_layout)
1698 1699 1700
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 0);
1701 1702 1703 1704

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
1705
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1706 1707

        if (mb->mode <= MODE_I4x4)
1708
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1709
        else
1710
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1711 1712 1713 1714

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
1715
            idct_mb(s, td, dst, mb);
1716
        } else {
1717
            AV_ZERO64(td->left_nnz);
1718 1719 1720 1721
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1722
                td->left_nnz[8]     = 0;
1723 1724 1725 1726 1727
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
1728 1729 1730 1731 1732 1733 1734 1735
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1736 1737

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
David Conrad's avatar
David Conrad committed
1738 1739 1740 1741

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
1742 1743
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
1744 1745 1746 1747 1748 1749

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
David Conrad's avatar
David Conrad committed
1750 1751 1752
    }
}

1753 1754
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
David Conrad's avatar
David Conrad committed
1755
{
1756 1757 1758
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1759
    AVFrame *curframe = s->curframe->tf.f;
1760 1761 1762 1763 1764 1765 1766
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };
David Conrad's avatar
David Conrad committed
1767

1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1795
        if (s->filter.simple)
1796
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1797
        else
1798 1799 1800 1801 1802 1803
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
David Conrad's avatar
David Conrad committed
1804 1805 1806
    }
}

1807 1808
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
1809
{
1810 1811 1812
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
1813
    VP8Frame *curframe = s->curframe;
1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
1828
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
1829
    }
1830 1831

    return 0;
1832 1833
}

Justin Ruggles's avatar
Justin Ruggles committed
1834 1835
int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                        AVPacket *avpkt)
David Conrad's avatar
David Conrad committed
1836 1837
{
    VP8Context *s = avctx->priv_data;
1838
    int ret, i, referenced, num_jobs;
David Conrad's avatar
David Conrad committed
1839
    enum AVDiscard skip_thresh;
1840
    VP8Frame *av_uninit(curframe), *prev_frame;
1841

David Conrad's avatar
David Conrad committed
1842
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1843
        goto err;
David Conrad's avatar
David Conrad committed
1844

1845 1846
    prev_frame = s->framep[VP56_FRAME_CURRENT];

David Conrad's avatar
David Conrad committed
1847 1848 1849 1850 1851 1852 1853 1854
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1855
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
David Conrad's avatar
David Conrad committed
1856 1857
        goto skip_decode;
    }
1858
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
David Conrad's avatar
David Conrad committed
1859

1860 1861
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
1862
        if (s->frames[i].tf.f->data[0] &&
1863 1864 1865 1866
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1867
            vp8_release_frame(s, &s->frames[i]);
1868 1869 1870 1871 1872

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
David Conrad's avatar
David Conrad committed
1873 1874 1875 1876 1877
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
1878 1879 1880 1881
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
1882 1883
    if (curframe->tf.f->data[0])
        vp8_release_frame(s, curframe);
David Conrad's avatar
David Conrad committed
1884

1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

1896 1897
    curframe->tf.f->key_frame = s->keyframe;
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1898
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1899
        goto err;
David Conrad's avatar
David Conrad committed
1900

1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

1919 1920
    ff_thread_finish_setup(avctx);

1921 1922
    s->linesize   = curframe->tf.f->linesize[0];
    s->uvlinesize = curframe->tf.f->linesize[1];
David Conrad's avatar
David Conrad committed
1923 1924

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1925
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1926 1927 1928 1929
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1930

1931
    memset(s->ref_count, 0, sizeof(s->ref_count));
David Conrad's avatar
David Conrad committed
1932 1933


1934 1935 1936 1937 1938 1939
    if (s->mb_layout == 1) {
        // Make sure the previous frame has read its segmentation map,
        // if we re-use the same map.
        if (prev_frame && s->segmentation.enabled &&
            !s->segmentation.update_map)
            ff_thread_await_progress(&prev_frame->tf, 1, 0);
1940
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1941
    }
David Conrad's avatar
David Conrad committed
1942

1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
David Conrad's avatar
David Conrad committed
1957

1958
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1959 1960
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

David Conrad's avatar
David Conrad committed
1961 1962 1963 1964 1965 1966 1967
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
1968 1969
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
            return ret;
1970
        *got_frame      = 1;
David Conrad's avatar
David Conrad committed
1971 1972 1973
    }

    return avpkt->size;
1974 1975
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1976
    return ret;
David Conrad's avatar
David Conrad committed
1977 1978
}

Justin Ruggles's avatar
Justin Ruggles committed
1979
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
{
    VP8Context *s = avctx->priv_data;
    int i;

    vp8_decode_flush_impl(avctx, 1);
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        av_frame_free(&s->frames[i].tf.f);

    return 0;
}

static av_cold int vp8_init_frames(VP8Context *s)
{
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
        s->frames[i].tf.f = av_frame_alloc();
        if (!s->frames[i].tf.f)
            return AVERROR(ENOMEM);
    }
    return 0;
}

Justin Ruggles's avatar
Justin Ruggles committed
2002
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
David Conrad's avatar
David Conrad committed
2003 2004
{
    VP8Context *s = avctx->priv_data;
2005
    int ret;
David Conrad's avatar
David Conrad committed
2006 2007

    s->avctx = avctx;
2008
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2009
    avctx->internal->allocate_progress = 1;
David Conrad's avatar
David Conrad committed
2010

2011
    ff_videodsp_init(&s->vdsp, 8);
2012
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2013
    ff_vp8dsp_init(&s->vp8dsp, 0);
David Conrad's avatar
David Conrad committed
2014

2015
    if ((ret = vp8_init_frames(s)) < 0) {
Justin Ruggles's avatar
Justin Ruggles committed
2016
        ff_vp8_decode_free(avctx);
2017 2018
        return ret;
    }
David Conrad's avatar
David Conrad committed
2019 2020 2021 2022

    return 0;
}

2023 2024 2025
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2026
    int ret;
2027 2028 2029

    s->avctx = avctx;

2030
    if ((ret = vp8_init_frames(s)) < 0) {
Justin Ruggles's avatar
Justin Ruggles committed
2031
        ff_vp8_decode_free(avctx);
2032 2033 2034
        return ret;
    }

2035 2036 2037 2038 2039 2040 2041 2042 2043
    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2044
    int i;
2045

2046 2047 2048
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2049 2050
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2051 2052
    }

2053 2054 2055 2056 2057
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

2058 2059 2060 2061 2062 2063 2064 2065
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
        if (s_src->frames[i].tf.f->data[0]) {
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
            if (ret < 0)
                return ret;
        }
    }

2066 2067 2068 2069 2070 2071 2072 2073
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

2074
AVCodec ff_vp8_decoder = {
2075
    .name                  = "vp8",
2076
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2077
    .type                  = AVMEDIA_TYPE_VIDEO,
2078
    .id                    = AV_CODEC_ID_VP8,
2079
    .priv_data_size        = sizeof(VP8Context),
Justin Ruggles's avatar
Justin Ruggles committed
2080 2081 2082
    .init                  = ff_vp8_decode_init,
    .close                 = ff_vp8_decode_free,
    .decode                = ff_vp8_decode_frame,
2083
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2084
    .flush                 = vp8_decode_flush,
2085 2086
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
David Conrad's avatar
David Conrad committed
2087
};
2088