h264_cavlc.c 43.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27
 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

28
#define CABAC(h) 0
29
#define UNCHECKED_BITSTREAM_READER 1
30

31 32 33 34 35 36
#include "internal.h"
#include "avcodec.h"
#include "h264.h"
#include "h264data.h" // FIXME FIXME FIXME
#include "h264_mvpred.h"
#include "golomb.h"
37
#include "mpegutils.h"
38
#include "libavutil/avassert.h"
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64


static const uint8_t golomb_to_inter_cbp_gray[16]={
 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
};

static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
};

static const uint8_t chroma_dc_coeff_token_len[4*5]={
 2, 0, 0, 0,
 6, 1, 0, 0,
 6, 6, 3, 0,
 6, 7, 7, 6,
 6, 8, 8, 7,
};

static const uint8_t chroma_dc_coeff_token_bits[4*5]={
 1, 0, 0, 0,
 7, 1, 0, 0,
 4, 6, 1, 0,
 3, 3, 2, 5,
 2, 3, 2, 0,
};

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
static const uint8_t chroma422_dc_coeff_token_len[4*9]={
  1,  0,  0,  0,
  7,  2,  0,  0,
  7,  7,  3,  0,
  9,  7,  7,  5,
  9,  9,  7,  6,
 10, 10,  9,  7,
 11, 11, 10,  7,
 12, 12, 11, 10,
 13, 12, 12, 11,
};

static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
  1,   0,  0, 0,
 15,   1,  0, 0,
 14,  13,  1, 0,
  7,  12, 11, 1,
  6,   5, 10, 1,
  7,   6,  4, 9,
  7,   6,  5, 8,
  7,   6,  5, 4,
  7,   5,  4, 4,
};

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static const uint8_t coeff_token_len[4][4*17]={
{
     1, 0, 0, 0,
     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
},
{
     2, 0, 0, 0,
     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
},
{
     4, 0, 0, 0,
     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
},
{
     6, 0, 0, 0,
     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
}
};

static const uint8_t coeff_token_bits[4][4*17]={
{
     1, 0, 0, 0,
     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
},
{
     3, 0, 0, 0,
    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
},
{
    15, 0, 0, 0,
    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
},
{
     3, 0, 0, 0,
     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
}
};

static const uint8_t total_zeros_len[16][16]= {
    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
    {5,3,4,4,3,3,3,4,3,4,5,5,5},
    {4,4,4,3,3,3,3,3,4,5,4,5},
    {6,5,3,3,3,3,3,3,4,3,6},
    {6,5,3,3,3,2,3,4,3,6},
    {6,4,5,3,2,2,3,3,6},
    {6,6,4,2,2,3,2,5},
    {5,5,3,2,2,2,4},
    {4,4,3,3,1,3},
    {4,4,2,1,3},
    {3,3,1,2},
    {2,2,1},
    {1,1},
};

static const uint8_t total_zeros_bits[16][16]= {
    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
    {3,7,5,4,6,5,4,3,3,2,2,1,0},
    {5,4,3,7,6,5,4,3,2,1,1,0},
    {1,1,7,6,5,4,3,2,1,1,0},
    {1,1,5,4,3,3,2,1,1,0},
    {1,1,1,3,3,2,2,1,0},
    {1,0,1,3,2,1,1,1},
    {1,0,1,3,2,1,1},
    {0,1,1,2,1,3},
    {0,1,1,1,1},
    {0,1,1,1},
    {0,1,1},
    {0,1},
};

static const uint8_t chroma_dc_total_zeros_len[3][4]= {
    { 1, 2, 3, 3,},
    { 1, 2, 2, 0,},
    { 1, 1, 0, 0,},
};

static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
    { 1, 1, 1, 0,},
    { 1, 1, 0, 0,},
    { 1, 0, 0, 0,},
};

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
    { 1, 3, 3, 4, 4, 4, 5, 5 },
    { 3, 2, 3, 3, 3, 3, 3 },
    { 3, 3, 2, 2, 3, 3 },
    { 3, 2, 2, 2, 3 },
    { 2, 2, 2, 2 },
    { 2, 2, 1 },
    { 1, 1 },
};

static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
    { 1, 2, 3, 2, 3, 1, 1, 0 },
    { 0, 1, 1, 4, 5, 6, 7 },
    { 0, 1, 1, 2, 6, 7 },
    { 6, 0, 1, 2, 7 },
    { 0, 1, 2, 3 },
    { 0, 1, 1 },
    { 0, 1 },
};

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
static const uint8_t run_len[7][16]={
    {1,1},
    {1,2,2},
    {2,2,2,2},
    {2,2,2,3,3},
    {2,2,3,3,3,3},
    {2,3,3,3,3,3,3},
    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
};

static const uint8_t run_bits[7][16]={
    {1,0},
    {1,1,0},
    {3,2,1,0},
    {3,2,1,1,0},
    {3,2,3,2,1,0},
    {3,0,1,3,2,5,4},
    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
};

static VLC coeff_token_vlc[4];
static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
static const int coeff_token_vlc_tables_size[4]={520,332,280,256};

static VLC chroma_dc_coeff_token_vlc;
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
static const int chroma_dc_coeff_token_vlc_table_size = 256;

247 248 249 250
static VLC chroma422_dc_coeff_token_vlc;
static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
static const int chroma422_dc_coeff_token_vlc_table_size = 8192;

251 252 253 254 255 256 257 258
static VLC total_zeros_vlc[15];
static VLC_TYPE total_zeros_vlc_tables[15][512][2];
static const int total_zeros_vlc_tables_size = 512;

static VLC chroma_dc_total_zeros_vlc[3];
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
static const int chroma_dc_total_zeros_vlc_tables_size = 8;

259 260 261 262
static VLC chroma422_dc_total_zeros_vlc[7];
static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
static const int chroma422_dc_total_zeros_vlc_tables_size = 32;

263 264 265 266 267 268 269 270 271 272 273
static VLC run_vlc[6];
static VLC_TYPE run_vlc_tables[6][8][2];
static const int run_vlc_tables_size = 8;

static VLC run7_vlc;
static VLC_TYPE run7_vlc_table[96][2];
static const int run7_vlc_table_size = 96;

#define LEVEL_TAB_BITS 8
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];

274 275 276 277 278 279 280 281
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
#define COEFF_TOKEN_VLC_BITS           8
#define TOTAL_ZEROS_VLC_BITS           9
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
#define RUN_VLC_BITS                   3
#define RUN7_VLC_BITS                  6
282 283

/**
284
 * Get the predicted number of non-zero coefficients.
285 286
 * @param n block index
 */
287
static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
288
{
289
    const int index8= scan8[n];
290 291
    const int left = sl->non_zero_count_cache[index8 - 1];
    const int top  = sl->non_zero_count_cache[index8 - 8];
292 293 294 295
    int i= left + top;

    if(i<64) i= (i+1)>>1;

296
    ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
297 298 299 300 301

    return i&31;
}

static av_cold void init_cavlc_level_tab(void){
302
    int suffix_length;
303 304 305 306 307 308 309
    unsigned int i;

    for(suffix_length=0; suffix_length<7; suffix_length++){
        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
            int prefix= LEVEL_TAB_BITS - av_log2(2*i);

            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 311 312 313
                int level_code = (prefix << suffix_length) +
                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
                int mask = -(level_code&1);
                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
                cavlc_level_tab[suffix_length][i][0]= level_code;
                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
            }else if(prefix + 1 <= LEVEL_TAB_BITS){
                cavlc_level_tab[suffix_length][i][0]= prefix+100;
                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
            }else{
                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
            }
        }
    }
}

av_cold void ff_h264_decode_init_vlc(void){
    static int done = 0;

    if (!done) {
        int i;
        int offset;
        done = 1;

        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                 &chroma_dc_coeff_token_len [0], 1, 1,
                 &chroma_dc_coeff_token_bits[0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

342 343 344 345 346 347 348
        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
        chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
        init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
                 &chroma422_dc_coeff_token_len [0], 1, 1,
                 &chroma422_dc_coeff_token_bits[0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
        offset = 0;
        for(i=0; i<4; i++){
            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                     &coeff_token_len [i][0], 1, 1,
                     &coeff_token_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
            offset += coeff_token_vlc_tables_size[i];
        }
        /*
         * This is a one time safety check to make sure that
         * the packed static coeff_token_vlc table sizes
         * were initialized correctly.
         */
364
        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365 366 367 368 369 370 371 372 373 374

        for(i=0; i<3; i++){
            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
            init_vlc(&chroma_dc_total_zeros_vlc[i],
                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }
375 376 377 378 379 380 381 382 383 384 385

        for(i=0; i<7; i++){
            chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
            chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
            init_vlc(&chroma422_dc_total_zeros_vlc[i],
                     CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
                     &chroma422_dc_total_zeros_len [i][0], 1, 1,
                     &chroma422_dc_total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
        for(i=0; i<15; i++){
            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
            init_vlc(&total_zeros_vlc[i],
                     TOTAL_ZEROS_VLC_BITS, 16,
                     &total_zeros_len [i][0], 1, 1,
                     &total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }

        for(i=0; i<6; i++){
            run_vlc[i].table = run_vlc_tables[i];
            run_vlc[i].table_allocated = run_vlc_tables_size;
            init_vlc(&run_vlc[i],
                     RUN_VLC_BITS, 7,
                     &run_len [i][0], 1, 1,
                     &run_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }
        run7_vlc.table = run7_vlc_table,
        run7_vlc.table_allocated = run7_vlc_table_size;
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                 &run_len [6][0], 1, 1,
                 &run_bits[6][0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

        init_cavlc_level_tab();
    }
}

/**
 *
 */
static inline int get_level_prefix(GetBitContext *gb){
    unsigned int buf;
    int log;

    OPEN_READER(re, gb);
    UPDATE_CACHE(re, gb);
    buf=GET_CACHE(re, gb);

    log= 32 - av_log2(buf);
#ifdef TRACE
    print_bin(buf>>(32-log), log);
    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
#endif

    LAST_SKIP_BITS(re, gb, log);
    CLOSE_READER(re, gb);

    return log-1;
}

/**
440
 * Decode a residual block.
441 442 443 444 445
 * @param n block index
 * @param scantable scantable
 * @param max_coeff number of coefficients in the block
 * @return <0 if an error occurred
 */
446
static int decode_residual(const H264Context *h, H264SliceContext *sl,
447 448 449 450
                           GetBitContext *gb, int16_t *block, int n,
                           const uint8_t *scantable, const uint32_t *qmul,
                           int max_coeff)
{
451 452
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
    int level[16];
453
    int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
454 455 456

    //FIXME put trailing_onex into the context

457
    if(max_coeff <= 8){
458 459 460 461
        if (max_coeff == 4)
            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
        else
            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
462 463
        total_coeff= coeff_token>>2;
    }else{
464
        if(n >= LUMA_DC_BLOCK_INDEX){
465
            total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
466 467 468
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
            total_coeff= coeff_token>>2;
        }else{
469
            total_coeff= pred_non_zero_count(h, sl, n);
470 471 472 473
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
            total_coeff= coeff_token>>2;
        }
    }
474
    sl->non_zero_count_cache[scan8[n]] = total_coeff;
475 476 477 478 479 480

    //FIXME set last_non_zero?

    if(total_coeff==0)
        return 0;
    if(total_coeff > (unsigned)max_coeff) {
481
        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
482 483 484 485
        return -1;
    }

    trailing_ones= coeff_token&3;
486
    ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
487
    av_assert2(total_coeff<=16);
488 489 490 491 492 493 494 495 496

    i = show_bits(gb, 3);
    skip_bits(gb, trailing_ones);
    level[0] = 1-((i&4)>>1);
    level[1] = 1-((i&2)   );
    level[2] = 1-((i&1)<<1);

    if(trailing_ones<total_coeff) {
        int mask, prefix;
497
        int suffix_length = total_coeff > 10 & trailing_ones < 3;
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
        int level_code= cavlc_level_tab[suffix_length][bitsi][0];

        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
        if(level_code >= 100){
            prefix= level_code - 100;
            if(prefix == LEVEL_TAB_BITS)
                prefix += get_level_prefix(gb);

            //first coefficient has suffix_length equal to 0 or 1
            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
                if(suffix_length)
                    level_code= (prefix<<1) + get_bits1(gb); //part
                else
                    level_code= prefix; //part
            }else if(prefix==14){
                if(suffix_length)
                    level_code= (prefix<<1) + get_bits1(gb); //part
                else
                    level_code= prefix + get_bits(gb, 4); //part
            }else{
519
                level_code= 30;
520 521
                if(prefix>=16){
                    if(prefix > 25+3){
522
                        av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
523 524
                        return -1;
                    }
525
                    level_code += (1<<(prefix-3))-4096;
526
                }
527
                level_code += get_bits(gb, prefix-3); //part
528 529 530 531 532 533 534 535
            }

            if(trailing_ones < 3) level_code += 2;

            suffix_length = 2;
            mask= -(level_code&1);
            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
        }else{
536
            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
537

538
            suffix_length = 1 + (level_code + 3U > 6U);
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
            level[trailing_ones]= level_code;
        }

        //remaining coefficients have suffix_length > 0
        for(i=trailing_ones+1;i<total_coeff;i++) {
            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
            level_code= cavlc_level_tab[suffix_length][bitsi][0];

            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
            if(level_code >= 100){
                prefix= level_code - 100;
                if(prefix == LEVEL_TAB_BITS){
                    prefix += get_level_prefix(gb);
                }
                if(prefix<15){
                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
                }else{
557 558 559 560 561 562
                    level_code = 15<<suffix_length;
                    if (prefix>=16) {
                        if(prefix > 25+3){
                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
                            return AVERROR_INVALIDDATA;
                        }
563
                        level_code += (1<<(prefix-3))-4096;
564 565
                    }
                    level_code += get_bits(gb, prefix-3);
566 567 568 569 570
                }
                mask= -(level_code&1);
                level_code= (((2+level_code)>>1) ^ mask) - mask;
            }
            level[i]= level_code;
571
            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
572 573 574 575 576 577
        }
    }

    if(total_coeff == max_coeff)
        zeros_left=0;
    else{
578 579 580 581 582 583 584 585
        if (max_coeff <= 8) {
            if (max_coeff == 4)
                zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
            else
                zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
        } else {
586
            zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
587
        }
588 589
    }

590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
#define STORE_BLOCK(type) \
    scantable += zeros_left + total_coeff - 1; \
    if(n >= LUMA_DC_BLOCK_INDEX){ \
        ((type*)block)[*scantable] = level[0]; \
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
            if(zeros_left < 7) \
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
            else \
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
            zeros_left -= run_before; \
            scantable -= 1 + run_before; \
            ((type*)block)[*scantable]= level[i]; \
        } \
        for(;i<total_coeff;i++) { \
            scantable--; \
            ((type*)block)[*scantable]= level[i]; \
        } \
    }else{ \
        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
            if(zeros_left < 7) \
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
            else \
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
            zeros_left -= run_before; \
            scantable -= 1 + run_before; \
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
        } \
        for(;i<total_coeff;i++) { \
            scantable--; \
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
        } \
    }

624
    if (h->pixel_shift) {
625 626 627
        STORE_BLOCK(int32_t)
    } else {
        STORE_BLOCK(int16_t)
628 629 630
    }

    if(zeros_left<0){
631
        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
632 633 634 635 636 637
        return -1;
    }

    return 0;
}

638 639 640 641 642 643
static av_always_inline
int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
                         GetBitContext *gb, const uint8_t *scan,
                         const uint8_t *scan8x8, int pixel_shift,
                         int mb_type, int cbp, int p)
{
644
    int i4x4, i8x8;
645
    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
646
    if(IS_INTRA16x16(mb_type)){
647 648 649 650
        AV_ZERO128(sl->mb_luma_dc[p]+0);
        AV_ZERO128(sl->mb_luma_dc[p]+8);
        AV_ZERO128(sl->mb_luma_dc[p]+16);
        AV_ZERO128(sl->mb_luma_dc[p]+24);
651
        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
652 653 654
            return -1; //FIXME continue if partitioned and other return -1 too
        }

655
        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
656 657 658 659 660

        if(cbp&15){
            for(i8x8=0; i8x8<4; i8x8++){
                for(i4x4=0; i4x4<4; i4x4++){
                    const int index= i4x4 + 4*i8x8 + p*16;
661
                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
662 663 664 665 666 667 668
                        index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
                        return -1;
                    }
                }
            }
            return 0xf;
        }else{
669
            fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
670 671 672 673 674 675 676 677 678
            return 0;
        }
    }else{
        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
        int new_cbp = 0;
        for(i8x8=0; i8x8<4; i8x8++){
            if(cbp & (1<<i8x8)){
                if(IS_8x8DCT(mb_type)){
679
                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
680 681 682
                    uint8_t *nnz;
                    for(i4x4=0; i4x4<4; i4x4++){
                        const int index= i4x4 + 4*i8x8 + p*16;
683
                        if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
684 685 686
                                            h->dequant8_coeff[cqm][qscale], 16) < 0 )
                            return -1;
                    }
687
                    nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
688 689 690 691 692
                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
                    new_cbp |= !!nnz[0] << i8x8;
                }else{
                    for(i4x4=0; i4x4<4; i4x4++){
                        const int index= i4x4 + 4*i8x8 + p*16;
693
                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
694 695 696
                                            scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
                            return -1;
                        }
697
                        new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
698 699 700
                    }
                }
            }else{
701
                uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
702 703 704 705 706 707 708
                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
            }
        }
        return new_cbp;
    }
}

709
int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
710
{
711 712 713 714
    int mb_xy;
    int partition_count;
    unsigned int mb_type, cbp;
    int dct8x8_allowed= h->pps.transform_8x8_mode;
715
    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
716
    const int pixel_shift = h->pixel_shift;
717

718
    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
719

720
    ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
721 722
    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
                down the code */
723
    if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
724
        if (sl->mb_skip_run == -1)
725
            sl->mb_skip_run = get_ue_golomb_long(&sl->gb);
726

727
        if (sl->mb_skip_run--) {
728
            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
729
                if (sl->mb_skip_run == 0)
730
                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
731
            }
732
            decode_mb_skip(h, sl);
733 734 735
            return 0;
        }
    }
736
    if (FRAME_MBAFF(h)) {
737
        if ((sl->mb_y & 1) == 0)
738
            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
739 740
    }

741
    sl->prev_mb_skipped = 0;
742

743
    mb_type= get_ue_golomb(&sl->gb);
744
    if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
745 746 747 748 749 750 751
        if(mb_type < 23){
            partition_count= b_mb_type_info[mb_type].partition_count;
            mb_type=         b_mb_type_info[mb_type].type;
        }else{
            mb_type -= 23;
            goto decode_intra_mb;
        }
752
    } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
753 754 755 756 757 758 759 760
        if(mb_type < 5){
            partition_count= p_mb_type_info[mb_type].partition_count;
            mb_type=         p_mb_type_info[mb_type].type;
        }else{
            mb_type -= 5;
            goto decode_intra_mb;
        }
    }else{
761
       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
762
        if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
763 764 765
            mb_type--;
decode_intra_mb:
        if(mb_type > 25){
766
            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
767 768 769 770
            return -1;
        }
        partition_count=0;
        cbp= i_mb_type_info[mb_type].cbp;
771
        sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
772 773 774
        mb_type= i_mb_type_info[mb_type].type;
    }

775
    if (MB_FIELD(sl))
776 777
        mb_type |= MB_TYPE_INTERLACED;

778
    h->slice_table[mb_xy] = sl->slice_num;
779 780

    if(IS_INTRA_PCM(mb_type)){
781
        const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
782
                            h->sps.bit_depth_luma;
783 784

        // We assume these blocks are very rare so we do not optimize it.
785 786
        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
        if (get_bits_left(&sl->gb) < mb_size) {
787 788 789
            av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
            return AVERROR_INVALIDDATA;
        }
790
        skip_bits_long(&sl->gb, mb_size);
791 792

        // In deblocking, the quantizer is 0
793
        h->cur_pic.qscale_table[mb_xy] = 0;
794
        // All coeffs are present
795
        memset(h->non_zero_count[mb_xy], 16, 48);
796

797
        h->cur_pic.mb_type[mb_xy] = mb_type;
798 799 800
        return 0;
    }

801 802
    fill_decode_neighbors(h, sl, mb_type);
    fill_decode_caches(h, sl, mb_type);
803 804 805 806 807 808 809 810

    //mb_pred
    if(IS_INTRA(mb_type)){
        int pred_mode;
//            init_top_left_availability(h);
        if(IS_INTRA4x4(mb_type)){
            int i;
            int di = 1;
811
            if(dct8x8_allowed && get_bits1(&sl->gb)){
812 813 814 815 816 817
                mb_type |= MB_TYPE_8x8DCT;
                di = 4;
            }

//                fill_intra4x4_pred_table(h);
            for(i=0; i<16; i+=di){
818
                int mode = pred_intra_mode(h, sl, i);
819

820 821
                if(!get_bits1(&sl->gb)){
                    const int rem_mode= get_bits(&sl->gb, 3);
822 823 824 825
                    mode = rem_mode + (rem_mode >= mode);
                }

                if(di==4)
826
                    fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
827
                else
828
                    sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
829
            }
830 831
            write_back_intra_pred_mode(h, sl);
            if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
832 833
                return -1;
        }else{
834
            sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
835
            if (sl->intra16x16_pred_mode < 0)
836 837
                return -1;
        }
838
        if(decode_chroma){
839
            pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&sl->gb), 1);
840 841
            if(pred_mode < 0)
                return -1;
842
            sl->chroma_pred_mode = pred_mode;
843
        } else {
844
            sl->chroma_pred_mode = DC_128_PRED8x8;
845 846 847 848
        }
    }else if(partition_count==4){
        int i, j, sub_partition_count[4], list, ref[2][4];

849
        if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
850
            for(i=0; i<4; i++){
851
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
852
                if(sl->sub_mb_type[i] >=13){
853
                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
854 855
                    return -1;
                }
856 857
                sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
                sl->sub_mb_type[i]=      b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
858
            }
859
            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
860 861 862 863 864
                ff_h264_pred_direct_motion(h, sl, &mb_type);
                sl->ref_cache[0][scan8[4]] =
                sl->ref_cache[1][scan8[4]] =
                sl->ref_cache[0][scan8[12]] =
                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
865 866
            }
        }else{
867
            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
868
            for(i=0; i<4; i++){
869
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
870
                if(sl->sub_mb_type[i] >=4){
871
                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
872 873
                    return -1;
                }
874 875
                sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
                sl->sub_mb_type[i]=      p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
876 877 878
            }
        }

879
        for (list = 0; list < sl->list_count; list++) {
880
            int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
881
            for(i=0; i<4; i++){
882 883
                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
884 885 886 887
                    unsigned int tmp;
                    if(ref_count == 1){
                        tmp= 0;
                    }else if(ref_count == 2){
888
                        tmp= get_bits1(&sl->gb)^1;
889
                    }else{
890
                        tmp= get_ue_golomb_31(&sl->gb);
891
                        if(tmp>=ref_count){
892
                            av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
893 894 895 896 897 898 899 900 901 902 903 904
                            return -1;
                        }
                    }
                    ref[list][i]= tmp;
                }else{
                 //FIXME
                    ref[list][i] = -1;
                }
            }
        }

        if(dct8x8_allowed)
905
            dct8x8_allowed = get_dct8x8_allowed(h, sl);
906

907
        for (list = 0; list < sl->list_count; list++) {
908
            for(i=0; i<4; i++){
909
                if(IS_DIRECT(sl->sub_mb_type[i])) {
910
                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
911 912
                    continue;
                }
913 914
                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
915

916 917
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
                    const int sub_mb_type= sl->sub_mb_type[i];
918 919 920 921
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                    for(j=0; j<sub_partition_count[i]; j++){
                        int mx, my;
                        const int index= 4*i + block_width*j;
922 923
                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
924 925
                        mx += get_se_golomb(&sl->gb);
                        my += get_se_golomb(&sl->gb);
926
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943

                        if(IS_SUB_8X8(sub_mb_type)){
                            mv_cache[ 1 ][0]=
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
                            mv_cache[ 1 ][1]=
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
                        }else if(IS_SUB_8X4(sub_mb_type)){
                            mv_cache[ 1 ][0]= mx;
                            mv_cache[ 1 ][1]= my;
                        }else if(IS_SUB_4X8(sub_mb_type)){
                            mv_cache[ 8 ][0]= mx;
                            mv_cache[ 8 ][1]= my;
                        }
                        mv_cache[ 0 ][0]= mx;
                        mv_cache[ 0 ][1]= my;
                    }
                }else{
944
                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
945 946 947 948 949 950
                    p[0] = p[1]=
                    p[8] = p[9]= 0;
                }
            }
        }
    }else if(IS_DIRECT(mb_type)){
951
        ff_h264_pred_direct_motion(h, sl, &mb_type);
952 953 954 955 956
        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
    }else{
        int list, mx, my, i;
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
        if(IS_16X16(mb_type)){
957
            for (list = 0; list < sl->list_count; list++) {
958 959
                    unsigned int val;
                    if(IS_DIR(mb_type, 0, list)){
960 961
                        unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                        if (rc == 1) {
962
                            val= 0;
963
                        } else if (rc == 2) {
964
                            val= get_bits1(&sl->gb)^1;
965
                        }else{
966
                            val= get_ue_golomb_31(&sl->gb);
967
                            if (val >= rc) {
968
                                av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
969 970 971
                                return -1;
                            }
                        }
972
                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
973
                    }
974
            }
975
            for (list = 0; list < sl->list_count; list++) {
976
                if(IS_DIR(mb_type, 0, list)){
977
                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
978 979
                    mx += get_se_golomb(&sl->gb);
                    my += get_se_golomb(&sl->gb);
980
                    ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
981

982
                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
983
                }
984 985 986
            }
        }
        else if(IS_16X8(mb_type)){
987
            for (list = 0; list < sl->list_count; list++) {
988 989 990
                    for(i=0; i<2; i++){
                        unsigned int val;
                        if(IS_DIR(mb_type, i, list)){
991 992
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                            if (rc == 1) {
993
                                val= 0;
994
                            } else if (rc == 2) {
995
                                val= get_bits1(&sl->gb)^1;
996
                            }else{
997
                                val= get_ue_golomb_31(&sl->gb);
998
                                if (val >= rc) {
999
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1000 1001 1002 1003 1004
                                    return -1;
                                }
                            }
                        }else
                            val= LIST_NOT_USED&0xFF;
1005
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1006 1007
                    }
            }
1008
            for (list = 0; list < sl->list_count; list++) {
1009 1010 1011
                for(i=0; i<2; i++){
                    unsigned int val;
                    if(IS_DIR(mb_type, i, list)){
1012
                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1013 1014
                        mx += get_se_golomb(&sl->gb);
                        my += get_se_golomb(&sl->gb);
1015
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1016 1017 1018 1019

                        val= pack16to32(mx,my);
                    }else
                        val=0;
1020
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1021 1022 1023
                }
            }
        }else{
1024
            av_assert2(IS_8X16(mb_type));
1025
            for (list = 0; list < sl->list_count; list++) {
1026 1027 1028
                    for(i=0; i<2; i++){
                        unsigned int val;
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1029 1030
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                            if (rc == 1) {
1031
                                val= 0;
1032
                            } else if (rc == 2) {
1033
                                val= get_bits1(&sl->gb)^1;
1034
                            }else{
1035
                                val= get_ue_golomb_31(&sl->gb);
1036
                                if (val >= rc) {
1037
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1038 1039 1040 1041 1042
                                    return -1;
                                }
                            }
                        }else
                            val= LIST_NOT_USED&0xFF;
1043
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1044 1045
                    }
            }
1046
            for (list = 0; list < sl->list_count; list++) {
1047 1048 1049
                for(i=0; i<2; i++){
                    unsigned int val;
                    if(IS_DIR(mb_type, i, list)){
1050
                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1051 1052
                        mx += get_se_golomb(&sl->gb);
                        my += get_se_golomb(&sl->gb);
1053
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1054 1055 1056 1057

                        val= pack16to32(mx,my);
                    }else
                        val=0;
1058
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1059 1060 1061 1062 1063 1064
                }
            }
        }
    }

    if(IS_INTER(mb_type))
1065
        write_back_motion(h, sl, mb_type);
1066 1067

    if(!IS_INTRA16x16(mb_type)){
1068
        cbp= get_ue_golomb(&sl->gb);
1069

1070 1071
        if(decode_chroma){
            if(cbp > 47){
1072
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1073 1074
                return -1;
            }
1075 1076 1077
            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
            else                     cbp= golomb_to_inter_cbp   [cbp];
        }else{
1078
            if(cbp > 15){
1079
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1080 1081
                return -1;
            }
1082 1083 1084
            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
            else                     cbp= golomb_to_inter_cbp_gray[cbp];
        }
1085 1086
    } else {
        if (!decode_chroma && cbp>15) {
1087
            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1088 1089
            return AVERROR_INVALIDDATA;
        }
1090 1091 1092
    }

    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1093
        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1094
    }
1095
    sl->cbp=
1096
    h->cbp_table[mb_xy]= cbp;
1097
    h->cur_pic.mb_type[mb_xy] = mb_type;
1098 1099

    if(cbp || IS_INTRA16x16(mb_type)){
1100
        int i4x4, i8x8, chroma_idx;
1101
        int dquant;
1102
        int ret;
1103
        GetBitContext *gb = &sl->gb;
1104
        const uint8_t *scan, *scan8x8;
1105
        const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1106 1107

        if(IS_INTERLACED(mb_type)){
1108 1109
            scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
            scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
1110
        }else{
1111 1112
            scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
            scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1113 1114
        }

1115
        dquant= get_se_golomb(&sl->gb);
1116

1117
        sl->qscale += dquant;
1118

1119 1120 1121 1122
        if (((unsigned)sl->qscale) > max_qp){
            if (sl->qscale < 0) sl->qscale += max_qp + 1;
            else                sl->qscale -= max_qp+1;
            if (((unsigned)sl->qscale) > max_qp){
1123
                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1124 1125
                return -1;
            }
1126 1127
        }

1128 1129
        sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
        sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1130

1131
        if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1132 1133 1134
            return -1;
        }
        h->cbp_table[mb_xy] |= ret << 12;
1135
        if (CHROMA444(h)) {
1136
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1137 1138
                return -1;
            }
1139
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1140 1141 1142
                return -1;
            }
        } else {
1143 1144
            const int num_c8x8 = h->sps.chroma_format_idc;

1145 1146
            if(cbp&0x30){
                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1147
                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1148
                                        CHROMA_DC_BLOCK_INDEX+chroma_idx,
1149
                                        CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1150
                                        NULL, 4*num_c8x8) < 0) {
1151 1152 1153
                        return -1;
                    }
            }
1154

1155 1156
            if(cbp&0x20){
                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1157
                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1158
                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1159 1160 1161
                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1162
                            if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1163
                                return -1;
1164
                            mb += 16 << pixel_shift;
1165 1166 1167 1168
                        }
                    }
                }
            }else{
1169 1170
                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1171 1172 1173
            }
        }
    }else{
1174 1175 1176
        fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
        fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
        fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1177
    }
1178
    h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1179
    write_back_non_zero_count(h, sl);
1180 1181 1182

    return 0;
}