h264_cavlc.c 44.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25 26 27
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

28
#define CABAC(h) 0
29
#define UNCHECKED_BITSTREAM_READER 1
30

31 32
#include "internal.h"
#include "avcodec.h"
33
#include "h264dec.h"
34
#include "h264_mvpred.h"
35
#include "h264data.h"
36
#include "golomb.h"
37
#include "mpegutils.h"
38
#include "libavutil/avassert.h"
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64


static const uint8_t golomb_to_inter_cbp_gray[16]={
 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
};

static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
};

static const uint8_t chroma_dc_coeff_token_len[4*5]={
 2, 0, 0, 0,
 6, 1, 0, 0,
 6, 6, 3, 0,
 6, 7, 7, 6,
 6, 8, 8, 7,
};

static const uint8_t chroma_dc_coeff_token_bits[4*5]={
 1, 0, 0, 0,
 7, 1, 0, 0,
 4, 6, 1, 0,
 3, 3, 2, 5,
 2, 3, 2, 0,
};

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
static const uint8_t chroma422_dc_coeff_token_len[4*9]={
  1,  0,  0,  0,
  7,  2,  0,  0,
  7,  7,  3,  0,
  9,  7,  7,  5,
  9,  9,  7,  6,
 10, 10,  9,  7,
 11, 11, 10,  7,
 12, 12, 11, 10,
 13, 12, 12, 11,
};

static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
  1,   0,  0, 0,
 15,   1,  0, 0,
 14,  13,  1, 0,
  7,  12, 11, 1,
  6,   5, 10, 1,
  7,   6,  4, 9,
  7,   6,  5, 8,
  7,   6,  5, 4,
  7,   5,  4, 4,
};

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static const uint8_t coeff_token_len[4][4*17]={
{
     1, 0, 0, 0,
     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
},
{
     2, 0, 0, 0,
     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
},
{
     4, 0, 0, 0,
     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
},
{
     6, 0, 0, 0,
     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
}
};

static const uint8_t coeff_token_bits[4][4*17]={
{
     1, 0, 0, 0,
     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
},
{
     3, 0, 0, 0,
    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
},
{
    15, 0, 0, 0,
    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
},
{
     3, 0, 0, 0,
     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
}
};

static const uint8_t total_zeros_len[16][16]= {
    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
    {5,3,4,4,3,3,3,4,3,4,5,5,5},
    {4,4,4,3,3,3,3,3,4,5,4,5},
    {6,5,3,3,3,3,3,3,4,3,6},
    {6,5,3,3,3,2,3,4,3,6},
    {6,4,5,3,2,2,3,3,6},
    {6,6,4,2,2,3,2,5},
    {5,5,3,2,2,2,4},
    {4,4,3,3,1,3},
    {4,4,2,1,3},
    {3,3,1,2},
    {2,2,1},
    {1,1},
};

static const uint8_t total_zeros_bits[16][16]= {
    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
    {3,7,5,4,6,5,4,3,3,2,2,1,0},
    {5,4,3,7,6,5,4,3,2,1,1,0},
    {1,1,7,6,5,4,3,2,1,1,0},
    {1,1,5,4,3,3,2,1,1,0},
    {1,1,1,3,3,2,2,1,0},
    {1,0,1,3,2,1,1,1},
    {1,0,1,3,2,1,1},
    {0,1,1,2,1,3},
    {0,1,1,1,1},
    {0,1,1,1},
    {0,1,1},
    {0,1},
};

static const uint8_t chroma_dc_total_zeros_len[3][4]= {
    { 1, 2, 3, 3,},
    { 1, 2, 2, 0,},
    { 1, 1, 0, 0,},
};

static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
    { 1, 1, 1, 0,},
    { 1, 1, 0, 0,},
    { 1, 0, 0, 0,},
};

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
    { 1, 3, 3, 4, 4, 4, 5, 5 },
    { 3, 2, 3, 3, 3, 3, 3 },
    { 3, 3, 2, 2, 3, 3 },
    { 3, 2, 2, 2, 3 },
    { 2, 2, 2, 2 },
    { 2, 2, 1 },
    { 1, 1 },
};

static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
    { 1, 2, 3, 2, 3, 1, 1, 0 },
    { 0, 1, 1, 4, 5, 6, 7 },
    { 0, 1, 1, 2, 6, 7 },
    { 6, 0, 1, 2, 7 },
    { 0, 1, 2, 3 },
    { 0, 1, 1 },
    { 0, 1 },
};

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
static const uint8_t run_len[7][16]={
    {1,1},
    {1,2,2},
    {2,2,2,2},
    {2,2,2,3,3},
    {2,2,3,3,3,3},
    {2,3,3,3,3,3,3},
    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
};

static const uint8_t run_bits[7][16]={
    {1,0},
    {1,1,0},
    {3,2,1,0},
    {3,2,1,1,0},
    {3,2,3,2,1,0},
    {3,0,1,3,2,5,4},
    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
};

static VLC coeff_token_vlc[4];
static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
static const int coeff_token_vlc_tables_size[4]={520,332,280,256};

static VLC chroma_dc_coeff_token_vlc;
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
static const int chroma_dc_coeff_token_vlc_table_size = 256;

247 248 249 250
static VLC chroma422_dc_coeff_token_vlc;
static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
static const int chroma422_dc_coeff_token_vlc_table_size = 8192;

251
static VLC total_zeros_vlc[15+1];
252 253 254
static VLC_TYPE total_zeros_vlc_tables[15][512][2];
static const int total_zeros_vlc_tables_size = 512;

255
static VLC chroma_dc_total_zeros_vlc[3+1];
256 257 258
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
static const int chroma_dc_total_zeros_vlc_tables_size = 8;

259
static VLC chroma422_dc_total_zeros_vlc[7+1];
260 261 262
static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
static const int chroma422_dc_total_zeros_vlc_tables_size = 32;

263
static VLC run_vlc[6+1];
264 265 266 267 268 269 270 271 272 273
static VLC_TYPE run_vlc_tables[6][8][2];
static const int run_vlc_tables_size = 8;

static VLC run7_vlc;
static VLC_TYPE run7_vlc_table[96][2];
static const int run7_vlc_table_size = 96;

#define LEVEL_TAB_BITS 8
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];

274 275 276 277 278 279 280 281
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
#define COEFF_TOKEN_VLC_BITS           8
#define TOTAL_ZEROS_VLC_BITS           9
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
#define RUN_VLC_BITS                   3
#define RUN7_VLC_BITS                  6
282 283

/**
284
 * Get the predicted number of non-zero coefficients.
285 286
 * @param n block index
 */
287
static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
288
{
289
    const int index8= scan8[n];
290 291
    const int left = sl->non_zero_count_cache[index8 - 1];
    const int top  = sl->non_zero_count_cache[index8 - 8];
292 293 294 295
    int i= left + top;

    if(i<64) i= (i+1)>>1;

296
    ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
297 298 299 300 301

    return i&31;
}

static av_cold void init_cavlc_level_tab(void){
302
    int suffix_length;
303 304 305 306 307 308 309
    unsigned int i;

    for(suffix_length=0; suffix_length<7; suffix_length++){
        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
            int prefix= LEVEL_TAB_BITS - av_log2(2*i);

            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 311 312 313
                int level_code = (prefix << suffix_length) +
                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
                int mask = -(level_code&1);
                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
                cavlc_level_tab[suffix_length][i][0]= level_code;
                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
            }else if(prefix + 1 <= LEVEL_TAB_BITS){
                cavlc_level_tab[suffix_length][i][0]= prefix+100;
                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
            }else{
                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
            }
        }
    }
}

av_cold void ff_h264_decode_init_vlc(void){
    static int done = 0;

    if (!done) {
        int i;
        int offset;
        done = 1;

        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                 &chroma_dc_coeff_token_len [0], 1, 1,
                 &chroma_dc_coeff_token_bits[0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

342 343 344 345 346 347 348
        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
        chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
        init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
                 &chroma422_dc_coeff_token_len [0], 1, 1,
                 &chroma422_dc_coeff_token_bits[0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
        offset = 0;
        for(i=0; i<4; i++){
            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                     &coeff_token_len [i][0], 1, 1,
                     &coeff_token_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
            offset += coeff_token_vlc_tables_size[i];
        }
        /*
         * This is a one time safety check to make sure that
         * the packed static coeff_token_vlc table sizes
         * were initialized correctly.
         */
364
        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365 366

        for(i=0; i<3; i++){
367 368 369
            chroma_dc_total_zeros_vlc[i+1].table = chroma_dc_total_zeros_vlc_tables[i];
            chroma_dc_total_zeros_vlc[i+1].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
            init_vlc(&chroma_dc_total_zeros_vlc[i+1],
370 371 372 373 374
                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }
375 376

        for(i=0; i<7; i++){
377 378 379
            chroma422_dc_total_zeros_vlc[i+1].table = chroma422_dc_total_zeros_vlc_tables[i];
            chroma422_dc_total_zeros_vlc[i+1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
            init_vlc(&chroma422_dc_total_zeros_vlc[i+1],
380 381 382 383 384 385
                     CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
                     &chroma422_dc_total_zeros_len [i][0], 1, 1,
                     &chroma422_dc_total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }

386
        for(i=0; i<15; i++){
387 388 389
            total_zeros_vlc[i+1].table = total_zeros_vlc_tables[i];
            total_zeros_vlc[i+1].table_allocated = total_zeros_vlc_tables_size;
            init_vlc(&total_zeros_vlc[i+1],
390 391 392 393 394 395 396
                     TOTAL_ZEROS_VLC_BITS, 16,
                     &total_zeros_len [i][0], 1, 1,
                     &total_zeros_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }

        for(i=0; i<6; i++){
397 398 399
            run_vlc[i+1].table = run_vlc_tables[i];
            run_vlc[i+1].table_allocated = run_vlc_tables_size;
            init_vlc(&run_vlc[i+1],
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
                     RUN_VLC_BITS, 7,
                     &run_len [i][0], 1, 1,
                     &run_bits[i][0], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
        }
        run7_vlc.table = run7_vlc_table,
        run7_vlc.table_allocated = run7_vlc_table_size;
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                 &run_len [6][0], 1, 1,
                 &run_bits[6][0], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);

        init_cavlc_level_tab();
    }
}

static inline int get_level_prefix(GetBitContext *gb){
    unsigned int buf;
    int log;

    OPEN_READER(re, gb);
    UPDATE_CACHE(re, gb);
    buf=GET_CACHE(re, gb);

    log= 32 - av_log2(buf);

    LAST_SKIP_BITS(re, gb, log);
    CLOSE_READER(re, gb);

    return log-1;
}

/**
433
 * Decode a residual block.
434 435 436 437 438
 * @param n block index
 * @param scantable scantable
 * @param max_coeff number of coefficients in the block
 * @return <0 if an error occurred
 */
439
static int decode_residual(const H264Context *h, H264SliceContext *sl,
440 441 442 443
                           GetBitContext *gb, int16_t *block, int n,
                           const uint8_t *scantable, const uint32_t *qmul,
                           int max_coeff)
{
444 445
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
    int level[16];
446
    int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
447 448 449

    //FIXME put trailing_onex into the context

450
    if(max_coeff <= 8){
451 452 453 454
        if (max_coeff == 4)
            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
        else
            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
455 456
        total_coeff= coeff_token>>2;
    }else{
457
        if(n >= LUMA_DC_BLOCK_INDEX){
458
            total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
459 460 461
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
            total_coeff= coeff_token>>2;
        }else{
462
            total_coeff= pred_non_zero_count(h, sl, n);
463 464 465 466
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
            total_coeff= coeff_token>>2;
        }
    }
467
    sl->non_zero_count_cache[scan8[n]] = total_coeff;
468 469 470 471 472 473

    //FIXME set last_non_zero?

    if(total_coeff==0)
        return 0;
    if(total_coeff > (unsigned)max_coeff) {
474
        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
475 476 477 478
        return -1;
    }

    trailing_ones= coeff_token&3;
479
    ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
480
    av_assert2(total_coeff<=16);
481 482 483 484 485 486 487 488 489

    i = show_bits(gb, 3);
    skip_bits(gb, trailing_ones);
    level[0] = 1-((i&4)>>1);
    level[1] = 1-((i&2)   );
    level[2] = 1-((i&1)<<1);

    if(trailing_ones<total_coeff) {
        int mask, prefix;
490
        int suffix_length = total_coeff > 10 & trailing_ones < 3;
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
        int level_code= cavlc_level_tab[suffix_length][bitsi][0];

        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
        if(level_code >= 100){
            prefix= level_code - 100;
            if(prefix == LEVEL_TAB_BITS)
                prefix += get_level_prefix(gb);

            //first coefficient has suffix_length equal to 0 or 1
            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
                if(suffix_length)
                    level_code= (prefix<<1) + get_bits1(gb); //part
                else
                    level_code= prefix; //part
            }else if(prefix==14){
                if(suffix_length)
                    level_code= (prefix<<1) + get_bits1(gb); //part
                else
                    level_code= prefix + get_bits(gb, 4); //part
            }else{
512
                level_code= 30;
513 514
                if(prefix>=16){
                    if(prefix > 25+3){
515
                        av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
516 517
                        return -1;
                    }
518
                    level_code += (1<<(prefix-3))-4096;
519
                }
520
                level_code += get_bits(gb, prefix-3); //part
521 522 523 524 525 526 527 528
            }

            if(trailing_ones < 3) level_code += 2;

            suffix_length = 2;
            mask= -(level_code&1);
            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
        }else{
529
            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
530

531
            suffix_length = 1 + (level_code + 3U > 6U);
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
            level[trailing_ones]= level_code;
        }

        //remaining coefficients have suffix_length > 0
        for(i=trailing_ones+1;i<total_coeff;i++) {
            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
            level_code= cavlc_level_tab[suffix_length][bitsi][0];

            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
            if(level_code >= 100){
                prefix= level_code - 100;
                if(prefix == LEVEL_TAB_BITS){
                    prefix += get_level_prefix(gb);
                }
                if(prefix<15){
                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
                }else{
550 551 552 553 554 555
                    level_code = 15<<suffix_length;
                    if (prefix>=16) {
                        if(prefix > 25+3){
                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
                            return AVERROR_INVALIDDATA;
                        }
556
                        level_code += (1<<(prefix-3))-4096;
557 558
                    }
                    level_code += get_bits(gb, prefix-3);
559 560 561 562 563
                }
                mask= -(level_code&1);
                level_code= (((2+level_code)>>1) ^ mask) - mask;
            }
            level[i]= level_code;
564
            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
565 566 567 568 569 570
        }
    }

    if(total_coeff == max_coeff)
        zeros_left=0;
    else{
571 572
        if (max_coeff <= 8) {
            if (max_coeff == 4)
573
                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff].table,
574 575
                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
            else
576
                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff].table,
577 578
                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
        } else {
579
            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
580
        }
581 582
    }

583 584 585 586 587 588
#define STORE_BLOCK(type) \
    scantable += zeros_left + total_coeff - 1; \
    if(n >= LUMA_DC_BLOCK_INDEX){ \
        ((type*)block)[*scantable] = level[0]; \
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
            if(zeros_left < 7) \
589
                run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
590 591 592 593 594 595 596 597 598 599 600 601 602 603
            else \
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
            zeros_left -= run_before; \
            scantable -= 1 + run_before; \
            ((type*)block)[*scantable]= level[i]; \
        } \
        for(;i<total_coeff;i++) { \
            scantable--; \
            ((type*)block)[*scantable]= level[i]; \
        } \
    }else{ \
        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
            if(zeros_left < 7) \
604
                run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
605 606 607 608 609 610 611 612 613 614 615 616
            else \
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
            zeros_left -= run_before; \
            scantable -= 1 + run_before; \
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
        } \
        for(;i<total_coeff;i++) { \
            scantable--; \
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
        } \
    }

617
    if (h->pixel_shift) {
618 619 620
        STORE_BLOCK(int32_t)
    } else {
        STORE_BLOCK(int16_t)
621 622 623
    }

    if(zeros_left<0){
624
        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
625 626 627 628 629 630
        return -1;
    }

    return 0;
}

631 632 633 634 635 636
static av_always_inline
int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
                         GetBitContext *gb, const uint8_t *scan,
                         const uint8_t *scan8x8, int pixel_shift,
                         int mb_type, int cbp, int p)
{
637
    int i4x4, i8x8;
638
    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
639
    if(IS_INTRA16x16(mb_type)){
640 641 642 643
        AV_ZERO128(sl->mb_luma_dc[p]+0);
        AV_ZERO128(sl->mb_luma_dc[p]+8);
        AV_ZERO128(sl->mb_luma_dc[p]+16);
        AV_ZERO128(sl->mb_luma_dc[p]+24);
644
        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
645 646 647
            return -1; //FIXME continue if partitioned and other return -1 too
        }

648
        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
649 650 651 652 653

        if(cbp&15){
            for(i8x8=0; i8x8<4; i8x8++){
                for(i4x4=0; i4x4<4; i4x4++){
                    const int index= i4x4 + 4*i8x8 + p*16;
654
                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
655
                        index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
656 657 658 659 660 661
                        return -1;
                    }
                }
            }
            return 0xf;
        }else{
662
            fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
663 664 665 666 667 668 669 670 671
            return 0;
        }
    }else{
        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
        int new_cbp = 0;
        for(i8x8=0; i8x8<4; i8x8++){
            if(cbp & (1<<i8x8)){
                if(IS_8x8DCT(mb_type)){
672
                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
673 674 675
                    uint8_t *nnz;
                    for(i4x4=0; i4x4<4; i4x4++){
                        const int index= i4x4 + 4*i8x8 + p*16;
676
                        if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
677
                                            h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
678 679
                            return -1;
                    }
680
                    nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
681 682 683 684 685
                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
                    new_cbp |= !!nnz[0] << i8x8;
                }else{
                    for(i4x4=0; i4x4<4; i4x4++){
                        const int index= i4x4 + 4*i8x8 + p*16;
686
                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
687
                                            scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
688 689
                            return -1;
                        }
690
                        new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
691 692 693
                    }
                }
            }else{
694
                uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
695 696 697 698 699 700 701
                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
            }
        }
        return new_cbp;
    }
}

702
int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
703
{
704 705 706
    int mb_xy;
    int partition_count;
    unsigned int mb_type, cbp;
707 708
    int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
    const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
709
    const int pixel_shift = h->pixel_shift;
710

711
    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
712

713
    ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
714 715
    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
                down the code */
716
    if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
717 718 719 720 721 722 723 724
        if (sl->mb_skip_run == -1) {
            unsigned mb_skip_run = get_ue_golomb_long(&sl->gb);
            if (mb_skip_run > h->mb_num) {
                av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run);
                return AVERROR_INVALIDDATA;
            }
            sl->mb_skip_run = mb_skip_run;
        }
725

726
        if (sl->mb_skip_run--) {
727
            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
728
                if (sl->mb_skip_run == 0)
729
                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
730
            }
731
            decode_mb_skip(h, sl);
732 733 734
            return 0;
        }
    }
735
    if (FRAME_MBAFF(h)) {
736
        if ((sl->mb_y & 1) == 0)
737
            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
738 739
    }

740
    sl->prev_mb_skipped = 0;
741

742
    mb_type= get_ue_golomb(&sl->gb);
743
    if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
744
        if(mb_type < 23){
745 746
            partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
            mb_type         = ff_h264_b_mb_type_info[mb_type].type;
747 748 749 750
        }else{
            mb_type -= 23;
            goto decode_intra_mb;
        }
751
    } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
752
        if(mb_type < 5){
753 754
            partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
            mb_type         = ff_h264_p_mb_type_info[mb_type].type;
755 756 757 758 759
        }else{
            mb_type -= 5;
            goto decode_intra_mb;
        }
    }else{
760
       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
761
        if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
762 763 764
            mb_type--;
decode_intra_mb:
        if(mb_type > 25){
765
            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
766 767 768
            return -1;
        }
        partition_count=0;
769 770 771
        cbp                      = ff_h264_i_mb_type_info[mb_type].cbp;
        sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
        mb_type                  = ff_h264_i_mb_type_info[mb_type].type;
772 773
    }

774
    if (MB_FIELD(sl))
775 776
        mb_type |= MB_TYPE_INTERLACED;

777
    h->slice_table[mb_xy] = sl->slice_num;
778 779

    if(IS_INTRA_PCM(mb_type)){
780 781
        const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
                            h->ps.sps->bit_depth_luma;
782 783

        // We assume these blocks are very rare so we do not optimize it.
784 785
        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
        if (get_bits_left(&sl->gb) < mb_size) {
786 787 788
            av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
            return AVERROR_INVALIDDATA;
        }
789
        skip_bits_long(&sl->gb, mb_size);
790 791

        // In deblocking, the quantizer is 0
792
        h->cur_pic.qscale_table[mb_xy] = 0;
793
        // All coeffs are present
794
        memset(h->non_zero_count[mb_xy], 16, 48);
795

796
        h->cur_pic.mb_type[mb_xy] = mb_type;
797 798 799
        return 0;
    }

800 801
    fill_decode_neighbors(h, sl, mb_type);
    fill_decode_caches(h, sl, mb_type);
802 803 804 805 806 807 808 809

    //mb_pred
    if(IS_INTRA(mb_type)){
        int pred_mode;
//            init_top_left_availability(h);
        if(IS_INTRA4x4(mb_type)){
            int i;
            int di = 1;
810
            if(dct8x8_allowed && get_bits1(&sl->gb)){
811 812 813 814 815 816
                mb_type |= MB_TYPE_8x8DCT;
                di = 4;
            }

//                fill_intra4x4_pred_table(h);
            for(i=0; i<16; i+=di){
817
                int mode = pred_intra_mode(h, sl, i);
818

819 820
                if(!get_bits1(&sl->gb)){
                    const int rem_mode= get_bits(&sl->gb, 3);
821 822 823 824
                    mode = rem_mode + (rem_mode >= mode);
                }

                if(di==4)
825
                    fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
826
                else
827
                    sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
828
            }
829
            write_back_intra_pred_mode(h, sl);
830 831
            if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
                                                 sl->top_samples_available, sl->left_samples_available) < 0)
832 833
                return -1;
        }else{
834 835
            sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
                                                                     sl->left_samples_available, sl->intra16x16_pred_mode, 0);
836
            if (sl->intra16x16_pred_mode < 0)
837 838
                return -1;
        }
839
        if(decode_chroma){
840 841
            pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
                                                     sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
842 843
            if(pred_mode < 0)
                return -1;
844
            sl->chroma_pred_mode = pred_mode;
845
        } else {
846
            sl->chroma_pred_mode = DC_128_PRED8x8;
847 848 849 850
        }
    }else if(partition_count==4){
        int i, j, sub_partition_count[4], list, ref[2][4];

851
        if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
852
            for(i=0; i<4; i++){
853
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
854
                if(sl->sub_mb_type[i] >=13){
855
                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
856 857
                    return -1;
                }
858 859
                sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
                sl->sub_mb_type[i]     = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
860
            }
861
            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
862 863 864 865 866
                ff_h264_pred_direct_motion(h, sl, &mb_type);
                sl->ref_cache[0][scan8[4]] =
                sl->ref_cache[1][scan8[4]] =
                sl->ref_cache[0][scan8[12]] =
                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
867 868
            }
        }else{
869
            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
870
            for(i=0; i<4; i++){
871
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
872
                if(sl->sub_mb_type[i] >=4){
873
                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
874 875
                    return -1;
                }
876 877
                sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
                sl->sub_mb_type[i]     = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
878 879 880
            }
        }

881
        for (list = 0; list < sl->list_count; list++) {
882
            int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
883
            for(i=0; i<4; i++){
884 885
                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
886 887 888 889
                    unsigned int tmp;
                    if(ref_count == 1){
                        tmp= 0;
                    }else if(ref_count == 2){
890
                        tmp= get_bits1(&sl->gb)^1;
891
                    }else{
892
                        tmp= get_ue_golomb_31(&sl->gb);
893
                        if(tmp>=ref_count){
894
                            av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
895 896 897 898 899 900 901 902 903 904 905 906
                            return -1;
                        }
                    }
                    ref[list][i]= tmp;
                }else{
                 //FIXME
                    ref[list][i] = -1;
                }
            }
        }

        if(dct8x8_allowed)
907
            dct8x8_allowed = get_dct8x8_allowed(h, sl);
908

909
        for (list = 0; list < sl->list_count; list++) {
910
            for(i=0; i<4; i++){
911
                if(IS_DIRECT(sl->sub_mb_type[i])) {
912
                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
913 914
                    continue;
                }
915 916
                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
917

918 919
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
                    const int sub_mb_type= sl->sub_mb_type[i];
920 921 922 923
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                    for(j=0; j<sub_partition_count[i]; j++){
                        int mx, my;
                        const int index= 4*i + block_width*j;
924 925
                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
926 927
                        mx += (unsigned)get_se_golomb(&sl->gb);
                        my += (unsigned)get_se_golomb(&sl->gb);
928
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945

                        if(IS_SUB_8X8(sub_mb_type)){
                            mv_cache[ 1 ][0]=
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
                            mv_cache[ 1 ][1]=
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
                        }else if(IS_SUB_8X4(sub_mb_type)){
                            mv_cache[ 1 ][0]= mx;
                            mv_cache[ 1 ][1]= my;
                        }else if(IS_SUB_4X8(sub_mb_type)){
                            mv_cache[ 8 ][0]= mx;
                            mv_cache[ 8 ][1]= my;
                        }
                        mv_cache[ 0 ][0]= mx;
                        mv_cache[ 0 ][1]= my;
                    }
                }else{
946
                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
947 948 949 950 951 952
                    p[0] = p[1]=
                    p[8] = p[9]= 0;
                }
            }
        }
    }else if(IS_DIRECT(mb_type)){
953
        ff_h264_pred_direct_motion(h, sl, &mb_type);
954
        dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
955 956 957 958
    }else{
        int list, mx, my, i;
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
        if(IS_16X16(mb_type)){
959
            for (list = 0; list < sl->list_count; list++) {
960 961
                    unsigned int val;
                    if(IS_DIR(mb_type, 0, list)){
962 963
                        unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                        if (rc == 1) {
964
                            val= 0;
965
                        } else if (rc == 2) {
966
                            val= get_bits1(&sl->gb)^1;
967
                        }else{
968
                            val= get_ue_golomb_31(&sl->gb);
969
                            if (val >= rc) {
970
                                av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
971 972 973
                                return -1;
                            }
                        }
974
                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
975
                    }
976
            }
977
            for (list = 0; list < sl->list_count; list++) {
978
                if(IS_DIR(mb_type, 0, list)){
979
                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
980 981
                    mx += (unsigned)get_se_golomb(&sl->gb);
                    my += (unsigned)get_se_golomb(&sl->gb);
982
                    ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
983

984
                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
985
                }
986 987 988
            }
        }
        else if(IS_16X8(mb_type)){
989
            for (list = 0; list < sl->list_count; list++) {
990 991 992
                    for(i=0; i<2; i++){
                        unsigned int val;
                        if(IS_DIR(mb_type, i, list)){
993 994
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                            if (rc == 1) {
995
                                val= 0;
996
                            } else if (rc == 2) {
997
                                val= get_bits1(&sl->gb)^1;
998
                            }else{
999
                                val= get_ue_golomb_31(&sl->gb);
1000
                                if (val >= rc) {
1001
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1002 1003 1004 1005 1006
                                    return -1;
                                }
                            }
                        }else
                            val= LIST_NOT_USED&0xFF;
1007
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1008 1009
                    }
            }
1010
            for (list = 0; list < sl->list_count; list++) {
1011 1012 1013
                for(i=0; i<2; i++){
                    unsigned int val;
                    if(IS_DIR(mb_type, i, list)){
1014
                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1015 1016
                        mx += (unsigned)get_se_golomb(&sl->gb);
                        my += (unsigned)get_se_golomb(&sl->gb);
1017
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1018 1019 1020 1021

                        val= pack16to32(mx,my);
                    }else
                        val=0;
1022
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1023 1024 1025
                }
            }
        }else{
1026
            av_assert2(IS_8X16(mb_type));
1027
            for (list = 0; list < sl->list_count; list++) {
1028 1029 1030
                    for(i=0; i<2; i++){
                        unsigned int val;
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1031 1032
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
                            if (rc == 1) {
1033
                                val= 0;
1034
                            } else if (rc == 2) {
1035
                                val= get_bits1(&sl->gb)^1;
1036
                            }else{
1037
                                val= get_ue_golomb_31(&sl->gb);
1038
                                if (val >= rc) {
1039
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1040 1041 1042 1043 1044
                                    return -1;
                                }
                            }
                        }else
                            val= LIST_NOT_USED&0xFF;
1045
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1046 1047
                    }
            }
1048
            for (list = 0; list < sl->list_count; list++) {
1049 1050 1051
                for(i=0; i<2; i++){
                    unsigned int val;
                    if(IS_DIR(mb_type, i, list)){
1052
                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1053 1054
                        mx += (unsigned)get_se_golomb(&sl->gb);
                        my += (unsigned)get_se_golomb(&sl->gb);
1055
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1056 1057 1058 1059

                        val= pack16to32(mx,my);
                    }else
                        val=0;
1060
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1061 1062 1063 1064 1065 1066
                }
            }
        }
    }

    if(IS_INTER(mb_type))
1067
        write_back_motion(h, sl, mb_type);
1068 1069

    if(!IS_INTRA16x16(mb_type)){
1070
        cbp= get_ue_golomb(&sl->gb);
1071

1072 1073
        if(decode_chroma){
            if(cbp > 47){
1074
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1075 1076
                return -1;
            }
1077 1078 1079 1080
            if (IS_INTRA4x4(mb_type))
                cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
            else
                cbp = ff_h264_golomb_to_inter_cbp[cbp];
1081
        }else{
1082
            if(cbp > 15){
1083
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1084 1085
                return -1;
            }
1086 1087 1088
            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
            else                     cbp= golomb_to_inter_cbp_gray[cbp];
        }
1089 1090
    } else {
        if (!decode_chroma && cbp>15) {
1091
            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1092 1093
            return AVERROR_INVALIDDATA;
        }
1094 1095 1096
    }

    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1097
        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1098
    }
1099
    sl->cbp=
1100
    h->cbp_table[mb_xy]= cbp;
1101
    h->cur_pic.mb_type[mb_xy] = mb_type;
1102 1103

    if(cbp || IS_INTRA16x16(mb_type)){
1104
        int i4x4, i8x8, chroma_idx;
1105
        int dquant;
1106
        int ret;
1107
        GetBitContext *gb = &sl->gb;
1108
        const uint8_t *scan, *scan8x8;
1109
        const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1110

1111
        dquant= get_se_golomb(&sl->gb);
1112

1113
        sl->qscale += (unsigned)dquant;
1114

1115 1116 1117 1118
        if (((unsigned)sl->qscale) > max_qp){
            if (sl->qscale < 0) sl->qscale += max_qp + 1;
            else                sl->qscale -= max_qp+1;
            if (((unsigned)sl->qscale) > max_qp){
1119
                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1120
                sl->qscale = max_qp;
1121 1122
                return -1;
            }
1123 1124
        }

1125 1126
        sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
        sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1127

1128 1129 1130 1131 1132 1133 1134 1135
        if(IS_INTERLACED(mb_type)){
            scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
            scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
        }else{
            scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
            scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
        }

1136
        if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1137 1138 1139
            return -1;
        }
        h->cbp_table[mb_xy] |= ret << 12;
1140
        if (CHROMA444(h)) {
1141
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1142 1143
                return -1;
            }
1144
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1145 1146 1147
                return -1;
            }
        } else {
1148
            const int num_c8x8 = h->ps.sps->chroma_format_idc;
1149

1150 1151
            if(cbp&0x30){
                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1152
                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1153 1154 1155
                                        CHROMA_DC_BLOCK_INDEX + chroma_idx,
                                        CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
                                        NULL, 4 * num_c8x8) < 0) {
1156 1157 1158
                        return -1;
                    }
            }
1159

1160 1161
            if(cbp&0x20){
                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1162
                    const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1163
                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1164 1165 1166
                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1167
                            if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1168
                                return -1;
1169
                            mb += 16 << pixel_shift;
1170 1171 1172 1173
                        }
                    }
                }
            }else{
1174 1175
                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1176 1177 1178
            }
        }
    }else{
1179 1180 1181
        fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
        fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
        fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1182
    }
1183
    h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1184
    write_back_non_zero_count(h, sl);
1185 1186 1187

    return 0;
}