intrax8.c 26.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
20
 * @file
21
 * @brief IntraX8 (J-Frame) subdecoder, used by WMV2 and VC-1
22 23
 */

24
#include "libavutil/avassert.h"
25
#include "avcodec.h"
26
#include "get_bits.h"
27
#include "idctdsp.h"
28 29 30
#include "msmpeg4data.h"
#include "intrax8huf.h"
#include "intrax8.h"
31
#include "intrax8dsp.h"
32
#include "mpegutils.h"
33

34 35
#define MAX_TABLE_DEPTH(table_bits, max_bits) \
    ((max_bits + table_bits - 1) / table_bits)
36 37 38 39 40 41 42 43 44

#define DC_VLC_BITS 9
#define AC_VLC_BITS 9
#define OR_VLC_BITS 7

#define DC_VLC_MTD MAX_TABLE_DEPTH(DC_VLC_BITS, MAX_DC_VLC_BITS)
#define AC_VLC_MTD MAX_TABLE_DEPTH(AC_VLC_BITS, MAX_AC_VLC_BITS)
#define OR_VLC_MTD MAX_TABLE_DEPTH(OR_VLC_BITS, MAX_OR_VLC_BITS)

45 46 47
static VLC j_ac_vlc[2][2][8];  // [quant < 13], [intra / inter], [select]
static VLC j_dc_vlc[2][8];     // [quant], [select]
static VLC j_orient_vlc[2][4]; // [quant], [select]
48

49
static av_cold int x8_vlc_init(void)
50
{
51
    int i;
52 53
    int offset = 0;
    int sizeidx = 0;
54
    static const uint16_t sizes[8 * 4 + 8 * 2 + 2 + 4] = {
55 56 57 58 59 60 61 62
        576, 548, 582, 618, 546, 616, 560, 642,
        584, 582, 704, 664, 512, 544, 656, 640,
        512, 648, 582, 566, 532, 614, 596, 648,
        586, 552, 584, 590, 544, 578, 584, 624,

        528, 528, 526, 528, 536, 528, 526, 544,
        544, 512, 512, 528, 528, 544, 512, 544,

63 64
        128, 128, 128, 128, 128, 128,
    };
65 66

    static VLC_TYPE table[28150][2];
67

68 69 70 71 72 73 74 75
// set ac tables
#define init_ac_vlc(dst, src)                                                 \
    do {                                                                      \
        dst.table           = &table[offset];                                 \
        dst.table_allocated = sizes[sizeidx];                                 \
        offset             += sizes[sizeidx++];                               \
        init_vlc(&dst, AC_VLC_BITS, 77, &src[1], 4, 2, &src[0], 4, 2,         \
                 INIT_VLC_USE_NEW_STATIC);                                    \
76
    } while(0)
77 78 79 80 81 82

    for (i = 0; i < 8; i++) {
        init_ac_vlc(j_ac_vlc[0][0][i], x8_ac0_highquant_table[i][0]);
        init_ac_vlc(j_ac_vlc[0][1][i], x8_ac1_highquant_table[i][0]);
        init_ac_vlc(j_ac_vlc[1][0][i], x8_ac0_lowquant_table[i][0]);
        init_ac_vlc(j_ac_vlc[1][1][i], x8_ac1_lowquant_table[i][0]);
83 84 85
    }
#undef init_ac_vlc

86 87 88 89 90 91 92 93
// set dc tables
#define init_dc_vlc(dst, src)                                                 \
    do {                                                                      \
        dst.table           = &table[offset];                                 \
        dst.table_allocated = sizes[sizeidx];                                 \
        offset             += sizes[sizeidx++];                               \
        init_vlc(&dst, DC_VLC_BITS, 34, &src[1], 4, 2, &src[0], 4, 2,         \
                 INIT_VLC_USE_NEW_STATIC);                                    \
94
    } while(0)
95 96 97 98

    for (i = 0; i < 8; i++) {
        init_dc_vlc(j_dc_vlc[0][i], x8_dc_highquant_table[i][0]);
        init_dc_vlc(j_dc_vlc[1][i], x8_dc_lowquant_table[i][0]);
99 100 101
    }
#undef init_dc_vlc

102 103 104 105 106 107 108 109
// set orient tables
#define init_or_vlc(dst, src)                                                 \
    do {                                                                      \
        dst.table           = &table[offset];                                 \
        dst.table_allocated = sizes[sizeidx];                                 \
        offset             += sizes[sizeidx++];                               \
        init_vlc(&dst, OR_VLC_BITS, 12, &src[1], 4, 2, &src[0], 4, 2,         \
                 INIT_VLC_USE_NEW_STATIC);                                    \
110
    } while(0)
111 112 113 114 115 116 117

    for (i = 0; i < 2; i++)
        init_or_vlc(j_orient_vlc[0][i], x8_orient_highquant_table[i][0]);
    for (i = 0; i < 4; i++)
        init_or_vlc(j_orient_vlc[1][i], x8_orient_lowquant_table[i][0]);
#undef init_or_vlc

118
    if (offset != sizeof(table) / sizeof(VLC_TYPE) / 2) {
119
        av_log(NULL, AV_LOG_ERROR, "table size %"SIZE_SPECIFIER" does not match needed %i\n",
120
               sizeof(table) / sizeof(VLC_TYPE) / 2, offset);
121 122 123 124
        return AVERROR_INVALIDDATA;
    }

    return 0;
125 126
}

127 128 129 130 131
static void x8_reset_vlc_tables(IntraX8Context *w)
{
    memset(w->j_dc_vlc, 0, sizeof(w->j_dc_vlc));
    memset(w->j_ac_vlc, 0, sizeof(w->j_ac_vlc));
    w->j_orient_vlc = NULL;
132 133
}

134 135
static inline void x8_select_ac_table(IntraX8Context *const w, int mode)
{
136 137
    int table_index;

138
    av_assert2(mode < 4);
139

140 141 142
    if (w->j_ac_vlc[mode])
        return;

143
    table_index       = get_bits(w->gb, 3);
144 145
    // 2 modes use same tables
    w->j_ac_vlc[mode] = &j_ac_vlc[w->quant < 13][mode >> 1][table_index];
146
    av_assert2(w->j_ac_vlc[mode]);
147 148
}

149 150 151
static inline int x8_get_orient_vlc(IntraX8Context *w)
{
    if (!w->j_orient_vlc) {
152
        int table_index = get_bits(w->gb, 1 + (w->quant < 13));
153
        w->j_orient_vlc = &j_orient_vlc[w->quant < 13][table_index];
154 155
    }

156
    return get_vlc2(w->gb, w->j_orient_vlc->table, OR_VLC_BITS, OR_VLC_MTD);
157 158
}

159 160 161 162 163
#define extra_bits(eb)  (eb)        // 3 bits
#define extra_run       (0xFF << 8) // 1 bit
#define extra_level     (0x00 << 8) // 1 bit
#define run_offset(r)   ((r) << 16) // 6 bits
#define level_offset(l) ((l) << 24) // 5 bits
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static const uint32_t ac_decode_table[] = {
    /* 46 */ extra_bits(3) | extra_run   | run_offset(16) | level_offset(0),
    /* 47 */ extra_bits(3) | extra_run   | run_offset(24) | level_offset(0),
    /* 48 */ extra_bits(2) | extra_run   | run_offset(4)  | level_offset(1),
    /* 49 */ extra_bits(3) | extra_run   | run_offset(8)  | level_offset(1),

    /* 50 */ extra_bits(5) | extra_run   | run_offset(32) | level_offset(0),
    /* 51 */ extra_bits(4) | extra_run   | run_offset(16) | level_offset(1),

    /* 52 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(4),
    /* 53 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(8),
    /* 54 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(12),
    /* 55 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(16),
    /* 56 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(24),

    /* 57 */ extra_bits(2) | extra_level | run_offset(1)  | level_offset(3),
    /* 58 */ extra_bits(3) | extra_level | run_offset(1)  | level_offset(7),

    /* 59 */ extra_bits(2) | extra_run   | run_offset(16) | level_offset(0),
    /* 60 */ extra_bits(2) | extra_run   | run_offset(20) | level_offset(0),
    /* 61 */ extra_bits(2) | extra_run   | run_offset(24) | level_offset(0),
    /* 62 */ extra_bits(2) | extra_run   | run_offset(28) | level_offset(0),
    /* 63 */ extra_bits(4) | extra_run   | run_offset(32) | level_offset(0),
    /* 64 */ extra_bits(4) | extra_run   | run_offset(48) | level_offset(0),

    /* 65 */ extra_bits(2) | extra_run   | run_offset(4)  | level_offset(1),
    /* 66 */ extra_bits(3) | extra_run   | run_offset(8)  | level_offset(1),
    /* 67 */ extra_bits(4) | extra_run   | run_offset(16) | level_offset(1),

    /* 68 */ extra_bits(2) | extra_level | run_offset(0)  | level_offset(4),
    /* 69 */ extra_bits(3) | extra_level | run_offset(0)  | level_offset(8),
    /* 70 */ extra_bits(4) | extra_level | run_offset(0)  | level_offset(16),

    /* 71 */ extra_bits(2) | extra_level | run_offset(1)  | level_offset(3),
    /* 72 */ extra_bits(3) | extra_level | run_offset(1)  | level_offset(7),
199 200 201 202 203 204 205
};
#undef extra_bits
#undef extra_run
#undef extra_level
#undef run_offset
#undef level_offset

206 207 208 209
static void x8_get_ac_rlf(IntraX8Context *const w, const int mode,
                          int *const run, int *const level, int *const final)
{
    int i, e;
210

211
//    x8_select_ac_table(w, mode);
212
    i = get_vlc2(w->gb, w->j_ac_vlc[mode]->table, AC_VLC_BITS, AC_VLC_MTD);
213

214 215 216
    if (i < 46) { // [0-45]
        int t, l;
        if (i < 0) {
217
            *level =
218
            *final =      // prevent 'may be used uninitialized'
219
            *run   = 64;  // this would cause error exit in the ac loop
220 221 222
            return;
        }

223 224 225 226 227 228 229
        /*
         * i == 0-15  r = 0-15 l = 0; r = i & %01111
         * i == 16-19 r = 0-3  l = 1; r = i & %00011
         * i == 20-21 r = 0-1  l = 2; r = i & %00001
         * i == 22    r = 0    l = 3; r = i & %00000
         */

230 231 232
        *final =
        t      = i > 22;
        i     -= 23 * t;
233 234 235

        /* l = lut_l[i / 2] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3 }[i >> 1];
         *     11 10'01 01'00 00'00 00'00 00'00 00 => 0xE50000 */
236
        l = (0xE50000 >> (i & 0x1E)) & 3; // 0x1E or ~1 or (i >> 1 << 1)
237 238 239

        /* t = lut_mask[l] = { 0x0f, 0x03, 0x01, 0x00 }[l];
         *     as i < 256 the higher bits do not matter */
240
        t = 0x01030F >> (l << 3);
241

242 243
        *run   = i & t;
        *level = l;
244
    } else if (i < 73) { // [46-72]
245 246 247
        uint32_t sm;
        uint32_t mask;

248 249 250
        i -= 46;
        sm = ac_decode_table[i];

251
        e    = get_bits(w->gb, sm & 0xF);
252
        sm >>= 8;                               // 3 bits
253
        mask = sm & 0xff;
254
        sm >>= 8;                               // 1 bit
255

256 257
        *run   = (sm &  0xff) + (e &  mask);    // 6 bits
        *level = (sm >>    8) + (e & ~mask);    // 5 bits
258
        *final = i > (58 - 46);
259 260 261 262 263 264 265 266
    } else if (i < 75) { // [73-74]
        static const uint8_t crazy_mix_runlevel[32] = {
            0x22, 0x32, 0x33, 0x53, 0x23, 0x42, 0x43, 0x63,
            0x24, 0x52, 0x34, 0x73, 0x25, 0x62, 0x44, 0x83,
            0x26, 0x72, 0x35, 0x54, 0x27, 0x82, 0x45, 0x64,
            0x28, 0x92, 0x36, 0x74, 0x29, 0xa2, 0x46, 0x84,
        };

267 268 269 270
        *final = !(i & 1);
        e      = get_bits(w->gb, 5); // get the extra bits
        *run   = crazy_mix_runlevel[e] >> 4;
        *level = crazy_mix_runlevel[e] & 0x0F;
271
    } else {
272 273 274
        *level = get_bits(w->gb, 7 - 3 * (i & 1));
        *run   = get_bits(w->gb, 6);
        *final = get_bits1(w->gb);
275 276 277 278
    }
    return;
}

279 280 281 282 283 284
/* static const uint8_t dc_extra_sbits[] = {
 *     0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
 * }; */
static const uint8_t dc_index_offset[] = {
    0, 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
};
285

286 287
static int x8_get_dc_rlf(IntraX8Context *const w, const int mode,
                         int *const level, int *const final)
288 289 290
{
    int i, e, c;

291
    av_assert2(mode < 3);
292
    if (!w->j_dc_vlc[mode]) {
293
        int table_index = get_bits(w->gb, 3);
294 295
        // 4 modes, same table
        w->j_dc_vlc[mode] = &j_dc_vlc[w->quant < 13][table_index];
296 297
    }

298
    i = get_vlc2(w->gb, w->j_dc_vlc[mode]->table, DC_VLC_BITS, DC_VLC_MTD);
299

300
    /* (i >= 17) { i -= 17; final =1; } */
301 302 303
    c      = i > 16;
    *final = c;
    i      -= 17 * c;
304

305
    if (i <= 0) {
306
        *level = 0;
307 308
        return -i;
    }
309 310
    c  = (i + 1) >> 1; // hackish way to calculate dc_extra_sbits[]
    c -= c > 1;
311

312
    e = get_bits(w->gb, c); // get the extra bits
313
    i = dc_index_offset[i] + (e >> 1);
314

315 316
    e      = -(e & 1);     // 0, 0xffffff
    *level =  (i ^ e) - e; // (i ^ 0) - 0, (i ^ 0xff) - (-1)
317 318 319
    return 0;
}

320 321 322 323
// end of huffman

static int x8_setup_spatial_predictor(IntraX8Context *const w, const int chroma)
{
324 325 326 327
    int range;
    int sum;
    int quant;

328
    w->dsp.setup_spatial_compensation(w->dest[chroma], w->scratchpad,
329
                                      w->frame->linesize[chroma > 0],
330
                                      &range, &sum, w->edges);
331 332 333 334 335
    if (chroma) {
        w->orient = w->chroma_orient;
        quant     = w->quant_dc_chroma;
    } else {
        quant = w->quant;
336 337
    }

338 339 340 341 342 343 344 345 346
    w->flat_dc = 0;
    if (range < quant || range < 3) {
        w->orient = 0;

        // yep you read right, a +-1 idct error may break decoding!
        if (range < 3) {
            w->flat_dc      = 1;
            sum            += 9;
            // ((1 << 17) + 9) / (8 + 8 + 1 + 2) = 6899
347
            w->predicted_dc = sum * 6899 >> 17;
348 349
        }
    }
350
    if (chroma)
351 352
        return 0;

353
    av_assert2(w->orient < 3);
354 355 356 357 358 359 360 361
    if (range < 2 * w->quant) {
        if ((w->edges & 3) == 0) {
            if (w->orient == 1)
                w->orient = 11;
            if (w->orient == 2)
                w->orient = 10;
        } else {
            w->orient = 0;
362
        }
363 364 365 366 367 368
        w->raw_orient = 0;
    } else {
        static const uint8_t prediction_table[3][12] = {
            { 0, 8, 4, 10, 11, 2, 6, 9, 1, 3, 5, 7 },
            { 4, 0, 8, 11, 10, 3, 5, 2, 6, 9, 1, 7 },
            { 8, 0, 4, 10, 11, 1, 7, 2, 6, 9, 3, 5 },
369
        };
370 371 372
        w->raw_orient = x8_get_orient_vlc(w);
        if (w->raw_orient < 0)
            return -1;
373 374
        av_assert2(w->raw_orient < 12);
        av_assert2(w->orient < 3);
375 376 377 378 379
        w->orient=prediction_table[w->orient][w->raw_orient];
    }
    return 0;
}

380 381 382
static void x8_update_predictions(IntraX8Context *const w, const int orient,
                                  const int est_run)
{
383
    w->prediction_table[w->mb_x * 2 + (w->mb_y & 1)] = (est_run << 2) + 1 * (orient == 4) + 2 * (orient == 8);
384
/*
385 386 387
 * y = 2n + 0 -> // 0 2 4
 * y = 2n + 1 -> // 1 3 5
 */
388 389
}

390 391
static void x8_get_prediction_chroma(IntraX8Context *const w)
{
392 393
    w->edges  = 1 * !(w->mb_x >> 1);
    w->edges |= 2 * !(w->mb_y >> 1);
394
    w->edges |= 4 * (w->mb_x >= (2 * w->mb_width - 1)); // mb_x for chroma would always be odd
395 396 397 398 399

    w->raw_orient = 0;
    // lut_co[8] = {inv,4,8,8, inv,4,8,8} <- => {1,1,0,0;1,1,0,0} => 0xCC
    if (w->edges & 3) {
        w->chroma_orient = 4 << ((0xCC >> w->edges) & 1);
400 401
        return;
    }
402
    // block[x - 1][y | 1 - 1)]
403
    w->chroma_orient = (w->prediction_table[2 * w->mb_x - 2] & 0x03) << 2;
404 405
}

406 407 408 409
static void x8_get_prediction(IntraX8Context *const w)
{
    int a, b, c, i;

410 411
    w->edges  = 1 * !w->mb_x;
    w->edges |= 2 * !w->mb_y;
412
    w->edges |= 4 * (w->mb_x >= (2 * w->mb_width - 1));
413 414 415 416 417 418

    switch (w->edges & 3) {
    case 0:
        break;
    case 1:
        // take the one from the above block[0][y - 1]
419
        w->est_run = w->prediction_table[!(w->mb_y & 1)] >> 2;
420 421 422 423
        w->orient  = 1;
        return;
    case 2:
        // take the one from the previous block[x - 1][0]
424
        w->est_run = w->prediction_table[2 * w->mb_x - 2] >> 2;
425 426 427 428 429 430
        w->orient  = 2;
        return;
    case 3:
        w->est_run = 16;
        w->orient  = 0;
        return;
431
    }
432
    // no edge cases
433 434 435
    b = w->prediction_table[2 * w->mb_x     + !(w->mb_y & 1)]; // block[x    ][y - 1]
    a = w->prediction_table[2 * w->mb_x - 2 +  (w->mb_y & 1)]; // block[x - 1][y    ]
    c = w->prediction_table[2 * w->mb_x - 2 + !(w->mb_y & 1)]; // block[x - 1][y - 1]
436

437
    w->est_run = FFMIN(b, a);
438
    /* This condition has nothing to do with w->edges, even if it looks
439 440
     * similar it would trigger if e.g. x = 3; y = 2;
     * I guess somebody wrote something wrong and it became standard. */
441
    if ((w->mb_x & w->mb_y) != 0)
442 443 444 445 446 447 448 449 450 451 452 453
        w->est_run = FFMIN(c, w->est_run);
    w->est_run >>= 2;

    a &= 3;
    b &= 3;
    c &= 3;

    i = (0xFFEAF4C4 >> (2 * b + 8 * a)) & 3;
    if (i != 3)
        w->orient = i;
    else
        w->orient = (0xFFEAD8 >> (2 * c + 8 * (w->quant > 12))) & 3;
454
/*
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
 * lut1[b][a] = {
 * ->{ 0, 1, 0, pad },
 *   { 0, 1, X, pad },
 *   { 2, 2, 2, pad }
 * }
 * pad 2  2  2;
 * pad X  1  0;
 * pad 0  1  0 <-
 * -> 11 10 '10 10 '11 11'01 00 '11 00'01 00 => 0xEAF4C4
 *
 * lut2[q>12][c] = {
 * ->{ 0, 2, 1, pad},
 *   { 2, 2, 2, pad}
 * }
 * pad 2  2  2;
 * pad 1  2  0 <-
 * -> 11 10'10 10 '11 01'10 00 => 0xEAD8
 */
473 474
}

475 476
static void x8_ac_compensation(IntraX8Context *const w, const int direction,
                               const int dc_level)
477
{
478
    int t;
479
#define B(x,y)  w->block[0][w->idct_permutation[(x) + (y) * 8]]
480
#define T(x)  ((x) * dc_level + 0x8000) >> 16;
481
    switch (direction) {
482
    case 0:
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
        t        = T(3811); // h
        B(1, 0) -= t;
        B(0, 1) -= t;

        t        = T(487); // e
        B(2, 0) -= t;
        B(0, 2) -= t;

        t        = T(506); // f
        B(3, 0) -= t;
        B(0, 3) -= t;

        t        = T(135); // c
        B(4, 0) -= t;
        B(0, 4) -= t;
        B(2, 1) += t;
        B(1, 2) += t;
        B(3, 1) += t;
        B(1, 3) += t;

        t        = T(173); // d
        B(5, 0) -= t;
        B(0, 5) -= t;

        t        = T(61); // b
        B(6, 0) -= t;
        B(0, 6) -= t;
        B(5, 1) += t;
        B(1, 5) += t;

        t        = T(42); // a
        B(7, 0) -= t;
        B(0, 7) -= t;
        B(4, 1) += t;
        B(1, 4) += t;
        B(4, 4) += t;

        t        = T(1084); // g
        B(1, 1) += t;

523
        w->block_last_index[0] = FFMAX(w->block_last_index[0], 7 * 8);
524 525
        break;
    case 1:
526 527 528 529
        B(0, 1) -= T(6269);
        B(0, 3) -= T(708);
        B(0, 5) -= T(172);
        B(0, 7) -= T(73);
530

531
        w->block_last_index[0] = FFMAX(w->block_last_index[0], 7 * 8);
532 533
        break;
    case 2:
534 535 536 537
        B(1, 0) -= T(6269);
        B(3, 0) -= T(708);
        B(5, 0) -= T(172);
        B(7, 0) -= T(73);
538

539
        w->block_last_index[0] = FFMAX(w->block_last_index[0], 7);
540 541 542 543 544 545
        break;
    }
#undef B
#undef T
}

546
static void dsp_x8_put_solidcolor(const uint8_t pix, uint8_t *dst,
547
                                  const ptrdiff_t linesize)
548
{
549
    int k;
550 551 552
    for (k = 0; k < 8; k++) {
        memset(dst, pix, 8);
        dst += linesize;
553 554 555 556
    }
}

static const int16_t quant_table[64] = {
557 558 559 560 561 562 563 564
    256, 256, 256, 256, 256, 256, 259, 262,
    265, 269, 272, 275, 278, 282, 285, 288,
    292, 295, 299, 303, 306, 310, 314, 317,
    321, 325, 329, 333, 337, 341, 345, 349,
    353, 358, 362, 366, 371, 375, 379, 384,
    389, 393, 398, 403, 408, 413, 417, 422,
    428, 433, 438, 443, 448, 454, 459, 465,
    470, 476, 482, 488, 493, 499, 505, 511,
565 566
};

567 568 569 570 571 572
static int x8_decode_intra_mb(IntraX8Context *const w, const int chroma)
{
    uint8_t *scantable;
    int final, run, level;
    int ac_mode, dc_mode, est_run, dc_level;
    int pos, n;
573 574 575 576
    int zeros_only;
    int use_quant_matrix;
    int sign;

577
    av_assert2(w->orient < 12);
578
    w->bdsp.clear_block(w->block[0]);
579

580 581 582 583 584 585 586 587 588 589 590 591
    if (chroma)
        dc_mode = 2;
    else
        dc_mode = !!w->est_run; // 0, 1

    if (x8_get_dc_rlf(w, dc_mode, &dc_level, &final))
        return -1;
    n          = 0;
    zeros_only = 0;
    if (!final) { // decode ac
        use_quant_matrix = w->use_quant_matrix;
        if (chroma) {
592
            ac_mode = 1;
593 594 595
            est_run = 64; // not used
        } else {
            if (w->raw_orient < 3)
596
                use_quant_matrix = 0;
597 598

            if (w->raw_orient > 4) {
599 600
                ac_mode = 0;
                est_run = 64;
601 602
            } else {
                if (w->est_run > 1) {
603
                    ac_mode = 2;
604 605
                    est_run = w->est_run;
                } else {
606 607 608 609 610
                    ac_mode = 3;
                    est_run = 64;
                }
            }
        }
611 612 613 614 615
        x8_select_ac_table(w, ac_mode);
        /* scantable_selector[12] = { 0, 2, 0, 1, 1, 1, 0, 2, 2, 0, 1, 2 }; <-
         * -> 10'01' 00'10' 10'00' 01'01' 01'00' 10'00 => 0x928548 */
        scantable = w->scantable[(0x928548 >> (2 * w->orient)) & 3].permutated;
        pos       = 0;
616 617
        do {
            n++;
618 619 620
            if (n >= est_run) {
                ac_mode = 3;
                x8_select_ac_table(w, 3);
621 622
            }

623
            x8_get_ac_rlf(w, ac_mode, &run, &level, &final);
624

625 626 627
            pos += run + 1;
            if (pos > 63) {
                // this also handles vlc error in x8_get_ac_rlf
628 629
                return -1;
            }
630 631
            level  = (level + 1) * w->dquant;
            level += w->qsum;
632

633
            sign  = -get_bits1(w->gb);
634 635
            level = (level ^ sign) - sign;

636 637
            if (use_quant_matrix)
                level = (level * quant_table[pos]) >> 8;
638

639
            w->block[0][scantable[pos]] = level;
640
        } while (!final);
641

642
        w->block_last_index[0] = pos;
643
    } else { // DC only
644
        w->block_last_index[0] = 0;
645 646 647 648 649
        if (w->flat_dc && ((unsigned) (dc_level + 1)) < 3) { // [-1; 1]
            int32_t divide_quant = !chroma ? w->divide_quant_dc_luma
                                           : w->divide_quant_dc_chroma;
            int32_t dc_quant     = !chroma ? w->quant
                                           : w->quant_dc_chroma;
650

651 652 653 654 655
            // original intent dc_level += predicted_dc/quant;
            // but it got lost somewhere in the rounding
            dc_level += (w->predicted_dc * divide_quant + (1 << 12)) >> 13;

            dsp_x8_put_solidcolor(av_clip_uint8((dc_level * dc_quant + 4) >> 3),
656
                                  w->dest[chroma],
657
                                  w->frame->linesize[!!chroma]);
658 659 660

            goto block_placed;
        }
661
        zeros_only = dc_level == 0;
662
    }
663
    if (!chroma)
664
        w->block[0][0] = dc_level * w->quant;
665
    else
666
        w->block[0][0] = dc_level * w->quant_dc_chroma;
667

668 669
    // there is !zero_only check in the original, but dc_level check is enough
    if ((unsigned int) (dc_level + 1) >= 3 && (w->edges & 3) != 3) {
670
        int direction;
671 672 673 674 675
        /* ac_comp_direction[orient] = { 0, 3, 3, 1, 1, 0, 0, 0, 2, 2, 2, 1 }; <-
         * -> 01'10' 10'10' 00'00' 00'01' 01'11' 11'00 => 0x6A017C */
        direction = (0x6A017C >> (w->orient * 2)) & 3;
        if (direction != 3) {
            // modify block_last[]
676
            x8_ac_compensation(w, direction, w->block[0][0]);
677 678 679
        }
    }

680
    if (w->flat_dc) {
681
        dsp_x8_put_solidcolor(w->predicted_dc, w->dest[chroma],
682
                              w->frame->linesize[!!chroma]);
683
    } else {
684
        w->dsp.spatial_compensation[w->orient](w->scratchpad,
685
                                               w->dest[chroma],
686
                                               w->frame->linesize[!!chroma]);
687
    }
688
    if (!zeros_only)
689
        w->wdsp.idct_add(w->dest[chroma],
690
                         w->frame->linesize[!!chroma],
691
                         w->block[0]);
692 693

block_placed:
694 695
    if (!chroma)
        x8_update_predictions(w, w->orient, n);
696

697
    if (w->loopfilter) {
698
        uint8_t *ptr = w->dest[chroma];
699
        ptrdiff_t linesize = w->frame->linesize[!!chroma];
700

701
        if (!((w->edges & 2) || (zeros_only && (w->orient | 4) == 4)))
702
            w->dsp.h_loop_filter(ptr, linesize, w->quant);
703 704

        if (!((w->edges & 1) || (zeros_only && (w->orient | 8) == 8)))
705
            w->dsp.v_loop_filter(ptr, linesize, w->quant);
706 707 708 709
    }
    return 0;
}

710
// FIXME maybe merge with ff_*
711
static void x8_init_block_index(IntraX8Context *w, AVFrame *frame)
712
{
713
    // not parent codec linesize as this would be wrong for field pics
714
    // not that IntraX8 has interlacing support ;)
715 716
    const ptrdiff_t linesize   = frame->linesize[0];
    const ptrdiff_t uvlinesize = frame->linesize[1];
717

718 719 720
    w->dest[0] = frame->data[0];
    w->dest[1] = frame->data[1];
    w->dest[2] = frame->data[2];
721

722
    w->dest[0] +=  w->mb_y       * linesize   << 3;
723
    // chroma blocks are on add rows
724 725
    w->dest[1] += (w->mb_y & ~1) * uvlinesize << 2;
    w->dest[2] += (w->mb_y & ~1) * uvlinesize << 2;
726 727
}

728 729
av_cold int ff_intrax8_common_init(AVCodecContext *avctx,
                                   IntraX8Context *w, IDCTDSPContext *idsp,
730 731
                                   int16_t (*block)[64],
                                   int block_last_index[12],
732
                                   int mb_width, int mb_height)
733
{
734 735 736 737
    int ret = x8_vlc_init();
    if (ret < 0)
        return ret;

738
    w->avctx = avctx;
739
    w->idsp = *idsp;
740 741
    w->mb_width  = mb_width;
    w->mb_height = mb_height;
742 743
    w->block = block;
    w->block_last_index = block_last_index;
744

745
    // two rows, 2 blocks per cannon mb
746
    w->prediction_table = av_mallocz(w->mb_width * 2 * 2);
747 748
    if (!w->prediction_table)
        return AVERROR(ENOMEM);
749

750
    ff_wmv2dsp_init(&w->wdsp);
751

752 753 754
    ff_init_scantable_permutation(w->idct_permutation,
                                  w->wdsp.idct_perm);

755
    ff_init_scantable(w->idct_permutation, &w->scantable[0],
756
                      ff_wmv1_scantable[0]);
757
    ff_init_scantable(w->idct_permutation, &w->scantable[1],
758
                      ff_wmv1_scantable[2]);
759
    ff_init_scantable(w->idct_permutation, &w->scantable[2],
760
                      ff_wmv1_scantable[3]);
761 762

    ff_intrax8dsp_init(&w->dsp);
763
    ff_blockdsp_init(&w->bdsp, avctx);
764 765

    return 0;
766 767
}

768
av_cold void ff_intrax8_common_end(IntraX8Context *w)
769 770 771 772
{
    av_freep(&w->prediction_table);
}

773
int ff_intrax8_decode_picture(IntraX8Context *w, Picture *pict,
774
                              GetBitContext *gb, int *mb_x, int *mb_y,
775 776
                              int dquant, int quant_offset,
                              int loopfilter, int lowdelay)
777
{
778 779
    int mb_xy;

780
    w->gb     = gb;
781 782 783
    w->dquant = dquant;
    w->quant  = dquant >> 1;
    w->qsum   = quant_offset;
784
    w->frame  = pict->f;
785
    w->loopfilter = loopfilter;
786
    w->use_quant_matrix = get_bits1(w->gb);
787

788 789 790
    w->mb_x = *mb_x;
    w->mb_y = *mb_y;

791 792 793
    w->divide_quant_dc_luma = ((1 << 16) + (w->quant >> 1)) / w->quant;
    if (w->quant < 5) {
        w->quant_dc_chroma        = w->quant;
794
        w->divide_quant_dc_chroma = w->divide_quant_dc_luma;
795 796 797
    } else {
        w->quant_dc_chroma        = w->quant + ((w->quant + 3) >> 3);
        w->divide_quant_dc_chroma = ((1 << 16) + (w->quant_dc_chroma >> 1)) / w->quant_dc_chroma;
798 799 800
    }
    x8_reset_vlc_tables(w);

801 802 803 804
    for (w->mb_y = 0; w->mb_y < w->mb_height * 2; w->mb_y++) {
        x8_init_block_index(w, w->frame);
        mb_xy = (w->mb_y >> 1) * (w->mb_width + 1);
        for (w->mb_x = 0; w->mb_x < w->mb_width * 2; w->mb_x++) {
805
            x8_get_prediction(w);
806 807 808 809
            if (x8_setup_spatial_predictor(w, 0))
                goto error;
            if (x8_decode_intra_mb(w, 0))
                goto error;
810

811
            if (w->mb_x & w->mb_y & 1) {
812 813
                x8_get_prediction_chroma(w);

814 815 816 817 818
                /* when setting up chroma, no vlc is read,
                 * so no error condition can be reached */
                x8_setup_spatial_predictor(w, 1);
                if (x8_decode_intra_mb(w, 1))
                    goto error;
819

820 821 822
                x8_setup_spatial_predictor(w, 2);
                if (x8_decode_intra_mb(w, 2))
                    goto error;
823

824 825
                w->dest[1] += 8;
                w->dest[2] += 8;
826

827
                pict->qscale_table[mb_xy] = w->quant;
828 829
                mb_xy++;
            }
830
            w->dest[0] += 8;
831
        }
832
        if (w->mb_y & 1)
833 834 835
            ff_draw_horiz_band(w->avctx, w->frame, w->frame,
                               (w->mb_y - 1) * 8, 16,
                               PICT_FRAME, 0, lowdelay);
836 837 838
    }

error:
839 840 841
    *mb_x = w->mb_x;
    *mb_y = w->mb_y;

842 843
    return 0;
}