svq3.c 48.7 KB
Newer Older
1
/*
2
 * Copyright (c) 2003 The FFmpeg Project
3
 *
4 5 6
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
7 8
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12 13 14 15 16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 20 21
 */

/*
22 23
 * How to use this decoder:
 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 25 26 27 28 29
 * have stsd atoms to describe media trak properties. A stsd atom for a
 * video trak contains 1 or more ImageDescription atoms. These atoms begin
 * with the 4-byte length of the atom followed by the codec fourcc. Some
 * decoders need information in this atom to operate correctly. Such
 * is the case with SVQ3. In order to get the best use out of this decoder,
 * the calling app must make the SVQ3 ImageDescription atom available
30 31
 * via the AVCodecContext's extradata[_size] field:
 *
32
 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33
 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 35
 * AVCodecContext.extradata_size = size of ImageDescription atom memory
 * buffer (which will be the same as the ImageDescription atom size field
36 37 38 39
 * from the QT file, minus 4 bytes since the length is missing)
 *
 * You will know you have these parameters passed correctly when the decoder
 * correctly decodes this file:
Diego Biurrun's avatar
Diego Biurrun committed
40
 *  http://samples.mplayerhq.hu/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41
 */
42

43 44
#include <inttypes.h>

45
#include "libavutil/attributes.h"
46 47
#include "internal.h"
#include "avcodec.h"
48
#include "mpegutils.h"
49 50
#include "h264.h"

51
#include "h264data.h" // FIXME FIXME FIXME
52

53
#include "h264_mvpred.h"
54
#include "golomb.h"
55
#include "hpeldsp.h"
56
#include "rectangle.h"
57
#include "tpeldsp.h"
58
#include "vdpau_internal.h"
59

60
#if CONFIG_ZLIB
61 62 63
#include <zlib.h>
#endif

64
#include "svq1.h"
65
#include "svq3.h"
66

67
/**
68
 * @file
69 70 71
 * svq3 decoder.
 */

72 73
typedef struct {
    H264Context h;
74
    HpelDSPContext hdsp;
75
    TpelDSPContext tdsp;
76 77 78
    H264Picture *cur_pic;
    H264Picture *next_pic;
    H264Picture *last_pic;
79 80 81 82 83
    int halfpel_flag;
    int thirdpel_flag;
    int unknown_flag;
    int next_slice_index;
    uint32_t watermark_key;
84 85
    uint8_t *buf;
    int buf_size;
86 87 88 89 90
    int adaptive_quant;
    int next_p_frame_damaged;
    int h_edge_pos;
    int v_edge_pos;
    int last_frame_output;
91 92
} SVQ3Context;

93 94
#define FULLPEL_MODE  1
#define HALFPEL_MODE  2
Michael Niedermayer's avatar
Michael Niedermayer committed
95
#define THIRDPEL_MODE 3
96
#define PREDICT_MODE  4
97

Michael Niedermayer's avatar
Michael Niedermayer committed
98
/* dual scan (from some older h264 draft)
99 100 101 102 103 104 105 106
 * o-->o-->o   o
 *         |  /|
 * o   o   o / o
 * | / |   |/  |
 * o   o   o   o
 *   /
 * o-->o-->o-->o
 */
107
static const uint8_t svq3_scan[16] = {
108 109 110 111
    0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
    2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
    0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
    0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
112 113
};

114 115 116 117 118 119 120
static const uint8_t luma_dc_zigzag_scan[16] = {
    0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
    3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
    1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
    3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
};

121
static const uint8_t svq3_pred_0[25][2] = {
122 123 124 125 126 127 128 129 130
    { 0, 0 },
    { 1, 0 }, { 0, 1 },
    { 0, 2 }, { 1, 1 }, { 2, 0 },
    { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
    { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
    { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
    { 2, 4 }, { 3, 3 }, { 4, 2 },
    { 4, 3 }, { 3, 4 },
    { 4, 4 }
131 132 133
};

static const int8_t svq3_pred_1[6][6][5] = {
134 135 136 137 138 139 140 141 142 143 144 145
    { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
      { 2,  1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
    { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  4,  3 }, { 0, 1,  2,  4,  3 },
      { 0,  2,  1,  4,  3 }, { 2, 0,  1,  3,  4 }, { 0, 4,  2,  1,  3 } },
    { { 2,  0, -1, -1, -1 }, { 2, 1,  0,  4,  3 }, { 1, 2,  4,  0,  3 },
      { 2,  1,  0,  4,  3 }, { 2, 1,  4,  3,  0 }, { 1, 2,  4,  0,  3 } },
    { { 2,  0, -1, -1, -1 }, { 2, 0,  1,  4,  3 }, { 1, 2,  0,  4,  3 },
      { 2,  1,  0,  4,  3 }, { 2, 1,  3,  4,  0 }, { 2, 4,  1,  0,  3 } },
    { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  3,  4 }, { 1, 2,  3,  0,  4 },
      { 2,  0,  1,  3,  4 }, { 2, 1,  3,  0,  4 }, { 2, 0,  4,  3,  1 } },
    { { 0,  2, -1, -1, -1 }, { 0, 2,  4,  1,  3 }, { 1, 4,  2,  0,  3 },
      { 4,  2,  0,  1,  3 }, { 2, 0,  1,  4,  3 }, { 4, 2,  1,  0,  3 } },
146 147
};

148 149 150 151
static const struct {
    uint8_t run;
    uint8_t level;
} svq3_dct_tables[2][16] = {
152 153 154 155
    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
      { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
      { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
156 157 158
};

static const uint32_t svq3_dequant_coeff[32] = {
159 160 161 162
     3881,  4351,  4890,  5481,   6154,   6914,   7761,   8718,
     9781, 10987, 12339, 13828,  15523,  17435,  19561,  21873,
    24552, 27656, 30847, 34870,  38807,  43747,  49103,  54683,
    61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
163 164
};

165 166
static int svq3_decode_end(AVCodecContext *avctx);

Diego Biurrun's avatar
Diego Biurrun committed
167
void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
168
{
169
    const int qmul = svq3_dequant_coeff[qp];
170 171 172
#define stride 16
    int i;
    int temp[16];
173 174 175 176 177 178 179 180 181 182 183 184
    static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };

    for (i = 0; i < 4; i++) {
        const int z0 = 13 * (input[4 * i + 0] +      input[4 * i + 2]);
        const int z1 = 13 * (input[4 * i + 0] -      input[4 * i + 2]);
        const int z2 =  7 *  input[4 * i + 1] - 17 * input[4 * i + 3];
        const int z3 = 17 *  input[4 * i + 1] +  7 * input[4 * i + 3];

        temp[4 * i + 0] = z0 + z3;
        temp[4 * i + 1] = z1 + z2;
        temp[4 * i + 2] = z1 - z2;
        temp[4 * i + 3] = z0 - z3;
185 186
    }

187 188 189 190 191 192 193
    for (i = 0; i < 4; i++) {
        const int offset = x_offset[i];
        const int z0     = 13 * (temp[4 * 0 + i] +      temp[4 * 2 + i]);
        const int z1     = 13 * (temp[4 * 0 + i] -      temp[4 * 2 + i]);
        const int z2     =  7 *  temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
        const int z3     = 17 *  temp[4 * 1 + i] +  7 * temp[4 * 3 + i];

194 195 196 197
        output[stride *  0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
        output[stride *  2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
        output[stride *  8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
        output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
198 199 200 201
    }
}
#undef stride

Diego Biurrun's avatar
Diego Biurrun committed
202
void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block,
203
                        int stride, int qp, int dc)
204
{
205
    const int qmul = svq3_dequant_coeff[qp];
206 207 208
    int i;

    if (dc) {
209 210
        dc       = 13 * 13 * (dc == 1 ? 1538 * block[0]
                                      : qmul * (block[0] >> 3) / 2);
211 212 213
        block[0] = 0;
    }

214
    for (i = 0; i < 4; i++) {
215 216 217 218 219 220 221 222 223
        const int z0 = 13 * (block[0 + 4 * i] +      block[2 + 4 * i]);
        const int z1 = 13 * (block[0 + 4 * i] -      block[2 + 4 * i]);
        const int z2 =  7 *  block[1 + 4 * i] - 17 * block[3 + 4 * i];
        const int z3 = 17 *  block[1 + 4 * i] +  7 * block[3 + 4 * i];

        block[0 + 4 * i] = z0 + z3;
        block[1 + 4 * i] = z1 + z2;
        block[2 + 4 * i] = z1 - z2;
        block[3 + 4 * i] = z0 - z3;
224 225
    }

226
    for (i = 0; i < 4; i++) {
227 228 229 230
        const int z0 = 13 * (block[i + 4 * 0] +      block[i + 4 * 2]);
        const int z1 = 13 * (block[i + 4 * 0] -      block[i + 4 * 2]);
        const int z2 =  7 *  block[i + 4 * 1] - 17 * block[i + 4 * 3];
        const int z3 = 17 *  block[i + 4 * 1] +  7 * block[i + 4 * 3];
231 232
        const int rr = (dc + 0x80000);

233 234 235 236
        dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
        dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
        dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
        dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
237
    }
238 239

    memset(block, 0, 16 * sizeof(int16_t));
240 241
}

Diego Biurrun's avatar
Diego Biurrun committed
242
static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
243 244
                                    int index, const int type)
{
245 246
    static const uint8_t *const scan_patterns[4] =
    { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
247

248
    int run, level, sign, limit;
249
    unsigned vlc;
250
    const int intra           = 3 * type >> 2;
251
    const uint8_t *const scan = scan_patterns[type];
252

253 254
    for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
        for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
255
            if ((int32_t)vlc < 0)
256
                return -1;
257

258
            sign     = (vlc & 1) ? 0 : -1;
259
            vlc      = vlc + 1 >> 1;
260 261 262 263 264 265 266 267 268

            if (type == 3) {
                if (vlc < 3) {
                    run   = 0;
                    level = vlc;
                } else if (vlc < 4) {
                    run   = 1;
                    level = 1;
                } else {
269 270
                    run   = vlc & 0x3;
                    level = (vlc + 9 >> 2) - run;
271 272
                }
            } else {
273
                if (vlc < 16U) {
274 275 276
                    run   = svq3_dct_tables[intra][vlc].run;
                    level = svq3_dct_tables[intra][vlc].level;
                } else if (intra) {
277
                    run   = vlc & 0x7;
278
                    level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
279
                } else {
280
                    run   = vlc & 0xF;
281
                    level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
282 283
                }
            }
284

285

286 287 288 289
            if ((index += run) >= limit)
                return -1;

            block[scan[index]] = (level ^ sign) - sign;
290
        }
291

292 293 294
        if (type != 2) {
            break;
        }
295 296
    }

297
    return 0;
298 299
}

300
static inline void svq3_mc_dir_part(SVQ3Context *s,
301 302 303 304
                                    int x, int y, int width, int height,
                                    int mx, int my, int dxy,
                                    int thirdpel, int dir, int avg)
{
305 306
    H264Context *h = &s->h;
    const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
307 308
    uint8_t *src, *dest;
    int i, emu = 0;
309
    int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
310 311 312

    mx += x;
    my += y;
313

314 315
    if (mx < 0 || mx >= s->h_edge_pos - width  - 1 ||
        my < 0 || my >= s->v_edge_pos - height - 1) {
316
        emu = 1;
317 318
        mx = av_clip(mx, -16, s->h_edge_pos - width  + 15);
        my = av_clip(my, -16, s->v_edge_pos - height + 15);
319 320
    }

321
    /* form component predictions */
322 323
    dest = h->cur_pic.f.data[0] + x + y * h->linesize;
    src  = pic->f.data[0] + mx + my * h->linesize;
324 325

    if (emu) {
326 327
        h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
                                 h->linesize, h->linesize,
328 329
                                 width + 1, height + 1,
                                 mx, my, s->h_edge_pos, s->v_edge_pos);
330
        src = h->edge_emu_buffer;
331
    }
332
    if (thirdpel)
333 334 335
        (avg ? s->tdsp.avg_tpel_pixels_tab
             : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
                                                 width, height);
336
    else
337 338 339
        (avg ? s->hdsp.avg_pixels_tab
             : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
                                                       height);
340

341
    if (!(h->flags & CODEC_FLAG_GRAY)) {
342 343 344 345
        mx     = mx + (mx < (int) x) >> 1;
        my     = my + (my < (int) y) >> 1;
        width  = width  >> 1;
        height = height >> 1;
346 347 348
        blocksize++;

        for (i = 1; i < 3; i++) {
349 350
            dest = h->cur_pic.f.data[i] + (x >> 1) + (y >> 1) * h->uvlinesize;
            src  = pic->f.data[i] + mx + my * h->uvlinesize;
351 352

            if (emu) {
353 354
                h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
                                         h->uvlinesize, h->uvlinesize,
355 356 357
                                         width + 1, height + 1,
                                         mx, my, (s->h_edge_pos >> 1),
                                         s->v_edge_pos >> 1);
358
                src = h->edge_emu_buffer;
359 360
            }
            if (thirdpel)
361 362 363 364
                (avg ? s->tdsp.avg_tpel_pixels_tab
                     : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
                                                         h->uvlinesize,
                                                         width, height);
365
            else
366 367 368 369
                (avg ? s->hdsp.avg_pixels_tab
                     : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
                                                               h->uvlinesize,
                                                               height);
370 371
        }
    }
372 373
}

374
static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
375
                              int dir, int avg)
376
{
377
    int i, j, k, mx, my, dx, dy, x, y;
378
    H264Context *h          = &s->h;
379 380 381 382 383 384 385
    const int part_width    = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
    const int part_height   = 16 >> ((unsigned)(size + 1) / 3);
    const int extra_width   = (mode == PREDICT_MODE) ? -16 * 6 : 0;
    const int h_edge_pos    = 6 * (s->h_edge_pos - part_width)  - extra_width;
    const int v_edge_pos    = 6 * (s->v_edge_pos - part_height) - extra_width;

    for (i = 0; i < 16; i += part_height)
386
        for (j = 0; j < 16; j += part_width) {
387 388
            const int b_xy = (4 * h->mb_x + (j >> 2)) +
                             (4 * h->mb_y + (i >> 2)) * h->b_stride;
389
            int dxy;
390 391
            x = 16 * h->mb_x + j;
            y = 16 * h->mb_y + i;
392 393
            k = (j >> 2 & 1) + (i >> 1 & 2) +
                (j >> 1 & 4) + (i      & 8);
394 395

            if (mode != PREDICT_MODE) {
396
                pred_motion(h, k, part_width >> 2, dir, 1, &mx, &my);
397
            } else {
398 399
                mx = s->next_pic->motion_val[0][b_xy][0] << 1;
                my = s->next_pic->motion_val[0][b_xy][1] << 1;
400 401

                if (dir == 0) {
402 403 404 405
                    mx = mx * h->frame_num_offset /
                         h->prev_frame_num_offset + 1 >> 1;
                    my = my * h->frame_num_offset /
                         h->prev_frame_num_offset + 1 >> 1;
406
                } else {
407 408 409 410
                    mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
                         h->prev_frame_num_offset + 1 >> 1;
                    my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
                         h->prev_frame_num_offset + 1 >> 1;
411 412 413 414
                }
            }

            /* clip motion vector prediction to frame border */
415 416
            mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
            my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
417 418 419 420 421

            /* get (optional) motion vector differential */
            if (mode == PREDICT_MODE) {
                dx = dy = 0;
            } else {
422 423
                dy = svq3_get_se_golomb(&h->gb);
                dx = svq3_get_se_golomb(&h->gb);
424 425

                if (dx == INVALID_VLC || dy == INVALID_VLC) {
426
                    av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
427 428 429 430 431 432 433
                    return -1;
                }
            }

            /* compute motion vector */
            if (mode == THIRDPEL_MODE) {
                int fx, fy;
434 435 436 437
                mx  = (mx + 1 >> 1) + dx;
                my  = (my + 1 >> 1) + dy;
                fx  = (unsigned)(mx + 0x3000) / 3 - 0x1000;
                fy  = (unsigned)(my + 0x3000) / 3 - 0x1000;
438 439 440 441
                dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);

                svq3_mc_dir_part(s, x, y, part_width, part_height,
                                 fx, fy, dxy, 1, dir, avg);
442 443 444
                mx += mx;
                my += my;
            } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
445 446
                mx  = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
                my  = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
447
                dxy = (mx & 1) + 2 * (my & 1);
448

449 450
                svq3_mc_dir_part(s, x, y, part_width, part_height,
                                 mx >> 1, my >> 1, dxy, 0, dir, avg);
451 452 453
                mx *= 3;
                my *= 3;
            } else {
454 455
                mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
                my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
456

457 458
                svq3_mc_dir_part(s, x, y, part_width, part_height,
                                 mx, my, 0, 0, dir, avg);
459 460 461 462 463 464
                mx *= 6;
                my *= 6;
            }

            /* update mv_cache */
            if (mode != PREDICT_MODE) {
465
                int32_t mv = pack16to32(mx, my);
466 467

                if (part_height == 8 && i < 8) {
468
                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1 * 8], mv);
469

470 471
                    if (part_width == 8 && j < 8)
                        AV_WN32A(h->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
472
                }
473
                if (part_width == 8 && j < 8)
474
                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1], mv);
475
                if (part_width == 4 || part_height == 4)
476
                    AV_WN32A(h->mv_cache[dir][scan8[k]], mv);
477 478 479
            }

            /* write back motion vectors */
480
            fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
481 482
                           part_width >> 2, part_height >> 2, h->b_stride,
                           pack16to32(mx, my), 4);
483
        }
484

485
    return 0;
486 487
}

488
static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
489
{
490
    H264Context *h = &s->h;
491 492 493 494
    int i, j, k, m, dir, mode;
    int cbp = 0;
    uint32_t vlc;
    int8_t *top, *left;
495
    const int mb_xy         = h->mb_xy;
496
    const int b_xy          = 4 * h->mb_x + 4 * h->mb_y * h->b_stride;
497

498 499
    h->top_samples_available      = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
    h->left_samples_available     = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
500 501 502
    h->topright_samples_available = 0xFFFF;

    if (mb_type == 0) {           /* SKIP */
503
        if (h->pict_type == AV_PICTURE_TYPE_P ||
504
            s->next_pic->mb_type[mb_xy] == -1) {
505
            svq3_mc_dir_part(s, 16 * h->mb_x, 16 * h->mb_y, 16, 16,
506
                             0, 0, 0, 0, 0, 0);
507

508 509
            if (h->pict_type == AV_PICTURE_TYPE_B)
                svq3_mc_dir_part(s, 16 * h->mb_x, 16 * h->mb_y, 16, 16,
510
                                 0, 0, 0, 0, 1, 1);
511 512 513

            mb_type = MB_TYPE_SKIP;
        } else {
514
            mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
515
            if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
516
                return -1;
517
            if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
518
                return -1;
519

520
            mb_type = MB_TYPE_16x16;
521
        }
522
    } else if (mb_type < 8) {     /* INTER */
523
        if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
524
            mode = THIRDPEL_MODE;
525 526
        else if (s->halfpel_flag &&
                 s->thirdpel_flag == !get_bits1(&h->gb))
527
            mode = HALFPEL_MODE;
528
        else
529
            mode = FULLPEL_MODE;
530

531 532
        /* fill caches */
        /* note ref_cache should contain here:
533 534 535 536 537 538
         *  ????????
         *  ???11111
         *  N??11111
         *  N??11111
         *  N??11111
         */
539 540

        for (m = 0; m < 2; m++) {
541
            if (h->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
542 543
                for (i = 0; i < 4; i++)
                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i * 8],
544
                              h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
545
            } else {
546 547
                for (i = 0; i < 4; i++)
                    AV_ZERO32(h->mv_cache[m][scan8[0] - 1 + i * 8]);
548
            }
549
            if (h->mb_y > 0) {
550
                memcpy(h->mv_cache[m][scan8[0] - 1 * 8],
551
                       h->cur_pic.motion_val[m][b_xy - h->b_stride],
552 553
                       4 * 2 * sizeof(int16_t));
                memset(&h->ref_cache[m][scan8[0] - 1 * 8],
554
                       (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
555

556
                if (h->mb_x < h->mb_width - 1) {
557
                    AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1 * 8],
558
                              h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
559
                    h->ref_cache[m][scan8[0] + 4 - 1 * 8] =
560 561
                        (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
                         h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
562 563
                } else
                    h->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
564
                if (h->mb_x > 0) {
565
                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1 * 8],
566
                              h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
567
                    h->ref_cache[m][scan8[0] - 1 - 1 * 8] =
568
                        (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
569 570 571 572 573
                } else
                    h->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
            } else
                memset(&h->ref_cache[m][scan8[0] - 1 * 8 - 1],
                       PART_NOT_AVAILABLE, 8);
574

575
            if (h->pict_type != AV_PICTURE_TYPE_B)
576
                break;
577
        }
578

579
        /* decode motion vector(s) and form prediction(s) */
580 581
        if (h->pict_type == AV_PICTURE_TYPE_P) {
            if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
582
                return -1;
583
        } else {        /* AV_PICTURE_TYPE_B */
584
            if (mb_type != 2) {
585
                if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
586 587
                    return -1;
            } else {
588
                for (i = 0; i < 4; i++)
589
                    memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
590
                           0, 4 * 2 * sizeof(int16_t));
591 592
            }
            if (mb_type != 1) {
593
                if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
594 595
                    return -1;
            } else {
596
                for (i = 0; i < 4; i++)
597
                    memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
598
                           0, 4 * 2 * sizeof(int16_t));
599
            }
600
        }
601

602 603
        mb_type = MB_TYPE_16x16;
    } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
604
        memset(h->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
605 606

        if (mb_type == 8) {
607
            if (h->mb_x > 0) {
608 609 610
                for (i = 0; i < 4; i++)
                    h->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
                if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
611 612
                    h->left_samples_available = 0x5F5F;
            }
613 614 615 616 617
            if (h->mb_y > 0) {
                h->intra4x4_pred_mode_cache[4 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
                h->intra4x4_pred_mode_cache[5 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
                h->intra4x4_pred_mode_cache[6 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
                h->intra4x4_pred_mode_cache[7 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
618

619
                if (h->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
620 621 622 623
                    h->top_samples_available = 0x33FF;
            }

            /* decode prediction codes for luma blocks */
624
            for (i = 0; i < 16; i += 2) {
625
                vlc = svq3_get_ue_golomb(&h->gb);
626

627
                if (vlc >= 25U) {
628 629
                    av_log(h->avctx, AV_LOG_ERROR,
                           "luma prediction:%"PRIu32"\n", vlc);
630 631 632
                    return -1;
                }

633 634
                left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
                top  = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
635 636 637 638

                left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
                left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];

639
                if (left[1] == -1 || left[2] == -1) {
640
                    av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
641 642 643 644
                    return -1;
                }
            }
        } else {    /* mb_type == 33, DC_128_PRED block type */
645 646
            for (i = 0; i < 4; i++)
                memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
647
        }
648

649
        write_back_intra_pred_mode(h);
650

651
        if (mb_type == 8) {
652
            ff_h264_check_intra4x4_pred_mode(h);
653

654 655
            h->top_samples_available  = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
            h->left_samples_available = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
656
        } else {
657 658
            for (i = 0; i < 4; i++)
                memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
659

660 661 662
            h->top_samples_available  = 0x33FF;
            h->left_samples_available = 0x5F5F;
        }
663

664 665 666
        mb_type = MB_TYPE_INTRA4x4;
    } else {                      /* INTRA16x16 */
        dir = i_mb_type_info[mb_type - 8].pred_mode;
667
        dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
668

669
        if ((h->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, dir, 0)) < 0) {
670 671
            av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
            return h->intra16x16_pred_mode;
672
        }
673

674
        cbp     = i_mb_type_info[mb_type - 8].cbp;
675
        mb_type = MB_TYPE_INTRA16x16;
676
    }
677

678
    if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
679
        for (i = 0; i < 4; i++)
680
            memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
681
                   0, 4 * 2 * sizeof(int16_t));
682
        if (h->pict_type == AV_PICTURE_TYPE_B) {
683
            for (i = 0; i < 4; i++)
684
                memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
685
                       0, 4 * 2 * sizeof(int16_t));
686
        }
687
    }
688
    if (!IS_INTRA4x4(mb_type)) {
689
        memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
690
    }
691
    if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
692
        memset(h->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
693
    }
694

695
    if (!IS_INTRA16x16(mb_type) &&
696
        (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
697
        if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48U){
698
            av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
699 700
            return -1;
        }
701

702 703
        cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc]
                                : golomb_to_inter_cbp[vlc];
704
    }
705
    if (IS_INTRA16x16(mb_type) ||
706 707
        (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
        h->qscale += svq3_get_se_golomb(&h->gb);
708

709 710
        if (h->qscale > 31u) {
            av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", h->qscale);
711 712
            return -1;
        }
713
    }
714
    if (IS_INTRA16x16(mb_type)) {
715 716
        AV_ZERO128(h->mb_luma_dc[0] + 0);
        AV_ZERO128(h->mb_luma_dc[0] + 8);
717 718
        if (svq3_decode_block(&h->gb, h->mb_luma_dc[0], 0, 1)) {
            av_log(h->avctx, AV_LOG_ERROR,
719
                   "error while decoding intra luma dc\n");
720
            return -1;
721
        }
722
    }
723

724 725
    if (cbp) {
        const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
726
        const int type  = ((h->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
727

728
        for (i = 0; i < 4; i++)
729 730
            if ((cbp & (1 << i))) {
                for (j = 0; j < 4; j++) {
731 732 733 734 735
                    k = index ? (1 * (j & 1) + 2 * (i & 1) +
                                 2 * (j & 2) + 4 * (i & 2))
                              : (4 * i + j);
                    h->non_zero_count_cache[scan8[k]] = 1;

736 737
                    if (svq3_decode_block(&h->gb, &h->mb[16 * k], index, type)) {
                        av_log(h->avctx, AV_LOG_ERROR,
738
                               "error while decoding block\n");
739 740 741 742
                        return -1;
                    }
                }
            }
743

744
        if ((cbp & 0x30)) {
745
            for (i = 1; i < 3; ++i)
746 747
                if (svq3_decode_block(&h->gb, &h->mb[16 * 16 * i], 0, 3)) {
                    av_log(h->avctx, AV_LOG_ERROR,
748 749 750
                           "error while decoding chroma dc block\n");
                    return -1;
                }
751 752

            if ((cbp & 0x20)) {
753 754
                for (i = 1; i < 3; i++) {
                    for (j = 0; j < 4; j++) {
755 756
                        k                                 = 16 * i + j;
                        h->non_zero_count_cache[scan8[k]] = 1;
757

758 759
                        if (svq3_decode_block(&h->gb, &h->mb[16 * k], 1, 1)) {
                            av_log(h->avctx, AV_LOG_ERROR,
760
                                   "error while decoding chroma ac block\n");
761 762
                            return -1;
                        }
763 764 765
                    }
                }
            }
766
        }
767 768
    }

769
    h->cbp                              = cbp;
770
    h->cur_pic.mb_type[mb_xy] = mb_type;
771

772
    if (IS_INTRA(mb_type))
773
        h->chroma_pred_mode = ff_h264_check_intra_pred_mode(h, DC_PRED8x8, 1);
774

775
    return 0;
776 777
}

778
static int svq3_decode_slice_header(AVCodecContext *avctx)
779
{
780 781
    SVQ3Context *s = avctx->priv_data;
    H264Context *h    = &s->h;
782
    const int mb_xy   = h->mb_xy;
783
    int i, header;
784
    unsigned slice_id;
785

786
    header = get_bits(&h->gb, 8);
787

788 789
    if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
        /* TODO: what? */
790
        av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
791 792
        return -1;
    } else {
793
        int length = header >> 5 & 3;
794

795 796 797
        s->next_slice_index = get_bits_count(&h->gb) +
                              8 * show_bits(&h->gb, 8 * length) +
                              8 * length;
798

799
        if (s->next_slice_index > h->gb.size_in_bits) {
800
            av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
801
            return -1;
802
        }
803

804 805
        h->gb.size_in_bits = s->next_slice_index - 8 * (length - 1);
        skip_bits(&h->gb, 8);
806

807 808 809 810
        if (s->watermark_key) {
            uint32_t header = AV_RL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1]);
            AV_WL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1],
                    header ^ s->watermark_key);
811 812
        }
        if (length > 0) {
813 814
            memmove((uint8_t *) &h->gb.buffer[get_bits_count(&h->gb) >> 3],
                    &h->gb.buffer[h->gb.size_in_bits >> 3], length - 1);
815
        }
816
        skip_bits_long(&h->gb, 0);
817 818
    }

819
    if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
820
        av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
821 822
        return -1;
    }
823

824
    h->slice_type = golomb_to_pict_type[slice_id];
825

826
    if ((header & 0x9F) == 2) {
827 828 829
        i              = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
        h->mb_skip_run = get_bits(&h->gb, i) -
                         (h->mb_y * h->mb_width + h->mb_x);
830
    } else {
831 832
        skip_bits1(&h->gb);
        h->mb_skip_run = 0;
833
    }
834

835 836 837
    h->slice_num      = get_bits(&h->gb, 8);
    h->qscale         = get_bits(&h->gb, 5);
    s->adaptive_quant = get_bits1(&h->gb);
838

839
    /* unknown fields */
840
    skip_bits1(&h->gb);
841

842 843
    if (s->unknown_flag)
        skip_bits1(&h->gb);
844

845 846
    skip_bits1(&h->gb);
    skip_bits(&h->gb, 2);
847

848 849
    if (skip_1stop_8data_bits(&h->gb) < 0)
        return AVERROR_INVALIDDATA;
850

851
    /* reset intra predictors and invalidate motion vector references */
852
    if (h->mb_x > 0) {
853 854
        memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
               -1, 4 * sizeof(int8_t));
855 856
        memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
               -1, 8 * sizeof(int8_t) * h->mb_x);
857
    }
858 859 860
    if (h->mb_y > 0) {
        memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
               -1, 8 * sizeof(int8_t) * (h->mb_width - h->mb_x));
861

862 863
        if (h->mb_x > 0)
            h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
864 865
    }

866
    return 0;
867 868
}

869
static av_cold int svq3_decode_init(AVCodecContext *avctx)
870
{
871 872
    SVQ3Context *s = avctx->priv_data;
    H264Context *h = &s->h;
873
    int m;
874
    unsigned char *extradata;
875
    unsigned char *extradata_end;
876
    unsigned int size;
877
    int marker_found = 0;
878
    int ret;
879

880 881 882 883
    s->cur_pic  = av_mallocz(sizeof(*s->cur_pic));
    s->last_pic = av_mallocz(sizeof(*s->last_pic));
    s->next_pic = av_mallocz(sizeof(*s->next_pic));
    if (!s->next_pic || !s->last_pic || !s->cur_pic) {
884 885
        ret = AVERROR(ENOMEM);
        goto fail;
886 887
    }

888 889
    if ((ret = ff_h264_decode_init(avctx)) < 0)
        goto fail;
890

891
    ff_hpeldsp_init(&s->hdsp, avctx->flags);
892 893
    ff_tpeldsp_init(&s->tdsp);

894
    h->flags           = avctx->flags;
895
    h->is_complex      = 1;
896
    h->sps.chroma_format_idc = 1;
897
    h->picture_structure = PICT_FRAME;
898 899
    avctx->pix_fmt     = AV_PIX_FMT_YUVJ420P;
    avctx->color_range = AVCOL_RANGE_JPEG;
900

901
    h->chroma_qp[0] = h->chroma_qp[1] = 4;
902
    h->chroma_x_shift = h->chroma_y_shift = 1;
903

904 905 906
    s->halfpel_flag  = 1;
    s->thirdpel_flag = 1;
    s->unknown_flag  = 0;
907 908 909 910 911 912 913 914 915

    /* prowl for the "SEQH" marker in the extradata */
    extradata     = (unsigned char *)avctx->extradata;
    extradata_end = avctx->extradata + avctx->extradata_size;
    if (extradata) {
        for (m = 0; m + 8 < avctx->extradata_size; m++) {
            if (!memcmp(extradata, "SEQH", 4)) {
                marker_found = 1;
                break;
916
            }
917
            extradata++;
918
        }
919
    }
920

921 922 923 924 925 926
    /* if a match was found, parse the extra data */
    if (marker_found) {
        GetBitContext gb;
        int frame_size_code;

        size = AV_RB32(&extradata[4]);
927 928 929 930
        if (size > extradata_end - extradata - 8) {
            ret = AVERROR_INVALIDDATA;
            goto fail;
        }
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
        init_get_bits(&gb, extradata + 8, size * 8);

        /* 'frame size code' and optional 'width, height' */
        frame_size_code = get_bits(&gb, 3);
        switch (frame_size_code) {
        case 0:
            avctx->width  = 160;
            avctx->height = 120;
            break;
        case 1:
            avctx->width  = 128;
            avctx->height =  96;
            break;
        case 2:
            avctx->width  = 176;
            avctx->height = 144;
            break;
        case 3:
            avctx->width  = 352;
            avctx->height = 288;
            break;
        case 4:
            avctx->width  = 704;
            avctx->height = 576;
            break;
        case 5:
            avctx->width  = 240;
            avctx->height = 180;
            break;
        case 6:
            avctx->width  = 320;
            avctx->height = 240;
            break;
        case 7:
            avctx->width  = get_bits(&gb, 12);
            avctx->height = get_bits(&gb, 12);
            break;
        }
969

970 971
        s->halfpel_flag  = get_bits1(&gb);
        s->thirdpel_flag = get_bits1(&gb);
972

973 974 975 976 977
        /* unknown fields */
        skip_bits1(&gb);
        skip_bits1(&gb);
        skip_bits1(&gb);
        skip_bits1(&gb);
978

979
        h->low_delay = get_bits1(&gb);
980

981 982
        /* unknown field */
        skip_bits1(&gb);
983

984 985 986 987
        if (skip_1stop_8data_bits(&gb) < 0) {
            ret = AVERROR_INVALIDDATA;
            goto fail;
        }
988

989 990 991
        s->unknown_flag  = get_bits1(&gb);
        avctx->has_b_frames = !h->low_delay;
        if (s->unknown_flag) {
992
#if CONFIG_ZLIB
993 994 995 996 997 998 999 1000 1001 1002 1003
            unsigned watermark_width  = svq3_get_ue_golomb(&gb);
            unsigned watermark_height = svq3_get_ue_golomb(&gb);
            int u1                    = svq3_get_ue_golomb(&gb);
            int u2                    = get_bits(&gb, 8);
            int u3                    = get_bits(&gb, 2);
            int u4                    = svq3_get_ue_golomb(&gb);
            unsigned long buf_len     = watermark_width *
                                        watermark_height * 4;
            int offset                = get_bits_count(&gb) + 7 >> 3;
            uint8_t *buf;

1004
            if (watermark_height <= 0 ||
1005 1006 1007 1008
                (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height) {
                ret = -1;
                goto fail;
            }
1009

1010
            buf = av_malloc(buf_len);
1011
            av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1012 1013 1014 1015 1016 1017
                   watermark_width, watermark_height);
            av_log(avctx, AV_LOG_DEBUG,
                   "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
                   u1, u2, u3, u4, offset);
            if (uncompress(buf, &buf_len, extradata + 8 + offset,
                           size - offset) != Z_OK) {
1018
                av_log(avctx, AV_LOG_ERROR,
1019 1020
                       "could not uncompress watermark logo\n");
                av_free(buf);
1021 1022
                ret = -1;
                goto fail;
1023
            }
1024 1025
            s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
            s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1026
            av_log(avctx, AV_LOG_DEBUG,
1027
                   "watermark key %#"PRIx32"\n", s->watermark_key);
1028 1029 1030 1031
            av_free(buf);
#else
            av_log(avctx, AV_LOG_ERROR,
                   "this svq3 file contains watermark which need zlib support compiled in\n");
1032 1033
            ret = -1;
            goto fail;
1034
#endif
1035
        }
1036
    }
1037

1038 1039 1040 1041 1042 1043 1044 1045 1046
    h->width  = avctx->width;
    h->height = avctx->height;
    h->mb_width  = (h->width + 15) / 16;
    h->mb_height = (h->height + 15) / 16;
    h->mb_stride = h->mb_width + 1;
    h->mb_num    = h->mb_width * h->mb_height;
    h->b_stride = 4 * h->mb_width;
    s->h_edge_pos = h->mb_width * 16;
    s->v_edge_pos = h->mb_height * 16;
1047

1048
    if ((ret = ff_h264_alloc_tables(h)) < 0) {
1049
        av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1050
        goto fail;
1051
    }
1052

1053
    return 0;
1054 1055 1056
fail:
    svq3_decode_end(avctx);
    return ret;
1057 1058
}

1059
static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
{
    int i;
    for (i = 0; i < 2; i++) {
        av_buffer_unref(&pic->motion_val_buf[i]);
        av_buffer_unref(&pic->ref_index_buf[i]);
    }
    av_buffer_unref(&pic->mb_type_buf);

    av_frame_unref(&pic->f);
}

1071
static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1072 1073 1074 1075 1076 1077 1078 1079 1080
{
    SVQ3Context *s = avctx->priv_data;
    H264Context *h = &s->h;
    const int big_mb_num    = h->mb_stride * (h->mb_height + 1) + 1;
    const int mb_array_size = h->mb_stride * h->mb_height;
    const int b4_stride     = h->mb_width * 4 + 1;
    const int b4_array_size = b4_stride * h->mb_height * 4;
    int ret;

1081
    if (!pic->motion_val_buf[0]) {
1082 1083
        int i;

1084 1085
        pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
        if (!pic->mb_type_buf)
1086
            return AVERROR(ENOMEM);
1087
        pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1088 1089

        for (i = 0; i < 2; i++) {
1090 1091 1092 1093 1094 1095
            pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
            pic->ref_index_buf[i]  = av_buffer_allocz(4 * mb_array_size);
            if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
                ret = AVERROR(ENOMEM);
                goto fail;
            }
1096

1097 1098
            pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
            pic->ref_index[i]  = pic->ref_index_buf[i]->data;
1099 1100
        }
    }
1101 1102 1103 1104 1105 1106
    pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);

    ret = ff_get_buffer(avctx, &pic->f,
                        pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
    if (ret < 0)
        goto fail;
1107

1108 1109 1110 1111 1112
    if (!h->edge_emu_buffer) {
        h->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17);
        if (!h->edge_emu_buffer)
            return AVERROR(ENOMEM);
    }
1113 1114 1115 1116

    h->linesize   = pic->f.linesize[0];
    h->uvlinesize = pic->f.linesize[1];

1117 1118 1119
    return 0;
fail:
    free_picture(avctx, pic);
1120 1121 1122
    return ret;
}

1123
static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1124
                             int *got_frame, AVPacket *avpkt)
1125
{
1126 1127
    SVQ3Context *s     = avctx->priv_data;
    H264Context *h     = &s->h;
1128
    int buf_size       = avpkt->size;
1129
    int left;
1130
    uint8_t *buf;
1131
    int ret, m, i;
1132

1133 1134
    /* special case for last picture */
    if (buf_size == 0) {
1135
        if (s->next_pic->f.data[0] && !h->low_delay && !s->last_frame_output) {
1136 1137 1138
            ret = av_frame_ref(data, &s->next_pic->f);
            if (ret < 0)
                return ret;
1139
            s->last_frame_output = 1;
1140
            *got_frame          = 1;
1141 1142
        }
        return 0;
1143
    }
1144

1145
    h->mb_x = h->mb_y = h->mb_xy = 0;
1146

1147
    if (s->watermark_key) {
1148
        av_fast_padded_malloc(&s->buf, &s->buf_size, buf_size);
1149
        if (!s->buf)
1150
            return AVERROR(ENOMEM);
1151 1152
        memcpy(s->buf, avpkt->data, buf_size);
        buf = s->buf;
1153 1154 1155 1156
    } else {
        buf = avpkt->data;
    }

1157
    init_get_bits(&h->gb, buf, 8 * buf_size);
1158

1159
    if (svq3_decode_slice_header(avctx))
1160
        return -1;
1161

1162
    h->pict_type = h->slice_type;
1163

1164
    if (h->pict_type != AV_PICTURE_TYPE_B)
1165
        FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1166

1167
    av_frame_unref(&s->cur_pic->f);
1168

1169
    /* for skipping the frame */
1170 1171
    s->cur_pic->f.pict_type = h->pict_type;
    s->cur_pic->f.key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1172

1173 1174 1175 1176 1177
    ret = get_buffer(avctx, s->cur_pic);
    if (ret < 0)
        return ret;

    h->cur_pic_ptr = s->cur_pic;
1178
    av_frame_unref(&h->cur_pic.f);
1179
    h->cur_pic     = *s->cur_pic;
1180 1181 1182
    ret = av_frame_ref(&h->cur_pic.f, &s->cur_pic->f);
    if (ret < 0)
        return ret;
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197

    for (i = 0; i < 16; i++) {
        h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
        h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
    }
    for (i = 0; i < 16; i++) {
        h->block_offset[16 + i]      =
        h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
        h->block_offset[48 + 16 + i] =
        h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
    }

    if (h->pict_type != AV_PICTURE_TYPE_I) {
        if (!s->last_pic->f.data[0]) {
            av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1198
            av_frame_unref(s->last_pic);
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210
            ret = get_buffer(avctx, s->last_pic);
            if (ret < 0)
                return ret;
            memset(s->last_pic->f.data[0], 0, avctx->height * s->last_pic->f.linesize[0]);
            memset(s->last_pic->f.data[1], 0x80, (avctx->height / 2) *
                   s->last_pic->f.linesize[1]);
            memset(s->last_pic->f.data[2], 0x80, (avctx->height / 2) *
                   s->last_pic->f.linesize[2]);
        }

        if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f.data[0]) {
            av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1211
            av_frame_unref(s->next_pic);
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
            ret = get_buffer(avctx, s->next_pic);
            if (ret < 0)
                return ret;
            memset(s->next_pic->f.data[0], 0, avctx->height * s->next_pic->f.linesize[0]);
            memset(s->next_pic->f.data[1], 0x80, (avctx->height / 2) *
                   s->next_pic->f.linesize[1]);
            memset(s->next_pic->f.data[2], 0x80, (avctx->height / 2) *
                   s->next_pic->f.linesize[2]);
        }
    }

    if (avctx->debug & FF_DEBUG_PICT_INFO)
        av_log(h->avctx, AV_LOG_DEBUG,
               "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
               av_get_picture_type_char(h->pict_type),
               s->halfpel_flag, s->thirdpel_flag,
               s->adaptive_quant, h->qscale, h->slice_num);

    if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
        avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1232
        avctx->skip_frame >= AVDISCARD_ALL)
1233 1234 1235
        return 0;

    if (s->next_p_frame_damaged) {
1236
        if (h->pict_type == AV_PICTURE_TYPE_B)
1237 1238 1239 1240
            return 0;
        else
            s->next_p_frame_damaged = 0;
    }
1241

1242
    if (h->pict_type == AV_PICTURE_TYPE_B) {
1243
        h->frame_num_offset = h->slice_num - h->prev_frame_num;
1244

1245
        if (h->frame_num_offset < 0)
1246
            h->frame_num_offset += 256;
1247 1248
        if (h->frame_num_offset == 0 ||
            h->frame_num_offset >= h->prev_frame_num_offset) {
1249
            av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1250 1251 1252
            return -1;
        }
    } else {
1253 1254
        h->prev_frame_num        = h->frame_num;
        h->frame_num             = h->slice_num;
1255
        h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1256

1257
        if (h->prev_frame_num_offset < 0)
1258
            h->prev_frame_num_offset += 256;
1259 1260
    }

1261
    for (m = 0; m < 2; m++) {
1262
        int i;
1263
        for (i = 0; i < 4; i++) {
1264 1265
            int j;
            for (j = -1; j < 4; j++)
1266
                h->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1267
            if (i < 3)
1268
                h->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1269
        }
1270 1271
    }

1272 1273
    for (h->mb_y = 0; h->mb_y < h->mb_height; h->mb_y++) {
        for (h->mb_x = 0; h->mb_x < h->mb_width; h->mb_x++) {
1274
            unsigned mb_type;
1275
            h->mb_xy = h->mb_x + h->mb_y * h->mb_stride;
1276

1277 1278 1279 1280 1281
            if ((get_bits_count(&h->gb) + 7) >= h->gb.size_in_bits &&
                ((get_bits_count(&h->gb) & 7) == 0 ||
                 show_bits(&h->gb, -get_bits_count(&h->gb) & 7) == 0)) {
                skip_bits(&h->gb, s->next_slice_index - get_bits_count(&h->gb));
                h->gb.size_in_bits = 8 * buf_size;
1282

1283
                if (svq3_decode_slice_header(avctx))
1284
                    return -1;
1285

1286 1287
                /* TODO: support s->mb_skip_run */
            }
1288

1289
            mb_type = svq3_get_ue_golomb(&h->gb);
1290

1291
            if (h->pict_type == AV_PICTURE_TYPE_I)
1292
                mb_type += 8;
1293
            else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1294
                mb_type += 4;
1295 1296 1297
            if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
                av_log(h->avctx, AV_LOG_ERROR,
                       "error while decoding MB %d %d\n", h->mb_x, h->mb_y);
1298 1299
                return -1;
            }
1300

1301
            if (mb_type != 0 || h->cbp)
1302
                ff_h264_hl_decode_mb(h);
1303

1304
            if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1305
                h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] =
1306
                    (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1307
        }
1308

1309 1310
        ff_draw_horiz_band(avctx, &s->cur_pic->f,
                           s->last_pic->f.data[0] ? &s->last_pic->f : NULL,
1311
                           16 * h->mb_y, 16, h->picture_structure, 0,
1312
                           h->low_delay);
1313
    }
1314

1315
    left = buf_size*8 - get_bits_count(&h->gb);
1316

1317 1318
    if (h->mb_y != h->mb_height || h->mb_x != h->mb_width) {
        av_log(avctx, AV_LOG_INFO, "frame num %d incomplete pic x %d y %d left %d\n", avctx->frame_number, h->mb_y, h->mb_x, left);
1319 1320 1321 1322 1323 1324 1325 1326
        //av_hex_dump(stderr, buf+buf_size-8, 8);
    }

    if (left < 0) {
        av_log(avctx, AV_LOG_ERROR, "frame num %d left %d\n", avctx->frame_number, left);
        return -1;
    }

1327
    if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1328 1329 1330 1331 1332
        ret = av_frame_ref(data, &s->cur_pic->f);
    else if (s->last_pic->f.data[0])
        ret = av_frame_ref(data, &s->last_pic->f);
    if (ret < 0)
        return ret;
1333

1334
    /* Do not output the last pic after seeking. */
1335
    if (s->last_pic->f.data[0] || h->low_delay)
1336
        *got_frame = 1;
1337

1338
    if (h->pict_type != AV_PICTURE_TYPE_B) {
1339
        FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1340 1341
    } else {
        av_frame_unref(&s->cur_pic->f);
1342 1343
    }

1344
    return buf_size;
1345 1346
}

1347
static av_cold int svq3_decode_end(AVCodecContext *avctx)
1348
{
1349 1350
    SVQ3Context *s = avctx->priv_data;
    H264Context *h = &s->h;
1351

1352 1353 1354
    free_picture(avctx, s->cur_pic);
    free_picture(avctx, s->next_pic);
    free_picture(avctx, s->last_pic);
1355 1356 1357 1358 1359
    av_freep(&s->cur_pic);
    av_freep(&s->next_pic);
    av_freep(&s->last_pic);

    av_frame_unref(&h->cur_pic.f);
1360

1361
    ff_h264_free_context(h);
1362

1363 1364
    av_freep(&s->buf);
    s->buf_size = 0;
1365
    av_freep(&h->edge_emu_buffer);
1366

1367 1368
    return 0;
}
1369

1370
AVCodec ff_svq3_decoder = {
1371
    .name           = "svq3",
1372
    .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1373
    .type           = AVMEDIA_TYPE_VIDEO,
1374
    .id             = AV_CODEC_ID_SVQ3,
1375 1376 1377 1378
    .priv_data_size = sizeof(SVQ3Context),
    .init           = svq3_decode_init,
    .close          = svq3_decode_end,
    .decode         = svq3_decode_frame,
1379 1380
    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND |
                      CODEC_CAP_DR1             |
1381
                      CODEC_CAP_DELAY,
1382 1383
    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
                                                     AV_PIX_FMT_NONE},
1384
};