Commit cac9877e authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  sws: implement MMX/SSE2/SSSE3/SSE4 versions for horizontal scaling.
  include stdint.h in adpcm_data.h
  mpeg12: reorder functions to avoid ugly forward declarations
  Fixed off by one packet size allocation in the smacker demuxer.
  Check for invalid packet size in the smacker demuxer.
  ape demuxer: fix segfault on memory allocation failure.
  xan: Add some buffer checks
  xan: Remove extra trailing newline
  Fixed size given to init_get_bits() in xan decoder.

Conflicts:
	libavcodec/mpeg12.c
	libswscale/x86/swscale_template.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 5ad01dec e0c3e073
......@@ -26,6 +26,8 @@
#ifndef AVCODEC_ADPCM_DATA_H
#define AVCODEC_ADPCM_DATA_H
#include <stdint.h>
extern const int8_t ff_adpcm_index_table[16];
extern const int16_t ff_adpcm_step_table[89];
extern const int16_t ff_adpcm_AdaptationTable[];
......
......@@ -50,1072 +50,1070 @@
#define MB_PTYPE_VLC_BITS 6
#define MB_BTYPE_VLC_BITS 6
static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg1_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg2_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n);
static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
static void exchange_uv(MpegEncContext *s);
static VLC mv_vlc;
uint8_t ff_mpeg12_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
/* as H.263, but only 17 codes */
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
{
int code, sign, val, l, shift;
code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
if (code == 0) {
return pred;
}
if (code < 0) {
return 0xffff;
}
#define INIT_2D_VLC_RL(rl, static_size)\
{\
static RL_VLC_ELEM rl_vlc_table[static_size];\
INIT_VLC_STATIC(&rl.vlc, TEX_VLC_BITS, rl.n + 2,\
&rl.table_vlc[0][1], 4, 2,\
&rl.table_vlc[0][0], 4, 2, static_size);\
\
rl.rl_vlc[0] = rl_vlc_table;\
init_2d_vlc_rl(&rl);\
sign = get_bits1(&s->gb);
shift = fcode - 1;
val = code;
if (shift) {
val = (val - 1) << shift;
val |= get_bits(&s->gb, shift);
val++;
}
if (sign)
val = -val;
val += pred;
/* modulo decoding */
l = INT_BIT - 5 - shift;
val = (val << l) >> l;
return val;
}
static void init_2d_vlc_rl(RLTable *rl)
static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int i;
int level, dc, diff, i, j, run;
int component;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix = s->intra_matrix;
const int qscale = s->qscale;
for (i = 0; i < rl->vlc.table_size; i++) {
int code = rl->vlc.table[i][0];
int len = rl->vlc.table[i][1];
int level, run;
/* DC coefficient */
component = (n <= 3 ? 0 : n - 4 + 1);
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc * quant_matrix[0];
av_dlog(s->avctx, "dc=%d diff=%d\n", dc, diff);
i = 0;
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (len == 0) { // illegal code
run = 65;
level = MAX_LEVEL;
} else if (len<0) { //more bits needed
run = 0;
level = code;
} else {
if (code == rl->n) { //esc
run = 65;
level = 0;
} else if (code == rl->n+1) { //eob
run = 0;
level = 127;
if (level == 127) {
break;
} else if (level != 0) {
i += run;
j = scantable[i];
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
} else {
run = rl->table_run [code] + 1;
level = rl->table_level[code];
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
level = -level;
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
block[j] = level;
}
rl->rl_vlc[0][i].len = len;
rl->rl_vlc[0][i].level = level;
rl->rl_vlc[0][i].run = run;
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = i;
return 0;
}
void ff_mpeg12_common_init(MpegEncContext *s)
{
s->y_dc_scale_table =
s->c_dc_scale_table = ff_mpeg2_dc_scale_table[s->intra_dc_precision];
}
void ff_mpeg1_clean_buffers(MpegEncContext *s)
int ff_mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
s->last_dc[0] = 1 << (7 + s->intra_dc_precision);
s->last_dc[1] = s->last_dc[0];
s->last_dc[2] = s->last_dc[0];
memset(s->last_mv, 0, sizeof(s->last_mv));
return mpeg1_decode_block_intra(s, block, n);
}
/******************************************/
/* decoding */
VLC ff_dc_lum_vlc;
VLC ff_dc_chroma_vlc;
static VLC mv_vlc;
static VLC mbincr_vlc;
static VLC mb_ptype_vlc;
static VLC mb_btype_vlc;
static VLC mb_pat_vlc;
av_cold void ff_mpeg12_init_vlcs(void)
static inline int mpeg1_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
{
static int done = 0;
if (!done) {
done = 1;
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix = s->inter_matrix;
const int qscale = s->qscale;
INIT_VLC_STATIC(&ff_dc_lum_vlc, DC_VLC_BITS, 12,
ff_mpeg12_vlc_dc_lum_bits, 1, 1,
ff_mpeg12_vlc_dc_lum_code, 2, 2, 512);
INIT_VLC_STATIC(&ff_dc_chroma_vlc, DC_VLC_BITS, 12,
ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
ff_mpeg12_vlc_dc_chroma_code, 2, 2, 514);
INIT_VLC_STATIC(&mv_vlc, MV_VLC_BITS, 17,
&ff_mpeg12_mbMotionVectorTable[0][1], 2, 1,
&ff_mpeg12_mbMotionVectorTable[0][0], 2, 1, 518);
INIT_VLC_STATIC(&mbincr_vlc, MBINCR_VLC_BITS, 36,
&ff_mpeg12_mbAddrIncrTable[0][1], 2, 1,
&ff_mpeg12_mbAddrIncrTable[0][0], 2, 1, 538);
INIT_VLC_STATIC(&mb_pat_vlc, MB_PAT_VLC_BITS, 64,
&ff_mpeg12_mbPatTable[0][1], 2, 1,
&ff_mpeg12_mbPatTable[0][0], 2, 1, 512);
{
OPEN_READER(re, &s->gb);
i = -1;
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale * quant_matrix[0]) >> 5;
level = (level - 1) | 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
INIT_VLC_STATIC(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7,
&table_mb_ptype[0][1], 2, 1,
&table_mb_ptype[0][0], 2, 1, 64);
INIT_VLC_STATIC(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11,
&table_mb_btype[0][1], 2, 1,
&table_mb_btype[0][0], 2, 1, 64);
init_rl(&ff_rl_mpeg1, ff_mpeg12_static_rl_table_store[0]);
init_rl(&ff_rl_mpeg2, ff_mpeg12_static_rl_table_store[1]);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
INIT_2D_VLC_RL(ff_rl_mpeg1, 680);
INIT_2D_VLC_RL(ff_rl_mpeg2, 674);
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = i;
return 0;
}
static inline int get_dmv(MpegEncContext *s)
static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
{
if (get_bits1(&s->gb))
return 1 - (get_bits1(&s->gb) << 1);
else
return 0;
}
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const int qscale = s->qscale;
static inline int get_qscale(MpegEncContext *s)
{
int qscale = get_bits(&s->gb, 5);
if (s->q_scale_type) {
return non_linear_qscale[qscale];
} else {
return qscale << 1;
}
}
{
OPEN_READER(re, &s->gb);
i = -1;
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale) >> 1;
level = (level - 1) | 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
/* motion type (for MPEG-2) */
#define MT_FIELD 1
#define MT_FRAME 2
#define MT_16X8 2
#define MT_DMV 3
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
{
int i, j, k, cbp, val, mb_type, motion_type;
const int mb_block_count = 4 + (1 << s->chroma_format);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
}
}
av_dlog(s->avctx, "decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y);
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = i;
return 0;
}
assert(s->mb_skipped == 0);
if (s->mb_skip_run-- != 0) {
if (s->pict_type == AV_PICTURE_TYPE_P) {
s->mb_skipped = 1;
s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride] = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
} else {
int mb_type;
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
int mismatch;
if (s->mb_x)
mb_type = s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride - 1];
else
mb_type = s->current_picture.f.mb_type[s->mb_width + (s->mb_y - 1) * s->mb_stride - 1]; // FIXME not sure if this is allowed in MPEG at all
if (IS_INTRA(mb_type))
return -1;
mismatch = 1;
s->current_picture.f.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
mb_type | MB_TYPE_SKIP;
// assert(s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride - 1] & (MB_TYPE_16x16 | MB_TYPE_16x8));
{
OPEN_READER(re, &s->gb);
i = -1;
if (n < 4)
quant_matrix = s->inter_matrix;
else
quant_matrix = s->chroma_inter_matrix;
if ((s->mv[0][0][0] | s->mv[0][0][1] | s->mv[1][0][0] | s->mv[1][0][1]) == 0)
s->mb_skipped = 1;
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level= (3 * qscale * quant_matrix[0]) >> 5;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
mismatch ^= level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
return 0;
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
switch (s->pict_type) {
default:
case AV_PICTURE_TYPE_I:
if (get_bits1(&s->gb) == 0) {
if (get_bits1(&s->gb) == 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in I Frame at %d %d\n", s->mb_x, s->mb_y);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
i += run;
j = scantable[i];
if (level < 0) {
level = ((-level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = -level;
} else {
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mb_type = MB_TYPE_QUANT | MB_TYPE_INTRA;
} else {
mb_type = MB_TYPE_INTRA;
}
break;
case AV_PICTURE_TYPE_P:
mb_type = get_vlc2(&s->gb, mb_ptype_vlc.table, MB_PTYPE_VLC_BITS, 1);
if (mb_type < 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in P Frame at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mb_type = ptype2mb_type[mb_type];
break;
case AV_PICTURE_TYPE_B:
mb_type = get_vlc2(&s->gb, mb_btype_vlc.table, MB_BTYPE_VLC_BITS, 1);
if (mb_type < 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in B Frame at %d %d\n", s->mb_x, s->mb_y);
return -1;
mismatch ^= level;
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
mb_type = btype2mb_type[mb_type];
break;
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
}
av_dlog(s->avctx, "mb_type=%x\n", mb_type);
// motion_type = 0; /* avoid warning */
if (IS_INTRA(mb_type)) {
s->dsp.clear_blocks(s->block[0]);
block[63] ^= (mismatch & 1);
if (!s->chroma_y_shift) {
s->dsp.clear_blocks(s->block[6]);
}
s->block_last_index[n] = i;
return 0;
}
/* compute DCT type */
if (s->picture_structure == PICT_FRAME && // FIXME add an interlaced_dct coded var?
!s->frame_pred_frame_dct) {
s->interlaced_dct = get_bits1(&s->gb);
}
static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const int qscale = s->qscale;
OPEN_READER(re, &s->gb);
i = -1;
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale) >> 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
if (s->concealment_motion_vectors) {
/* just parse them */
if (s->picture_structure != PICT_FRAME)
skip_bits1(&s->gb); /* field select */
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
s->mv[0][0][0]= s->last_mv[0][0][0]= s->last_mv[0][1][0] =
mpeg_decode_motion(s, s->mpeg_f_code[0][0], s->last_mv[0][0][0]);
s->mv[0][0][1]= s->last_mv[0][0][1]= s->last_mv[0][1][1] =
mpeg_decode_motion(s, s->mpeg_f_code[0][1], s->last_mv[0][0][1]);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale) >> 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
skip_bits1(&s->gb); /* marker */
} else
memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
s->mb_intra = 1;
// if 1, we memcpy blocks in xvmcvideo
if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1) {
ff_xvmc_pack_pblocks(s, -1); // inter are always full blocks
if (s->swap_uv) {
exchange_uv(s);
i += run;
j = scantable[i];
if (level < 0) {
level = ((-level * 2 + 1) * qscale) >> 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale) >> 1;
}
}
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
mpeg2_fast_decode_block_intra(s, *s->pblocks[i], i);
}
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
s->block_last_index[n] = i;
return 0;
}
static inline int mpeg2_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, dc, diff, i, j, run;
int component;
RLTable *rl;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
int mismatch;
/* DC coefficient */
if (n < 4) {
quant_matrix = s->intra_matrix;
component = 0;
} else {
quant_matrix = s->chroma_intra_matrix;
component = (n & 1) + 1;
}
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc << (3 - s->intra_dc_precision);
av_dlog(s->avctx, "dc=%d\n", block[0]);
mismatch = block[0] ^ 1;
i = 0;
if (s->intra_vlc_format)
rl = &ff_rl_mpeg2;
else
rl = &ff_rl_mpeg1;
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (level == 127) {
break;
} else if (level != 0) {
i += run;
j = scantable[i];
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
} else {
for (i = 0; i < mb_block_count; i++) {
if (mpeg2_decode_block_intra(s, *s->pblocks[i], i) < 0)
return -1;
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
i += run;
j = scantable[i];
if (level < 0) {
level = (-level * qscale * quant_matrix[j]) >> 4;
level = -level;
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
}
}
} else {
for (i = 0; i < 6; i++) {
if (mpeg1_decode_block_intra(s, *s->pblocks[i], i) < 0)
return -1;
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mismatch ^= level;
block[j] = level;
}
CLOSE_READER(re, &s->gb);
}
block[63] ^= mismatch & 1;
s->block_last_index[n] = i;
return 0;
}
static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, dc, diff, j, run;
int component;
RLTable *rl;
uint8_t * scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
/* DC coefficient */
if (n < 4) {
quant_matrix = s->intra_matrix;
component = 0;
} else {
if (mb_type & MB_TYPE_ZERO_MV) {
assert(mb_type & MB_TYPE_CBP);
quant_matrix = s->chroma_intra_matrix;
component = (n & 1) + 1;
}
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc << (3 - s->intra_dc_precision);
if (s->intra_vlc_format)
rl = &ff_rl_mpeg2;
else
rl = &ff_rl_mpeg1;
s->mv_dir = MV_DIR_FORWARD;
if (s->picture_structure == PICT_FRAME) {
if (!s->frame_pred_frame_dct)
s->interlaced_dct = get_bits1(&s->gb);
s->mv_type = MV_TYPE_16X16;
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (level == 127) {
break;
} else if (level != 0) {
scantable += run;
j = *scantable;
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
} else {
s->mv_type = MV_TYPE_FIELD;
mb_type |= MB_TYPE_INTERLACED;
s->field_select[0][0] = s->picture_structure - 1;
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
scantable += run;
j = *scantable;
if (level < 0) {
level = (-level * qscale * quant_matrix[j]) >> 4;
level = -level;
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
}
}
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
block[j] = level;
}
CLOSE_READER(re, &s->gb);
}
s->last_mv[0][0][0] = 0;
s->last_mv[0][0][1] = 0;
s->last_mv[0][1][0] = 0;
s->last_mv[0][1][1] = 0;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
s->block_last_index[n] = scantable - s->intra_scantable.permutated;
return 0;
}
uint8_t ff_mpeg12_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
#define INIT_2D_VLC_RL(rl, static_size)\
{\
static RL_VLC_ELEM rl_vlc_table[static_size];\
INIT_VLC_STATIC(&rl.vlc, TEX_VLC_BITS, rl.n + 2,\
&rl.table_vlc[0][1], 4, 2,\
&rl.table_vlc[0][0], 4, 2, static_size);\
\
rl.rl_vlc[0] = rl_vlc_table;\
init_2d_vlc_rl(&rl);\
}
static void init_2d_vlc_rl(RLTable *rl)
{
int i;
for (i = 0; i < rl->vlc.table_size; i++) {
int code = rl->vlc.table[i][0];
int len = rl->vlc.table[i][1];
int level, run;
if (len == 0) { // illegal code
run = 65;
level = MAX_LEVEL;
} else if (len<0) { //more bits needed
run = 0;
level = code;
} else {
assert(mb_type & MB_TYPE_L0L1);
// FIXME decide if MBs in field pictures are MB_TYPE_INTERLACED
/* get additional motion vector type */
if (s->frame_pred_frame_dct)
motion_type = MT_FRAME;
else {
motion_type = get_bits(&s->gb, 2);
if (s->picture_structure == PICT_FRAME && HAS_CBP(mb_type))
s->interlaced_dct = get_bits1(&s->gb);
if (code == rl->n) { //esc
run = 65;
level = 0;
} else if (code == rl->n+1) { //eob
run = 0;
level = 127;
} else {
run = rl->table_run [code] + 1;
level = rl->table_level[code];
}
}
rl->rl_vlc[0][i].len = len;
rl->rl_vlc[0][i].level = level;
rl->rl_vlc[0][i].run = run;
}
}
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
/* motion vectors */
s->mv_dir = (mb_type >> 13) & 3;
av_dlog(s->avctx, "motion_type=%d\n", motion_type);
switch (motion_type) {
case MT_FRAME: /* or MT_16X8 */
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x16;
s->mv_type = MV_TYPE_16X16;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
/* MT_FRAME */
s->mv[i][0][0]= s->last_mv[i][0][0]= s->last_mv[i][1][0] =
mpeg_decode_motion(s, s->mpeg_f_code[i][0], s->last_mv[i][0][0]);
s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] =
mpeg_decode_motion(s, s->mpeg_f_code[i][1], s->last_mv[i][0][1]);
/* full_pel: only for MPEG-1 */
if (s->full_pel[i]) {
s->mv[i][0][0] <<= 1;
s->mv[i][0][1] <<= 1;
}
}
}
} else {
mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
s->mv_type = MV_TYPE_16X8;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
/* MT_16X8 */
for (j = 0; j < 2; j++) {
s->field_select[i][j] = get_bits1(&s->gb);
for (k = 0; k < 2; k++) {
val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
s->last_mv[i][j][k]);
s->last_mv[i][j][k] = val;
s->mv[i][j][k] = val;
}
}
}
}
}
break;
case MT_FIELD:
if(s->progressive_sequence){
av_log(s->avctx, AV_LOG_ERROR, "MT_FIELD in progressive_sequence\n");
return -1;
}
s->mv_type = MV_TYPE_FIELD;
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
for (j = 0; j < 2; j++) {
s->field_select[i][j] = get_bits1(&s->gb);
val = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
s->last_mv[i][j][0]);
s->last_mv[i][j][0] = val;
s->mv[i][j][0] = val;
av_dlog(s->avctx, "fmx=%d\n", val);
val = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
s->last_mv[i][j][1] >> 1);
s->last_mv[i][j][1] = val << 1;
s->mv[i][j][1] = val;
av_dlog(s->avctx, "fmy=%d\n", val);
}
}
}
} else {
mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
s->field_select[i][0] = get_bits1(&s->gb);
for (k = 0; k < 2; k++) {
val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
s->last_mv[i][0][k]);
s->last_mv[i][0][k] = val;
s->last_mv[i][1][k] = val;
s->mv[i][0][k] = val;
}
}
}
}
break;
case MT_DMV:
if(s->progressive_sequence){
av_log(s->avctx, AV_LOG_ERROR, "MT_DMV in progressive_sequence\n");
return -1;
}
s->mv_type = MV_TYPE_DMV;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
int dmx, dmy, mx, my, m;
const int my_shift = s->picture_structure == PICT_FRAME;
mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
s->last_mv[i][0][0]);
s->last_mv[i][0][0] = mx;
s->last_mv[i][1][0] = mx;
dmx = get_dmv(s);
my = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
s->last_mv[i][0][1] >> my_shift);
dmy = get_dmv(s);
void ff_mpeg12_common_init(MpegEncContext *s)
{
s->y_dc_scale_table =
s->c_dc_scale_table = ff_mpeg2_dc_scale_table[s->intra_dc_precision];
s->last_mv[i][0][1] = my << my_shift;
s->last_mv[i][1][1] = my << my_shift;
}
s->mv[i][0][0] = mx;
s->mv[i][0][1] = my;
s->mv[i][1][0] = mx; // not used
s->mv[i][1][1] = my; // not used
void ff_mpeg1_clean_buffers(MpegEncContext *s)
{
s->last_dc[0] = 1 << (7 + s->intra_dc_precision);
s->last_dc[1] = s->last_dc[0];
s->last_dc[2] = s->last_dc[0];
memset(s->last_mv, 0, sizeof(s->last_mv));
}
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
// m = 1 + 2 * s->top_field_first;
m = s->top_field_first ? 1 : 3;
/******************************************/
/* decoding */
/* top -> top pred */
s->mv[i][2][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
s->mv[i][2][1] = ((my * m + (my > 0)) >> 1) + dmy - 1;
m = 4 - m;
s->mv[i][3][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
s->mv[i][3][1] = ((my * m + (my > 0)) >> 1) + dmy + 1;
} else {
mb_type |= MB_TYPE_16x16;
VLC ff_dc_lum_vlc;
VLC ff_dc_chroma_vlc;
s->mv[i][2][0] = ((mx + (mx > 0)) >> 1) + dmx;
s->mv[i][2][1] = ((my + (my > 0)) >> 1) + dmy;
if (s->picture_structure == PICT_TOP_FIELD)
s->mv[i][2][1]--;
else
s->mv[i][2][1]++;
}
}
}
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "00 motion_type at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
}
static VLC mbincr_vlc;
static VLC mb_ptype_vlc;
static VLC mb_btype_vlc;
static VLC mb_pat_vlc;
s->mb_intra = 0;
if (HAS_CBP(mb_type)) {
s->dsp.clear_blocks(s->block[0]);
av_cold void ff_mpeg12_init_vlcs(void)
{
static int done = 0;
cbp = get_vlc2(&s->gb, mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
if (mb_block_count > 6) {
cbp <<= mb_block_count - 6;
cbp |= get_bits(&s->gb, mb_block_count - 6);
s->dsp.clear_blocks(s->block[6]);
}
if (cbp <= 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid cbp at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
if (!done) {
done = 1;
//if 1, we memcpy blocks in xvmcvideo
if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1) {
ff_xvmc_pack_pblocks(s, cbp);
if (s->swap_uv) {
exchange_uv(s);
}
}
INIT_VLC_STATIC(&ff_dc_lum_vlc, DC_VLC_BITS, 12,
ff_mpeg12_vlc_dc_lum_bits, 1, 1,
ff_mpeg12_vlc_dc_lum_code, 2, 2, 512);
INIT_VLC_STATIC(&ff_dc_chroma_vlc, DC_VLC_BITS, 12,
ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
ff_mpeg12_vlc_dc_chroma_code, 2, 2, 514);
INIT_VLC_STATIC(&mv_vlc, MV_VLC_BITS, 17,
&ff_mpeg12_mbMotionVectorTable[0][1], 2, 1,
&ff_mpeg12_mbMotionVectorTable[0][0], 2, 1, 518);
INIT_VLC_STATIC(&mbincr_vlc, MBINCR_VLC_BITS, 36,
&ff_mpeg12_mbAddrIncrTable[0][1], 2, 1,
&ff_mpeg12_mbAddrIncrTable[0][0], 2, 1, 538);
INIT_VLC_STATIC(&mb_pat_vlc, MB_PAT_VLC_BITS, 64,
&ff_mpeg12_mbPatTable[0][1], 2, 1,
&ff_mpeg12_mbPatTable[0][0], 2, 1, 512);
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
if (cbp & 32) {
mpeg2_fast_decode_block_non_intra(s, *s->pblocks[i], i);
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
} else {
cbp <<= 12-mb_block_count;
INIT_VLC_STATIC(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7,
&table_mb_ptype[0][1], 2, 1,
&table_mb_ptype[0][0], 2, 1, 64);
INIT_VLC_STATIC(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11,
&table_mb_btype[0][1], 2, 1,
&table_mb_btype[0][0], 2, 1, 64);
init_rl(&ff_rl_mpeg1, ff_mpeg12_static_rl_table_store[0]);
init_rl(&ff_rl_mpeg2, ff_mpeg12_static_rl_table_store[1]);
for (i = 0; i < mb_block_count; i++) {
if (cbp & (1 << 11)) {
if (mpeg2_decode_block_non_intra(s, *s->pblocks[i], i) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
}
} else {
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
if (cbp & 32) {
mpeg1_fast_decode_block_inter(s, *s->pblocks[i], i);
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
} else {
for (i = 0; i < 6; i++) {
if (cbp & 32) {
if (mpeg1_decode_block_inter(s, *s->pblocks[i], i) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
}
}
} else {
for (i = 0; i < 12; i++)
s->block_last_index[i] = -1;
}
INIT_2D_VLC_RL(ff_rl_mpeg1, 680);
INIT_2D_VLC_RL(ff_rl_mpeg2, 674);
}
s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride] = mb_type;
return 0;
}
/* as H.263, but only 17 codes */
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
static inline int get_dmv(MpegEncContext *s)
{
int code, sign, val, l, shift;
if (get_bits1(&s->gb))
return 1 - (get_bits1(&s->gb) << 1);
else
return 0;
}
code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
if (code == 0) {
return pred;
}
if (code < 0) {
return 0xffff;
static inline int get_qscale(MpegEncContext *s)
{
int qscale = get_bits(&s->gb, 5);
if (s->q_scale_type) {
return non_linear_qscale[qscale];
} else {
return qscale << 1;
}
}
sign = get_bits1(&s->gb);
shift = fcode - 1;
val = code;
if (shift) {
val = (val - 1) << shift;
val |= get_bits(&s->gb, shift);
val++;
}
if (sign)
val = -val;
val += pred;
static void exchange_uv(MpegEncContext *s)
{
DCTELEM (*tmp)[64];
/* modulo decoding */
l = INT_BIT - 5 - shift;
val = (val << l) >> l;
return val;
tmp = s->pblocks[4];
s->pblocks[4] = s->pblocks[5];
s->pblocks[5] = tmp;
}
static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, dc, diff, i, j, run;
int component;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix = s->intra_matrix;
const int qscale = s->qscale;
/* motion type (for MPEG-2) */
#define MT_FIELD 1
#define MT_FRAME 2
#define MT_16X8 2
#define MT_DMV 3
/* DC coefficient */
component = (n <= 3 ? 0 : n - 4 + 1);
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc * quant_matrix[0];
av_dlog(s->avctx, "dc=%d diff=%d\n", dc, diff);
i = 0;
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
{
int i, j, k, cbp, val, mb_type, motion_type;
const int mb_block_count = 4 + (1 << s->chroma_format);
if (level == 127) {
break;
} else if (level != 0) {
i += run;
j = scantable[i];
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
level = -level;
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level - 1) | 1;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
av_dlog(s->avctx, "decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y);
block[j] = level;
}
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = i;
return 0;
}
assert(s->mb_skipped == 0);
int ff_mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
return mpeg1_decode_block_intra(s, block, n);
}
if (s->mb_skip_run-- != 0) {
if (s->pict_type == AV_PICTURE_TYPE_P) {
s->mb_skipped = 1;
s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride] = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
} else {
int mb_type;
static inline int mpeg1_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix = s->inter_matrix;
const int qscale = s->qscale;
if (s->mb_x)
mb_type = s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride - 1];
else
mb_type = s->current_picture.f.mb_type[s->mb_width + (s->mb_y - 1) * s->mb_stride - 1]; // FIXME not sure if this is allowed in MPEG at all
if (IS_INTRA(mb_type))
return -1;
s->current_picture.f.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
mb_type | MB_TYPE_SKIP;
// assert(s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride - 1] & (MB_TYPE_16x16 | MB_TYPE_16x8));
{
OPEN_READER(re, &s->gb);
i = -1;
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale * quant_matrix[0]) >> 5;
level = (level - 1) | 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
if ((s->mv[0][0][0] | s->mv[0][0][1] | s->mv[1][0][0] | s->mv[1][0][1]) == 0)
s->mb_skipped = 1;
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level - 1) | 1;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return 0;
}
switch (s->pict_type) {
default:
case AV_PICTURE_TYPE_I:
if (get_bits1(&s->gb) == 0) {
if (get_bits1(&s->gb) == 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in I Frame at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
mb_type = MB_TYPE_QUANT | MB_TYPE_INTRA;
} else {
mb_type = MB_TYPE_INTRA;
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
break;
case AV_PICTURE_TYPE_P:
mb_type = get_vlc2(&s->gb, mb_ptype_vlc.table, MB_PTYPE_VLC_BITS, 1);
if (mb_type < 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in P Frame at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mb_type = ptype2mb_type[mb_type];
break;
case AV_PICTURE_TYPE_B:
mb_type = get_vlc2(&s->gb, mb_btype_vlc.table, MB_BTYPE_VLC_BITS, 1);
if (mb_type < 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in B Frame at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mb_type = btype2mb_type[mb_type];
break;
}
s->block_last_index[n] = i;
return 0;
}
av_dlog(s->avctx, "mb_type=%x\n", mb_type);
// motion_type = 0; /* avoid warning */
if (IS_INTRA(mb_type)) {
s->dsp.clear_blocks(s->block[0]);
static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const int qscale = s->qscale;
if (!s->chroma_y_shift) {
s->dsp.clear_blocks(s->block[6]);
}
{
OPEN_READER(re, &s->gb);
i = -1;
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale) >> 1;
level = (level - 1) | 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
if (level == -128) {
level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
} else if (level == 0) {
level = SHOW_UBITS(re, &s->gb, 8) ; SKIP_BITS(re, &s->gb, 8);
}
i += run;
j = scantable[i];
if (level < 0) {
level = -level;
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale) >> 1;
level = (level - 1) | 1;
}
}
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
/* compute DCT type */
if (s->picture_structure == PICT_FRAME && // FIXME add an interlaced_dct coded var?
!s->frame_pred_frame_dct) {
s->interlaced_dct = get_bits1(&s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = i;
return 0;
}
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
int mismatch;
mismatch = 1;
if (s->concealment_motion_vectors) {
/* just parse them */
if (s->picture_structure != PICT_FRAME)
skip_bits1(&s->gb); /* field select */
{
OPEN_READER(re, &s->gb);
i = -1;
if (n < 4)
quant_matrix = s->inter_matrix;
else
quant_matrix = s->chroma_inter_matrix;
s->mv[0][0][0]= s->last_mv[0][0][0]= s->last_mv[0][1][0] =
mpeg_decode_motion(s, s->mpeg_f_code[0][0], s->last_mv[0][0][0]);
s->mv[0][0][1]= s->last_mv[0][0][1]= s->last_mv[0][1][1] =
mpeg_decode_motion(s, s->mpeg_f_code[0][1], s->last_mv[0][0][1]);
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level= (3 * qscale * quant_matrix[0]) >> 5;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
mismatch ^= level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
skip_bits1(&s->gb); /* marker */
} else
memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
s->mb_intra = 1;
// if 1, we memcpy blocks in xvmcvideo
if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1) {
ff_xvmc_pack_pblocks(s, -1); // inter are always full blocks
if (s->swap_uv) {
exchange_uv(s);
}
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
mpeg2_fast_decode_block_intra(s, *s->pblocks[i], i);
}
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
i += run;
j = scantable[i];
if (level < 0) {
level = ((-level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
level = -level;
} else {
level = ((level * 2 + 1) * qscale * quant_matrix[j]) >> 5;
for (i = 0; i < mb_block_count; i++) {
if (mpeg2_decode_block_intra(s, *s->pblocks[i], i) < 0)
return -1;
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
return -1;
} else {
for (i = 0; i < 6; i++) {
if (mpeg1_decode_block_intra(s, *s->pblocks[i], i) < 0)
return -1;
}
mismatch ^= level;
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
}
block[63] ^= (mismatch & 1);
} else {
if (mb_type & MB_TYPE_ZERO_MV) {
assert(mb_type & MB_TYPE_CBP);
s->block_last_index[n] = i;
return 0;
}
s->mv_dir = MV_DIR_FORWARD;
if (s->picture_structure == PICT_FRAME) {
if (!s->frame_pred_frame_dct)
s->interlaced_dct = get_bits1(&s->gb);
s->mv_type = MV_TYPE_16X16;
} else {
s->mv_type = MV_TYPE_FIELD;
mb_type |= MB_TYPE_INTERLACED;
s->field_select[0][0] = s->picture_structure - 1;
}
static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
DCTELEM *block, int n)
{
int level, i, j, run;
RLTable *rl = &ff_rl_mpeg1;
uint8_t * const scantable = s->intra_scantable.permutated;
const int qscale = s->qscale;
OPEN_READER(re, &s->gb);
i = -1;
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
// special case for first coefficient, no need to add second VLC table
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
level = (3 * qscale) >> 1;
if (GET_CACHE(re, &s->gb) & 0x40000000)
level = -level;
block[0] = level;
i++;
SKIP_BITS(re, &s->gb, 2);
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
goto end;
}
s->last_mv[0][0][0] = 0;
s->last_mv[0][0][1] = 0;
s->last_mv[0][1][0] = 0;
s->last_mv[0][1][1] = 0;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
} else {
assert(mb_type & MB_TYPE_L0L1);
// FIXME decide if MBs in field pictures are MB_TYPE_INTERLACED
/* get additional motion vector type */
if (s->frame_pred_frame_dct)
motion_type = MT_FRAME;
else {
motion_type = get_bits(&s->gb, 2);
if (s->picture_structure == PICT_FRAME && HAS_CBP(mb_type))
s->interlaced_dct = get_bits1(&s->gb);
}
/* now quantify & encode AC coefficients */
for (;;) {
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (IS_QUANT(mb_type))
s->qscale = get_qscale(s);
if (level != 0) {
i += run;
j = scantable[i];
level = ((level * 2 + 1) * qscale) >> 1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
/* motion vectors */
s->mv_dir = (mb_type >> 13) & 3;
av_dlog(s->avctx, "motion_type=%d\n", motion_type);
switch (motion_type) {
case MT_FRAME: /* or MT_16X8 */
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x16;
s->mv_type = MV_TYPE_16X16;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
/* MT_FRAME */
s->mv[i][0][0]= s->last_mv[i][0][0]= s->last_mv[i][1][0] =
mpeg_decode_motion(s, s->mpeg_f_code[i][0], s->last_mv[i][0][0]);
s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] =
mpeg_decode_motion(s, s->mpeg_f_code[i][1], s->last_mv[i][0][1]);
/* full_pel: only for MPEG-1 */
if (s->full_pel[i]) {
s->mv[i][0][0] <<= 1;
s->mv[i][0][1] <<= 1;
}
}
}
} else {
mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
s->mv_type = MV_TYPE_16X8;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
/* MT_16X8 */
for (j = 0; j < 2; j++) {
s->field_select[i][j] = get_bits1(&s->gb);
for (k = 0; k < 2; k++) {
val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
s->last_mv[i][j][k]);
s->last_mv[i][j][k] = val;
s->mv[i][j][k] = val;
}
}
}
}
}
break;
case MT_FIELD:
if(s->progressive_sequence){
av_log(s->avctx, AV_LOG_ERROR, "MT_FIELD in progressive_sequence\n");
return -1;
}
s->mv_type = MV_TYPE_FIELD;
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
for (j = 0; j < 2; j++) {
s->field_select[i][j] = get_bits1(&s->gb);
val = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
s->last_mv[i][j][0]);
s->last_mv[i][j][0] = val;
s->mv[i][j][0] = val;
av_dlog(s->avctx, "fmx=%d\n", val);
val = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
s->last_mv[i][j][1] >> 1);
s->last_mv[i][j][1] = val << 1;
s->mv[i][j][1] = val;
av_dlog(s->avctx, "fmy=%d\n", val);
}
}
}
} else {
mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
s->field_select[i][0] = get_bits1(&s->gb);
for (k = 0; k < 2; k++) {
val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
s->last_mv[i][0][k]);
s->last_mv[i][0][k] = val;
s->last_mv[i][1][k] = val;
s->mv[i][0][k] = val;
}
}
}
}
break;
case MT_DMV:
if(s->progressive_sequence){
av_log(s->avctx, AV_LOG_ERROR, "MT_DMV in progressive_sequence\n");
return -1;
}
s->mv_type = MV_TYPE_DMV;
for (i = 0; i < 2; i++) {
if (USES_LIST(mb_type, i)) {
int dmx, dmy, mx, my, m;
const int my_shift = s->picture_structure == PICT_FRAME;
i += run;
j = scantable[i];
if (level < 0) {
level = ((-level * 2 + 1) * qscale) >> 1;
level = -level;
} else {
level = ((level * 2 + 1) * qscale) >> 1;
}
}
mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
s->last_mv[i][0][0]);
s->last_mv[i][0][0] = mx;
s->last_mv[i][1][0] = mx;
dmx = get_dmv(s);
my = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
s->last_mv[i][0][1] >> my_shift);
dmy = get_dmv(s);
block[j] = level;
if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
UPDATE_CACHE(re, &s->gb);
}
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
s->block_last_index[n] = i;
return 0;
}
s->last_mv[i][0][1] = my << my_shift;
s->last_mv[i][1][1] = my << my_shift;
static inline int mpeg2_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, dc, diff, i, j, run;
int component;
RLTable *rl;
uint8_t * const scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
int mismatch;
s->mv[i][0][0] = mx;
s->mv[i][0][1] = my;
s->mv[i][1][0] = mx; // not used
s->mv[i][1][1] = my; // not used
/* DC coefficient */
if (n < 4) {
quant_matrix = s->intra_matrix;
component = 0;
} else {
quant_matrix = s->chroma_intra_matrix;
component = (n & 1) + 1;
}
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc << (3 - s->intra_dc_precision);
av_dlog(s->avctx, "dc=%d\n", block[0]);
mismatch = block[0] ^ 1;
i = 0;
if (s->intra_vlc_format)
rl = &ff_rl_mpeg2;
else
rl = &ff_rl_mpeg1;
if (s->picture_structure == PICT_FRAME) {
mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
// m = 1 + 2 * s->top_field_first;
m = s->top_field_first ? 1 : 3;
if (level == 127) {
break;
} else if (level != 0) {
i += run;
j = scantable[i];
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
i += run;
j = scantable[i];
if (level < 0) {
level = (-level * qscale * quant_matrix[j]) >> 4;
level = -level;
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
/* top -> top pred */
s->mv[i][2][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
s->mv[i][2][1] = ((my * m + (my > 0)) >> 1) + dmy - 1;
m = 4 - m;
s->mv[i][3][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
s->mv[i][3][1] = ((my * m + (my > 0)) >> 1) + dmy + 1;
} else {
mb_type |= MB_TYPE_16x16;
s->mv[i][2][0] = ((mx + (mx > 0)) >> 1) + dmx;
s->mv[i][2][1] = ((my + (my > 0)) >> 1) + dmy;
if (s->picture_structure == PICT_TOP_FIELD)
s->mv[i][2][1]--;
else
s->mv[i][2][1]++;
}
}
}
}
if (i > 63) {
av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "00 motion_type at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
mismatch ^= level;
block[j] = level;
}
CLOSE_READER(re, &s->gb);
}
block[63] ^= mismatch & 1;
s->block_last_index[n] = i;
return 0;
}
s->mb_intra = 0;
if (HAS_CBP(mb_type)) {
s->dsp.clear_blocks(s->block[0]);
static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
{
int level, dc, diff, j, run;
int component;
RLTable *rl;
uint8_t * scantable = s->intra_scantable.permutated;
const uint16_t *quant_matrix;
const int qscale = s->qscale;
cbp = get_vlc2(&s->gb, mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
if (mb_block_count > 6) {
cbp <<= mb_block_count - 6;
cbp |= get_bits(&s->gb, mb_block_count - 6);
s->dsp.clear_blocks(s->block[6]);
}
if (cbp <= 0) {
av_log(s->avctx, AV_LOG_ERROR, "invalid cbp at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
/* DC coefficient */
if (n < 4) {
quant_matrix = s->intra_matrix;
component = 0;
} else {
quant_matrix = s->chroma_intra_matrix;
component = (n & 1) + 1;
}
diff = decode_dc(&s->gb, component);
if (diff >= 0xffff)
return -1;
dc = s->last_dc[component];
dc += diff;
s->last_dc[component] = dc;
block[0] = dc << (3 - s->intra_dc_precision);
if (s->intra_vlc_format)
rl = &ff_rl_mpeg2;
else
rl = &ff_rl_mpeg1;
//if 1, we memcpy blocks in xvmcvideo
if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1) {
ff_xvmc_pack_pblocks(s, cbp);
if (s->swap_uv) {
exchange_uv(s);
}
}
{
OPEN_READER(re, &s->gb);
/* now quantify & encode AC coefficients */
for (;;) {
UPDATE_CACHE(re, &s->gb);
GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
if (cbp & 32) {
mpeg2_fast_decode_block_non_intra(s, *s->pblocks[i], i);
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
} else {
cbp <<= 12-mb_block_count;
if (level == 127) {
break;
} else if (level != 0) {
scantable += run;
j = *scantable;
level = (level * qscale * quant_matrix[j]) >> 4;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
for (i = 0; i < mb_block_count; i++) {
if (cbp & (1 << 11)) {
if (mpeg2_decode_block_non_intra(s, *s->pblocks[i], i) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
}
} else {
/* escape */
run = SHOW_UBITS(re, &s->gb, 6) + 1; LAST_SKIP_BITS(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
scantable += run;
j = *scantable;
if (level < 0) {
level = (-level * qscale * quant_matrix[j]) >> 4;
level = -level;
if (s->flags2 & CODEC_FLAG2_FAST) {
for (i = 0; i < 6; i++) {
if (cbp & 32) {
mpeg1_fast_decode_block_inter(s, *s->pblocks[i], i);
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
} else {
level = (level * qscale * quant_matrix[j]) >> 4;
for (i = 0; i < 6; i++) {
if (cbp & 32) {
if (mpeg1_decode_block_inter(s, *s->pblocks[i], i) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
}
cbp += cbp;
}
}
}
block[j] = level;
} else {
for (i = 0; i < 12; i++)
s->block_last_index[i] = -1;
}
CLOSE_READER(re, &s->gb);
}
s->block_last_index[n] = scantable - s->intra_scantable.permutated;
s->current_picture.f.mb_type[s->mb_x + s->mb_y * s->mb_stride] = mb_type;
return 0;
}
......@@ -1610,15 +1608,6 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
av_dlog(s->avctx, "progressive_frame=%d\n", s->progressive_frame);
}
static void exchange_uv(MpegEncContext *s)
{
DCTELEM (*tmp)[64];
tmp = s->pblocks[4];
s->pblocks[4] = s->pblocks[5];
s->pblocks[5] = tmp;
}
static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
{
AVCodecContext *avctx = s->avctx;
......
......@@ -97,17 +97,21 @@ static av_cold int xan_decode_init(AVCodecContext *avctx)
return 0;
}
static int xan_huffman_decode(unsigned char *dest, const unsigned char *src,
int dest_len)
static int xan_huffman_decode(unsigned char *dest, int dest_len,
const unsigned char *src, int src_len)
{
unsigned char byte = *src++;
unsigned char ival = byte + 0x16;
const unsigned char * ptr = src + byte*2;
int ptr_len = src_len - 1 - byte*2;
unsigned char val = ival;
unsigned char *dest_end = dest + dest_len;
GetBitContext gb;
init_get_bits(&gb, ptr, 0); // FIXME: no src size available
if (ptr_len < 0)
return AVERROR_INVALIDDATA;
init_get_bits(&gb, ptr, ptr_len * 8);
while ( val != 0x16 ) {
val = src[val - 0x17 + get_bits1(&gb) * byte];
......@@ -246,7 +250,7 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s,
}
}
static void xan_wc3_decode_frame(XanContext *s) {
static int xan_wc3_decode_frame(XanContext *s) {
int width = s->avctx->width;
int height = s->avctx->height;
......@@ -266,13 +270,30 @@ static void xan_wc3_decode_frame(XanContext *s) {
const unsigned char *size_segment;
const unsigned char *vector_segment;
const unsigned char *imagedata_segment;
int huffman_offset, size_offset, vector_offset, imagedata_offset;
if (s->size < 8)
return AVERROR_INVALIDDATA;
huffman_offset = AV_RL16(&s->buf[0]);
size_offset = AV_RL16(&s->buf[2]);
vector_offset = AV_RL16(&s->buf[4]);
imagedata_offset = AV_RL16(&s->buf[6]);
huffman_segment = s->buf + AV_RL16(&s->buf[0]);
size_segment = s->buf + AV_RL16(&s->buf[2]);
vector_segment = s->buf + AV_RL16(&s->buf[4]);
imagedata_segment = s->buf + AV_RL16(&s->buf[6]);
if (huffman_offset >= s->size ||
size_offset >= s->size ||
vector_offset >= s->size ||
imagedata_offset >= s->size)
return AVERROR_INVALIDDATA;
xan_huffman_decode(opcode_buffer, huffman_segment, opcode_buffer_size);
huffman_segment = s->buf + huffman_offset;
size_segment = s->buf + size_offset;
vector_segment = s->buf + vector_offset;
imagedata_segment = s->buf + imagedata_offset;
if (xan_huffman_decode(opcode_buffer, opcode_buffer_size,
huffman_segment, s->size - huffman_offset) < 0)
return AVERROR_INVALIDDATA;
if (imagedata_segment[0] == 2)
xan_unpack(s->buffer2, &imagedata_segment[1], s->buffer2_size);
......@@ -358,6 +379,7 @@ static void xan_wc3_decode_frame(XanContext *s) {
y += (x + size) / width;
x = (x + size) % width;
}
return 0;
}
#if RUNTIME_GAMMA
......@@ -519,7 +541,8 @@ static int xan_decode_frame(AVCodecContext *avctx,
s->buf = buf;
s->size = buf_size;
xan_wc3_decode_frame(s);
if (xan_wc3_decode_frame(s) < 0)
return AVERROR_INVALIDDATA;
/* release the last frame if it is allocated */
if (s->last_frame.data[0])
......@@ -563,4 +586,3 @@ AVCodec ff_xan_wc3_decoder = {
.capabilities = CODEC_CAP_DR1,
.long_name = NULL_IF_CONFIG_SMALL("Wing Commander III / Xan"),
};
......@@ -19,6 +19,7 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
OBJS-$(HAVE_YASM) += x86/scale.o
TESTPROGS = colorspace swscale
......
;******************************************************************************
;* x86-optimized horizontal line scaling functions
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION_RODATA
max_19bit_int: times 4 dd 0x7ffff
max_19bit_flt: times 4 dd 524287.0
minshort: times 8 dw 0x8000
unicoeff: times 4 dd 0x20000000
SECTION .text
;-----------------------------------------------------------------------------
; horizontal line scaling
;
; void hscale<source_width>to<intermediate_nbits>_<filterSize>_<opt>
; (SwsContext *c, int{16,32}_t *dst,
; int dstW, const uint{8,16}_t *src,
; const int16_t *filter,
; const int16_t *filterPos, int filterSize);
;
; Scale one horizontal line. Input is either 8-bits width or 16-bits width
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
; downscale before multiplying). Filter is 14-bits. Output is either 15bits
; (in int16_t) or 19bits (in int32_t), as given in $intermediate_nbits. Each
; output pixel is generated from $filterSize input pixels, the position of
; the first pixel is given in filterPos[nOutputPixel].
;-----------------------------------------------------------------------------
; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, opt, n_args, n_xmm
%macro SCALE_FUNC 7
cglobal hscale%1to%2_%4_%5, %6, 7, %7
%ifdef ARCH_X86_64
movsxd r2, r2d
%endif ; x86-64
%if %2 == 19
%if mmsize == 8 ; mmx
mova m2, [max_19bit_int]
%elifidn %5, sse4
mova m2, [max_19bit_int]
%else ; ssse3/sse2
mova m2, [max_19bit_flt]
%endif ; mmx/sse2/ssse3/sse4
%endif ; %2 == 19
%if %1 == 16
mova m6, [minshort]
mova m7, [unicoeff]
%elif %1 == 8
pxor m3, m3
%endif ; %1 == 8/16
%if %1 == 8
%define movlh movd
%define movbh movh
%define srcmul 1
%else ; %1 == 9-16
%define movlh movq
%define movbh movu
%define srcmul 2
%endif ; %1 == 8/9-16
%ifnidn %3, X
; setup loop
%if %3 == 8
shl r2, 1 ; this allows *16 (i.e. now *8) in lea instructions for the 8-tap filter
%define r2shr 1
%else ; %3 == 4
%define r2shr 0
%endif ; %3 == 8
lea r4, [r4+r2*8]
%if %2 == 15
lea r1, [r1+r2*(2>>r2shr)]
%else ; %2 == 19
lea r1, [r1+r2*(4>>r2shr)]
%endif ; %2 == 15/19
lea r5, [r5+r2*(2>>r2shr)]
neg r2
.loop:
%if %3 == 4 ; filterSize == 4 scaling
; load 2x4 or 4x4 source pixels into m0/m1
movsx r0, word [r5+r2*2+0] ; filterPos[0]
movsx r6, word [r5+r2*2+2] ; filterPos[1]
movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}]
%if mmsize == 8
movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
%else ; mmsize == 16
%if %1 > 8
movhps m0, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
%else ; %1 == 8
movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
%endif
movsx r0, word [r5+r2*2+4] ; filterPos[2]
movsx r6, word [r5+r2*2+6] ; filterPos[3]
movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}]
%if %1 > 8
movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}]
%else ; %1 == 8
movd m5, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}]
punpckldq m0, m4
punpckldq m1, m5
%endif ; %1 == 8 && %5 <= ssse
%endif ; mmsize == 8/16
%if %1 == 8
punpcklbw m0, m3 ; byte -> word
punpcklbw m1, m3 ; byte -> word
%endif ; %1 == 8
; multiply with filter coefficients
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
; add back 0x8000 * sum(coeffs) after the horizontal add
psubw m0, m6
psubw m1, m6
%endif ; %1 == 16
pmaddwd m0, [r4+r2*8+mmsize*0] ; *= filter[{0,1,..,6,7}]
pmaddwd m1, [r4+r2*8+mmsize*1] ; *= filter[{8,9,..,14,15}]
; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix)
%if mmsize == 8 ; mmx
movq m4, m0
punpckldq m0, m1
punpckhdq m4, m1
paddd m0, m4
%elifidn %5, sse2
mova m4, m0
shufps m0, m1, 10001000b
shufps m4, m1, 11011101b
paddd m0, m4
%else ; ssse3/sse4
phaddd m0, m1 ; filter[{ 0, 1, 2, 3}]*src[filterPos[0]+{0,1,2,3}],
; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}],
; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}],
; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}]
%endif ; mmx/sse2/ssse3/sse4
%else ; %3 == 8, i.e. filterSize == 8 scaling
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
movsx r0, word [r5+r2*1+0] ; filterPos[0]
movsx r6, word [r5+r2*1+2] ; filterPos[1]
movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
%if mmsize == 8
movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
movbh m4, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3}]
movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
%else ; mmsize == 16
movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
movsx r0, word [r5+r2*1+4] ; filterPos[2]
movsx r6, word [r5+r2*1+6] ; filterPos[3]
movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
%endif ; mmsize == 8/16
%if %1 == 8
punpcklbw m0, m3 ; byte -> word
punpcklbw m1, m3 ; byte -> word
punpcklbw m4, m3 ; byte -> word
punpcklbw m5, m3 ; byte -> word
%endif ; %1 == 8
; multiply
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
; add back 0x8000 * sum(coeffs) after the horizontal add
psubw m0, m6
psubw m1, m6
psubw m4, m6
psubw m5, m6
%endif ; %1 == 16
pmaddwd m0, [r4+r2*8+mmsize*0] ; *= filter[{0,1,..,6,7}]
pmaddwd m1, [r4+r2*8+mmsize*1] ; *= filter[{8,9,..,14,15}]
pmaddwd m4, [r4+r2*8+mmsize*2] ; *= filter[{16,17,..,22,23}]
pmaddwd m5, [r4+r2*8+mmsize*3] ; *= filter[{24,25,..,30,31}]
; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix)
%if mmsize == 8
paddd m0, m1
paddd m4, m5
movq m1, m0
punpckldq m0, m4
punpckhdq m1, m4
paddd m0, m1
%elifidn %5, sse2
%if %1 == 8
%define mex m6
%else
%define mex m3
%endif
; emulate horizontal add as transpose + vertical add
mova mex, m0
punpckldq m0, m1
punpckhdq mex, m1
paddd m0, mex
mova m1, m4
punpckldq m4, m5
punpckhdq m1, m5
paddd m4, m1
mova m1, m0
punpcklqdq m0, m4
punpckhqdq m1, m4
paddd m0, m1
%else ; ssse3/sse4
; FIXME if we rearrange the filter in pairs of 4, we can
; load pixels likewise and use 2 x paddd + phaddd instead
; of 3 x phaddd here, faster on older cpus
phaddd m0, m1
phaddd m4, m5
phaddd m0, m4 ; filter[{ 0, 1,..., 6, 7}]*src[filterPos[0]+{0,1,...,6,7}],
; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}],
; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}],
; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}]
%endif ; mmx/sse2/ssse3/sse4
%endif ; %3 == 4/8
%else ; %3 == X, i.e. any filterSize scaling
%ifidn %4, X4
%define r6sub 4
%else ; %4 == X || %4 == X8
%define r6sub 0
%endif ; %4 ==/!= X4
%ifdef ARCH_X86_64
push r12
movsxd r6, r6d ; filterSize
lea r12, [r3+(r6-r6sub)*srcmul] ; &src[filterSize&~4]
%define src_reg r11
%define r1x r10
%define filter2 r12
%else ; x86-32
lea r0, [r3+(r6-r6sub)*srcmul] ; &src[filterSize&~4]
mov r6m, r0
%define src_reg r3
%define r1x r1
%define filter2 r6m
%endif ; x86-32/64
lea r5, [r5+r2*2]
%if %2 == 15
lea r1, [r1+r2*2]
%else ; %2 == 19
lea r1, [r1+r2*4]
%endif ; %2 == 15/19
movifnidn r1mp, r1
neg r2
.loop:
movsx r0, word [r5+r2*2+0] ; filterPos[0]
movsx r1x, word [r5+r2*2+2] ; filterPos[1]
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
pxor m4, m4
pxor m5, m5
mov src_reg, r3mp
.innerloop:
; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5
movbh m0, [src_reg+r0 *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}]
movbh m1, [src_reg+(r1x+r6sub)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}]
%if %1 == 8
punpcklbw m0, m3
punpcklbw m1, m3
%endif ; %1 == 8
; multiply
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
; add back 0x8000 * sum(coeffs) after the horizontal add
psubw m0, m6
psubw m1, m6
%endif ; %1 == 16
pmaddwd m0, [r4 ] ; filter[{0,1,2,3(,4,5,6,7)}]
pmaddwd m1, [r4+(r6+r6sub)*2] ; filter[filtersize+{0,1,2,3(,4,5,6,7)}]
paddd m4, m0
paddd m5, m1
add r4, mmsize
add src_reg, srcmul*mmsize/2
cmp src_reg, filter2 ; while (src += 4) < &src[filterSize]
jl .innerloop
%ifidn %4, X4
movsx r1x, word [r5+r2*2+2] ; filterPos[1]
movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
sub r1x, r6 ; and first 4 srcpx of dstpx[1]
%if %1 > 8
movhps m0, [src_reg+(r1x+r6sub)*srcmul]
%else ; %1 == 8
movd m1, [src_reg+(r1x+r6sub)*srcmul]
punpckldq m0, m1
%endif ; %1 == 8 && %5 <= ssse
%if %1 == 8
punpcklbw m0, m3
%endif ; %1 == 8
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
; add back 0x8000 * sum(coeffs) after the horizontal add
psubw m0, m6
%endif ; %1 == 16
pmaddwd m0, [r4]
%endif ; %4 == X4
lea r4, [r4+(r6+r6sub)*2]
%if mmsize == 8 ; mmx
movq m0, m4
punpckldq m4, m5
punpckhdq m0, m5
paddd m0, m4
%else ; mmsize == 16
%ifidn %5, sse2
mova m1, m4
punpcklqdq m4, m5
punpckhqdq m1, m5
paddd m4, m1
%else ; ssse3/sse4
phaddd m4, m5
%endif ; sse2/ssse3/sse4
%ifidn %4, X4
paddd m4, m0
%endif ; %3 == X4
%ifidn %5, sse2
pshufd m4, m4, 11011000b
movhlps m0, m4
paddd m0, m4
%else ; ssse3/sse4
phaddd m4, m4
SWAP 0, 4
%endif ; sse2/ssse3/sse4
%endif ; mmsize == 8/16
%endif ; %3 ==/!= X
%if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned
paddd m0, m7
%endif ; %1 == 16
; clip, store
psrad m0, 14 + %1 - %2
%ifidn %3, X
movifnidn r1, r1mp
%endif ; %3 == X
%if %2 == 15
packssdw m0, m0
%ifnidn %3, X
movh [r1+r2*(2>>r2shr)], m0
%else ; %3 == X
movd [r1+r2*2], m0
%endif ; %3 ==/!= X
%else ; %2 == 19
%if mmsize == 8
PMINSD_MMX m0, m2, m4
%elifidn %5, sse4
pminsd m0, m2
%else ; sse2/ssse3
cvtdq2ps m0, m0
minps m0, m2
cvtps2dq m0, m0
%endif ; mmx/sse2/ssse3/sse4
%ifnidn %3, X
movu [r1+r2*(4>>r2shr)], m0
%else ; %3 == X
movq [r1+r2*4], m0
%endif ; %3 ==/!= X
%endif ; %2 == 15/19
%ifnidn %3, X
add r2, (mmsize<<r2shr)/4 ; both 8tap and 4tap really only do 4 pixels (or for mmx: 2 pixels)
; per iteration. see "shl r2,1" above as for why we do this
%else ; %3 == X
add r2, 2
%endif ; %3 ==/!= X
jl .loop
%ifnidn %3, X
REP_RET
%else ; %3 == X
%ifdef ARCH_X86_64
pop r12
RET
%else ; x86-32
REP_RET
%endif ; x86-32/64
%endif ; %3 ==/!= X
%endmacro
; SCALE_FUNCS source_width, intermediate_nbits, opt, n_xmm
%macro SCALE_FUNCS 4
SCALE_FUNC %1, %2, 4, 4, %3, 6, %4
SCALE_FUNC %1, %2, 8, 8, %3, 6, %4
%if mmsize == 8
SCALE_FUNC %1, %2, X, X, %3, 7, %4
%else
SCALE_FUNC %1, %2, X, X4, %3, 7, %4
SCALE_FUNC %1, %2, X, X8, %3, 7, %4
%endif
%endmacro
; SCALE_FUNCS2 opt, 8_xmm_args, 9to10_xmm_args, 16_xmm_args
%macro SCALE_FUNCS2 4
%ifnidn %1, sse4
SCALE_FUNCS 8, 15, %1, %2
SCALE_FUNCS 9, 15, %1, %3
SCALE_FUNCS 10, 15, %1, %3
SCALE_FUNCS 16, 15, %1, %4
%endif ; !sse4
SCALE_FUNCS 8, 19, %1, %2
SCALE_FUNCS 9, 19, %1, %3
SCALE_FUNCS 10, 19, %1, %3
SCALE_FUNCS 16, 19, %1, %4
%endmacro
%ifdef ARCH_X86_32
INIT_MMX
SCALE_FUNCS2 mmx, 0, 0, 0
%endif
INIT_XMM
SCALE_FUNCS2 sse2, 6, 7, 8
SCALE_FUNCS2 ssse3, 6, 6, 8
SCALE_FUNCS2 sse4, 6, 6, 8
......@@ -176,6 +176,41 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
}
}
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
SwsContext *c, int16_t *data, \
int dstW, const uint8_t *src, \
const int16_t *filter, \
const int16_t *filterPos, int filterSize);
#define SCALE_FUNCS(filter_n, opt) \
SCALE_FUNC(filter_n, 8, 15, opt); \
SCALE_FUNC(filter_n, 9, 15, opt); \
SCALE_FUNC(filter_n, 10, 15, opt); \
SCALE_FUNC(filter_n, 16, 15, opt); \
SCALE_FUNC(filter_n, 8, 19, opt); \
SCALE_FUNC(filter_n, 9, 19, opt); \
SCALE_FUNC(filter_n, 10, 19, opt); \
SCALE_FUNC(filter_n, 16, 19, opt)
#define SCALE_FUNCS_MMX(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X, opt)
#define SCALE_FUNCS_SSE(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X4, opt); \
SCALE_FUNCS(X8, opt)
#if ARCH_X86_32
SCALE_FUNCS_MMX(mmx);
#endif
SCALE_FUNCS_SSE(sse2);
SCALE_FUNCS_SSE(ssse3);
SCALE_FUNCS_SSE(sse4);
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
......@@ -186,4 +221,55 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
if (cpu_flags & AV_CPU_FLAG_MMX2)
sws_init_swScale_MMX2(c);
#endif
#if HAVE_YASM
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 9) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 10) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
} else if(c->srcBpc == 16 && !((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
} \
} while (0)
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
switch (filtersize) { \
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
}
#if ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_MMX) {
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
}
#endif
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
switch (filtersize) { \
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
break; \
}
if (cpu_flags & AV_CPU_FLAG_SSE2) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
}
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
}
if (cpu_flags & AV_CPU_FLAG_SSE4) {
/* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
}
#endif
}
......@@ -1951,164 +1951,6 @@ static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
}
#if !COMPILE_TEMPLATE_MMX2
// bilinear / bicubic scaling
static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int16_t *filterPos, int filterSize)
{
assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
x86_reg counter= -2*dstW;
filter-= counter*2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
"movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
"movd (%3, %%"REG_a"), %%mm0 \n\t"
"movd (%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (filterSize==8) {
x86_reg counter= -2*dstW;
filter-= counter*4;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
"movd (%3, %%"REG_a"), %%mm0 \n\t"
"movd (%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
"movd 4(%3, %%"REG_a"), %%mm4 \n\t"
"movd 4(%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else {
const uint8_t *offset = src+filterSize;
x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
".p2align 4 \n\t"
"1: \n\t"
"mov %2, %%"REG_c" \n\t"
"movzwl (%%"REG_c", %0), %%eax \n\t"
"movzwl 2(%%"REG_c", %0), %%edx \n\t"
"mov %5, %%"REG_c" \n\t"
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t"
"2: \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1, %6), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
"movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"paddd %%mm3, %%mm5 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"add $8, %1 \n\t"
"add $4, %%"REG_c" \n\t"
"cmp %4, %%"REG_c" \n\t"
" jb 2b \n\t"
"add %6, %1 \n\t"
"movq %%mm4, %%mm0 \n\t"
"punpckldq %%mm5, %%mm4 \n\t"
"punpckhdq %%mm5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"psrad $7, %%mm4 \n\t"
"packssdw %%mm4, %%mm4 \n\t"
"mov %3, %%"REG_a" \n\t"
"movd %%mm4, (%%"REG_a", %0) \n\t"
"add $4, %0 \n\t"
" jnc 1b \n\t"
: "+r" (counter), "+r" (filter)
: "m" (filterPos), "m" (dst), "m"(offset),
"m" (src), "r" ((x86_reg)filterSize*2)
: "%"REG_a, "%"REG_c, "%"REG_d
);
}
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
{
......@@ -2265,7 +2107,6 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,
}
}
#if COMPILE_TEMPLATE_MMX2
static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
int dstWidth, const uint8_t *src,
......@@ -2466,10 +2307,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
if (c->srcBpc == 8 && c->dstBpc <= 10) {
#if !COMPILE_TEMPLATE_MMX2
c->hyScale = c->hcScale = RENAME(hScale );
#endif /* !COMPILE_TEMPLATE_MMX2 */
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMX2
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment