Commit d557838c authored by Roman Shaposhnik's avatar Roman Shaposhnik

Coalescing the un-weighting and de-quantization steps for faster

processing

Originally committed as revision 14691 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 76c655fb
...@@ -50,7 +50,7 @@ typedef struct DVVideoContext { ...@@ -50,7 +50,7 @@ typedef struct DVVideoContext {
uint8_t *buf; uint8_t *buf;
uint8_t dv_zigzag[2][64]; uint8_t dv_zigzag[2][64];
uint8_t dv_idct_shift[2][2][22][64]; uint32_t dv_idct_factor[2][2][22][64];
void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
void (*fdct[2])(DCTELEM *block); void (*fdct[2])(DCTELEM *block);
...@@ -84,25 +84,22 @@ static struct dv_vlc_pair { ...@@ -84,25 +84,22 @@ static struct dv_vlc_pair {
static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
{ {
int i, q, j; int i, q, a;
/* NOTE: max left shift is 6 */ /* NOTE: max left shift is 6 */
for(q = 0; q < 22; q++) { for(q = 0; q < 22; q++) {
/* 88DCT */ /* 88DCT */
for(i = 1; i < 64; i++) { i=1;
/* 88 table */ for(a = 0; a<4; a++) {
j = perm[i]; for(; i < dv_quant_areas[a]; i++) {
s->dv_idct_shift[0][0][q][j] = /* 88 table */
dv_quant_shifts[q][dv_88_areas[i]] + 1; s->dv_idct_factor[0][0][q][i] = dv_iweight_88[i]<<(dv_quant_shifts[q][a] + 1);
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; s->dv_idct_factor[1][0][q][i] = s->dv_idct_factor[0][0][q][i]<<1;
}
/* 248 table */
/* 248DCT */ s->dv_idct_factor[0][1][q][i] = dv_iweight_248[i]<<(dv_quant_shifts[q][a] + 1);
for(i = 1; i < 64; i++) { s->dv_idct_factor[1][1][q][i] = s->dv_idct_factor[0][1][q][i]<<1;
/* 248 table */ }
s->dv_idct_shift[0][1][q][i] =
dv_quant_shifts[q][dv_248_areas[i]] + 1;
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
} }
} }
} }
...@@ -247,9 +244,8 @@ static av_cold int dvvideo_init(AVCodecContext *avctx) ...@@ -247,9 +244,8 @@ static av_cold int dvvideo_init(AVCodecContext *avctx)
// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__) // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
typedef struct BlockInfo { typedef struct BlockInfo {
const uint8_t *shift_table; const uint32_t *factor_table;
const uint8_t *scan_table; const uint8_t *scan_table;
const int *iweight_table;
uint8_t pos; /* position in block */ uint8_t pos; /* position in block */
uint8_t dct_mode; uint8_t dct_mode;
uint8_t partial_bit_count; uint8_t partial_bit_count;
...@@ -281,11 +277,10 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) ...@@ -281,11 +277,10 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
{ {
int last_index = gb->size_in_bits; int last_index = gb->size_in_bits;
const uint8_t *scan_table = mb->scan_table; const uint8_t *scan_table = mb->scan_table;
const uint8_t *shift_table = mb->shift_table; const uint32_t *factor_table = mb->factor_table;
const int *iweight_table = mb->iweight_table;
int pos = mb->pos; int pos = mb->pos;
int partial_bit_count = mb->partial_bit_count; int partial_bit_count = mb->partial_bit_count;
int level, pos1, run, vlc_len, index; int level, run, vlc_len, index;
OPEN_READER(re, gb); OPEN_READER(re, gb);
UPDATE_CACHE(re, gb); UPDATE_CACHE(re, gb);
...@@ -330,13 +325,8 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) ...@@ -330,13 +325,8 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
if (pos >= 64) if (pos >= 64)
break; break;
pos1 = scan_table[pos]; level = (level*factor_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
level <<= shift_table[pos1]; block[scan_table[pos]] = level;
/* unweigh, round, and shift down */
level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
block[pos1] = level;
UPDATE_CACHE(re, gb); UPDATE_CACHE(re, gb);
} }
...@@ -402,9 +392,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s, ...@@ -402,9 +392,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
dct_mode = get_bits1(&gb); dct_mode = get_bits1(&gb);
mb->dct_mode = dct_mode; mb->dct_mode = dct_mode;
mb->scan_table = s->dv_zigzag[dct_mode]; mb->scan_table = s->dv_zigzag[dct_mode];
mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88;
class1 = get_bits(&gb, 2); class1 = get_bits(&gb, 2);
mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] mb->factor_table = s->dv_idct_factor[class1 == 3][dct_mode]
[quant + dv_quant_offset[class1]]; [quant + dv_quant_offset[class1]];
dc = dc << 2; dc = dc << 2;
/* convert to unsigned because 128 is not added in the /* convert to unsigned because 128 is not added in the
......
...@@ -286,28 +286,6 @@ static const uint8_t dv_vlc_level[409] = { ...@@ -286,28 +286,6 @@ static const uint8_t dv_vlc_level[409] = {
}; };
/* unquant tables (not used directly) */ /* unquant tables (not used directly) */
static const uint8_t dv_88_areas[64] = {
0,0,0,1,1,1,2,2,
0,0,1,1,1,2,2,2,
0,1,1,1,2,2,2,3,
1,1,1,2,2,2,3,3,
1,1,2,2,2,3,3,3,
1,2,2,2,3,3,3,3,
2,2,2,3,3,3,3,3,
2,2,3,3,3,3,3,3,
};
static const uint8_t dv_248_areas[64] = {
0,0,1,1,1,2,2,3,
0,0,1,1,2,2,2,3,
0,1,1,2,2,2,3,3,
0,1,1,2,2,2,3,3,
1,1,2,2,2,3,3,3,
1,1,2,2,2,3,3,3,
1,2,2,2,3,3,3,3,
1,2,2,3,3,3,3,3,
};
static const uint8_t dv_quant_shifts[22][4] = { static const uint8_t dv_quant_shifts[22][4] = {
{ 3,3,4,4 }, { 3,3,4,4 },
{ 3,3,4,4 }, { 3,3,4,4 },
...@@ -334,6 +312,7 @@ static const uint8_t dv_quant_shifts[22][4] = { ...@@ -334,6 +312,7 @@ static const uint8_t dv_quant_shifts[22][4] = {
}; };
static const uint8_t dv_quant_offset[4] = { 6, 3, 0, 1 }; static const uint8_t dv_quant_offset[4] = { 6, 3, 0, 1 };
static const uint8_t dv_quant_areas[4] = { 6, 21, 43, 64 };
/* NOTE: I prefer hardcoding the positioning of dv blocks, it is /* NOTE: I prefer hardcoding the positioning of dv blocks, it is
simpler :-) */ simpler :-) */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment