Commit 642ccefb authored by Michael Niedermayer's avatar Michael Niedermayer

move mmx quantizer matrixes out of MpegEncContext (23k -> 7k) (no meassureable slowdown)

Originally committed as revision 2412 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 7e4995c3
...@@ -76,12 +76,12 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -76,12 +76,12 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
block[0]=0; //avoid fake overflow block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q; // temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1; last_non_zero_p1 = 1;
bias = s->q_intra_matrix16_bias[qscale]; bias = s->q_intra_matrix16[qscale][1];
qmat = s->q_intra_matrix16[qscale]; qmat = s->q_intra_matrix16[qscale][0];
} else { } else {
last_non_zero_p1 = 0; last_non_zero_p1 = 0;
bias = s->q_inter_matrix16_bias[qscale]; bias = s->q_inter_matrix16[qscale][1];
qmat = s->q_inter_matrix16[qscale]; qmat = s->q_inter_matrix16[qscale][0];
} }
if(s->out_format == FMT_H263 && s->mpeg_quant==0){ if(s->out_format == FMT_H263 && s->mpeg_quant==0){
......
...@@ -93,7 +93,7 @@ static uint8_t default_fcode_tab[MAX_MV*2+1]; ...@@ -93,7 +93,7 @@ static uint8_t default_fcode_tab[MAX_MV*2+1];
enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1}; enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
const uint16_t *quant_matrix, int bias, int qmin, int qmax) const uint16_t *quant_matrix, int bias, int qmin, int qmax)
{ {
int qscale; int qscale;
...@@ -132,10 +132,10 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -132,10 +132,10 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
*/ */
qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
// qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); // qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
} }
} }
} }
...@@ -445,6 +445,8 @@ int MPV_common_init(MpegEncContext *s) ...@@ -445,6 +445,8 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int)) CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int)) CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
} }
CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture)) CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
...@@ -565,6 +567,8 @@ void MPV_common_end(MpegEncContext *s) ...@@ -565,6 +567,8 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->lambda_table); av_freep(&s->lambda_table);
av_freep(&s->q_intra_matrix); av_freep(&s->q_intra_matrix);
av_freep(&s->q_inter_matrix); av_freep(&s->q_inter_matrix);
av_freep(&s->q_intra_matrix16);
av_freep(&s->q_inter_matrix16);
for(i=0; i<MAX_PICTURE_COUNT; i++){ for(i=0; i<MAX_PICTURE_COUNT; i++){
free_picture(s, &s->picture[i]); free_picture(s, &s->picture[i]);
...@@ -882,9 +886,9 @@ int MPV_encode_init(AVCodecContext *avctx) ...@@ -882,9 +886,9 @@ int MPV_encode_init(AVCodecContext *avctx)
/* precompute matrix */ /* precompute matrix */
/* for mjpeg, we do include qscale in the matrix */ /* for mjpeg, we do include qscale in the matrix */
if (s->out_format != FMT_MJPEG) { if (s->out_format != FMT_MJPEG) {
convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
s->intra_matrix, s->intra_quant_bias, 1, 31); s->intra_matrix, s->intra_quant_bias, 1, 31);
convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
s->inter_matrix, s->inter_quant_bias, 1, 31); s->inter_matrix, s->inter_quant_bias, 1, 31);
} }
...@@ -3502,7 +3506,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -3502,7 +3506,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
} }
convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8); s->intra_matrix, s->intra_quant_bias, 8, 8);
} }
//FIXME var duplication //FIXME var duplication
......
...@@ -459,11 +459,9 @@ typedef struct MpegEncContext { ...@@ -459,11 +459,9 @@ typedef struct MpegEncContext {
/** precomputed matrix (combine qscale and DCT renorm) */ /** precomputed matrix (combine qscale and DCT renorm) */
int (*q_intra_matrix)[64]; int (*q_intra_matrix)[64];
int (*q_inter_matrix)[64]; int (*q_inter_matrix)[64];
/** identical to the above but for MMX & these are not permutated */ /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
uint16_t __align8 q_intra_matrix16[32][64]; uint16_t (*q_intra_matrix16)[2][64];
uint16_t __align8 q_inter_matrix16[32][64]; uint16_t (*q_inter_matrix16)[2][64];
uint16_t __align8 q_intra_matrix16_bias[32][64];
uint16_t __align8 q_inter_matrix16_bias[32][64];
int block_last_index[6]; ///< last non zero coefficient in block int block_last_index[6]; ///< last non zero coefficient in block
/* scantables */ /* scantables */
ScanTable __align8 intra_scantable; ScanTable __align8 intra_scantable;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment