Commit d50635cd authored by Michael Niedermayer's avatar Michael Niedermayer

split intra / inter dequantization

Originally committed as revision 2607 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 7ebfc0ea
...@@ -2766,7 +2766,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s ...@@ -2766,7 +2766,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
memcpy(bak, temp, 64*sizeof(DCTELEM)); memcpy(bak, temp, 64*sizeof(DCTELEM));
s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize(s, temp, 0, s->qscale); s->dct_unquantize_inter(s, temp, 0, s->qscale);
simple_idct(temp); //FIXME simple_idct(temp); //FIXME
for(i=0; i<64; i++) for(i=0; i<64; i++)
...@@ -2839,7 +2839,10 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int ...@@ -2839,7 +2839,10 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
} }
if(last>=0){ if(last>=0){
s->dct_unquantize(s, temp, 0, s->qscale); if(s->mb_intra)
s->dct_unquantize_intra(s, temp, 0, s->qscale);
else
s->dct_unquantize_inter(s, temp, 0, s->qscale);
} }
s->dsp.idct_add(bak, stride, temp); s->dsp.idct_add(bak, stride, temp);
......
...@@ -31,31 +31,92 @@ static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xfff ...@@ -31,31 +31,92 @@ static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xfff
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
static void dct_unquantize_h263_mmx(MpegEncContext *s, static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
int level, qmul, qadd, nCoeffs; int level, qmul, qadd, nCoeffs;
qmul = qscale << 1; qmul = qscale << 1;
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic); assert(s->block_last_index[n]>=0 || s->h263_aic);
if (s->mb_intra) { if (!s->h263_aic) {
if (!s->h263_aic) { if (n < 4)
if (n < 4) level = block[0] * s->y_dc_scale;
level = block[0] * s->y_dc_scale; else
else level = block[0] * s->c_dc_scale;
level = block[0] * s->c_dc_scale; qadd = (qscale - 1) | 1;
}else{ }else{
qadd = 0; qadd = 0;
level= block[0]; level= block[0];
}
nCoeffs=63;
} else {
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
level = 0;/* keep gcc quiet */
} }
if(s->ac_pred)
nCoeffs=63;
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"addl $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
block[0]= level;
}
static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int level, qmul, qadd, nCoeffs;
qmul = qscale << 1;
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd); //printf("%d %d ", qmul, qadd);
asm volatile( asm volatile(
"movd %1, %%mm6 \n\t" //qmul "movd %1, %%mm6 \n\t" //qmul
...@@ -104,8 +165,6 @@ asm volatile( ...@@ -104,8 +165,6 @@ asm volatile(
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory" : "memory"
); );
if(s->mb_intra)
block[0]= level;
} }
...@@ -138,24 +197,23 @@ asm volatile( ...@@ -138,24 +197,23 @@ asm volatile(
high3:low3 = low1*low2 high3:low3 = low1*low2
high3 += tlow1 high3 += tlow1
*/ */
static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
int nCoeffs; int nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
int block0;
assert(s->block_last_index[n]>=0); assert(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
if (s->mb_intra) { if (n < 4)
int block0; block0 = block[0] * s->y_dc_scale;
if (n < 4) else
block0 = block[0] * s->y_dc_scale; block0 = block[0] * s->c_dc_scale;
else /* XXX: only mpeg1 */
block0 = block[0] * s->c_dc_scale; quant_matrix = s->intra_matrix;
/* XXX: only mpeg1 */
quant_matrix = s->intra_matrix;
asm volatile( asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t"
...@@ -205,9 +263,19 @@ asm volatile( ...@@ -205,9 +263,19 @@ asm volatile(
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%eax", "memory" : "%eax", "memory"
); );
block[0]= block0; block[0]= block0;
}
static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
} else {
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
asm volatile( asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -262,28 +330,25 @@ asm volatile( ...@@ -262,28 +330,25 @@ asm volatile(
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%eax", "memory" : "%eax", "memory"
); );
}
} }
static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
int nCoeffs; int nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
int block0;
assert(s->block_last_index[n]>=0); assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
if (s->mb_intra) { if (n < 4)
int block0; block0 = block[0] * s->y_dc_scale;
if (n < 4) else
block0 = block[0] * s->y_dc_scale; block0 = block[0] * s->c_dc_scale;
else quant_matrix = s->intra_matrix;
block0 = block[0] * s->c_dc_scale;
quant_matrix = s->intra_matrix;
asm volatile( asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t"
...@@ -329,10 +394,21 @@ asm volatile( ...@@ -329,10 +394,21 @@ asm volatile(
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%eax", "memory" : "%eax", "memory"
); );
block[0]= block0; block[0]= block0;
//Note, we dont do mismatch control for intra as errors cannot accumulate //Note, we dont do mismatch control for intra as errors cannot accumulate
}
static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
} else {
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
asm volatile( asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -397,7 +473,6 @@ asm volatile( ...@@ -397,7 +473,6 @@ asm volatile(
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
: "%eax", "memory" : "%eax", "memory"
); );
}
} }
/* draw the edges of width 'w' of an image of size width, height /* draw the edges of width 'w' of an image of size width, height
...@@ -505,9 +580,12 @@ void MPV_common_init_mmx(MpegEncContext *s) ...@@ -505,9 +580,12 @@ void MPV_common_init_mmx(MpegEncContext *s)
if (mm_flags & MM_MMX) { if (mm_flags & MM_MMX) {
const int dct_algo = s->avctx->dct_algo; const int dct_algo = s->avctx->dct_algo;
s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
draw_edges = draw_edges_mmx; draw_edges = draw_edges_mmx;
......
This diff is collapsed.
...@@ -645,13 +645,21 @@ typedef struct MpegEncContext { ...@@ -645,13 +645,21 @@ typedef struct MpegEncContext {
#define SLICE_END -2 ///<end marker found #define SLICE_END -2 ///<end marker found
#define SLICE_NOEND -3 ///<no end marker or error found but mb count exceeded #define SLICE_NOEND -3 ///<no end marker or error found but mb count exceeded
void (*dct_unquantize_mpeg1)(struct MpegEncContext *s, void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale); DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_mpeg2)(struct MpegEncContext *s, void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale); DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263)(struct MpegEncContext *s, void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale); DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block/*align 16*/, int n, int qscale); DCTELEM *block/*align 16*/, int n, int qscale);
int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment