Commit 5773a746 authored by Michael Niedermayer's avatar Michael Niedermayer

porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API

Originally committed as revision 4260 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 1482fee8
......@@ -3885,6 +3885,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
for(i=0; i<64; i++)
c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
break;
case FF_PARTTRANS_IDCT_PERM:
for(i=0; i<64; i++)
c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
break;
default:
av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
}
......
......@@ -315,6 +315,7 @@ typedef struct DSPContext {
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
#define FF_PARTTRANS_IDCT_PERM 5
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
......
......@@ -3115,6 +3115,10 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
void ff_vp3_idct_sse2(int16_t *input_data);
void ff_vp3_idct_mmx(int16_t *data);
void ff_vp3_dsp_init_mmx(void);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
......@@ -3137,6 +3141,26 @@ static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc
ff_mmxext_idct (block);
add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_vp3_idct_sse2(block);
put_signed_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_vp3_idct_sse2(block);
add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_vp3_idct_mmx(block);
put_signed_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_vp3_idct_mmx(block);
add_pixels_clamped_mmx(block, dest, line_size);
}
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
......@@ -3196,18 +3220,22 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->idct = ff_mmx_idct;
}
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}else if(idct_algo==FF_IDCT_VP3){
if(mm_flags & MM_SSE2){
c->idct_put= ff_vp3_idct_put_sse2;
c->idct_add= ff_vp3_idct_add_sse2;
c->idct = ff_vp3_idct_sse2;
c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
}else{
ff_vp3_dsp_init_mmx();
c->idct_put= ff_vp3_idct_put_mmx;
c->idct_add= ff_vp3_idct_add_mmx;
c->idct = ff_vp3_idct_mmx;
c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
}
}
}
/* VP3 optimized DSP functions */
// if (mm_flags & MM_SSE2) {
// c->vp3_dsp_init = vp3_dsp_init_sse2;
// c->vp3_idct = vp3_idct_sse2;
// } else {
// c->vp3_dsp_init = vp3_dsp_init_mmx;
// c->vp3_idct = vp3_idct_mmx;
// }
#ifdef CONFIG_ENCODERS
c->get_pixels = get_pixels_mmx;
c->diff_pixels = diff_pixels_mmx;
......
This diff is collapsed.
......@@ -796,24 +796,16 @@ static unsigned short __align16 SSE2_idct_data[7 * 8] =
} /* end of SSE2_Dequantize Macro */
void vp3_dsp_init_sse2(void)
{
/* nop */
}
void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
int coeff_count, int16_t *output_data)
void ff_vp3_idct_sse2(int16_t *input_data)
{
unsigned char *input_bytes = (unsigned char *)input_data;
unsigned char *dequant_matrix_bytes = (unsigned char *)dequant_matrix;
unsigned char *dequant_const_bytes = (unsigned char *)SSE2_dequant_const;
unsigned char *output_data_bytes = (unsigned char *)output_data;
unsigned char *output_data_bytes = (unsigned char *)input_data;
unsigned char *idct_data_bytes = (unsigned char *)SSE2_idct_data;
unsigned char *Eight = (unsigned char *)eight_data;
#define eax input_bytes
#define ebx dequant_matrix_bytes
//#define ebx dequant_matrix_bytes
#define ecx dequant_const_bytes
#define edx idct_data_bytes
......@@ -821,7 +813,7 @@ void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
#define O(i) (ebx + 16 * i)
#define C(i) (edx + 16 * (i-1))
SSE2_Dequantize();
// SSE2_Dequantize();
#undef ebx
#define ebx output_data_bytes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment