Commit 38acbc3c authored by Mike Melanson's avatar Mike Melanson

hook up support for SSE2-optimized VP3 IDCT

Originally committed as revision 3064 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 78a16bfe
...@@ -116,7 +116,8 @@ ifeq ($(TARGET_MMX),yes) ...@@ -116,7 +116,8 @@ ifeq ($(TARGET_MMX),yes)
OBJS += i386/fdct_mmx.o i386/cputest.o \ OBJS += i386/fdct_mmx.o i386/cputest.o \
i386/dsputil_mmx.o i386/mpegvideo_mmx.o \ i386/dsputil_mmx.o i386/mpegvideo_mmx.o \
i386/idct_mmx.o i386/motion_est_mmx.o \ i386/idct_mmx.o i386/motion_est_mmx.o \
i386/simple_idct_mmx.o i386/fft_sse.o i386/vp3dsp_mmx.o i386/simple_idct_mmx.o i386/fft_sse.o i386/vp3dsp_mmx.o \
i386/vp3dsp_sse2.o
ifdef TARGET_BUILTIN_VECTOR ifdef TARGET_BUILTIN_VECTOR
i386/fft_sse.o: CFLAGS+= -msse i386/fft_sse.o: CFLAGS+= -msse
depend: CFLAGS+= -msse depend: CFLAGS+= -msse
......
...@@ -73,6 +73,12 @@ void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix, ...@@ -73,6 +73,12 @@ void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,
void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix, void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,
int coeff_count, uint8_t *dest, int stride); int coeff_count, uint8_t *dest, int stride);
void vp3_dsp_init_sse2(void);
void vp3_idct_put_sse2(int16_t *input_data, int16_t *dequant_matrix,
int coeff_count, uint8_t *dest, int stride);
void vp3_idct_add_sse2(int16_t *input_data, int16_t *dequant_matrix,
int coeff_count, uint8_t *dest, int stride);
/* minimum alignment rules ;) /* minimum alignment rules ;)
if u notice errors in the align stuff, need more alignment for some asm code for some cpu if u notice errors in the align stuff, need more alignment for some asm code for some cpu
...@@ -403,6 +409,7 @@ static inline void emms(void) ...@@ -403,6 +409,7 @@ static inline void emms(void)
} }
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
#define __align16 __attribute__ ((aligned (16)))
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
......
...@@ -2147,9 +2147,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2147,9 +2147,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} }
/* VP3 optimized DSP functions */ /* VP3 optimized DSP functions */
c->vp3_dsp_init = vp3_dsp_init_mmx; if (mm_flags & MM_SSE2) {
c->vp3_idct_put = vp3_idct_put_mmx; c->vp3_dsp_init = vp3_dsp_init_sse2;
c->vp3_idct_add = vp3_idct_add_mmx; c->vp3_idct_put = vp3_idct_put_sse2;
c->vp3_idct_add = vp3_idct_add_sse2;
} else {
c->vp3_dsp_init = vp3_dsp_init_mmx;
c->vp3_idct_put = vp3_idct_put_mmx;
c->vp3_idct_add = vp3_idct_add_mmx;
}
#ifdef CONFIG_ENCODERS #ifdef CONFIG_ENCODERS
c->get_pixels = get_pixels_mmx; c->get_pixels = get_pixels_mmx;
......
...@@ -268,9 +268,11 @@ typedef struct Vp3DecodeContext { ...@@ -268,9 +268,11 @@ typedef struct Vp3DecodeContext {
VLC ac_vlc_3[16]; VLC ac_vlc_3[16];
VLC ac_vlc_4[16]; VLC ac_vlc_4[16];
int16_t intra_y_dequant[64]; /* these arrays need to be on 16-byte boundaries since SSE2 operations
int16_t intra_c_dequant[64]; * index into them */
int16_t inter_dequant[64]; int16_t __align16 intra_y_dequant[64];
int16_t __align16 intra_c_dequant[64];
int16_t __align16 inter_dequant[64];
/* This table contains superblock_count * 16 entries. Each set of 16 /* This table contains superblock_count * 16 entries. Each set of 16
* numbers corresponds to the fragment indices 0..15 of the superblock. * numbers corresponds to the fragment indices 0..15 of the superblock.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment