Commit a22eff36 authored by Jason Garrett-Glaser's avatar Jason Garrett-Glaser

Port x264 deblocking code to libavcodec.

This includes SSE2 luma deblocking code and both MMXEXT and SSE2 luma
intra deblocking code for H.264 decoding. This assembly is available
under --enable-gpl and speeds decoding of Cathedral by 7%.

Originally committed as revision 16239 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent b9263c94
......@@ -404,7 +404,8 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_VP6A_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o
MMX-OBJS-$(HAVE_YASM) += i386/dsputil_yasm.o
MMX-OBJS-$(HAVE_YASM) += i386/dsputil_yasm.o \
i386/h264_deblock_sse2.o
OBJS-$(HAVE_MMX) += i386/cpuid.o \
i386/dnxhd_mmx.o \
......
......@@ -2327,6 +2327,17 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
{
ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
}
void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
#else
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
......@@ -2853,6 +2864,21 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#endif
#if defined(CONFIG_GPL) && defined(HAVE_YASM)
if( mm_flags&FF_MM_MMXEXT ){
#ifndef ARCH_X86_64
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
#endif
if( mm_flags&FF_MM_SSE2 ){
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
}
}
#endif
#ifdef CONFIG_SNOW_DECODER
if(mm_flags & FF_MM_SSE2 & 0){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment