Commit a711eb48 authored by Ronald S. Bultje's avatar Ronald S. Bultje

VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations.

Originally committed as revision 24250 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 75148437
...@@ -61,7 +61,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; ...@@ -61,7 +61,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x0404040404040404ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x0404040404040404ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
......
...@@ -47,7 +47,7 @@ extern const uint64_t ff_pw_96; ...@@ -47,7 +47,7 @@ extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128; extern const uint64_t ff_pw_128;
extern const uint64_t ff_pw_255; extern const uint64_t ff_pw_255;
extern const uint64_t ff_pb_1; extern const xmm_reg ff_pb_1;
extern const xmm_reg ff_pb_3; extern const xmm_reg ff_pb_3;
extern const uint64_t ff_pb_7; extern const uint64_t ff_pb_7;
extern const uint64_t ff_pb_1F; extern const uint64_t ff_pb_1F;
......
...@@ -229,6 +229,19 @@ extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int fli ...@@ -229,6 +229,19 @@ extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int fli
extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim);
extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim);
extern void ff_vp8_v_loop_filter16_inner_mmx (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_v_loop_filter16_inner_mmxext(uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_v_loop_filter16_inner_sse2 (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16_inner_mmx (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16_inner_mmxext(uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16_inner_sse2 (uint8_t *dst, int stride,
int e, int i, int hvt);
#endif #endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
...@@ -270,6 +283,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -270,6 +283,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmx;
c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmx;
} }
/* note that 4-tap width=16 functions are missing because w=16 /* note that 4-tap width=16 functions are missing because w=16
...@@ -285,6 +301,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -285,6 +301,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmxext;
c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmxext;
} }
if (mm_flags & FF_MM_SSE) { if (mm_flags & FF_MM_SSE) {
...@@ -300,6 +319,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -300,6 +319,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_sse2;
c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_sse2;
} }
if (mm_flags & FF_MM_SSSE3) { if (mm_flags & FF_MM_SSSE3) {
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment