Commit 7ca422bb authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9: add mxext versions of the single-block (w=4,npx=8) h/v loopfilters.

Each takes about 0.5% of runtime in my profiles, and they didn't have
any SIMD yet so far (we only had simd for npx=16 double-block versions).
parent 726501a3
......@@ -126,6 +126,7 @@ void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
int E, int I, int H)
lpf_funcs(4, 8, mmxext);
lpf_funcs(16, 16, sse2);
lpf_funcs(16, 16, ssse3);
lpf_funcs(16, 16, avx);
......@@ -281,6 +282,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext;
dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext;
init_subpel2(4, 0, 4, put, 8, mmxext);
init_subpel2(4, 1, 4, avg, 8, mmxext);
init_fpel_func(4, 1, 4, avg, _8, mmxext);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment