Commit 2f9ab159 authored by James Almer's avatar James Almer

x86/vp9: add avx2 subpel MC SIMD for 10/12bpp

Reviewed-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 85c343fa
...@@ -33,16 +33,31 @@ extern const int16_t ff_filters_16bpp[3][15][4][16]; ...@@ -33,16 +33,31 @@ extern const int16_t ff_filters_16bpp[3][15][4][16];
decl_mc_funcs(4, sse2, int16_t, 16, BPC); decl_mc_funcs(4, sse2, int16_t, 16, BPC);
decl_mc_funcs(8, sse2, int16_t, 16, BPC); decl_mc_funcs(8, sse2, int16_t, 16, BPC);
decl_mc_funcs(16, avx2, int16_t, 16, BPC);
mc_rep_funcs(16, 8, 16, sse2, int16_t, 16, BPC); mc_rep_funcs(16, 8, 16, sse2, int16_t, 16, BPC);
mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC); mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC);
mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC); mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC);
mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC);
mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC);
filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp) filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp) filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp) filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp) filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
...@@ -56,6 +71,13 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp) ...@@ -56,6 +71,13 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
init_subpel3(1, avg, BPC, sse2); init_subpel3(1, avg, BPC, sse2);
} }
if (EXTERNAL_AVX2(cpu_flags)) {
init_subpel3_32_64(0, put, BPC, avx2);
init_subpel3_32_64(1, avg, BPC, avx2);
init_subpel2(2, 0, 16, put, BPC, avx2);
init_subpel2(2, 1, 16, avg, BPC, avx2);
}
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
ff_vp9dsp_init_16bpp_x86(dsp); ff_vp9dsp_init_16bpp_x86(dsp);
......
...@@ -201,6 +201,9 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, ...@@ -201,6 +201,9 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride,
INIT_XMM sse2 INIT_XMM sse2
filter_h_fn put filter_h_fn put
filter_h_fn avg filter_h_fn avg
INIT_YMM avx2
filter_h_fn put
filter_h_fn avg
%macro filter_v4_fn 1-2 12 %macro filter_v4_fn 1-2 12
%if ARCH_X86_64 %if ARCH_X86_64
...@@ -419,3 +422,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, ...@@ -419,3 +422,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride,
INIT_XMM sse2 INIT_XMM sse2
filter_v_fn put filter_v_fn put
filter_v_fn avg filter_v_fn avg
INIT_YMM avx2
filter_v_fn put
filter_v_fn avg
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment