Commit de130842 authored by Muhammad Faiz's avatar Muhammad Faiz

swresample/x86/resample: extend resample_double to support avx and fma3

benchmark:
sse2 10.670s
avx   8.763s
fma3  8.380s
Signed-off-by: 's avatarMuhammad Faiz <mfcc64@gmail.com>
parent 3d5c2169
...@@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \ ...@@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
; horizontal sum & store ; horizontal sum & store
%if mmsize == 32 %if mmsize == 32
vextractf128 xm1, m0, 0x1 vextractf128 xm1, m0, 0x1
addps xm0, xm1 addp%4 xm0, xm1
%endif %endif
movhlps xm1, xm0 movhlps xm1, xm0
%ifidn %1, float %ifidn %1, float
...@@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \ ...@@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
%if mmsize == 32 %if mmsize == 32
vextractf128 xm1, m0, 0x1 vextractf128 xm1, m0, 0x1
vextractf128 xm3, m2, 0x1 vextractf128 xm3, m2, 0x1
addps xm0, xm1 addp%4 xm0, xm1
addps xm2, xm3 addp%4 xm2, xm3
%endif %endif
cvtsi2s%4 xm1, fracd cvtsi2s%4 xm1, fracd
subp%4 xm2, xm0 subp%4 xm2, xm0
...@@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1 ...@@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1
INIT_XMM sse2 INIT_XMM sse2
RESAMPLE_FNS double, 8, 3, d, pdbl_1 RESAMPLE_FNS double, 8, 3, d, pdbl_1
%if HAVE_AVX_EXTERNAL
INIT_YMM avx
RESAMPLE_FNS double, 8, 3, d, pdbl_1
%endif
%if HAVE_FMA3_EXTERNAL
INIT_YMM fma3
RESAMPLE_FNS double, 8, 3, d, pdbl_1
%endif
...@@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx); ...@@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx);
RESAMPLE_FUNCS(float, fma3); RESAMPLE_FUNCS(float, fma3);
RESAMPLE_FUNCS(float, fma4); RESAMPLE_FUNCS(float, fma4);
RESAMPLE_FUNCS(double, sse2); RESAMPLE_FUNCS(double, sse2);
RESAMPLE_FUNCS(double, avx);
RESAMPLE_FUNCS(double, fma3);
av_cold void swri_resample_dsp_x86_init(ResampleContext *c) av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
{ {
...@@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c) ...@@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
c->dsp.resample_linear = ff_resample_linear_double_sse2; c->dsp.resample_linear = ff_resample_linear_double_sse2;
c->dsp.resample_common = ff_resample_common_double_sse2; c->dsp.resample_common = ff_resample_common_double_sse2;
} }
if (EXTERNAL_AVX_FAST(mm_flags)) {
c->dsp.resample_linear = ff_resample_linear_double_avx;
c->dsp.resample_common = ff_resample_common_double_avx;
}
if (EXTERNAL_FMA3_FAST(mm_flags)) {
c->dsp.resample_linear = ff_resample_linear_double_fma3;
c->dsp.resample_common = ff_resample_common_double_fma3;
}
break; break;
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment