Commit 5402c188 authored by James Almer's avatar James Almer

x86/af_afir: add ff_fcmul_add_avx()

fcmul_add_c: 1228.8
fcmul_add_sse3: 334.3
fcmul_add_avx: 186.3

Tested on a Core i5 4460 @ 3.2GHz
Reviewed-by: 's avatarPaul B Mahol <onemda@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent ba89dc27
...@@ -27,7 +27,7 @@ SECTION .text ...@@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
INIT_XMM sse3 %macro FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3 shl lend, 3
add tq, lenq add tq, lenq
...@@ -61,3 +61,9 @@ ALIGN 16 ...@@ -61,3 +61,9 @@ ALIGN 16
addss xm0, [sumq + lenq] addss xm0, [sumq + lenq]
movss [sumq + lenq], xm0 movss [sumq + lenq], xm0
RET RET
%endmacro
INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c, void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len); ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{ {
...@@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) ...@@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_SSE3(cpu_flags)) { if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3; s->fcmul_add = ff_fcmul_add_sse3;
} }
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment