Commit 1a400796 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: float dsp: butterflies_float SSE

97c -> 49c
Some codecs could benefit from more unrolling, but AAC doesn't.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 295ce83e
...@@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset ...@@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
%endif %endif
RET RET
;-----------------------------------------------------------------------------
; void ff_butterflies_float(float *src0, float *src1, int len);
;-----------------------------------------------------------------------------
INIT_XMM sse
cglobal butterflies_float, 3,3,3, src0, src1, len
movsxdifnidn lenq, lend
test lenq, lenq
jz .end
shl lenq, 2
lea src0q, [src0q + lenq]
lea src1q, [src1q + lenq]
neg lenq
.loop:
mova m0, [src0q + lenq]
mova m1, [src1q + lenq]
subps m2, m0, m1
addps m0, m0, m1
mova [src1q + lenq], m2
mova [src0q + lenq], m0
add lenq, mmsize
jl .loop
.end:
REP_RET
...@@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0, ...@@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void ff_butterflies_float_sse(float *src0, float *src1, int len);
#if HAVE_6REGS && HAVE_INLINE_ASM #if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0, static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win, const float *src1, const float *win,
...@@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) ...@@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_sse; fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
fdsp->scalarproduct_float = ff_scalarproduct_float_sse; fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
fdsp->butterflies_float = ff_butterflies_float_sse;
} }
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment