Commit 884e085d authored by James Almer's avatar James Almer Committed by Michael Niedermayer

x86/synth_filter: Revert the switch to float ops with SSE2

This reverts the changes 64672098
and 68c3ed93 did to the SSE2 version,
which generated a hit of about 5 cycles.
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent f5d1d1e4
...@@ -199,6 +199,14 @@ INIT_XMM sse ...@@ -199,6 +199,14 @@ INIT_XMM sse
DCA_LFE_FIR 0 DCA_LFE_FIR 0
DCA_LFE_FIR 1 DCA_LFE_FIR 1
%macro SETZERO 1
%if cpuflag(sse2) && notcpuflag(avx)
pxor %1, %1
%else
xorps %1, %1, %1
%endif
%endmacro
%macro SHUF 3 %macro SHUF 3
%if cpuflag(avx) %if cpuflag(avx)
mova %3, [%2 - 16] mova %3, [%2 - 16]
...@@ -265,7 +273,12 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -265,7 +273,12 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
synth_buf, synth_buf2, window, out, off, scale synth_buf, synth_buf2, window, out, off, scale
%define scale m0 %define scale m0
%if ARCH_X86_32 || WIN64 %if ARCH_X86_32 || WIN64
%if cpuflag(sse2) && notcpuflag(avx)
movd m0, scalem
SPLATD m0
%else
VBROADCASTSS m0, scalem VBROADCASTSS m0, scalem
%endif
; Make sure offset is in a register and not on the stack ; Make sure offset is in a register and not on the stack
%define OFFQ r4q %define OFFQ r4q
%else %else
...@@ -290,8 +303,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -290,8 +303,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%endif %endif
.mainloop .mainloop
; m1 = a m2 = b m3 = c m4 = d ; m1 = a m2 = b m3 = c m4 = d
xorps m3, m3, m3 SETZERO m3
xorps m4, m4, m4 SETZERO m4
mova m1, [buf2 + i] mova m1, [buf2 + i]
mova m2, [buf2 + i + 16 * 4] mova m2, [buf2 + i + 16 * 4]
%if ARCH_X86_32 %if ARCH_X86_32
...@@ -308,8 +321,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -308,8 +321,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%define ptr2 r7q ; must be loaded %define ptr2 r7q ; must be loaded
%define win r8q %define win r8q
%define j r9q %define j r9q
xorps m9, m9, m9 SETZERO m9
xorps m10, m10, m10 SETZERO m10
mova m7, [buf2 + i + mmsize] mova m7, [buf2 + i + mmsize]
mova m8, [buf2 + i + mmsize + 16 * 4] mova m8, [buf2 + i + mmsize + 16 * 4]
lea win, [windowq + i] lea win, [windowq + i]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment