Commit 0a7a94f2 authored by Diego Biurrun's avatar Diego Biurrun

x86: Refactor PSWAPD fallback implementations and port to cpuflags

parent 9a07c133
...@@ -105,7 +105,8 @@ SECTION_TEXT ...@@ -105,7 +105,8 @@ SECTION_TEXT
pfadd %5, %4 ; {t6,t5} pfadd %5, %4 ; {t6,t5}
pxor %3, [ps_m1p1] ; {t8,t7} pxor %3, [ps_m1p1] ; {t8,t7}
mova %6, %1 mova %6, %1
PSWAPD %3, %3 movd [r0+12], %3
punpckhdq %3, [r0+8]
pfadd %1, %5 ; {r0,i0} pfadd %1, %5 ; {r0,i0}
pfsub %6, %5 ; {r2,i2} pfsub %6, %5 ; {r2,i2}
mova %4, %2 mova %4, %2
...@@ -498,19 +499,6 @@ fft8 %+ SUFFIX: ...@@ -498,19 +499,6 @@ fft8 %+ SUFFIX:
%endmacro %endmacro
%if ARCH_X86_32 %if ARCH_X86_32
%macro PSWAPD 2
%if cpuflag(3dnowext)
pswapd %1, %2
%elifidn %1, %2
movd [r0+12], %1
punpckhdq %1, [r0+8]
%else
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endif
%endmacro
INIT_MMX 3dnowext INIT_MMX 3dnowext
FFT48_3DNOW FFT48_3DNOW
......
...@@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2 ...@@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2 INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2 FLOAT_TO_INT16_INTERLEAVE2
%macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e
%endmacro
%macro PSWAPD_3DNOW 2
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endmacro
%macro FLOAT_TO_INT16_INTERLEAVE6 0 %macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
...@@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s ...@@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
packssdw mm0, mm3 packssdw mm0, mm3
packssdw mm1, mm4 packssdw mm1, mm4
packssdw mm2, mm5 packssdw mm2, mm5
pswapd mm3, mm0 PSWAPD mm3, mm0
punpcklwd mm0, mm1 punpcklwd mm0, mm1
punpckhwd mm1, mm2 punpckhwd mm1, mm2
punpcklwd mm2, mm3 punpcklwd mm2, mm3
pswapd mm3, mm0 PSWAPD mm3, mm0
punpckldq mm0, mm2 punpckldq mm0, mm2
punpckhdq mm2, mm1 punpckhdq mm2, mm1
punpckldq mm1, mm3 punpckldq mm1, mm3
...@@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s ...@@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse INIT_MMX sse
%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow INIT_MMX 3dnow
%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 FLOAT_TO_INT16_INTERLEAVE6
%undef pswapd
INIT_MMX 3dnowext INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6 FLOAT_TO_INT16_INTERLEAVE6
......
...@@ -319,6 +319,18 @@ ...@@ -319,6 +319,18 @@
%endif %endif
%endmacro %endmacro
%macro PSWAPD 2
%if cpuflag(mmxext)
pshufw %1, %2, q1032
%elif cpuflag(3dnowext)
pswapd %1, %2
%elif cpuflag(3dnow)
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endif
%endmacro
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5 %ifnum %5
pand m%3, m%5, m%4 ; src .. y6 .. y4 pand m%3, m%5, m%4 ; src .. y6 .. y4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment