Commit 3f3d748c authored by James Almer's avatar James Almer Committed by Michael Niedermayer

x86: Move XOP emulation to x86util

We need the emulation to support the cases where the first
argument is the same as the fourth. To achieve this a fifth
argument working as a temporary may be needed.
Emulation that doesn't obey the original instruction semantics
can't be in x86inc.
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 6c6e4dd1
...@@ -44,21 +44,21 @@ ALIGN 16 ...@@ -44,21 +44,21 @@ ALIGN 16
test jq, jq test jq, jq
jz .end_order jz .end_order
.loop_order: .loop_order:
pmacsdql m2, m0, m1, m2 PMACSDQL m2, m0, m1, m2, m0
movd m0, [decodedq+jq*4] movd m0, [decodedq+jq*4]
pmacsdql m3, m1, m0, m3 PMACSDQL m3, m1, m0, m3, m1
movd m1, [coeffsq+jq*4] movd m1, [coeffsq+jq*4]
inc jq inc jq
jl .loop_order jl .loop_order
.end_order: .end_order:
pmacsdql m2, m0, m1, m2 PMACSDQL m2, m0, m1, m2, m0
psrlq m2, m4 psrlq m2, m4
movd m0, [decodedq] movd m0, [decodedq]
paddd m0, m2 paddd m0, m2
movd [decodedq], m0 movd [decodedq], m0
sub lend, 2 sub lend, 2
jl .ret jl .ret
pmacsdql m3, m1, m0, m3 PMACSDQL m3, m1, m0, m3, m1
psrlq m3, m4 psrlq m3, m4
movd m1, [decodedq+4] movd m1, [decodedq+4]
paddd m1, m3 paddd m1, m3
......
...@@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1 ...@@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1
%undef i %undef i
%undef j %undef j
%macro FMA_INSTR 3
%macro %1 4-7 %1, %2, %3
%if cpuflag(xop)
v%5 %1, %2, %3, %4
%elifidn %1, %4
%6 %2, %3
%7 %1, %2
%else
%6 %1, %2, %3
%7 %1, %4
%endif
%endmacro
%endmacro
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmacsdql, pmuldq, paddq
FMA_INSTR pmadcswd, pmaddwd, paddd
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf. ; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet. ; This lets us use tzcnt without bumping the yasm version requirement yet.
%define tzcnt rep bsf %define tzcnt rep bsf
......
...@@ -666,6 +666,25 @@ ...@@ -666,6 +666,25 @@
%endif %endif
%endmacro %endmacro
%macro PMA_EMU 4
%macro %1 5-8 %2, %3, %4
%if cpuflag(xop)
v%6 %1, %2, %3, %4
%elifidn %1, %4
%7 %5, %2, %3
%8 %1, %4, %5
%else
%7 %1, %2, %3
%8 %1, %4
%endif
%endmacro
%endmacro
PMA_EMU PMACSWW, pmacsww, pmullw, paddw
PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation
PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation
PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
; Wrapper for non-FMA version of fmaddps ; Wrapper for non-FMA version of fmaddps
%macro FMULADD_PS 5 %macro FMULADD_PS 5
%if cpuflag(fma3) || cpuflag(fma4) %if cpuflag(fma3) || cpuflag(fma4)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment