Commit 79687079 authored by Justin Ruggles's avatar Justin Ruggles

x86: add support for fmaddps fma4 instruction with abstraction to avx/sse

parent 0cf7d849
...@@ -242,6 +242,7 @@ Optimization options (experts only): ...@@ -242,6 +242,7 @@ Optimization options (experts only):
--disable-sse disable SSE optimizations --disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations --disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations --disable-avx disable AVX optimizations
--disable-fma4 disable FMA4 optimizations
--disable-armv5te disable armv5te optimizations --disable-armv5te disable armv5te optimizations
--disable-armv6 disable armv6 optimizations --disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations --disable-armv6t2 disable armv6t2 optimizations
...@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST=' ...@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST='
armv6t2 armv6t2
armvfp armvfp
avx avx
fma4
mmi mmi
mmx mmx
mmx2 mmx2
...@@ -1295,6 +1297,7 @@ mmx2_deps="mmx" ...@@ -1295,6 +1297,7 @@ mmx2_deps="mmx"
sse_deps="mmx" sse_deps="mmx"
ssse3_deps="sse" ssse3_deps="sse"
avx_deps="ssse3" avx_deps="ssse3"
fma4_deps="avx"
aligned_stack_if_any="ppc x86" aligned_stack_if_any="ppc x86"
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
...@@ -2865,6 +2868,7 @@ EOF ...@@ -2865,6 +2868,7 @@ EOF
check_yasm "pextrd [eax], xmm0, 1" && enable yasm || check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
die "yasm not found, use --disable-yasm for a crippled build" die "yasm not found, use --disable-yasm for a crippled build"
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
fi fi
case "$cpu" in case "$cpu" in
...@@ -3292,6 +3296,7 @@ if enabled x86; then ...@@ -3292,6 +3296,7 @@ if enabled x86; then
echo "SSE enabled ${sse-no}" echo "SSE enabled ${sse-no}"
echo "SSSE3 enabled ${ssse3-no}" echo "SSSE3 enabled ${ssse3-no}"
echo "AVX enabled ${avx-no}" echo "AVX enabled ${avx-no}"
echo "FMA4 enabled ${fma4-no}"
echo "CMOV enabled ${cmov-no}" echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}" echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}" echo "EBX available ${ebx_available-no}"
......
...@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1 ...@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j %undef j
%macro FMA_INSTR 3 %macro FMA_INSTR 3
%macro %1 4-7 %1, %2, %3 %macro %1 5-8 %1, %2, %3
%if cpuflag(xop) %if cpuflag(xop) || cpuflag(fma4)
v%5 %1, %2, %3, %4 v%6 %1, %2, %3, %4
%else %else
%6 %1, %2, %3 %ifidn %1, %4
%7 %1, %4 %7 %5, %2, %3
%8 %1, %4, %5
%else
%7 %1, %2, %3
%8 %1, %4
%endif
%endif %endif
%endmacro %endmacro
%endmacro %endmacro
FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd FMA_INSTR pmadcswd, pmaddwd, paddd
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment