Commit 76ed71a7 authored by James Almer's avatar James Almer Committed by Michael Niedermayer

x86: move horizontal add macros to x86util

Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Reviewed-by: 's avatar"Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 443261cb
......@@ -171,22 +171,6 @@ PRED4x4_HD
;-----------------------------------------------------------------------------
; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
%macro HADDD 2 ; sum junk
%if mmsize == 16
movhlps %2, %1
paddd %1, %2
pshuflw %2, %1, 0xE
paddd %1, %2
%else
pshufw %2, %1, 0xE
paddd %1, %2
%endif
%endmacro
%macro HADDW 2
pmaddwd %1, [pw_1]
HADDD %1, %2
%endmacro
INIT_MMX mmxext
cglobal pred4x4_dc_10, 3, 3
......
......@@ -273,6 +273,39 @@
%endif
%endmacro
%macro HADDD 2 ; sum junk
%if sizeof%1 == 32
%define %2 xmm%2
vextracti128 %2, %1, 1
%define %1 xmm%1
paddd %1, %2
%endif
%if mmsize >= 16
%if cpuflag(xop) && sizeof%1 == 16
vphadddq %1, %1
%endif
movhlps %2, %1
paddd %1, %2
%endif
%if notcpuflag(xop) || sizeof%1 != 16
PSHUFLW %2, %1, q0032
paddd %1, %2
%endif
%undef %1
%undef %2
%endmacro
%macro HADDW 2 ; reg, tmp
%if cpuflag(xop) && sizeof%1 == 16
vphaddwq %1, %1
movhlps %2, %1
paddd %1, %2
%else
pmaddwd %1, [pw_1]
HADDD %1, %2
%endif
%endmacro
%macro PALIGNR 4-5
%if cpuflag(ssse3)
%if %0==5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment