Commit 960490c0 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Michael Niedermayer

avcodec/x86/videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.

Don't use word-size multiplications if size == 2, and if we're using
SIMD instructions (size >= 8), complete leftover 4byte sets using movd,
not mov. Both of these changes lead to minor speedups.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent cd86eb26
...@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22 ...@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
; obviously not the same on both sides. ; obviously not the same on both sides.
%macro READ_V_PIXEL 2 %macro READ_V_PIXEL 2
%if %1 == 2
movzx valw, byte %2
imul valw, 0x0101
%else
movzx vald, byte %2 movzx vald, byte %2
imul vald, 0x01010101 imul vald, 0x01010101
%if %1 >= 8 %if %1 >= 8
...@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22 ...@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
pshufd m0, m0, q0000 pshufd m0, m0, q0000
%else %else
punpckldq m0, m0 punpckldq m0, m0
%endif %endif ; mmsize == 16
%endif ; %1 >= 8 %endif ; %1 > 16
%endif
%endmacro ; READ_V_PIXEL %endmacro ; READ_V_PIXEL
%macro WRITE_V_PIXEL 2 %macro WRITE_V_PIXEL 2
%assign %%off 0 %assign %%off 0
%if %1 >= 8
%rep %1/mmsize %rep %1/mmsize
movu [%2+%%off], m0 movu [%2+%%off], m0
%assign %%off %%off+mmsize %assign %%off %%off+mmsize
...@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22 ...@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22
%assign %%off %%off+8 %assign %%off %%off+8
%endif %endif
%endif ; %1-%%off >= 8 %endif ; %1-%%off >= 8
%endif %endif ; mmsize == 16
%if %1-%%off >= 4 %if %1-%%off >= 4
%if %1 > 8 && %1-%%off > 4 %if %1 > 8 && %1-%%off > 4
movq [%2+%1-8], m0 movq [%2+%1-8], m0
%assign %%off %1 %assign %%off %1
%elif %1 >= 8 && %1-%%off >= 4
movd [%2+%%off], m0
%assign %%off %%off+4
%else %else
mov [%2+%%off], vald movd [%2+%%off], m0
%assign %%off %%off+4 %assign %%off %%off+4
%endif %endif
%endif ; %1-%%off >= 4 %endif ; %1-%%off >= 4
%if %1-%%off >= 2 %else ; %1 < 8
%if %1 >= 8
movd [%2+%1-4], m0 %rep %1/4
%else mov [%2+%%off], vald
%assign %%off %%off+4
%endrep ; %1/4
%endif ; %1 >=/< 8
%if %1-%%off == 2
mov [%2+%%off], valw mov [%2+%%off], valw
%endif
%endif ; (%1-%%off)/2 %endif ; (%1-%%off)/2
%endmacro ; WRITE_V_PIXEL %endmacro ; WRITE_V_PIXEL
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment