Commit 81943a10 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: hevc_mc: load less data in epel filters

Before:
5679 decicycles in epel_bi, 2059976 runs, 37176 skips
3468 decicycles in epel_uni, 1040886 runs, 7690 skips

After:
5323 decicycles in epel_bi, 2059493 runs, 37659 skips
3262 decicycles in epel_uni, 1040871 runs, 7705 skips
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 36284ae9
......@@ -176,15 +176,23 @@ QPEL_TABLE 12, 4, w, sse4
%else
%define rfilterq %2
%endif
movdqu m0, [rfilterq ] ;load 128bit of x
%if (%1 == 8 && %4 <= 4)
%define %%load movd
%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4)
%define %%load movq
%else
%define %%load movdqu
%endif
%%load m0, [rfilterq ]
%ifnum %3
movdqu m1, [rfilterq+ %3] ;load 128bit of x+stride
movdqu m2, [rfilterq+2*%3] ;load 128bit of x+2*stride
movdqu m3, [rfilterq+3*%3] ;load 128bit of x+3*stride
%%load m1, [rfilterq+ %3]
%%load m2, [rfilterq+2*%3]
%%load m3, [rfilterq+3*%3]
%else
movdqu m1, [rfilterq+ %3q] ;load 128bit of x+stride
movdqu m2, [rfilterq+2*%3q] ;load 128bit of x+2*stride
movdqu m3, [rfilterq+r3srcq] ;load 128bit of x+2*stride
%%load m1, [rfilterq+ %3q]
%%load m2, [rfilterq+2*%3q]
%%load m3, [rfilterq+r3srcq]
%endif
%if %1 == 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment