Commit 6e74e963 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Anton Khirnov

vp9lpf/x86: slightly simplify 44/48/84/88 h stores.

Signed-off-by: 's avatarAnton Khirnov <anton@khirnov.net>
parent 6411c328
...@@ -725,34 +725,34 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri ...@@ -725,34 +725,34 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri
SBUTTERFLY bw, 2, 3, 8 SBUTTERFLY bw, 2, 3, 8
SBUTTERFLY wd, 0, 2, 8 SBUTTERFLY wd, 0, 2, 8
SBUTTERFLY wd, 1, 3, 8 SBUTTERFLY wd, 1, 3, 8
SBUTTERFLY dq, 0, 4, 8
SBUTTERFLY dq, 1, 5, 8
SBUTTERFLY dq, 2, 6, 8
SBUTTERFLY dq, 3, 7, 8
movd [P7], m0 movd [P7], m0
punpckhqdq m0, m8
movd [P6], m0
movd [Q0], m1
punpckhqdq m1, m9
movd [Q1], m1
movd [P3], m2 movd [P3], m2
punpckhqdq m2, m10 movd [Q0], m1
movd [P2], m2
movd [Q4], m3 movd [Q4], m3
punpckhqdq m3, m11 psrldq m0, 4
psrldq m1, 4
psrldq m2, 4
psrldq m3, 4
movd [P6], m0
movd [P2], m2
movd [Q1], m1
movd [Q5], m3 movd [Q5], m3
movd [P5], m4 psrldq m0, 4
punpckhqdq m4, m12 psrldq m1, 4
movd [P4], m4 psrldq m2, 4
movd [Q2], m5 psrldq m3, 4
punpckhqdq m5, m13 movd [P5], m0
movd [Q3], m5 movd [P1], m2
movd [P1], m6 movd [Q2], m1
punpckhqdq m6, m14 movd [Q6], m3
movd [P0], m6 psrldq m0, 4
movd [Q6], m7 psrldq m1, 4
punpckhqdq m7, m8 psrldq m2, 4
movd [Q7], m7 psrldq m3, 4
movd [P4], m0
movd [P0], m2
movd [Q3], m1
movd [Q7], m3
%else %else
; the following code do a transpose of 8 full lines to 16 half ; the following code do a transpose of 8 full lines to 16 half
; lines (high part). It is inlined to avoid the need of a staging area ; lines (high part). It is inlined to avoid the need of a staging area
...@@ -777,30 +777,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri ...@@ -777,30 +777,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri
SBUTTERFLY dq, 1, 5, 8 SBUTTERFLY dq, 1, 5, 8
SBUTTERFLY dq, 2, 6, 8 SBUTTERFLY dq, 2, 6, 8
SBUTTERFLY dq, 3, 7, 8 SBUTTERFLY dq, 3, 7, 8
movh [P7], m0 movh [P7], m0
punpckhqdq m0, m8 movhps [P6], m0
movh [P6], m0 movh [Q0], m1
movh [Q0], m1 movhps [Q1], m1
punpckhqdq m1, m9 movh [P3], m2
movh [Q1], m1 movhps [P2], m2
movh [P3], m2 movh [Q4], m3
punpckhqdq m2, m10 movhps [Q5], m3
movh [P2], m2 movh [P5], m4
movh [Q4], m3 movhps [P4], m4
punpckhqdq m3, m11 movh [Q2], m5
movh [Q5], m3 movhps [Q3], m5
movh [P5], m4 movh [P1], m6
punpckhqdq m4, m12 movhps [P0], m6
movh [P4], m4 movh [Q6], m7
movh [Q2], m5 movhps [Q7], m7
punpckhqdq m5, m13
movh [Q3], m5
movh [P1], m6
punpckhqdq m6, m14
movh [P0], m6
movh [Q6], m7
punpckhqdq m7, m8
movh [Q7], m7
%endif %endif
%endif %endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment