Commit cddbfd2a authored by Clément Bœsch's avatar Clément Bœsch

x86/lossless_videodsp: simplify and explicit aligned/unaligned flags

parent 78a9f185
...@@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 ...@@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION_TEXT SECTION_TEXT
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned %macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
movd m4, maskd movd m4, maskd
SPLATW m4, m4 SPLATW m4, m4
add wq, wq add wq, wq
...@@ -51,28 +51,16 @@ SECTION_TEXT ...@@ -51,28 +51,16 @@ SECTION_TEXT
neg wq neg wq
jz %%.end jz %%.end
%%.loop: %%.loop:
%if %1 mov%1 m0, [srcq+wq]
mova m0, [srcq+wq] mov%1 m1, [dstq+wq]
mova m1, [dstq+wq] mov%1 m2, [srcq+wq+mmsize]
mova m2, [srcq+wq+mmsize] mov%1 m3, [dstq+wq+mmsize]
mova m3, [dstq+wq+mmsize]
%else
movu m0, [srcq+wq]
movu m1, [dstq+wq]
movu m2, [srcq+wq+mmsize]
movu m3, [dstq+wq+mmsize]
%endif
paddw m0, m1 paddw m0, m1
paddw m2, m3 paddw m2, m3
pand m0, m4 pand m0, m4
pand m2, m4 pand m2, m4
%if %1 mov%1 [dstq+wq] , m0
mova [dstq+wq] , m0 mov%1 [dstq+wq+mmsize], m2
mova [dstq+wq+mmsize], m2
%else
movu [dstq+wq] , m0
movu [dstq+wq+mmsize], m2
%endif
add wq, 2*mmsize add wq, 2*mmsize
jl %%.loop jl %%.loop
%%.end: %%.end:
...@@ -81,7 +69,7 @@ SECTION_TEXT ...@@ -81,7 +69,7 @@ SECTION_TEXT
INIT_MMX mmx INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w cglobal add_int16, 4,4,5, dst, src, mask, w
ADD_INT16_LOOP 1 ADD_INT16_LOOP a
INIT_XMM sse2 INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w cglobal add_int16, 4,4,5, dst, src, mask, w
...@@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w ...@@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
jnz .unaligned jnz .unaligned
test dstq, mmsize-1 test dstq, mmsize-1
jnz .unaligned jnz .unaligned
ADD_INT16_LOOP 1 ADD_INT16_LOOP a
.unaligned: .unaligned:
ADD_INT16_LOOP 0 ADD_INT16_LOOP u
%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned %macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
movd m4, maskd movd m4, maskd
SPLATW m4, m4 SPLATW m4, m4
add wq, wq add wq, wq
...@@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w ...@@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
neg wq neg wq
jz %%.end jz %%.end
%%.loop: %%.loop:
%if %1 mov%1 m0, [src1q+wq]
mova m0, [src1q+wq] mov%1 m1, [src2q+wq]
mova m1, [src2q+wq] mov%1 m2, [src1q+wq+mmsize]
mova m2, [src1q+wq+mmsize] mov%1 m3, [src2q+wq+mmsize]
mova m3, [src2q+wq+mmsize]
%else
movu m0, [src1q+wq]
movu m1, [src2q+wq]
movu m2, [src1q+wq+mmsize]
movu m3, [src2q+wq+mmsize]
%endif
psubw m0, m1 psubw m0, m1
psubw m2, m3 psubw m2, m3
pand m0, m4 pand m0, m4
pand m2, m4 pand m2, m4
%if %1 mov%1 [dstq+wq] , m0
mova [dstq+wq] , m0 mov%1 [dstq+wq+mmsize], m2
mova [dstq+wq+mmsize], m2
%else
movu [dstq+wq] , m0
movu [dstq+wq+mmsize], m2
%endif
add wq, 2*mmsize add wq, 2*mmsize
jl %%.loop jl %%.loop
%%.end: %%.end:
...@@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w ...@@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
INIT_MMX mmx INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
DIFF_INT16_LOOP 1 DIFF_INT16_LOOP a
INIT_XMM sse2 INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
...@@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w ...@@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
jnz .unaligned jnz .unaligned
test dstq, mmsize-1 test dstq, mmsize-1
jnz .unaligned jnz .unaligned
DIFF_INT16_LOOP 1 DIFF_INT16_LOOP a
.unaligned: .unaligned:
DIFF_INT16_LOOP 0 DIFF_INT16_LOOP u
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
add wq, wq add wq, wq
add srcq, wq add srcq, wq
add dstq, wq add dstq, wq
neg wq neg wq
%%.loop: %%.loop:
%if %2 mov%2 m1, [srcq+wq]
mova m1, [srcq+wq]
%else
movu m1, [srcq+wq]
%endif
mova m2, m1 mova m2, m1
pslld m1, 16 pslld m1, 16
paddw m1, m2 paddw m1, m2
...@@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w ...@@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
%endif %endif
paddw m0, m1 paddw m0, m1
pand m0, m7 pand m0, m7
%if %1 %ifidn %1, a
mova [dstq+wq], m0 mova [dstq+wq], m0
%else %else
movq [dstq+wq], m0 movq [dstq+wq], m0
...@@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left ...@@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
psllq m0, 48 psllq m0, 48
movd m7, maskm movd m7, maskm
SPLATW m7 ,m7 SPLATW m7 ,m7
ADD_HFYU_LEFT_LOOP_INT16 1, 1 ADD_HFYU_LEFT_LOOP_INT16 a, a
INIT_XMM sse4 INIT_XMM sse4
cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
...@@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left ...@@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
jnz .src_unaligned jnz .src_unaligned
test dstq, 15 test dstq, 15
jnz .dst_unaligned jnz .dst_unaligned
ADD_HFYU_LEFT_LOOP_INT16 1, 1 ADD_HFYU_LEFT_LOOP_INT16 a, a
.dst_unaligned: .dst_unaligned:
ADD_HFYU_LEFT_LOOP_INT16 0, 1 ADD_HFYU_LEFT_LOOP_INT16 u, a
.src_unaligned: .src_unaligned:
ADD_HFYU_LEFT_LOOP_INT16 0, 0 ADD_HFYU_LEFT_LOOP_INT16 u, u
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX mmxext INIT_MMX mmxext
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment