Commit a4a4179e authored by Martin Vignali's avatar Martin Vignali

avfilter/x86/vf_hflip : merge hflip byte and hflip short to one macro

parent 8fb1d63d
...@@ -29,11 +29,16 @@ pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1 ...@@ -29,11 +29,16 @@ pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
SECTION .text SECTION .text
INIT_XMM ssse3 ;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x %macro HFLIP 3
mova m0, [pb_flip_byte] cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
mova m0, [pb_flip_%1]
xor xq, xq xor xq, xq
%if %3 == 1
movsxdifnidn wq, wd movsxdifnidn wq, wd
%else ; short
add wd, wd
%endif
mov rq, wq mov rq, wq
and rq, 2 * mmsize - 1 and rq, 2 * mmsize - 1
cmp wq, 2 * mmsize cmp wq, 2 * mmsize
...@@ -42,8 +47,8 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x ...@@ -42,8 +47,8 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x
.loop0: .loop0:
neg xq neg xq
movu m1, [srcq + xq - mmsize + 1] movu m1, [srcq + xq - mmsize + %3]
movu m2, [srcq + xq - 2 * mmsize + 1] movu m2, [srcq + xq - 2 * mmsize + %3]
pshufb m1, m0 pshufb m1, m0
pshufb m2, m0 pshufb m2, m0
neg xq neg xq
...@@ -59,49 +64,17 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x ...@@ -59,49 +64,17 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x
.loop1: .loop1:
neg xq neg xq
mov rb, [srcq + xq] mov r%2, [srcq + xq]
neg xq neg xq
mov [dstq + xq], rb mov [dstq + xq], r%2
add xq, 1 add xq, %3
cmp xq, wq cmp xq, wq
jl .loop1 jl .loop1
.end: .end:
RET RET
%endmacro
cglobal hflip_short, 3, 5, 3, src, dst, w, r, x INIT_XMM ssse3
mova m0, [pb_flip_short] HFLIP byte, b, 1
xor xq, xq HFLIP short, w, 2
add wd, wd
mov rq, wq
and rq, 2 * mmsize - 1
cmp wq, 2 * mmsize
jl .loop1
sub wq, rq
.loop0:
neg xq
movu m1, [srcq + xq - mmsize + 2]
movu m2, [srcq + xq - 2 * mmsize + 2]
pshufb m1, m0
pshufb m2, m0
neg xq
movu [dstq + xq ], m1
movu [dstq + xq + mmsize], m2
add xq, mmsize * 2
cmp xq, wq
jl .loop0
cmp rq, 0
je .end
add wq, rq
.loop1:
neg xq
mov rw, [srcq + xq]
neg xq
mov [dstq + xq], rw
add xq, 2
cmp xq, wq
jl .loop1
.end:
RET
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment