Commit 0daa1cf0 authored by James Almer's avatar James Almer

x86/vf_blend: optimize difference and negation functions

Process more pixels per loop.
Reviewed-by: 's avatarPaul B Mahol <onemda@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent fa50d936
...@@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4 ...@@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4
BLEND_END BLEND_END
%macro BLEND_ABS 0 %macro BLEND_ABS 0
BLEND_INIT difference, 3 BLEND_INIT difference, 5
pxor m2, m2 pxor m2, m2
.nextrow: .nextrow:
mov xq, widthq mov xq, widthq
.loop: .loop:
movh m0, [topq + xq] movu m0, [topq + xq]
movh m1, [bottomq + xq] movu m1, [bottomq + xq]
punpckhbw m3, m0, m2
punpcklbw m0, m2 punpcklbw m0, m2
punpckhbw m4, m1, m2
punpcklbw m1, m2 punpcklbw m1, m2
psubw m0, m1 psubw m0, m1
psubw m3, m4
ABS1 m0, m1 ABS1 m0, m1
packuswb m0, m0 ABS1 m3, m4
movh [dstq + xq], m0 packuswb m0, m3
add xq, mmsize / 2 mova [dstq + xq], m0
add xq, mmsize
jl .loop jl .loop
BLEND_END BLEND_END
...@@ -311,26 +315,30 @@ BLEND_INIT extremity, 8 ...@@ -311,26 +315,30 @@ BLEND_INIT extremity, 8
jl .loop jl .loop
BLEND_END BLEND_END
BLEND_INIT negation, 5 BLEND_INIT negation, 8
pxor m2, m2 pxor m2, m2
mova m4, [pw_255] mova m4, [pw_255]
.nextrow: .nextrow:
mov xq, widthq mov xq, widthq
.loop: .loop:
movh m0, [topq + xq] movu m0, [topq + xq]
movh m1, [bottomq + xq] movu m1, [bottomq + xq]
punpckhbw m5, m0, m2
punpcklbw m0, m2 punpcklbw m0, m2
punpckhbw m6, m1, m2
punpcklbw m1, m2 punpcklbw m1, m2
mova m3, m4 psubw m3, m4, m0
psubw m3, m0 psubw m7, m4, m5
psubw m3, m1 psubw m3, m1
psubw m7, m6
ABS1 m3, m1 ABS1 m3, m1
mova m0, m4 ABS1 m7, m1
psubw m0, m3 psubw m0, m4, m3
packuswb m0, m0 psubw m1, m4, m7
movh [dstq + xq], m0 packuswb m0, m1
add xq, mmsize / 2 mova [dstq + xq], m0
add xq, mmsize
jl .loop jl .loop
BLEND_END BLEND_END
%endmacro %endmacro
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment