Commit b9c7f66e authored by Ronald S. Bultje's avatar Ronald S. Bultje

Fix horizontal/horizontal_up 8x8l intra prediction x86/simd functions.

The original functions did not work correctly for edge pixels, e.g.
when CODEC_FLAG_EMU_EDGE is set, leading to corrupt output in e.g. VLC.
Based on a patch by Daniel Kang <daniel d kang gmail com>.

Signed-off-by: Ronald S. Bultje <rsbultje gmail com>
parent c3897d76
...@@ -1249,7 +1249,10 @@ cglobal pred8x8l_horizontal_%1, 4,4 ...@@ -1249,7 +1249,10 @@ cglobal pred8x8l_horizontal_%1, 4,4
sub r0, r3 sub r0, r3
lea r2, [r0+r3*2] lea r2, [r0+r3*2]
movq mm0, [r0+r3*1-8] movq mm0, [r0+r3*1-8]
punpckhbw mm0, [r0+r3*0-8] test r1, r1
lea r1, [r0+r3]
cmovnz r1, r0
punpckhbw mm0, [r1+r3*0-8]
movq mm1, [r2+r3*1-8] movq mm1, [r2+r3*1-8]
punpckhbw mm1, [r0+r3*2-8] punpckhbw mm1, [r0+r3*2-8]
mov r2, r0 mov r2, r0
...@@ -1264,21 +1267,12 @@ cglobal pred8x8l_horizontal_%1, 4,4 ...@@ -1264,21 +1267,12 @@ cglobal pred8x8l_horizontal_%1, 4,4
punpckhdq mm3, mm1 punpckhdq mm3, mm1
lea r0, [r0+r3*2] lea r0, [r0+r3*2]
movq mm0, [r0+r3*0-8] movq mm0, [r0+r3*0-8]
movq mm1, [r2] movq mm1, [r1+r3*0-8]
mov r0, r2 mov r0, r2
movq mm4, mm3 movq mm4, mm3
movq mm2, mm3 movq mm2, mm3
PALIGNR mm4, mm0, 7, mm0 PALIGNR mm4, mm0, 7, mm0
PALIGNR mm1, mm2, 1, mm2 PALIGNR mm1, mm2, 1, mm2
test r1, r1 ; top_left
jnz .do_left
.fix_lt_1:
movq mm5, mm3
pxor mm5, mm4
psrlq mm5, 56
psllq mm5, 48
pxor mm1, mm5
.do_left:
movq mm0, mm4 movq mm0, mm4
PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
movq mm4, mm0 movq mm4, mm0
...@@ -2153,7 +2147,10 @@ cglobal pred8x8l_horizontal_up_%1, 4,4 ...@@ -2153,7 +2147,10 @@ cglobal pred8x8l_horizontal_up_%1, 4,4
sub r0, r3 sub r0, r3
lea r2, [r0+r3*2] lea r2, [r0+r3*2]
movq mm0, [r0+r3*1-8] movq mm0, [r0+r3*1-8]
punpckhbw mm0, [r0+r3*0-8] test r1, r1
lea r1, [r0+r3]
cmovnz r1, r0
punpckhbw mm0, [r1+r3*0-8]
movq mm1, [r2+r3*1-8] movq mm1, [r2+r3*1-8]
punpckhbw mm1, [r0+r3*2-8] punpckhbw mm1, [r0+r3*2-8]
mov r2, r0 mov r2, r0
...@@ -2168,21 +2165,12 @@ cglobal pred8x8l_horizontal_up_%1, 4,4 ...@@ -2168,21 +2165,12 @@ cglobal pred8x8l_horizontal_up_%1, 4,4
punpckhdq mm3, mm1 punpckhdq mm3, mm1
lea r0, [r0+r3*2] lea r0, [r0+r3*2]
movq mm0, [r0+r3*0-8] movq mm0, [r0+r3*0-8]
movq mm1, [r2] movq mm1, [r1+r3*0-8]
mov r0, r2 mov r0, r2
movq mm4, mm3 movq mm4, mm3
movq mm2, mm3 movq mm2, mm3
PALIGNR mm4, mm0, 7, mm0 PALIGNR mm4, mm0, 7, mm0
PALIGNR mm1, mm2, 1, mm2 PALIGNR mm1, mm2, 1, mm2
test r1, r1
jnz .do_left
.fix_lt_1:
movq mm5, mm3
pxor mm5, mm4
psrlq mm5, 56
psllq mm5, 48
pxor mm1, mm5
.do_left:
movq mm0, mm4 movq mm0, mm4
PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
movq mm4, mm0 movq mm4, mm0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment