Commit cbc7d60a authored by Mans Rullgard's avatar Mans Rullgard

arm: dsputil: fix overreads in put/avg_pixels functions

The vertically interpolating variants of these functions read
ahead one line to optimise the loop.  On the last line processed,
this might be outside the buffer.  Fix these invalid reads by
processing the last line outside the loop.
Signed-off-by: 's avatarMans Rullgard <mans@mansr.com>
parent 1de53d00
......@@ -95,6 +95,7 @@ endfunc
.endm
.macro pixels16_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.64 {q0}, [r1], r2
vld1.64 {q1}, [r1], r2
1: subs r3, r3, #2
......@@ -114,10 +115,25 @@ endfunc
vst1.64 {q2}, [r0,:128], r2
vst1.64 {q3}, [r0,:128], r2
bne 1b
avg q2, q0, q1
vld1.64 {q0}, [r1], r2
avg q3, q0, q1
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.64 {q2}, [r0,:128], r2
vst1.64 {q3}, [r0,:128], r2
bx lr
.endm
.macro pixels16_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
.ifeq \rnd
......@@ -173,6 +189,42 @@ endfunc
vaddl.u8 q11, d3, d5
vst1.64 {q15}, [r0,:128], r2
bgt 1b
vld1.64 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
.ifeq \rnd
vadd.u16 q12, q12, q13
.endif
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
shrn d28, q12, #2
.ifeq \rnd
vadd.u16 q1, q1, q13
.endif
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vaddl.u8 q10, d1, d31
vst1.64 {q14}, [r0,:128], r2
vadd.u16 q12, q8, q9
.ifeq \rnd
vadd.u16 q12, q12, q13
.endif
vadd.u16 q0, q10, q11
shrn d30, q12, #2
.ifeq \rnd
vadd.u16 q0, q0, q13
.endif
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vst1.64 {q15}, [r0,:128], r2
bx lr
.endm
......@@ -228,6 +280,7 @@ endfunc
.endm
.macro pixels8_y2 rnd=1, avg=0
sub r3, r3, #2
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
1: subs r3, r3, #2
......@@ -246,10 +299,24 @@ endfunc
vst1.64 {d4}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
bne 1b
avg d4, d0, d1
vld1.64 {d0}, [r1], r2
avg d5, d0, d1
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.64 {d4}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
bx lr
.endm
.macro pixels8_xy2 rnd=1, avg=0
sub r3, r3, #2
vld1.64 {q0}, [r1], r2
vld1.64 {q1}, [r1], r2
.ifeq \rnd
......@@ -291,6 +358,31 @@ endfunc
vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2
bgt 1b
vld1.64 {q0}, [r1], r2
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
.ifeq \rnd
vadd.u16 q10, q10, q11
.endif
vaddl.u8 q8, d0, d4
shrn d5, q10, #2
vadd.u16 q10, q8, q9
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
.ifeq \rnd
vadd.u16 q10, q10, q11
.endif
vst1.64 {d5}, [r0,:64], r2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vst1.64 {d7}, [r0,:64], r2
bx lr
.endm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment