Commit 94267ddf authored by Mans Rullgard's avatar Mans Rullgard

ARM: clean up NEON put/avg_pixels macros

Although this adds a few lines, the macro calls are less convoluted.
Signed-off-by: 's avatarMans Rullgard <mans@mansr.com>
parent ca124013
...@@ -40,10 +40,10 @@ function ff_clear_blocks_neon, export=1 ...@@ -40,10 +40,10 @@ function ff_clear_blocks_neon, export=1
bx lr bx lr
endfunc endfunc
.macro pixels16 avg=0 .macro pixels16 rnd=1, avg=0
.if \avg .if \avg
mov ip, r0 mov ip, r0
.endif .endif
1: vld1.64 {d0, d1}, [r1], r2 1: vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
vld1.64 {d4, d5}, [r1], r2 vld1.64 {d4, d5}, [r1], r2
...@@ -52,7 +52,7 @@ endfunc ...@@ -52,7 +52,7 @@ endfunc
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
pld [r1, r2, lsl #1] pld [r1, r2, lsl #1]
.if \avg .if \avg
vld1.64 {d16,d17}, [ip,:128], r2 vld1.64 {d16,d17}, [ip,:128], r2
vrhadd.u8 q0, q0, q8 vrhadd.u8 q0, q0, q8
vld1.64 {d18,d19}, [ip,:128], r2 vld1.64 {d18,d19}, [ip,:128], r2
...@@ -61,7 +61,7 @@ endfunc ...@@ -61,7 +61,7 @@ endfunc
vrhadd.u8 q2, q2, q10 vrhadd.u8 q2, q2, q10
vld1.64 {d22,d23}, [ip,:128], r2 vld1.64 {d22,d23}, [ip,:128], r2
vrhadd.u8 q3, q3, q11 vrhadd.u8 q3, q3, q11
.endif .endif
subs r3, r3, #4 subs r3, r3, #4
vst1.64 {d0, d1}, [r0,:128], r2 vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d2, d3}, [r0,:128], r2 vst1.64 {d2, d3}, [r0,:128], r2
...@@ -69,31 +69,31 @@ endfunc ...@@ -69,31 +69,31 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2 vst1.64 {d6, d7}, [r0,:128], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels16_x2 vhadd=vrhadd.u8 .macro pixels16_x2 rnd=1, avg=0
1: vld1.64 {d0-d2}, [r1], r2 1: vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2 vld1.64 {d4-d6}, [r1], r2
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
subs r3, r3, #2 subs r3, r3, #2
vext.8 q1, q0, q1, #1 vext.8 q1, q0, q1, #1
\vhadd q0, q0, q1 avg q0, q0, q1
vext.8 q3, q2, q3, #1 vext.8 q3, q2, q3, #1
\vhadd q2, q2, q3 avg q2, q2, q3
vst1.64 {d0, d1}, [r0,:128], r2 vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2 vst1.64 {d4, d5}, [r0,:128], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels16_y2 vhadd=vrhadd.u8 .macro pixels16_y2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2 vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
1: subs r3, r3, #2 1: subs r3, r3, #2
\vhadd q2, q0, q1 avg q2, q0, q1
vld1.64 {d0, d1}, [r1], r2 vld1.64 {d0, d1}, [r1], r2
\vhadd q3, q0, q1 avg q3, q0, q1
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
...@@ -101,14 +101,14 @@ endfunc ...@@ -101,14 +101,14 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2 vst1.64 {d6, d7}, [r0,:128], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0 .macro pixels16_xy2 rnd=1, avg=0
vld1.64 {d0-d2}, [r1], r2 vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2 vld1.64 {d4-d6}, [r1], r2
.if \no_rnd .ifeq \rnd
vmov.i16 q13, #1 vmov.i16 q13, #1
.endif .endif
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
vext.8 q1, q0, q1, #1 vext.8 q1, q0, q1, #1
...@@ -121,40 +121,40 @@ endfunc ...@@ -121,40 +121,40 @@ endfunc
vld1.64 {d0-d2}, [r1], r2 vld1.64 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9 vadd.u16 q12, q8, q9
pld [r1] pld [r1]
.if \no_rnd .ifeq \rnd
vadd.u16 q12, q12, q13 vadd.u16 q12, q12, q13
.endif .endif
vext.8 q15, q0, q1, #1 vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11 vadd.u16 q1 , q10, q11
\vshrn d28, q12, #2 shrn d28, q12, #2
.if \no_rnd .ifeq \rnd
vadd.u16 q1, q1, q13 vadd.u16 q1, q1, q13
.endif .endif
\vshrn d29, q1, #2 shrn d29, q1, #2
vaddl.u8 q8, d0, d30 vaddl.u8 q8, d0, d30
vld1.64 {d2-d4}, [r1], r2 vld1.64 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31 vaddl.u8 q10, d1, d31
vst1.64 {d28,d29}, [r0,:128], r2 vst1.64 {d28,d29}, [r0,:128], r2
vadd.u16 q12, q8, q9 vadd.u16 q12, q8, q9
pld [r1, r2] pld [r1, r2]
.if \no_rnd .ifeq \rnd
vadd.u16 q12, q12, q13 vadd.u16 q12, q12, q13
.endif .endif
vext.8 q2, q1, q2, #1 vext.8 q2, q1, q2, #1
vadd.u16 q0, q10, q11 vadd.u16 q0, q10, q11
\vshrn d30, q12, #2 shrn d30, q12, #2
.if \no_rnd .ifeq \rnd
vadd.u16 q0, q0, q13 vadd.u16 q0, q0, q13
.endif .endif
\vshrn d31, q0, #2 shrn d31, q0, #2
vaddl.u8 q9, d2, d4 vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5 vaddl.u8 q11, d3, d5
vst1.64 {d30,d31}, [r0,:128], r2 vst1.64 {d30,d31}, [r0,:128], r2
bgt 1b bgt 1b
bx lr bx lr
.endm .endm
.macro pixels8 avg=0 .macro pixels8 rnd=1, avg=0
1: vld1.64 {d0}, [r1], r2 1: vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2 vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2 vld1.64 {d2}, [r1], r2
...@@ -163,7 +163,7 @@ endfunc ...@@ -163,7 +163,7 @@ endfunc
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
pld [r1, r2, lsl #1] pld [r1, r2, lsl #1]
.if \avg .if \avg
vld1.64 {d4}, [r0,:64], r2 vld1.64 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4 vrhadd.u8 d0, d0, d4
vld1.64 {d5}, [r0,:64], r2 vld1.64 {d5}, [r0,:64], r2
...@@ -173,7 +173,7 @@ endfunc ...@@ -173,7 +173,7 @@ endfunc
vld1.64 {d7}, [r0,:64], r2 vld1.64 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7 vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2 sub r0, r0, r2, lsl #2
.endif .endif
subs r3, r3, #4 subs r3, r3, #4
vst1.64 {d0}, [r0,:64], r2 vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2 vst1.64 {d1}, [r0,:64], r2
...@@ -181,9 +181,9 @@ endfunc ...@@ -181,9 +181,9 @@ endfunc
vst1.64 {d3}, [r0,:64], r2 vst1.64 {d3}, [r0,:64], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels8_x2 vhadd=vrhadd.u8 .macro pixels8_x2 rnd=1, avg=0
1: vld1.64 {d0, d1}, [r1], r2 1: vld1.64 {d0, d1}, [r1], r2
vext.8 d1, d0, d1, #1 vext.8 d1, d0, d1, #1
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
...@@ -192,20 +192,20 @@ endfunc ...@@ -192,20 +192,20 @@ endfunc
pld [r1, r2] pld [r1, r2]
subs r3, r3, #2 subs r3, r3, #2
vswp d1, d2 vswp d1, d2
\vhadd q0, q0, q1 avg q0, q0, q1
vst1.64 {d0}, [r0,:64], r2 vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2 vst1.64 {d1}, [r0,:64], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels8_y2 vhadd=vrhadd.u8 .macro pixels8_y2 rnd=1, avg=0
vld1.64 {d0}, [r1], r2 vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2 vld1.64 {d1}, [r1], r2
1: subs r3, r3, #2 1: subs r3, r3, #2
\vhadd d4, d0, d1 avg d4, d0, d1
vld1.64 {d0}, [r1], r2 vld1.64 {d0}, [r1], r2
\vhadd d5, d0, d1 avg d5, d0, d1
vld1.64 {d1}, [r1], r2 vld1.64 {d1}, [r1], r2
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
...@@ -213,14 +213,14 @@ endfunc ...@@ -213,14 +213,14 @@ endfunc
vst1.64 {d5}, [r0,:64], r2 vst1.64 {d5}, [r0,:64], r2
bne 1b bne 1b
bx lr bx lr
.endm .endm
.macro pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0 .macro pixels8_xy2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2 vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
.if \no_rnd .ifeq \rnd
vmov.i16 q11, #1 vmov.i16 q11, #1
.endif .endif
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
vext.8 d4, d0, d1, #1 vext.8 d4, d0, d1, #1
...@@ -232,66 +232,83 @@ endfunc ...@@ -232,66 +232,83 @@ endfunc
pld [r1] pld [r1]
vadd.u16 q10, q8, q9 vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1 vext.8 d4, d0, d1, #1
.if \no_rnd .ifeq \rnd
vadd.u16 q10, q10, q11 vadd.u16 q10, q10, q11
.endif .endif
vaddl.u8 q8, d0, d4 vaddl.u8 q8, d0, d4
\vshrn d5, q10, #2 shrn d5, q10, #2
vld1.64 {d2, d3}, [r1], r2 vld1.64 {d2, d3}, [r1], r2
vadd.u16 q10, q8, q9 vadd.u16 q10, q8, q9
pld [r1, r2] pld [r1, r2]
.if \no_rnd .ifeq \rnd
vadd.u16 q10, q10, q11 vadd.u16 q10, q10, q11
.endif .endif
vst1.64 {d5}, [r0,:64], r2 vst1.64 {d5}, [r0,:64], r2
\vshrn d7, q10, #2 shrn d7, q10, #2
vext.8 d6, d2, d3, #1 vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6 vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2 vst1.64 {d7}, [r0,:64], r2
bgt 1b bgt 1b
bx lr bx lr
.endm .endm
.macro pixfunc pfx name suf rnd_op args:vararg .macro pixfunc pfx, name, suf, rnd=1, avg=0
.if \rnd
.macro avg rd, rn, rm
vrhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vrshrn.u16 \rd, \rn, \rm
.endm
.else
.macro avg rd, rn, rm
vhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vshrn.u16 \rd, \rn, \rm
.endm
.endif
function ff_\pfx\name\suf\()_neon, export=1 function ff_\pfx\name\suf\()_neon, export=1
\name \rnd_op \args \name \rnd, \avg
endfunc endfunc
.endm .purgem avg
.purgem shrn
.endm
.macro pixfunc2 pfx name args:vararg .macro pixfunc2 pfx, name, avg=0
pixfunc \pfx \name pixfunc \pfx, \name, rnd=1, avg=\avg
pixfunc \pfx \name \args pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
.endm .endm
function ff_put_h264_qpel16_mc00_neon, export=1 function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16 mov r3, #16
endfunc endfunc
pixfunc put_ pixels16 pixfunc put_, pixels16, avg=0
pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8 pixfunc2 put_, pixels16_x2, avg=0
pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8 pixfunc2 put_, pixels16_y2, avg=0
pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1 pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1 function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16 mov r3, #16
endfunc endfunc
pixfunc avg_ pixels16,, 1 pixfunc avg_, pixels16, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1 function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8 mov r3, #8
endfunc endfunc
pixfunc put_ pixels8 pixfunc put_, pixels8, avg=0
pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8 pixfunc2 put_, pixels8_x2, avg=0
pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8 pixfunc2 put_, pixels8_y2, avg=0
pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1 pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1 function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8 mov r3, #8
endfunc endfunc
pixfunc avg_ pixels8,, 1 pixfunc avg_, pixels8, avg=1
function ff_put_pixels_clamped_neon, export=1 function ff_put_pixels_clamped_neon, export=1
vld1.64 {d16-d19}, [r0,:128]! vld1.64 {d16-d19}, [r0,:128]!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment