Commit 94267ddf authored by Mans Rullgard's avatar Mans Rullgard

ARM: clean up NEON put/avg_pixels macros

Although this adds a few lines, the macro calls are less convoluted.
Signed-off-by: 's avatarMans Rullgard <mans@mansr.com>
parent ca124013
......@@ -40,10 +40,10 @@ function ff_clear_blocks_neon, export=1
bx lr
endfunc
.macro pixels16 avg=0
.if \avg
.macro pixels16 rnd=1, avg=0
.if \avg
mov ip, r0
.endif
.endif
1: vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
vld1.64 {d4, d5}, [r1], r2
......@@ -52,7 +52,7 @@ endfunc
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
.if \avg
vld1.64 {d16,d17}, [ip,:128], r2
vrhadd.u8 q0, q0, q8
vld1.64 {d18,d19}, [ip,:128], r2
......@@ -61,7 +61,7 @@ endfunc
vrhadd.u8 q2, q2, q10
vld1.64 {d22,d23}, [ip,:128], r2
vrhadd.u8 q3, q3, q11
.endif
.endif
subs r3, r3, #4
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d2, d3}, [r0,:128], r2
......@@ -69,31 +69,31 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2
bne 1b
bx lr
.endm
.endm
.macro pixels16_x2 vhadd=vrhadd.u8
.macro pixels16_x2 rnd=1, avg=0
1: vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vext.8 q1, q0, q1, #1
\vhadd q0, q0, q1
avg q0, q0, q1
vext.8 q3, q2, q3, #1
\vhadd q2, q2, q3
avg q2, q2, q3
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2
bne 1b
bx lr
.endm
.endm
.macro pixels16_y2 vhadd=vrhadd.u8
.macro pixels16_y2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
1: subs r3, r3, #2
\vhadd q2, q0, q1
avg q2, q0, q1
vld1.64 {d0, d1}, [r1], r2
\vhadd q3, q0, q1
avg q3, q0, q1
vld1.64 {d2, d3}, [r1], r2
pld [r1]
pld [r1, r2]
......@@ -101,14 +101,14 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2
bne 1b
bx lr
.endm
.endm
.macro pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
.macro pixels16_xy2 rnd=1, avg=0
vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
.if \no_rnd
.ifeq \rnd
vmov.i16 q13, #1
.endif
.endif
pld [r1]
pld [r1, r2]
vext.8 q1, q0, q1, #1
......@@ -121,40 +121,40 @@ endfunc
vld1.64 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
pld [r1]
.if \no_rnd
.ifeq \rnd
vadd.u16 q12, q12, q13
.endif
.endif
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
\vshrn d28, q12, #2
.if \no_rnd
shrn d28, q12, #2
.ifeq \rnd
vadd.u16 q1, q1, q13
.endif
\vshrn d29, q1, #2
.endif
shrn d29, q1, #2
vaddl.u8 q8, d0, d30
vld1.64 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31
vst1.64 {d28,d29}, [r0,:128], r2
vadd.u16 q12, q8, q9
pld [r1, r2]
.if \no_rnd
.ifeq \rnd
vadd.u16 q12, q12, q13
.endif
.endif
vext.8 q2, q1, q2, #1
vadd.u16 q0, q10, q11
\vshrn d30, q12, #2
.if \no_rnd
shrn d30, q12, #2
.ifeq \rnd
vadd.u16 q0, q0, q13
.endif
\vshrn d31, q0, #2
.endif
shrn d31, q0, #2
vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5
vst1.64 {d30,d31}, [r0,:128], r2
bgt 1b
bx lr
.endm
.endm
.macro pixels8 avg=0
.macro pixels8 rnd=1, avg=0
1: vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
......@@ -163,7 +163,7 @@ endfunc
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
.if \avg
.if \avg
vld1.64 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4
vld1.64 {d5}, [r0,:64], r2
......@@ -173,7 +173,7 @@ endfunc
vld1.64 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2
.endif
.endif
subs r3, r3, #4
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
......@@ -181,9 +181,9 @@ endfunc
vst1.64 {d3}, [r0,:64], r2
bne 1b
bx lr
.endm
.endm
.macro pixels8_x2 vhadd=vrhadd.u8
.macro pixels8_x2 rnd=1, avg=0
1: vld1.64 {d0, d1}, [r1], r2
vext.8 d1, d0, d1, #1
vld1.64 {d2, d3}, [r1], r2
......@@ -192,20 +192,20 @@ endfunc
pld [r1, r2]
subs r3, r3, #2
vswp d1, d2
\vhadd q0, q0, q1
avg q0, q0, q1
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
bne 1b
bx lr
.endm
.endm
.macro pixels8_y2 vhadd=vrhadd.u8
.macro pixels8_y2 rnd=1, avg=0
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
1: subs r3, r3, #2
\vhadd d4, d0, d1
avg d4, d0, d1
vld1.64 {d0}, [r1], r2
\vhadd d5, d0, d1
avg d5, d0, d1
vld1.64 {d1}, [r1], r2
pld [r1]
pld [r1, r2]
......@@ -213,14 +213,14 @@ endfunc
vst1.64 {d5}, [r0,:64], r2
bne 1b
bx lr
.endm
.endm
.macro pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
.macro pixels8_xy2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
.if \no_rnd
.ifeq \rnd
vmov.i16 q11, #1
.endif
.endif
pld [r1]
pld [r1, r2]
vext.8 d4, d0, d1, #1
......@@ -232,66 +232,83 @@ endfunc
pld [r1]
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
.if \no_rnd
.ifeq \rnd
vadd.u16 q10, q10, q11
.endif
.endif
vaddl.u8 q8, d0, d4
\vshrn d5, q10, #2
shrn d5, q10, #2
vld1.64 {d2, d3}, [r1], r2
vadd.u16 q10, q8, q9
pld [r1, r2]
.if \no_rnd
.ifeq \rnd
vadd.u16 q10, q10, q11
.endif
.endif
vst1.64 {d5}, [r0,:64], r2
\vshrn d7, q10, #2
shrn d7, q10, #2
vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2
bgt 1b
bx lr
.endm
.macro pixfunc pfx name suf rnd_op args:vararg
.endm
.macro pixfunc pfx, name, suf, rnd=1, avg=0
.if \rnd
.macro avg rd, rn, rm
vrhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vrshrn.u16 \rd, \rn, \rm
.endm
.else
.macro avg rd, rn, rm
vhadd.u8 \rd, \rn, \rm
.endm
.macro shrn rd, rn, rm
vshrn.u16 \rd, \rn, \rm
.endm
.endif
function ff_\pfx\name\suf\()_neon, export=1
\name \rnd_op \args
\name \rnd, \avg
endfunc
.endm
.purgem avg
.purgem shrn
.endm
.macro pixfunc2 pfx name args:vararg
pixfunc \pfx \name
pixfunc \pfx \name \args
.endm
.macro pixfunc2 pfx, name, avg=0
pixfunc \pfx, \name, rnd=1, avg=\avg
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
.endm
function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc put_ pixels16
pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8
pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1
pixfunc put_, pixels16, avg=0
pixfunc2 put_, pixels16_x2, avg=0
pixfunc2 put_, pixels16_y2, avg=0
pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
pixfunc avg_ pixels16,, 1
pixfunc avg_, pixels16, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc put_ pixels8
pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
pixfunc put_, pixels8, avg=0
pixfunc2 put_, pixels8_x2, avg=0
pixfunc2 put_, pixels8_y2, avg=0
pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
pixfunc avg_ pixels8,, 1
pixfunc avg_, pixels8, avg=1
function ff_put_pixels_clamped_neon, export=1
vld1.64 {d16-d19}, [r0,:128]!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment