Commit d6b62ce1 authored by James Almer's avatar James Almer

Merge commit 'cef914e0'

* commit 'cef914e0':
  arm: vp8: Optimize put_epel16_h6v6 with vp8_epel8_v6_y2
Merged-by: 's avatarJames Almer <jamrial@gmail.com>
parents 34a0a974 cef914e0
...@@ -773,23 +773,6 @@ endfunc ...@@ -773,23 +773,6 @@ endfunc
vqrshrun.s16 \d1, q14, #7 vqrshrun.s16 \d1, q14, #7
.endm .endm
.macro vp8_epel8_v6 d0, s0, s1, s2, s3, s4, s5
vmovl.u8 q10, \s2
vmovl.u8 q11, \s3
vmovl.u8 q9, \s1
vmovl.u8 q12, \s4
vmovl.u8 q8, \s0
vmovl.u8 q13, \s5
vmul.u16 q10, q10, d0[2]
vmul.u16 q11, q11, d0[3]
vmls.u16 q10, q9, d0[1]
vmls.u16 q11, q12, d1[0]
vmla.u16 q10, q8, d0[0]
vmla.u16 q11, q13, d1[1]
vqadd.s16 q11, q10, q11
vqrshrun.s16 \d0, q11, #7
.endm
.macro vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6 .macro vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
vmovl.u8 q10, \s0 vmovl.u8 q10, \s0
vmovl.u8 q11, \s3 vmovl.u8 q11, \s3
...@@ -909,12 +892,12 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 ...@@ -909,12 +892,12 @@ function ff_put_vp8_epel16_h6v6_neon, export=1
sub r2, r2, r3, lsl #1 sub r2, r2, r3, lsl #1
sub r2, r2, #2 sub r2, r2, #2
push {r4,lr} push {r4,lr}
vpush {d8-d9} vpush {d8-d15}
@ first pass (horizontal): @ first pass (horizontal):
ldr r4, [sp, #28] @ mx ldr r4, [sp, #64+8+4] @ mx
movrel lr, subpel_filters-16 movrel lr, subpel_filters-16
ldr r12, [sp, #24] @ h ldr r12, [sp, #64+8+0] @ h
add r4, lr, r4, lsl #4 add r4, lr, r4, lsl #4
sub sp, sp, #336+16 sub sp, sp, #336+16
vld1.16 {q0}, [r4,:128] vld1.16 {q0}, [r4,:128]
...@@ -931,9 +914,9 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 ...@@ -931,9 +914,9 @@ function ff_put_vp8_epel16_h6v6_neon, export=1
bne 1b bne 1b
@ second pass (vertical): @ second pass (vertical):
ldr r4, [sp, #336+16+32] @ my ldr r4, [sp, #336+16+64+8+8] @ my
movrel lr, subpel_filters-16 movrel lr, subpel_filters-16
ldr r12, [sp, #336+16+24] @ h ldr r12, [sp, #336+16+64+8+0] @ h
add r4, lr, r4, lsl #4 add r4, lr, r4, lsl #4
add lr, sp, #15 add lr, sp, #15
vld1.16 {q0}, [r4,:128] vld1.16 {q0}, [r4,:128]
...@@ -941,18 +924,20 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 ...@@ -941,18 +924,20 @@ function ff_put_vp8_epel16_h6v6_neon, export=1
2: 2:
vld1.8 {d2-d5}, [lr,:128]! vld1.8 {d2-d5}, [lr,:128]!
vld1.8 {d6-d9}, [lr,:128]! vld1.8 {d6-d9}, [lr,:128]!
vld1.8 {d28-d31},[lr,:128] vld1.8 {d10-d13},[lr,:128]!
sub lr, lr, #48 vld1.8 {d14-d15},[lr,:128]
sub lr, lr, #64
vp8_epel8_v6 d2, d2, d4, d6, d8, d28, d30 vp8_epel8_v6_y2 d2, d4, d2, d4, d6, d8, d10, d12, d14
vp8_epel8_v6 d3, d3, d5, d7, d9, d29, d31 vp8_epel8_v6_y2 d3, d5, d3, d5, d7, d9, d11, d13, d15
vst1.8 {d2-d3}, [r0,:128], r1 vst1.8 {d2-d3}, [r0,:128], r1
subs r12, r12, #1 vst1.8 {d4-d5}, [r0,:128], r1
subs r12, r12, #2
bne 2b bne 2b
add sp, sp, #336+16 add sp, sp, #336+16
vpop {d8-d9} vpop {d8-d15}
pop {r4,pc} pop {r4,pc}
endfunc endfunc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment