Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f574b4da
Commit
f574b4da
authored
Sep 28, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp8: use 2 registers for dst_stride and src_stride in neon bilin filter.
parent
c461265a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
87 additions
and
78 deletions
+87
-78
vp8dsp_neon.S
libavcodec/arm/vp8dsp_neon.S
+87
-78
No files found.
libavcodec/arm/vp8dsp_neon.S
View file @
f574b4da
...
@@ -1576,18 +1576,19 @@ endconst
...
@@ -1576,18 +1576,19 @@ endconst
/* Bilinear MC */
/* Bilinear MC */
function ff_put_vp8_bilin16_h_neon, export=1
function ff_put_vp8_bilin16_h_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {d2-d4}, [r2], r
1
vld1.8 {d2-d4}, [r2], r
3
vext.8 q2, q1, q2, #1
vext.8 q2, q1, q2, #1
vmull.u8 q8, d2, d1
vmull.u8 q8, d2, d1
vmlal.u8 q8, d4, d0
vmlal.u8 q8, d4, d0
vld1.8 {d18-d20},[r2], r
1
vld1.8 {d18-d20},[r2], r
3
vmull.u8 q3, d3, d1
vmull.u8 q3, d3, d1
vmlal.u8 q3, d5, d0
vmlal.u8 q3, d5, d0
vext.8 q10, q9, q10, #1
vext.8 q10, q9, q10, #1
...
@@ -1603,24 +1604,25 @@ function ff_put_vp8_bilin16_h_neon, export=1
...
@@ -1603,24 +1604,25 @@ function ff_put_vp8_bilin16_h_neon, export=1
vst1.8 {q3}, [r0,:128], r1
vst1.8 {q3}, [r0,:128], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin16_v_neon, export=1
function ff_put_vp8_bilin16_v_neon, export=1
ldr r3, [sp, #8] @ my
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #12] @ my
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.8 {q1}, [r2], r
1
vld1.8 {q1}, [r2], r
3
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {q2}, [r2], r
1
vld1.8 {q2}, [r2], r
3
vmull.u8 q3, d2, d1
vmull.u8 q3, d2, d1
vmlal.u8 q3, d4, d0
vmlal.u8 q3, d4, d0
vmull.u8 q8, d3, d1
vmull.u8 q8, d3, d1
vmlal.u8 q8, d5, d0
vmlal.u8 q8, d5, d0
vld1.8 {q1}, [r2], r
1
vld1.8 {q1}, [r2], r
3
vmull.u8 q9, d4, d1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d2, d0
vmlal.u8 q9, d2, d0
vmull.u8 q10, d5, d1
vmull.u8 q10, d5, d1
...
@@ -1633,21 +1635,22 @@ function ff_put_vp8_bilin16_v_neon, export=1
...
@@ -1633,21 +1635,22 @@ function ff_put_vp8_bilin16_v_neon, export=1
vst1.8 {q3}, [r0,:128], r1
vst1.8 {q3}, [r0,:128], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin16_hv_neon, export=1
function ff_put_vp8_bilin16_hv_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr
r3, [sp, #8]
@ my
ldr
lr, [sp, #12]
@ my
rsb r12,
r3
, #8
rsb r12,
lr
, #8
vdup.8 d2,
r3
vdup.8 d2,
lr
vdup.8 d3, r12
vdup.8 d3, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.8 {d4-d6}, [r2], r
1
vld1.8 {d4-d6}, [r2], r
3
vext.8 q3, q2, q3, #1
vext.8 q3, q2, q3, #1
vmull.u8 q8, d4, d1
vmull.u8 q8, d4, d1
vmlal.u8 q8, d6, d0
vmlal.u8 q8, d6, d0
...
@@ -1657,11 +1660,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1
...
@@ -1657,11 +1660,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1
vrshrn.u16 d5, q9, #3
vrshrn.u16 d5, q9, #3
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {d18-d20},[r2], r
1
vld1.8 {d18-d20},[r2], r
3
vext.8 q10, q9, q10, #1
vext.8 q10, q9, q10, #1
vmull.u8 q11, d18, d1
vmull.u8 q11, d18, d1
vmlal.u8 q11, d20, d0
vmlal.u8 q11, d20, d0
vld1.8 {d26-d28},[r2], r
1
vld1.8 {d26-d28},[r2], r
3
vmull.u8 q12, d19, d1
vmull.u8 q12, d19, d1
vmlal.u8 q12, d21, d0
vmlal.u8 q12, d21, d0
vext.8 q14, q13, q14, #1
vext.8 q14, q13, q14, #1
...
@@ -1689,22 +1692,23 @@ function ff_put_vp8_bilin16_hv_neon, export=1
...
@@ -1689,22 +1692,23 @@ function ff_put_vp8_bilin16_hv_neon, export=1
vst1.8 {q10}, [r0,:128], r1
vst1.8 {q10}, [r0,:128], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin8_h_neon, export=1
function ff_put_vp8_bilin8_h_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {q1}, [r2], r
1
vld1.8 {q1}, [r2], r
3
vext.8 d3, d2, d3, #1
vext.8 d3, d2, d3, #1
vmull.u8 q2, d2, d1
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
vmlal.u8 q2, d3, d0
vld1.8 {q3}, [r2], r
1
vld1.8 {q3}, [r2], r
3
vext.8 d7, d6, d7, #1
vext.8 d7, d6, d7, #1
vmull.u8 q8, d6, d1
vmull.u8 q8, d6, d1
vmlal.u8 q8, d7, d0
vmlal.u8 q8, d7, d0
...
@@ -1714,22 +1718,23 @@ function ff_put_vp8_bilin8_h_neon, export=1
...
@@ -1714,22 +1718,23 @@ function ff_put_vp8_bilin8_h_neon, export=1
vst1.8 {d16}, [r0,:64], r1
vst1.8 {d16}, [r0,:64], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin8_v_neon, export=1
function ff_put_vp8_bilin8_v_neon, export=1
ldr r3, [sp, #8] @ my
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #12] @ my
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.8 {d2}, [r2], r
1
vld1.8 {d2}, [r2], r
3
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {d3}, [r2], r
1
vld1.8 {d3}, [r2], r
3
vmull.u8 q2, d2, d1
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
vmlal.u8 q2, d3, d0
vld1.8 {d2}, [r2], r
1
vld1.8 {d2}, [r2], r
3
vmull.u8 q3, d3, d1
vmull.u8 q3, d3, d1
vmlal.u8 q3, d2, d0
vmlal.u8 q3, d2, d0
vrshrn.u16 d4, q2, #3
vrshrn.u16 d4, q2, #3
...
@@ -1738,32 +1743,33 @@ function ff_put_vp8_bilin8_v_neon, export=1
...
@@ -1738,32 +1743,33 @@ function ff_put_vp8_bilin8_v_neon, export=1
vst1.8 {d6}, [r0,:64], r1
vst1.8 {d6}, [r0,:64], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin8_hv_neon, export=1
function ff_put_vp8_bilin8_hv_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr
r3, [sp, #8]
@ my
ldr
lr, [sp, #12]
@ my
rsb r12,
r3
, #8
rsb r12,
lr
, #8
vdup.8 d2,
r3
vdup.8 d2,
lr
vdup.8 d3, r12
vdup.8 d3, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.8 {q2}, [r2], r
1
vld1.8 {q2}, [r2], r
3
vext.8 d5, d4, d5, #1
vext.8 d5, d4, d5, #1
vmull.u8 q9, d4, d1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
vmlal.u8 q9, d5, d0
vrshrn.u16 d22, q9, #3
vrshrn.u16 d22, q9, #3
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {q3}, [r2], r
1
vld1.8 {q3}, [r2], r
3
vext.8 d7, d6, d7, #1
vext.8 d7, d6, d7, #1
vmull.u8 q8, d6, d1
vmull.u8 q8, d6, d1
vmlal.u8 q8, d7, d0
vmlal.u8 q8, d7, d0
vld1.8 {q2}, [r2], r
1
vld1.8 {q2}, [r2], r
3
vext.8 d5, d4, d5, #1
vext.8 d5, d4, d5, #1
vmull.u8 q9, d4, d1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
vmlal.u8 q9, d5, d0
...
@@ -1779,20 +1785,21 @@ function ff_put_vp8_bilin8_hv_neon, export=1
...
@@ -1779,20 +1785,21 @@ function ff_put_vp8_bilin8_hv_neon, export=1
vst1.8 {d23}, [r0,:64], r1
vst1.8 {d23}, [r0,:64], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin4_h_neon, export=1
function ff_put_vp8_bilin4_h_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {d2}, [r2], r
1
vld1.8 {d2}, [r2], r
3
vext.8 d3, d2, d3, #1
vext.8 d3, d2, d3, #1
vld1.8 {d6}, [r2], r
1
vld1.8 {d6}, [r2], r
3
vext.8 d7, d6, d7, #1
vext.8 d7, d6, d7, #1
vtrn.32 q1, q3
vtrn.32 q1, q3
vmull.u8 q2, d2, d1
vmull.u8 q2, d2, d1
...
@@ -1802,20 +1809,21 @@ function ff_put_vp8_bilin4_h_neon, export=1
...
@@ -1802,20 +1809,21 @@ function ff_put_vp8_bilin4_h_neon, export=1
vst1.32 {d4[1]}, [r0,:32], r1
vst1.32 {d4[1]}, [r0,:32], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin4_v_neon, export=1
function ff_put_vp8_bilin4_v_neon, export=1
ldr r3, [sp, #8] @ my
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #12] @ my
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.32 {d2[]}, [r2], r
1
vld1.32 {d2[]}, [r2], r
3
1:
1:
vld1.32 {d3[]}, [r2]
vld1.32 {d3[]}, [r2]
vld1.32 {d2[1]}, [r2], r
1
vld1.32 {d2[1]}, [r2], r
3
vld1.32 {d3[1]}, [r2], r
1
vld1.32 {d3[1]}, [r2], r
3
vmull.u8 q2, d2, d1
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
vmlal.u8 q2, d3, d0
vtrn.32 d3, d2
vtrn.32 d3, d2
...
@@ -1825,30 +1833,31 @@ function ff_put_vp8_bilin4_v_neon, export=1
...
@@ -1825,30 +1833,31 @@ function ff_put_vp8_bilin4_v_neon, export=1
subs r12, r12, #2
subs r12, r12, #2
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
function ff_put_vp8_bilin4_hv_neon, export=1
function ff_put_vp8_bilin4_hv_neon, export=1
ldr r3, [sp, #4] @ mx
push {lr}
rsb r12, r3, #8
ldr lr, [sp, #8] @ mx
vdup.8 d0, r3
rsb r12, lr, #8
vdup.8 d0, lr
vdup.8 d1, r12
vdup.8 d1, r12
ldr
r3, [sp, #8]
@ my
ldr
lr, [sp, #12]
@ my
rsb r12,
r3
, #8
rsb r12,
lr
, #8
vdup.8 d2,
r3
vdup.8 d2,
lr
vdup.8 d3, r12
vdup.8 d3, r12
ldr r12, [sp
]
@ h
ldr r12, [sp
, #4]
@ h
vld1.8 {d4}, [r2], r
1
vld1.8 {d4}, [r2], r
3
vext.8 d5, d4, d4, #1
vext.8 d5, d4, d4, #1
vmull.u8 q9, d4, d1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
vmlal.u8 q9, d5, d0
vrshrn.u16 d22, q9, #3
vrshrn.u16 d22, q9, #3
1:
1:
subs r12, r12, #2
subs r12, r12, #2
vld1.8 {d6}, [r2], r
1
vld1.8 {d6}, [r2], r
3
vext.8 d7, d6, d6, #1
vext.8 d7, d6, d6, #1
vld1.8 {d4}, [r2], r
1
vld1.8 {d4}, [r2], r
3
vext.8 d5, d4, d4, #1
vext.8 d5, d4, d4, #1
vtrn.32 q3, q2
vtrn.32 q3, q2
vmull.u8 q8, d6, d1
vmull.u8 q8, d6, d1
...
@@ -1863,5 +1872,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1
...
@@ -1863,5 +1872,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1
vst1.32 {d20[1]}, [r0,:32], r1
vst1.32 {d20[1]}, [r0,:32], r1
bgt 1b
bgt 1b
bx lr
pop {pc}
endfunc
endfunc
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment