Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
e897a633
Commit
e897a633
authored
Jun 12, 2011
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: factor some repetitive code into macros
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
11177a4d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
69 additions
and
174 deletions
+69
-174
mpegvideo_armv5te_s.S
libavcodec/arm/mpegvideo_armv5te_s.S
+23
-38
simple_idct_armv5te.S
libavcodec/arm/simple_idct_armv5te.S
+46
-136
No files found.
libavcodec/arm/mpegvideo_armv5te_s.S
View file @
e897a633
...
@@ -35,6 +35,21 @@
...
@@ -35,6 +35,21 @@
*
*
* Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
* Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
*/
*/
.macro dequant_t dst, src, mul, add, tmp
rsbs \tmp, ip, \src, asr #16
addgt \tmp, \add, #0
rsblt \tmp, \add, #0
smlatbne \dst, \src, \mul, \tmp
.endm
.macro dequant_b dst, src, mul, add, tmp
rsbs \tmp, ip, \src, lsl #16
addgt \tmp, \add, #0
rsblt \tmp, \add, #0
smlabbne \dst, \src, \mul, \tmp
.endm
function ff_dct_unquantize_h263_armv5te, export=1
function ff_dct_unquantize_h263_armv5te, export=1
push {r4-r9,lr}
push {r4-r9,lr}
mov ip, #0
mov ip, #0
...
@@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1
...
@@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1
1:
1:
ldrd r6, [r0, #8]
ldrd r6, [r0, #8]
rsbs r9, ip, r4, asr #16
dequant_t r9, r4, r1, r2, r9
addgt r9, r2, #0
dequant_t lr, r5, r1, r2, lr
rsblt r9, r2, #0
dequant_b r4, r4, r1, r2, r8
smlatbne r9, r4, r1, r9
dequant_b r5, r5, r1, r2, r8
rsbs lr, ip, r5, asr #16
addgt lr, r2, #0
rsblt lr, r2, #0
smlatbne lr, r5, r1, lr
rsbs r8, ip, r4, asl #16
addgt r8, r2, #0
rsblt r8, r2, #0
smlabbne r4, r4, r1, r8
rsbs r8, ip, r5, asl #16
addgt r8, r2, #0
rsblt r8, r2, #0
smlabbne r5, r5, r1, r8
strh r4, [r0], #2
strh r4, [r0], #2
strh r9, [r0], #2
strh r9, [r0], #2
strh r5, [r0], #2
strh r5, [r0], #2
strh lr, [r0], #2
strh lr, [r0], #2
rsbs r9, ip, r6, asr #16
dequant_t r9, r6, r1, r2, r9
addgt r9, r2, #0
dequant_t lr, r7, r1, r2, lr
rsblt r9, r2, #0
dequant_b r6, r6, r1, r2, r8
smlatbne r9, r6, r1, r9
dequant_b r7, r7, r1, r2, r8
rsbs lr, ip, r7, asr #16
addgt lr, r2, #0
rsblt lr, r2, #0
smlatbne lr, r7, r1, lr
rsbs r8, ip, r6, asl #16
addgt r8, r2, #0
rsblt r8, r2, #0
smlabbne r6, r6, r1, r8
rsbs r8, ip, r7, asl #16
addgt r8, r2, #0
rsblt r8, r2, #0
smlabbne r7, r7, r1, r8
strh r6, [r0], #2
strh r6, [r0], #2
strh r9, [r0], #2
strh r9, [r0], #2
...
...
libavcodec/arm/simple_idct_armv5te.S
View file @
e897a633
...
@@ -333,6 +333,20 @@ function idct_col_armv5te
...
@@ -333,6 +333,20 @@ function idct_col_armv5te
ldr pc, [sp], #4
ldr pc, [sp], #4
endfunc
endfunc
.macro clip dst, src:vararg
movs \dst, \src
movmi \dst, #0
cmp \dst, #255
movgt \dst, #255
.endm
.macro aclip dst, src:vararg
adds \dst, \src
movmi \dst, #0
cmp \dst, #255
movgt \dst, #255
.endm
function idct_col_put_armv5te
function idct_col_put_armv5te
str lr, [sp, #-4]!
str lr, [sp, #-4]!
...
@@ -341,27 +355,15 @@ function idct_col_put_armv5te
...
@@ -341,27 +355,15 @@ function idct_col_put_armv5te
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
ldr lr, [sp, #32]
ldr lr, [sp, #32]
add a2, a3, v1
add a2, a3, v1
movs a2, a2, asr #20
clip a2, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add ip, a4, v2
add ip, a4, v2
movs ip, ip, asr #20
clip ip, ip, asr #20
movmi ip, #0
cmp ip, #255
movgt ip, #255
orr a2, a2, ip, lsl #8
orr a2, a2, ip, lsl #8
sub a3, a3, v1
sub a3, a3, v1
movs a3, a3, asr #20
clip a3, a3, asr #20
movmi a3, #0
cmp a3, #255
movgt a3, #255
sub a4, a4, v2
sub a4, a4, v2
movs a4, a4, asr #20
clip a4, a4, asr #20
movmi a4, #0
cmp a4, #255
ldr v1, [sp, #28]
ldr v1, [sp, #28]
movgt a4, #255
strh a2, [v1]
strh a2, [v1]
add a2, v1, #2
add a2, v1, #2
str a2, [sp, #28]
str a2, [sp, #28]
...
@@ -371,79 +373,43 @@ function idct_col_put_armv5te
...
@@ -371,79 +373,43 @@ function idct_col_put_armv5te
strh a2, [v2, v1]!
strh a2, [v2, v1]!
sub a2, a3, v3
sub a2, a3, v3
movs a2, a2, asr #20
clip a2, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
sub ip, a4, v4
sub ip, a4, v4
movs ip, ip, asr #20
clip ip, ip, asr #20
movmi ip, #0
cmp ip, #255
movgt ip, #255
orr a2, a2, ip, lsl #8
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]!
strh a2, [v1, lr]!
add a3, a3, v3
add a3, a3, v3
movs a2, a3, asr #20
clip a2, a3, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add a4, a4, v4
add a4, a4, v4
movs a4, a4, asr #20
clip a4, a4, asr #20
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a2, a4, lsl #8
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
strh a2, [v2, -lr]!
strh a2, [v2, -lr]!
add a2, a3, v5
add a2, a3, v5
movs a2, a2, asr #20
clip a2, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add ip, a4, v6
add ip, a4, v6
movs ip, ip, asr #20
clip ip, ip, asr #20
movmi ip, #0
cmp ip, #255
movgt ip, #255
orr a2, a2, ip, lsl #8
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]!
strh a2, [v1, lr]!
sub a3, a3, v5
sub a3, a3, v5
movs a2, a3, asr #20
clip a2, a3, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
sub a4, a4, v6
sub a4, a4, v6
movs a4, a4, asr #20
clip a4, a4, asr #20
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a2, a4, lsl #8
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
strh a2, [v2, -lr]!
strh a2, [v2, -lr]!
add a2, a3, v7
add a2, a3, v7
movs a2, a2, asr #20
clip a2, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add ip, a4, fp
add ip, a4, fp
movs ip, ip, asr #20
clip ip, ip, asr #20
movmi ip, #0
cmp ip, #255
movgt ip, #255
orr a2, a2, ip, lsl #8
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]
strh a2, [v1, lr]
sub a3, a3, v7
sub a3, a3, v7
movs a2, a3, asr #20
clip a2, a3, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
sub a4, a4, fp
sub a4, a4, fp
movs a4, a4, asr #20
clip a4, a4, asr #20
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a2, a4, lsl #8
orr a2, a2, a4, lsl #8
strh a2, [v2, -lr]
strh a2, [v2, -lr]
...
@@ -460,36 +426,22 @@ function idct_col_add_armv5te
...
@@ -460,36 +426,22 @@ function idct_col_add_armv5te
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
ldrh ip, [lr]
ldrh ip, [lr]
add a2, a3, v1
add a2, a3, v1
mov a2, a2, asr #20
sub a3, a3, v1
sub a3, a3, v1
and v1, ip, #255
and v1, ip, #255
adds a2, a2, v1
aclip a2, v1, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add v1, a4, v2
add v1, a4, v2
mov v1, v1, asr #20
mov v1, v1, asr #20
adds v1, v1, ip, lsr #8
aclip v1, v1, ip, lsr #8
movmi v1, #0
cmp v1, #255
movgt v1, #255
orr a2, a2, v1, lsl #8
orr a2, a2, v1, lsl #8
ldr v1, [sp, #32]
ldr v1, [sp, #32]
sub a4, a4, v2
sub a4, a4, v2
rsb v2, v1, v1, lsl #3
rsb v2, v1, v1, lsl #3
ldrh ip, [v2, lr]!
ldrh ip, [v2, lr]!
strh a2, [lr]
strh a2, [lr]
mov a3, a3, asr #20
and a2, ip, #255
and a2, ip, #255
adds a3, a3, a2
aclip a3, a2, a3, asr #20
movmi a3, #0
cmp a3, #255
movgt a3, #255
mov a4, a4, asr #20
mov a4, a4, asr #20
adds a4, a4, ip, lsr #8
aclip a4, a4, ip, lsr #8
movmi a4, #0
cmp a4, #255
movgt a4, #255
add a2, lr, #2
add a2, lr, #2
str a2, [sp, #28]
str a2, [sp, #28]
orr a2, a3, a4, lsl #8
orr a2, a3, a4, lsl #8
...
@@ -498,102 +450,60 @@ function idct_col_add_armv5te
...
@@ -498,102 +450,60 @@ function idct_col_add_armv5te
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
ldrh ip, [lr, v1]!
sub a2, a3, v3
sub a2, a3, v3
mov a2, a2, asr #20
add a3, a3, v3
add a3, a3, v3
and v3, ip, #255
and v3, ip, #255
adds a2, a2, v3
aclip a2, v3, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
sub v3, a4, v4
sub v3, a4, v4
mov v3, v3, asr #20
mov v3, v3, asr #20
adds v3, v3, ip, lsr #8
aclip v3, v3, ip, lsr #8
movmi v3, #0
cmp v3, #255
movgt v3, #255
orr a2, a2, v3, lsl #8
orr a2, a2, v3, lsl #8
add a4, a4, v4
add a4, a4, v4
ldrh ip, [v2, -v1]!
ldrh ip, [v2, -v1]!
strh a2, [lr]
strh a2, [lr]
mov a3, a3, asr #20
and a2, ip, #255
and a2, ip, #255
adds a3, a3, a2
aclip a3, a2, a3, asr #20
movmi a3, #0
cmp a3, #255
movgt a3, #255
mov a4, a4, asr #20
mov a4, a4, asr #20
adds a4, a4, ip, lsr #8
aclip a4, a4, ip, lsr #8
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a3, a4, lsl #8
orr a2, a3, a4, lsl #8
strh a2, [v2]
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
ldrh ip, [lr, v1]!
add a2, a3, v5
add a2, a3, v5
mov a2, a2, asr #20
sub a3, a3, v5
sub a3, a3, v5
and v3, ip, #255
and v3, ip, #255
adds a2, a2, v3
aclip a2, v3, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add v3, a4, v6
add v3, a4, v6
mov v3, v3, asr #20
mov v3, v3, asr #20
adds v3, v3, ip, lsr #8
aclip v3, v3, ip, lsr #8
movmi v3, #0
cmp v3, #255
movgt v3, #255
orr a2, a2, v3, lsl #8
orr a2, a2, v3, lsl #8
sub a4, a4, v6
sub a4, a4, v6
ldrh ip, [v2, -v1]!
ldrh ip, [v2, -v1]!
strh a2, [lr]
strh a2, [lr]
mov a3, a3, asr #20
and a2, ip, #255
and a2, ip, #255
adds a3, a3, a2
aclip a3, a2, a3, asr #20
movmi a3, #0
cmp a3, #255
movgt a3, #255
mov a4, a4, asr #20
mov a4, a4, asr #20
adds a4, a4, ip, lsr #8
aclip a4, a4, ip, lsr #8
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a3, a4, lsl #8
orr a2, a3, a4, lsl #8
strh a2, [v2]
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
ldrh ip, [lr, v1]!
add a2, a3, v7
add a2, a3, v7
mov a2, a2, asr #20
sub a3, a3, v7
sub a3, a3, v7
and v3, ip, #255
and v3, ip, #255
adds a2, a2, v3
aclip a2, v3, a2, asr #20
movmi a2, #0
cmp a2, #255
movgt a2, #255
add v3, a4, fp
add v3, a4, fp
mov v3, v3, asr #20
mov v3, v3, asr #20
adds v3, v3, ip, lsr #8
aclip v3, v3, ip, lsr #8
movmi v3, #0
cmp v3, #255
movgt v3, #255
orr a2, a2, v3, lsl #8
orr a2, a2, v3, lsl #8
sub a4, a4, fp
sub a4, a4, fp
ldrh ip, [v2, -v1]!
ldrh ip, [v2, -v1]!
strh a2, [lr]
strh a2, [lr]
mov a3, a3, asr #20
and a2, ip, #255
and a2, ip, #255
adds a3, a3, a2
aclip a3, a2, a3, asr #20
movmi a3, #0
cmp a3, #255
movgt a3, #255
mov a4, a4, asr #20
mov a4, a4, asr #20
adds a4, a4, ip, lsr #8
aclip a4, a4, ip, lsr #8
movmi a4, #0
cmp a4, #255
movgt a4, #255
orr a2, a3, a4, lsl #8
orr a2, a3, a4, lsl #8
strh a2, [v2]
strh a2, [v2]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment