Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8986fddc
Commit
8986fddc
authored
Jun 14, 2011
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: allow building in Thumb2 mode
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
9cd7b854
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
30 changed files
with
419 additions
and
146 deletions
+419
-146
configure
configure
+2
-1
aac.h
libavcodec/arm/aac.h
+3
-0
ac3dsp_arm.S
libavcodec/arm/ac3dsp_arm.S
+1
-0
ac3dsp_armv6.S
libavcodec/arm/ac3dsp_armv6.S
+2
-0
ac3dsp_neon.S
libavcodec/arm/ac3dsp_neon.S
+1
-0
asm.S
libavcodec/arm/asm.S
+93
-0
dcadsp_neon.S
libavcodec/arm/dcadsp_neon.S
+1
-0
dsputil_arm.S
libavcodec/arm/dsputil_arm.S
+10
-0
dsputil_armv6.S
libavcodec/arm/dsputil_armv6.S
+38
-38
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+3
-0
dsputil_vfp.S
libavcodec/arm/dsputil_vfp.S
+21
-0
fmtconvert_neon.S
libavcodec/arm/fmtconvert_neon.S
+4
-0
fmtconvert_vfp.S
libavcodec/arm/fmtconvert_vfp.S
+3
-0
h264dsp_neon.S
libavcodec/arm/h264dsp_neon.S
+54
-44
h264idct_neon.S
libavcodec/arm/h264idct_neon.S
+15
-8
mathops.h
libavcodec/arm/mathops.h
+3
-0
mdct_neon.S
libavcodec/arm/mdct_neon.S
+3
-1
mpegaudiodsp_fixed_armv6.S
libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+3
-3
mpegvideo_armv5te_s.S
libavcodec/arm/mpegvideo_armv5te_s.S
+12
-0
mpegvideo_neon.S
libavcodec/arm/mpegvideo_neon.S
+4
-0
rdft_neon.S
libavcodec/arm/rdft_neon.S
+1
-0
simple_idct_arm.S
libavcodec/arm/simple_idct_arm.S
+26
-6
simple_idct_armv5te.S
libavcodec/arm/simple_idct_armv5te.S
+26
-13
simple_idct_armv6.S
libavcodec/arm/simple_idct_armv6.S
+17
-16
simple_idct_neon.S
libavcodec/arm/simple_idct_neon.S
+5
-1
synth_filter_neon.S
libavcodec/arm/synth_filter_neon.S
+2
-0
vp56_arith.h
libavcodec/arm/vp56_arith.h
+23
-4
vp8_armv6.S
libavcodec/arm/vp8_armv6.S
+33
-3
vp8dsp_neon.S
libavcodec/arm/vp8dsp_neon.S
+8
-8
intmath.h
libavutil/arm/intmath.h
+2
-0
No files found.
configure
View file @
8986fddc
...
...
@@ -967,6 +967,7 @@ CONFIG_LIST="
static
swscale
swscale_alpha
thumb
vaapi
vdpau
version3
...
...
@@ -2607,7 +2608,7 @@ if enabled alpha; then
elif
enabled arm
;
then
check_cflags
-marm
enabled thumb
&&
check_cflags
-mthumb
||
check_cflags
-marm
nogas
=
die
if
check_cpp_condition stddef.h
"defined __ARM_PCS_VFP"
;
then
...
...
libavcodec/arm/aac.h
View file @
8986fddc
...
...
@@ -114,12 +114,15 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
"vmov d1, %2, %3
\n\t
"
"lsls %6, %6, #1
\n\t
"
"and %0, %5, #1<<31
\n\t
"
"it cs
\n\t
"
"lslcs %5, %5, #1
\n\t
"
"lsls %6, %6, #1
\n\t
"
"and %1, %5, #1<<31
\n\t
"
"it cs
\n\t
"
"lslcs %5, %5, #1
\n\t
"
"lsls %6, %6, #1
\n\t
"
"and %2, %5, #1<<31
\n\t
"
"it cs
\n\t
"
"lslcs %5, %5, #1
\n\t
"
"vmov d4, %0, %1
\n\t
"
"and %3, %5, #1<<31
\n\t
"
...
...
libavcodec/arm/ac3dsp_arm.S
View file @
8986fddc
...
...
@@ -27,6 +27,7 @@ function ff_ac3_update_bap_counts_arm, export=1
lsl r3, lr, #1
ldrh r12, [r0, r3]
subs r2, r2, #1
it gt
ldrbgt lr, [r1], #1
add r12, r12, #1
strh r12, [r0, r3]
...
...
libavcodec/arm/ac3dsp_armv6.S
View file @
8986fddc
...
...
@@ -42,9 +42,11 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1
mov r11, r10
ldrb r10, [r4], #1 @ band_start_tab[band++]
subs r9, r9, r5 @ - floor
it lt
movlt r9, #0
cmp r10, r3 @ - end
and r9, r9, r8 @ & 0x1fe0
ite gt
subgt r8, r3, r11
suble r8, r10, r11
add r9, r9, r5 @ + floor => m
...
...
libavcodec/arm/ac3dsp_neon.S
View file @
8986fddc
...
...
@@ -41,6 +41,7 @@ endfunc
function ff_ac3_exponent_min_neon, export=1
cmp r1, #0
it eq
bxeq lr
push {lr}
mov r12, #256
...
...
libavcodec/arm/asm.S
View file @
8986fddc
...
...
@@ -24,9 +24,18 @@
# define ELF
#else
# define ELF @
#endif
#if CONFIG_THUMB
# define A @
# define T
#else
# define A
# define T @
#endif
.syntax unified
T .thumb
.macro require8 val=1
ELF .eabi_attribute 24, \val
...
...
@@ -82,6 +91,90 @@ ELF .size \name, . - \name
#endif
.endm
.macro ldr_pre rt, rn, rm:vararg
A ldr \rt, [\rn, \rm]!
T add \rn, \rn, \rm
T ldr \rt, [\rn]
.endm
.macro ldr_post rt, rn, rm:vararg
A ldr \rt, [\rn], \rm
T ldr \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro ldrd_reg rt, rt2, rn, rm
A ldrd \rt, \rt2, [\rn, \rm]
T add \rt, \rn, \rm
T ldrd \rt, \rt2, [\rt]
.endm
.macro ldrd_post rt, rt2, rn, rm
A ldrd \rt, \rt2, [\rn], \rm
T ldrd \rt, \rt2, [\rn]
T add \rn, \rn, \rm
.endm
.macro ldrh_pre rt, rn, rm
A ldrh \rt, [\rn, \rm]!
T add \rn, \rn, \rm
T ldrh \rt, [\rn]
.endm
.macro ldrh_dpre rt, rn, rm
A ldrh \rt, [\rn, -\rm]!
T sub \rn, \rn, \rm
T ldrh \rt, [\rn]
.endm
.macro ldrh_post rt, rn, rm
A ldrh \rt, [\rn], \rm
T ldrh \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro str_post rt, rn, rm:vararg
A str \rt, [\rn], \rm
T str \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro strb_post rt, rn, rm:vararg
A strb \rt, [\rn], \rm
T strb \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro strd_post rt, rt2, rn, rm
A strd \rt, \rt2, [\rn], \rm
T strd \rt, \rt2, [\rn]
T add \rn, \rn, \rm
.endm
.macro strh_pre rt, rn, rm
A strh \rt, [\rn, \rm]!
T add \rn, \rn, \rm
T strh \rt, [\rn]
.endm
.macro strh_dpre rt, rn, rm
A strh \rt, [\rn, -\rm]!
T sub \rn, \rn, \rm
T strh \rt, [\rn]
.endm
.macro strh_post rt, rn, rm
A strh \rt, [\rn], \rm
T strh \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro strh_dpost rt, rn, rm
A strh \rt, [\rn], -\rm
T strh \rt, [\rn]
T sub \rn, \rn, \rm
.endm
#if HAVE_VFP_ARGS
.eabi_attribute 28, 1
# define VFP
...
...
libavcodec/arm/dcadsp_neon.S
View file @
8986fddc
...
...
@@ -27,6 +27,7 @@ function ff_dca_lfe_fir_neon, export=1
add r5, r2, #256*4-16 @ cf1
sub r1, r1, #12
cmp r3, #32
ite eq
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr s0, [sp, #16] @ scale
...
...
libavcodec/arm/dsputil_arm.S
View file @
8986fddc
...
...
@@ -554,10 +554,12 @@ endfunc
and r9, r5, r14
and r10, r6, r14
and r11, r7, r14
it eq
andeq r14, r14, r14, \rnd #1
add r8, r8, r10
add r9, r9, r11
ldr r12, =0xfcfcfcfc >> 2
itt eq
addeq r8, r8, r14
addeq r9, r9, r14
and r4, r12, r4, lsr #2
...
...
@@ -638,8 +640,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #4] /* moved form [A] */
...
...
@@ -654,8 +658,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
ldr r4, [r1, #4] /* moved form [B] */
...
...
@@ -676,8 +682,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #12] /* moved from [D] */
...
...
@@ -692,8 +700,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
add r0, r0, #16 /* moved from [E] */
...
...
libavcodec/arm/dsputil_armv6.S
View file @
8986fddc
...
...
@@ -47,16 +47,16 @@ function ff_put_pixels16_armv6, export=1
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr
r4, [r1],
r2
ldr
_post r4, r1,
r2
strd r6, r7, [r0, #8]
ldr r9, [r1, #4]
strd
r4, r5, [r0]
, r2
strd
_post r4, r5, r0
, r2
ldr r10, [r1, #8]
ldr r11, [r1, #12]
ldr
r8, [r1],
r2
ldr
_post r8, r1,
r2
strd r10, r11, [r0, #8]
subs r3, r3, #2
strd
r8, r9, [r0]
, r2
strd
_post r8, r9, r0
, r2
bne 1b
pop {r4-r11}
...
...
@@ -67,12 +67,12 @@ function ff_put_pixels8_armv6, export=1
push {r4-r7}
1:
ldr r5, [r1, #4]
ldr
r4, [r1],
r2
ldr
_post r4, r1,
r2
ldr r7, [r1, #4]
strd
r4, r5, [r0]
, r2
ldr
r6, [r1],
r2
strd
_post r4, r5, r0
, r2
ldr
_post r6, r1,
r2
subs r3, r3, #2
strd
r6, r7, [r0]
, r2
strd
_post r6, r7, r0
, r2
bne 1b
pop {r4-r7}
...
...
@@ -90,7 +90,7 @@ function ff_put_pixels8_x2_armv6, export=1
ldr r5, [r1, #4]
ldr r7, [r1, #5]
lsr r6, r4, #8
ldr
r8, [r1, r2]!
ldr
_pre r8, r1, r2
orr r6, r6, r5, lsl #24
ldr r9, [r1, #4]
ldr r11, [r1, #5]
...
...
@@ -112,9 +112,9 @@ function ff_put_pixels8_x2_armv6, export=1
uhadd8 r9, r9, r11
and r6, r6, r12
uadd8 r8, r8, r14
strd
r4, r5, [r0]
, r2
strd
_post r4, r5, r0
, r2
uadd8 r9, r9, r6
strd
r8, r9, [r0]
, r2
strd
_post r8, r9, r0
, r2
bne 1b
pop {r4-r11, pc}
...
...
@@ -127,7 +127,7 @@ function ff_put_pixels8_y2_armv6, export=1
orr r12, r12, r12, lsl #16
ldr r4, [r1]
ldr r5, [r1, #4]
ldr
r6, [r1, r2]!
ldr
_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
...
...
@@ -136,7 +136,7 @@ function ff_put_pixels8_y2_armv6, export=1
uhadd8 r9, r5, r7
eor r11, r5, r7
and r10, r10, r12
ldr
r4, [r1, r2]!
ldr
_pre r4, r1, r2
uadd8 r8, r8, r10
and r11, r11, r12
uadd8 r9, r9, r11
...
...
@@ -148,11 +148,11 @@ function ff_put_pixels8_y2_armv6, export=1
eor r7, r5, r7
uadd8 r10, r10, r6
and r7, r7, r12
ldr
r6, [r1, r2]!
ldr
_pre r6, r1, r2
uadd8 r11, r11, r7
strd
r8, r9, [r0]
, r2
strd
_post r8, r9, r0
, r2
ldr r7, [r1, #4]
strd
r10, r11, [r0]
, r2
strd
_post r10, r11, r0
, r2
bne 1b
pop {r4-r11}
...
...
@@ -166,7 +166,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r7, [r1, #5]
ldr
r8, [r1, r2]!
ldr
_pre r8, r1, r2
ldr r9, [r1, #4]
ldr r14, [r1, #5]
add r1, r1, r2
...
...
@@ -191,16 +191,16 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
ldr
r6, [r1, r2]!
ldr
_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
ldr
r4, [r1, r2]!
ldr
_pre r4, r1, r2
uhadd8 r9, r5, r7
ldr r5, [r1, #4]
uhadd8 r12, r4, r6
ldr
r6, [r1, r2]!
ldr
_pre r6, r1, r2
uhadd8 r14, r5, r7
ldr r7, [r1, #4]
stm r0, {r8,r9}
...
...
@@ -220,44 +220,44 @@ function ff_avg_pixels8_armv6, export=1
orr lr, lr, lr, lsl #16
ldrd r4, r5, [r0]
ldr r10, [r1, #4]
ldr
r9, [r1],
r2
ldr
_post r9, r1,
r2
subs r3, r3, #2
1:
pld [r1, r2]
eor r8, r4, r9
uhadd8 r4, r4, r9
eor r12, r5, r10
ldrd
r6, r7, [r0, r2]
ldrd
_reg r6, r7, r0, r2
uhadd8 r5, r5, r10
and r8, r8, lr
ldr r10, [r1, #4]
and r12, r12, lr
uadd8 r4, r4, r8
ldr
r9, [r1],
r2
ldr
_post r9, r1,
r2
eor r8, r6, r9
uadd8 r5, r5, r12
pld [r1, r2, lsl #1]
eor r12, r7, r10
uhadd8 r6, r6, r9
strd
r4, r5, [r0],
r2
strd
_post r4, r5, r0,
r2
uhadd8 r7, r7, r10
beq 2f
and r8, r8, lr
ldrd
r4, r5, [r0, r2]
ldrd
_reg r4, r5, r0, r2
uadd8 r6, r6, r8
ldr r10, [r1, #4]
and r12, r12, lr
subs r3, r3, #2
uadd8 r7, r7, r12
ldr
r9, [r1],
r2
strd
r6, r7, [r0],
r2
ldr
_post r9, r1,
r2
strd
_post r6, r7, r0,
r2
b 1b
2:
and r8, r8, lr
and r12, r12, lr
uadd8 r6, r6, r8
uadd8 r7, r7, r12
strd
r6, r7, [r0],
r2
strd
_post r6, r7, r0,
r2
pop {r4-r10, pc}
endfunc
...
...
@@ -284,7 +284,7 @@ function ff_add_pixels_clamped_armv6, export=1
orr r6, r8, r5, lsl #8
orr r7, r4, lr, lsl #8
subs r3, r3, #1
strd
r6, r7, [r1]
, r2
strd
_post r6, r7, r1
, r2
bgt 1b
pop {r4-r8,pc}
endfunc
...
...
@@ -294,7 +294,7 @@ function ff_get_pixels_armv6, export=1
push {r4-r8, lr}
mov lr, #8
1:
ldrd
r4, r5, [r1]
, r2
ldrd
_post r4, r5, r1
, r2
subs lr, lr, #1
uxtb16 r6, r4
uxtb16 r4, r4, ror #8
...
...
@@ -317,8 +317,8 @@ function ff_diff_pixels_armv6, export=1
push {r4-r9, lr}
mov lr, #8
1:
ldrd
r4, r5, [r1]
, r3
ldrd
r6, r7, [r2]
, r3
ldrd
_post r4, r5, r1
, r3
ldrd
_post r6, r7, r2
, r3
uxtb16 r8, r4
uxtb16 r4, r4, ror #8
uxtb16 r9, r6
...
...
@@ -492,19 +492,19 @@ function ff_pix_abs8_armv6, export=1
push {r4-r9, lr}
mov r0, #0
mov lr, #0
ldrd
r4, r5, [r1],
r3
ldrd
_post r4, r5, r1,
r3
1:
subs r12, r12, #2
ldr r7, [r2, #4]
ldr
r6, [r2],
r3
ldrd
r8, r9, [r1],
r3
ldr
_post r6, r2,
r3
ldrd
_post r8, r9, r1,
r3
usada8 r0, r4, r6, r0
pld [r2, r3]
usada8 lr, r5, r7, lr
ldr r7, [r2, #4]
ldr
r6, [r2],
r3
ldr
_post r6, r2,
r3
beq 2f
ldrd
r4, r5, [r1],
r3
ldrd
_post r4, r5, r1,
r3
usada8 r0, r8, r6, r0
pld [r2, r3]
usada8 lr, r9, r7, lr
...
...
@@ -613,7 +613,7 @@ function ff_pix_sum_armv6, export=1
ldr r7, [r0, #12]
usada8 r2, r6, lr, r2
beq 2f
ldr
r4, [r0, r1]!
ldr
_pre r4, r0, r1
usada8 r3, r7, lr, r3
bgt 1b
2:
...
...
libavcodec/arm/dsputil_neon.S
View file @
8986fddc
...
...
@@ -531,6 +531,7 @@ function ff_vorbis_inverse_coupling_neon, export=1
2: vst1.32 {d2-d3}, [r3, :128]!
vst1.32 {d0-d1}, [r12,:128]!
it lt
bxlt lr
3: vld1.32 {d2-d3}, [r1,:128]
...
...
@@ -575,6 +576,7 @@ NOVFP vdup.32 q8, r2
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
...
...
@@ -638,6 +640,7 @@ NOVFP ldr r3, [sp]
2: vst1.32 {q8},[r0,:128]!
vst1.32 {q9},[r0,:128]!
ands r3, r3, #7
it eq
popeq {pc}
3: vld1.32 {q0},[r1,:128]!
ldr r12, [r2], #4
...
...
libavcodec/arm/dsputil_vfp.S
View file @
8986fddc
...
...
@@ -55,18 +55,23 @@ function ff_vector_fmul_vfp, export=1
1:
subs r3, r3, #16
vmul.f32 s12, s4, s12
itttt ge
vldmiage r1!, {s16-s19}
vldmiage r2!, {s24-s27}
vldmiage r1!, {s20-s23}
vldmiage r2!, {s28-s31}
it ge
vmulge.f32 s24, s16, s24
vstmia r0!, {s8-s11}
vstmia r0!, {s12-s15}
it ge
vmulge.f32 s28, s20, s28
itttt gt
vldmiagt r1!, {s0-s3}
vldmiagt r2!, {s8-s11}
vldmiagt r1!, {s4-s7}
vldmiagt r2!, {s12-s15}
ittt ge
vmulge.f32 s8, s0, s8
vstmiage r0!, {s24-s27}
vstmiage r0!, {s28-s31}
...
...
@@ -97,33 +102,49 @@ function ff_vector_fmul_reverse_vfp, export=1
vmul.f32 s11, s0, s11
1:
subs r3, r3, #16
it ge
vldmdbge r2!, {s16-s19}
vmul.f32 s12, s7, s12
it ge
vldmiage r1!, {s24-s27}
vmul.f32 s13, s6, s13
it ge
vldmdbge r2!, {s20-s23}
vmul.f32 s14, s5, s14
it ge
vldmiage r1!, {s28-s31}
vmul.f32 s15, s4, s15
it ge
vmulge.f32 s24, s19, s24
it gt
vldmdbgt r2!, {s0-s3}
it ge
vmulge.f32 s25, s18, s25
vstmia r0!, {s8-s13}
it ge
vmulge.f32 s26, s17, s26
it gt
vldmiagt r1!, {s8-s11}
itt ge
vmulge.f32 s27, s16, s27
vmulge.f32 s28, s23, s28
it gt
vldmdbgt r2!, {s4-s7}
it ge
vmulge.f32 s29, s22, s29
vstmia r0!, {s14-s15}
ittt ge
vmulge.f32 s30, s21, s30
vmulge.f32 s31, s20, s31
vmulge.f32 s8, s3, s8
it gt
vldmiagt r1!, {s12-s15}
itttt ge
vmulge.f32 s9, s2, s9
vmulge.f32 s10, s1, s10
vstmiage r0!, {s24-s27}
vmulge.f32 s11, s0, s11
it ge
vstmiage r0!, {s28-s31}
bgt 1b
...
...
libavcodec/arm/fmtconvert_neon.S
View file @
8986fddc
...
...
@@ -71,6 +71,7 @@ endfunc
function ff_float_to_int16_interleave_neon, export=1
cmp r3, #2
itt lt
ldrlt r1, [r1]
blt ff_float_to_int16_neon
bne 4f
...
...
@@ -196,6 +197,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.64 {d3}, [r8], ip
vst1.64 {d7}, [r8], ip
subs r3, r3, #4
it eq
popeq {r4-r8,pc}
cmp r3, #4
add r0, r0, #8
...
...
@@ -305,6 +307,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.32 {d23[1]}, [r8], ip
8: subs r3, r3, #2
add r0, r0, #4
it eq
popeq {r4-r8,pc}
@ 1 channel
...
...
@@ -354,6 +357,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.16 {d2[3]}, [r5,:16], ip
vst1.16 {d3[1]}, [r5,:16], ip
vst1.16 {d3[3]}, [r5,:16], ip
it eq
popeq {r4-r8,pc}
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
...
...
libavcodec/arm/fmtconvert_vfp.S
View file @
8986fddc
...
...
@@ -46,6 +46,7 @@ function ff_float_to_int16_vfp, export=1
vmov r5, r6, s2, s3
vmov r7, r8, s4, s5
vmov ip, lr, s6, s7
it gt
vldmiagt r1!, {s16-s23}
ssat r4, #16, r4
ssat r3, #16, r3
...
...
@@ -53,10 +54,12 @@ function ff_float_to_int16_vfp, export=1
ssat r5, #16, r5
pkhbt r3, r3, r4, lsl #16
pkhbt r4, r5, r6, lsl #16
itttt gt
vcvtgt.s32.f32 s0, s16
vcvtgt.s32.f32 s1, s17
vcvtgt.s32.f32 s2, s18
vcvtgt.s32.f32 s3, s19
itttt gt
vcvtgt.s32.f32 s4, s20
vcvtgt.s32.f32 s5, s21
vcvtgt.s32.f32 s6, s22
...
...
libavcodec/arm/h264dsp_neon.S
View file @
8986fddc
This diff is collapsed.
Click to expand it.
libavcodec/arm/h264idct_neon.S
View file @
8986fddc
...
...
@@ -106,10 +106,12 @@ function ff_h264_idct_add16_neon, export=1
blt 2f
ldrsh lr, [r1]
add r0, r0, r4
it ne
movne lr, #0
cmp lr, #0
adrne lr, ff_h264_idct_dc_add_neon
adreq lr, ff_h264_idct_add_neon
ite ne
adrne lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
adreq lr, ff_h264_idct_add_neon + CONFIG_THUMB
blx lr
2: subs ip, ip, #1
add r1, r1, #32
...
...
@@ -132,8 +134,9 @@ function ff_h264_idct_add16intra_neon, export=1
add r0, r0, r4
cmp r8, #0
ldrsh r8, [r1]
adrne lr, ff_h264_idct_add_neon
adreq lr, ff_h264_idct_dc_add_neon
iteet ne
adrne lr, ff_h264_idct_add_neon + CONFIG_THUMB
adreq lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
cmpeq r8, #0
blxne lr
subs ip, ip, #1
...
...
@@ -159,12 +162,14 @@ function ff_h264_idct_add8_neon, export=1
add r1, r3, r12, lsl #5
cmp r8, #0
ldrsh r8, [r1]
adrne lr, ff_h264_idct_add_neon
adreq lr, ff_h264_idct_dc_add_neon
iteet ne
adrne lr, ff_h264_idct_add_neon + CONFIG_THUMB
adreq lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
cmpeq r8, #0
blxne lr
add r12, r12, #1
cmp r12, #4
itt eq
moveq r12, #16
moveq r4, r9
cmp r12, #20
...
...
@@ -365,10 +370,12 @@ function ff_h264_idct8_add4_neon, export=1
blt 2f
ldrsh lr, [r1]
add r0, r0, r4
it ne
movne lr, #0
cmp lr, #0
adrne lr, ff_h264_idct8_dc_add_neon
adreq lr, ff_h264_idct8_add_neon
ite ne
adrne lr, ff_h264_idct8_dc_add_neon + CONFIG_THUMB
adreq lr, ff_h264_idct8_add_neon + CONFIG_THUMB
blx lr
2: subs r12, r12, #4
add r1, r1, #128
...
...
libavcodec/arm/mathops.h
View file @
8986fddc
...
...
@@ -64,11 +64,14 @@ static inline av_const int mid_pred(int a, int b, int c)
__asm__
(
"mov %0, %2
\n\t
"
"cmp %1, %2
\n\t
"
"itt gt
\n\t
"
"movgt %0, %1
\n\t
"
"movgt %1, %2
\n\t
"
"cmp %1, %3
\n\t
"
"it le
\n\t
"
"movle %1, %3
\n\t
"
"cmp %0, %1
\n\t
"
"it gt
\n\t
"
"movgt %0, %1
\n\t
"
:
"=&r"
(
m
),
"+r"
(
a
)
:
"r"
(
b
),
"r"
(
c
)
...
...
libavcodec/arm/mdct_neon.S
View file @
8986fddc
...
...
@@ -191,7 +191,9 @@ function ff_mdct_calc_neon, export=1
vadd.f32 d17, d17, d3 @ in2u+in1d -I
1:
vmul.f32 d7, d0, d21 @ I*s
ldr r10, [r3, lr, lsr #1]
A ldr r10, [r3, lr, lsr #1]
T lsr r10, lr, #1
T ldr r10, [r3, r10]
vmul.f32 d6, d1, d20 @ -R*c
ldr r6, [r3, #4]!
vmul.f32 d4, d1, d21 @ -R*s
...
...
libavcodec/arm/mpegaudiodsp_fixed_armv6.S
View file @
8986fddc
...
...
@@ -75,7 +75,7 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
sum8 r8, r9, r1, r0, r10, r11, r12, lr
sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32
round r10, r8, r9
strh
r10, [r3],
r4
strh
_post r10, r3,
r4
mov lr, #15
1:
...
...
@@ -127,10 +127,10 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
round r10, r8, r9
adds r8, r8, r4
adc r9, r9, r7
strh
r10, [r3],
r12
strh
_post r10, r3,
r12
round r11, r8, r9
subs lr, lr, #1
strh
r11, [r5], -
r12
strh
_dpost r11, r5,
r12
bgt 1b
sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33
...
...
libavcodec/arm/mpegvideo_armv5te_s.S
View file @
8986fddc
...
...
@@ -38,15 +38,21 @@
.macro dequant_t dst, src, mul, add, tmp
rsbs \tmp, ip, \src, asr #16
it gt
addgt \tmp, \add, #0
it lt
rsblt \tmp, \add, #0
it ne
smlatbne \dst, \src, \mul, \tmp
.endm
.macro dequant_b dst, src, mul, add, tmp
rsbs \tmp, ip, \src, lsl #16
it gt
addgt \tmp, \add, #0
it lt
rsblt \tmp, \add, #0
it ne
smlabbne \dst, \src, \mul, \tmp
.endm
...
...
@@ -80,21 +86,27 @@ function ff_dct_unquantize_h263_armv5te, export=1
strh lr, [r0], #2
subs r3, r3, #8
it gt
ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
bgt 1b
adds r3, r3, #2
it le
pople {r4-r9,pc}
2:
ldrsh r9, [r0, #0]
ldrsh lr, [r0, #2]
mov r8, r2
cmp r9, #0
it lt
rsblt r8, r2, #0
it ne
smlabbne r9, r9, r1, r8
mov r8, r2
cmp lr, #0
it lt
rsblt r8, r2, #0
it ne
smlabbne lr, lr, r1, r8
strh r9, [r0], #2
strh lr, [r0], #2
...
...
libavcodec/arm/mpegvideo_neon.S
View file @
8986fddc
...
...
@@ -57,6 +57,7 @@ function ff_dct_unquantize_h263_neon, export=1
subs r3, r3, #16
vst1.16 {q0}, [r1,:128]!
vst1.16 {q8}, [r1,:128]!
it le
bxle lr
cmp r3, #8
bgt 1b
...
...
@@ -78,6 +79,7 @@ function ff_dct_unquantize_h263_intra_neon, export=1
ldr r6, [r0, #AC_PRED]
add lr, r0, #INTER_SCANTAB_RASTER_END
cmp r6, #0
it ne
movne r12, #63
bne 1f
ldr r12, [r12, r2, lsl #2]
...
...
@@ -86,9 +88,11 @@ function ff_dct_unquantize_h263_intra_neon, export=1
ldrsh r4, [r1]
cmp r5, #0
mov r5, r1
it ne
movne r2, #0
bne 2f
cmp r2, #4
it ge
addge r0, r0, #4
sub r2, r3, #1
ldr r6, [r0, #Y_DC_SCALE]
...
...
libavcodec/arm/rdft_neon.S
View file @
8986fddc
...
...
@@ -137,6 +137,7 @@ function ff_rdft_calc_neon, export=1
vst1.32 {d22}, [r5,:64]
cmp r6, #0
it eq
popeq {r4-r8,pc}
vmul.f32 d22, d22, d18
...
...
libavcodec/arm/simple_idct_arm.S
View file @
8986fddc
...
...
@@ -121,11 +121,13 @@ __b_evaluation:
ldr r11, [r12, #offW7] @ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if null avoid muls
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if null avoid muls
itttt ne
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
it ne
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=b0, R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
...
...
@@ -148,19 +150,23 @@ __b_evaluation:
@@ MAC16(b3, -W1, row[7]);
@@ MAC16(b1, -W5, row[7]);
mov r3, r3, asr #16 @ R3=ROWr16[5]
teq r3, #0 @ if null avoid muls
teq r3, #0 @ if null avoid muls
it ne
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
mov r4, r4, asr #16 @ R4=ROWr16[7]
itttt ne
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5]
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
@@ R3 is free now
teq r4, #0 @ if null avoid muls
teq r4, #0 @ if null avoid muls
itttt ne
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7]=b3
it ne
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7]=b1
@@ R4 is free now
__end_b_evaluation:
...
...
@@ -204,16 +210,19 @@ __a_evaluation:
@@ a2 -= W4*row[4]
@@ a3 += W4*row[4]
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
teq r11, #0 @ if null avoid muls
it ne
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
itttt ne
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
teq r9, #0 @ if null avoid muls
itttt ne
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
...
...
@@ -222,6 +231,7 @@ __a_evaluation:
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
itt ne
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
...
...
@@ -323,10 +333,12 @@ __b_evaluation2:
ldrsh r2, [r14, #48]
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if 0, then avoid muls
itttt ne
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
it ne
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
...
...
@@ -342,18 +354,22 @@ __b_evaluation2:
@@ MAC16(b1, -W5, col[7x8]);
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
teq r3, #0 @ if 0 then avoid muls
itttt ne
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
it ne
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
@@ R3 is free now
teq r4, #0 @ if 0 then avoid muls
itttt ne
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
it ne
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
@@ R4 is free now
__end_b_evaluation2:
...
...
@@ -390,15 +406,18 @@ __a_evaluation2:
@@ a3 += W4*row[4]
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
itttt ne
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
it ne
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
itttt ne
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
...
...
@@ -407,6 +426,7 @@ __a_evaluation2:
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
itt ne
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
__end_a_evaluation2:
...
...
libavcodec/arm/simple_idct_armv5te.S
View file @
8986fddc
...
...
@@ -49,6 +49,7 @@ function idct_row_armv5te
ldrd v1, [a1, #8]
ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
orrs v1, v1, v2
itt eq
cmpeq v1, a4
cmpeq v1, a3, lsr #16
beq row_dc_only
...
...
@@ -269,6 +270,7 @@ function idct_col_armv5te
ldmfd sp!, {a3, a4}
adds a2, a3, v1
mov a2, a2, lsr #20
it mi
orrmi a2, a2, #0xf000
add ip, a4, v2
mov ip, ip, asr #20
...
...
@@ -276,6 +278,7 @@ function idct_col_armv5te
str a2, [a1]
subs a3, a3, v1
mov a2, a3, lsr #20
it mi
orrmi a2, a2, #0xf000
sub a4, a4, v2
mov a4, a4, asr #20
...
...
@@ -285,6 +288,7 @@ function idct_col_armv5te
subs a2, a3, v3
mov a2, a2, lsr #20
it mi
orrmi a2, a2, #0xf000
sub ip, a4, v4
mov ip, ip, asr #20
...
...
@@ -292,6 +296,7 @@ function idct_col_armv5te
str a2, [a1, #(16*1)]
adds a3, a3, v3
mov a2, a3, lsr #20
it mi
orrmi a2, a2, #0xf000
add a4, a4, v4
mov a4, a4, asr #20
...
...
@@ -301,6 +306,7 @@ function idct_col_armv5te
adds a2, a3, v5
mov a2, a2, lsr #20
it mi
orrmi a2, a2, #0xf000
add ip, a4, v6
mov ip, ip, asr #20
...
...
@@ -308,6 +314,7 @@ function idct_col_armv5te
str a2, [a1, #(16*2)]
subs a3, a3, v5
mov a2, a3, lsr #20
it mi
orrmi a2, a2, #0xf000
sub a4, a4, v6
mov a4, a4, asr #20
...
...
@@ -317,6 +324,7 @@ function idct_col_armv5te
adds a2, a3, v7
mov a2, a2, lsr #20
it mi
orrmi a2, a2, #0xf000
add ip, a4, fp
mov ip, ip, asr #20
...
...
@@ -324,6 +332,7 @@ function idct_col_armv5te
str a2, [a1, #(16*3)]
subs a3, a3, v7
mov a2, a3, lsr #20
it mi
orrmi a2, a2, #0xf000
sub a4, a4, fp
mov a4, a4, asr #20
...
...
@@ -335,15 +344,19 @@ endfunc
.macro clip dst, src:vararg
movs \dst, \src
it mi
movmi \dst, #0
cmp \dst, #255
it gt
movgt \dst, #255
.endm
.macro aclip dst, src:vararg
adds \dst, \src
it mi
movmi \dst, #0
cmp \dst, #255
it gt
movgt \dst, #255
.endm
...
...
@@ -370,35 +383,35 @@ function idct_col_put_armv5te
orr a2, a3, a4, lsl #8
rsb v2, lr, lr, lsl #3
ldmfd sp!, {a3, a4}
strh
a2, [v2, v1]!
strh
_pre a2, v2, v1
sub a2, a3, v3
clip a2, a2, asr #20
sub ip, a4, v4
clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
strh
a2, [v1, lr]!
strh
_pre a2, v1, lr
add a3, a3, v3
clip a2, a3, asr #20
add a4, a4, v4
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
strh
a2, [v2, -lr]!
strh
_dpre a2, v2, lr
add a2, a3, v5
clip a2, a2, asr #20
add ip, a4, v6
clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
strh
a2, [v1, lr]!
strh
_pre a2, v1, lr
sub a3, a3, v5
clip a2, a3, asr #20
sub a4, a4, v6
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
strh
a2, [v2, -lr]!
strh
_dpre a2, v2, lr
add a2, a3, v7
clip a2, a2, asr #20
...
...
@@ -411,7 +424,7 @@ function idct_col_put_armv5te
sub a4, a4, fp
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
strh
a2, [v2, -lr]
strh
_dpre a2, v2, lr
ldr pc, [sp], #4
endfunc
...
...
@@ -436,7 +449,7 @@ function idct_col_add_armv5te
ldr v1, [sp, #32]
sub a4, a4, v2
rsb v2, v1, v1, lsl #3
ldrh
ip, [v2, lr]!
ldrh
_pre ip, v2, lr
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
...
...
@@ -448,7 +461,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldrh
ip, [lr, v1]!
ldrh
_pre ip, lr, v1
sub a2, a3, v3
add a3, a3, v3
and v3, ip, #255
...
...
@@ -458,7 +471,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
add a4, a4, v4
ldrh
ip, [v2, -v1]!
ldrh
_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
...
...
@@ -468,7 +481,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldrh
ip, [lr, v1]!
ldrh
_pre ip, lr, v1
add a2, a3, v5
sub a3, a3, v5
and v3, ip, #255
...
...
@@ -478,7 +491,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, v6
ldrh
ip, [v2, -v1]!
ldrh
_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
...
...
@@ -488,7 +501,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldrh
ip, [lr, v1]!
ldrh
_pre ip, lr, v1
add a2, a3, v7
sub a3, a3, v7
and v3, ip, #255
...
...
@@ -498,7 +511,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, fp
ldrh
ip, [v2, -v1]!
ldrh
_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
...
...
libavcodec/arm/simple_idct_armv6.S
View file @
8986fddc
...
...
@@ -200,6 +200,7 @@ function idct_row_armv6
ldr r3, [r0, #8] /* r3 = row[3,1] */
ldr r2, [r0] /* r2 = row[2,0] */
orrs lr, lr, ip
itt eq
cmpeq lr, r3
cmpeq lr, r2, lsr #16
beq 1f
...
...
@@ -282,14 +283,14 @@ function idct_col_put_armv6
pop {r1, r2}
idct_finish_shift_sat COL_SHIFT
strb
r4, [r1]
, r2
strb
r5, [r1]
, r2
strb
r6, [r1]
, r2
strb
r7, [r1]
, r2
strb
r11,[r1]
, r2
strb
r10,[r1]
, r2
strb
r9, [r1]
, r2
strb
r8, [r1]
, r2
strb
_post r4, r1
, r2
strb
_post r5, r1
, r2
strb
_post r6, r1
, r2
strb
_post r7, r1
, r2
strb
_post r11,r1
, r2
strb
_post r10,r1
, r2
strb
_post r9, r1
, r2
strb
_post r8, r1
, r2
sub r1, r1, r2, lsl #3
...
...
@@ -318,16 +319,16 @@ function idct_col_add_armv6
add ip, r3, ip, asr #COL_SHIFT
usat ip, #8, ip
add r4, r7, r4, asr #COL_SHIFT
strb
ip, [r1]
, r2
strb
_post ip, r1
, r2
ldrb ip, [r1, r2]
usat r4, #8, r4
ldrb r11,[r1, r2, lsl #2]
add r5, ip, r5, asr #COL_SHIFT
usat r5, #8, r5
strb
r4, [r1]
, r2
strb
_post r4, r1
, r2
ldrb r3, [r1, r2]
ldrb ip, [r1, r2, lsl #2]
strb
r5, [r1]
, r2
strb
_post r5, r1
, r2
ldrb r7, [r1, r2]
ldrb r4, [r1, r2, lsl #2]
add r6, r3, r6, asr #COL_SHIFT
...
...
@@ -340,11 +341,11 @@ function idct_col_add_armv6
usat r8, #8, r8
add lr, r4, lr, asr #COL_SHIFT
usat lr, #8, lr
strb
r6, [r1]
, r2
strb
r10,[r1]
, r2
strb
r9, [r1]
, r2
strb
r8, [r1]
, r2
strb
lr, [r1]
, r2
strb
_post r6, r1
, r2
strb
_post r10,r1
, r2
strb
_post r9, r1
, r2
strb
_post r8, r1
, r2
strb
_post lr, r1
, r2
sub r1, r1, r2, lsl #3
...
...
libavcodec/arm/simple_idct_neon.S
View file @
8986fddc
...
...
@@ -71,7 +71,7 @@ function idct_row4_pld_neon
add r3, r0, r1, lsl #2
pld [r0, r1]
pld [r0, r1, lsl #1]
pld [r3, -r1]
A
pld [r3, -r1]
pld [r3]
pld [r3, r1]
add r3, r3, r1, lsl #1
...
...
@@ -164,6 +164,7 @@ function idct_col4_neon
orrs r4, r4, r5
idct_col4_top
it eq
addeq r2, r2, #16
beq 1f
...
...
@@ -176,6 +177,7 @@ function idct_col4_neon
1: orrs r6, r6, r7
ldrd r4, [r2, #16]
it eq
addeq r2, r2, #16
beq 2f
...
...
@@ -187,6 +189,7 @@ function idct_col4_neon
2: orrs r4, r4, r5
ldrd r4, [r2, #16]
it eq
addeq r2, r2, #16
beq 3f
...
...
@@ -199,6 +202,7 @@ function idct_col4_neon
vadd.i32 q13, q13, q8
3: orrs r4, r4, r5
it eq
addeq r2, r2, #16
beq 4f
...
...
libavcodec/arm/synth_filter_neon.S
View file @
8986fddc
...
...
@@ -100,9 +100,11 @@ NOVFP vldr s0, [sp, #12*4] @ scale
vst1.32 {q9}, [r2,:128]
subs r1, r1, #1
it eq
popeq {r4-r11,pc}
cmp r4, #0
itt eq
subeq r8, r8, #512*4
subeq r9, r9, #512*4
sub r5, r5, #512*4
...
...
libavcodec/arm/vp56_arith.h
View file @
8986fddc
...
...
@@ -21,6 +21,14 @@
#ifndef AVCODEC_ARM_VP56_ARITH_H
#define AVCODEC_ARM_VP56_ARITH_H
#if CONFIG_THUMB
# define A(x)
# define T(x) x
#else
# define A(x) x
# define T(x)
#endif
#if HAVE_ARMV6 && HAVE_INLINE_ASM
#define vp56_rac_get_prob vp56_rac_get_prob_armv6
...
...
@@ -32,15 +40,21 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
unsigned
bit
;
__asm__
(
"adds %3, %3, %0
\n
"
"itt cs
\n
"
"cmpcs %7, %4
\n
"
"ldrcsh %2, [%4], #2
\n
"
A
(
"ldrcsh %2, [%4], #2
\n
"
)
T
(
"ldrhcs %2, [%4], #2
\n
"
)
"rsb %0, %6, #256
\n
"
"smlabb %0, %5, %6, %0
\n
"
T
(
"itttt cs
\n
"
)
"rev16cs %2, %2
\n
"
"orrcs %1, %1, %2, lsl %3
\n
"
T
(
"lslcs %2, %2, %3
\n
"
)
T
(
"orrcs %1, %1, %2
\n
"
)
A
(
"orrcs %1, %1, %2, lsl %3
\n
"
)
"subcs %3, %3, #16
\n
"
"lsr %0, %0, #8
\n
"
"cmp %1, %0, lsl #16
\n
"
"ittte ge
\n
"
"subge %1, %1, %0, lsl #16
\n
"
"subge %0, %5, %0
\n
"
"movge %2, #1
\n
"
...
...
@@ -64,12 +78,17 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
unsigned
tmp
;
__asm__
(
"adds %3, %3, %0
\n
"
"itt cs
\n
"
"cmpcs %7, %4
\n
"
"ldrcsh %2, [%4], #2
\n
"
A
(
"ldrcsh %2, [%4], #2
\n
"
)
T
(
"ldrhcs %2, [%4], #2
\n
"
)
"rsb %0, %6, #256
\n
"
"smlabb %0, %5, %6, %0
\n
"
T
(
"itttt cs
\n
"
)
"rev16cs %2, %2
\n
"
"orrcs %1, %1, %2, lsl %3
\n
"
T
(
"lslcs %2, %2, %3
\n
"
)
T
(
"orrcs %1, %1, %2
\n
"
)
A
(
"orrcs %1, %1, %2, lsl %3
\n
"
)
"subcs %3, %3, #16
\n
"
"lsr %0, %0, #8
\n
"
"lsl %2, %0, #16
\n
"
...
...
libavcodec/arm/vp8_armv6.S
View file @
8986fddc
...
...
@@ -25,13 +25,18 @@
lsl \cw, \cw, \t0
lsl \t0, \h, \t0
rsb \h, \pr, #256
it cs
ldrhcs \t1, [\buf], #2
smlabb \h, \t0, \pr, \h
T itttt cs
rev16cs \t1, \t1
orrcs \cw, \cw, \t1, lsl \bs
A orrcs \cw, \cw, \t1, lsl \bs
T lslcs \t1, \t1, \bs
T orrcs \cw, \cw, \t1
subcs \bs, \bs, #16
lsr \h, \h, #8
cmp \cw, \h, lsl #16
itt ge
subge \cw, \cw, \h, lsl #16
subge \h, \t0, \h
.endm
...
...
@@ -40,14 +45,20 @@
adds \bs, \bs, \t0
lsl \cw, \cw, \t0
lsl \t0, \h, \t0
it cs
ldrhcs \t1, [\buf], #2
mov \h, #128
it cs
rev16cs \t1, \t1
add \h, \h, \t0, lsl #7
orrcs \cw, \cw, \t1, lsl \bs
A orrcs \cw, \cw, \t1, lsl \bs
T ittt cs
T lslcs \t1, \t1, \bs
T orrcs \cw, \cw, \t1
subcs \bs, \bs, #16
lsr \h, \h, #8
cmp \cw, \h, lsl #16
itt ge
subge \cw, \cw, \h, lsl #16
subge \h, \t0, \h
.endm
...
...
@@ -59,6 +70,7 @@ function ff_decode_block_coeffs_armv6, export=1
cmp r3, #0
ldr r11, [r5]
ldm r0, {r5-r7} @ high, bits, buf
it ne
pkhtbne r11, r11, r11, asr #16
ldr r8, [r0, #16] @ code_word
0:
...
...
@@ -80,19 +92,26 @@ function ff_decode_block_coeffs_armv6, export=1
adds r6, r6, r9
add r4, r4, #11
lsl r8, r8, r9
it cs
ldrhcs r10, [r7], #2
lsl r9, r5, r9
mov r5, #128
it cs
rev16cs r10, r10
add r5, r5, r9, lsl #7
orrcs r8, r8, r10, lsl r6
T ittt cs
T lslcs r10, r10, r6
T orrcs r8, r8, r10
A orrcs r8, r8, r10, lsl r6
subcs r6, r6, #16
lsr r5, r5, #8
cmp r8, r5, lsl #16
movrel r10, zigzag_scan-1
itt ge
subge r8, r8, r5, lsl #16
subge r5, r9, r5
ldrb r10, [r10, r3]
it ge
rsbge r12, r12, #0
cmp r3, #16
strh r12, [r1, r10]
...
...
@@ -108,6 +127,7 @@ function ff_decode_block_coeffs_armv6, export=1
ldr r0, [sp]
ldr r9, [r0, #12]
cmp r7, r9
it hi
movhi r7, r9
stm r0, {r5-r7} @ high, bits, buf
str r8, [r0, #16] @ code_word
...
...
@@ -131,11 +151,13 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #2
ldrb r0, [r4, #4]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r12, #1
ldrb r9, [lr, r5]
blt 4f
ldrb r0, [r4, #5]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r12, #1
ldrb r9, [lr, r5]
b 4f
...
...
@@ -153,6 +175,7 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #5
mov r0, #159
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r12, r12, #1
ldrb r9, [lr, r5]
b 4f
...
...
@@ -160,23 +183,28 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #7
mov r0, #165
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r12, r12, #2
ldrb r9, [lr, r5]
mov r0, #145
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r12, r12, #1
ldrb r9, [lr, r5]
b 4f
3:
ldrb r0, [r4, #8]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
it ge
addge r4, r4, #1
ldrb r9, [lr, r5]
ite ge
movge r12, #2
movlt r12, #0
ldrb r0, [r4, #9]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
mov r9, #8
it ge
addge r12, r12, #1
movrel r4, X(ff_vp8_dct_cat_prob)
lsl r9, r9, r12
...
...
@@ -189,6 +217,7 @@ function ff_decode_block_coeffs_armv6, export=1
lsl r1, r1, #1
rac_get_prob r5, r6, r7, r8, r0, r9, r10
ldrb r0, [r4], #1
it ge
addge r1, r1, #1
cmp r0, #0
bne 1b
...
...
@@ -200,6 +229,7 @@ function ff_decode_block_coeffs_armv6, export=1
add r4, r2, r4
add r4, r4, #22
rac_get_128 r5, r6, r7, r8, r9, r10
it ge
rsbge r12, r12, #0
smulbb r12, r12, r11
movrel r9, zigzag_scan-1
...
...
libavcodec/arm/vp8dsp_neon.S
View file @
8986fddc
...
...
@@ -746,14 +746,14 @@ function ff_put_vp8_pixels4_neon, export=1
push {r4-r6,lr}
1:
subs r12, r12, #4
ldr
r4, [r2],
r3
ldr
r5, [r2],
r3
ldr
r6, [r2],
r3
ldr
lr, [r2],
r3
str
r4, [r0],
r1
str
r5, [r0],
r1
str
r6, [r0],
r1
str
lr, [r0],
r1
ldr
_post r4, r2,
r3
ldr
_post r5, r2,
r3
ldr
_post r6, r2,
r3
ldr
_post lr, r2,
r3
str
_post r4, r0,
r1
str
_post r5, r0,
r1
str
_post r6, r0,
r1
str
_post lr, r0,
r1
bgt 1b
pop {r4-r6,pc}
endfunc
...
...
libavutil/arm/intmath.h
View file @
8986fddc
...
...
@@ -36,6 +36,7 @@ static av_always_inline av_const int FASTDIV(int a, int b)
int
r
;
__asm__
(
"cmp %2, #2
\n\t
"
"ldr %0, [%3, %2, lsl #2]
\n\t
"
"ite le
\n\t
"
"lsrle %0, %1, #1
\n\t
"
"smmulgt %0, %0, %1
\n\t
"
:
"=&r"
(
r
)
:
"r"
(
a
),
"r"
(
b
),
"r"
(
ff_inverse
)
:
"cc"
);
...
...
@@ -101,6 +102,7 @@ static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
{
int
x
,
y
;
__asm__
(
"adds %1, %R2, %Q2, lsr #31
\n\t
"
"itet ne
\n\t
"
"mvnne %1, #1<<31
\n\t
"
"moveq %0, %Q2
\n\t
"
"eorne %0, %1, %R2, asr #31
\n\t
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment