Commit a5dfeb61 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Michael Niedermayer

VP8: armv6 optimizations.

From 52.503s (~40fps) to 27.973sec (~80fps) decoding of 480p sintel
trailer, i.e. a ~2x speedup overall, on a Nexus S.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent c3a77496
...@@ -11,7 +11,8 @@ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ...@@ -11,7 +11,8 @@ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o
OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o
OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o
ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_armv6.o
OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
......
...@@ -97,6 +97,12 @@ T add \rn, \rn, \rm ...@@ -97,6 +97,12 @@ T add \rn, \rn, \rm
T ldr \rt, [\rn] T ldr \rt, [\rn]
.endm .endm
.macro ldr_dpren rt, rn, rm:vararg
A ldr \rt, [\rn, -\rm]
T sub \rt, \rn, \rm
T ldr \rt, [\rt]
.endm
.macro ldr_post rt, rn, rm:vararg .macro ldr_post rt, rn, rm:vararg
A ldr \rt, [\rn], \rm A ldr \rt, [\rn], \rm
T ldr \rt, [\rn] T ldr \rt, [\rn]
...@@ -133,6 +139,12 @@ T ldrh \rt, [\rn] ...@@ -133,6 +139,12 @@ T ldrh \rt, [\rn]
T add \rn, \rn, \rm T add \rn, \rn, \rm
.endm .endm
.macro ldrb_post rt, rn, rm
A ldrb \rt, [\rn], \rm
T ldrb \rt, [\rn]
T add \rn, \rn, \rm
.endm
.macro str_post rt, rn, rm:vararg .macro str_post rt, rn, rm:vararg
A str \rt, [\rn], \rm A str \rt, [\rn], \rm
T str \rt, [\rn] T str \rt, [\rn]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
...@@ -76,18 +76,6 @@ function ff_vp8_luma_dc_wht_neon, export=1 ...@@ -76,18 +76,6 @@ function ff_vp8_luma_dc_wht_neon, export=1
bx lr bx lr
endfunc endfunc
function ff_vp8_luma_dc_wht_dc_neon, export=1
ldrsh r2, [r1]
mov r3, #0
add r2, r2, #3
strh r3, [r1]
asr r2, r2, #3
.rept 16
strh r2, [r0], #32
.endr
bx lr
endfunc
function ff_vp8_idct_add_neon, export=1 function ff_vp8_idct_add_neon, export=1
vld1.16 {q0-q1}, [r1,:128] vld1.16 {q0-q1}, [r1,:128]
movw r3, #20091 movw r3, #20091
...@@ -741,23 +729,6 @@ function ff_put_vp8_pixels8_neon, export=1 ...@@ -741,23 +729,6 @@ function ff_put_vp8_pixels8_neon, export=1
bx lr bx lr
endfunc endfunc
function ff_put_vp8_pixels4_neon, export=1
ldr r12, [sp, #0] @ h
push {r4-r6,lr}
1:
subs r12, r12, #4
ldr_post r4, r2, r3
ldr_post r5, r2, r3
ldr_post r6, r2, r3
ldr_post lr, r2, r3
str_post r4, r0, r1
str_post r5, r0, r1
str_post r6, r0, r1
str_post lr, r0, r1
bgt 1b
pop {r4-r6,pc}
endfunc
/* 4/6-tap 8th-pel MC */ /* 4/6-tap 8th-pel MC */
.macro vp8_epel8_h6 d, a, b .macro vp8_epel8_h6 d, a, b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment