Commit a7e7d40c authored by Måns Rullgård's avatar Måns Rullgård

ARM: set size of asm functions in object files

Originally committed as revision 22404 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent db76ca7f
...@@ -35,6 +35,11 @@ ELF .eabi_attribute 25, \val ...@@ -35,6 +35,11 @@ ELF .eabi_attribute 25, \val
.endm .endm
.macro function name, export=0 .macro function name, export=0
.macro endfunc
.size \name, . - \name
.endfunc
.purgem endfunc
.endm
.if \export .if \export
.global EXTERN_ASM\name .global EXTERN_ASM\name
EXTERN_ASM\name: EXTERN_ASM\name:
......
...@@ -36,7 +36,7 @@ function ff_prefetch_arm, export=1 ...@@ -36,7 +36,7 @@ function ff_prefetch_arm, export=1
add r0, r0, r1 add r0, r0, r1
bne ff_prefetch_arm bne ff_prefetch_arm
bx lr bx lr
.endfunc endfunc
#endif #endif
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
...@@ -151,7 +151,7 @@ function ff_put_pixels16_arm, export=1 ...@@ -151,7 +151,7 @@ function ff_put_pixels16_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
pop {r4-r11,pc} pop {r4-r11,pc}
.endfunc endfunc
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 5 .align 5
...@@ -203,7 +203,7 @@ function ff_put_pixels8_arm, export=1 ...@@ -203,7 +203,7 @@ function ff_put_pixels8_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
pop {r4-r5,pc} pop {r4-r5,pc}
.endfunc endfunc
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 5 .align 5
...@@ -263,7 +263,7 @@ function ff_put_pixels8_x2_arm, export=1 ...@@ -263,7 +263,7 @@ function ff_put_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
pop {r4-r10,pc} pop {r4-r10,pc}
.endfunc endfunc
.align 5 .align 5
function ff_put_no_rnd_pixels8_x2_arm, export=1 function ff_put_no_rnd_pixels8_x2_arm, export=1
...@@ -322,7 +322,7 @@ function ff_put_no_rnd_pixels8_x2_arm, export=1 ...@@ -322,7 +322,7 @@ function ff_put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
pop {r4-r10,pc} pop {r4-r10,pc}
.endfunc endfunc
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
...@@ -422,7 +422,7 @@ function ff_put_pixels8_y2_arm, export=1 ...@@ -422,7 +422,7 @@ function ff_put_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
pop {r4-r11,pc} pop {r4-r11,pc}
.endfunc endfunc
.align 5 .align 5
function ff_put_no_rnd_pixels8_y2_arm, export=1 function ff_put_no_rnd_pixels8_y2_arm, export=1
...@@ -520,7 +520,7 @@ function ff_put_no_rnd_pixels8_y2_arm, export=1 ...@@ -520,7 +520,7 @@ function ff_put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
pop {r4-r11,pc} pop {r4-r11,pc}
.endfunc endfunc
.ltorg .ltorg
...@@ -603,7 +603,7 @@ function ff_put_pixels8_xy2_arm, export=1 ...@@ -603,7 +603,7 @@ function ff_put_pixels8_xy2_arm, export=1
3: RND_XY2_EXPAND 2, lsl 3: RND_XY2_EXPAND 2, lsl
.align 5 .align 5
4: RND_XY2_EXPAND 3, lsl 4: RND_XY2_EXPAND 3, lsl
.endfunc endfunc
.align 5 .align 5
function ff_put_no_rnd_pixels8_xy2_arm, export=1 function ff_put_no_rnd_pixels8_xy2_arm, export=1
...@@ -619,7 +619,7 @@ function ff_put_no_rnd_pixels8_xy2_arm, export=1 ...@@ -619,7 +619,7 @@ function ff_put_no_rnd_pixels8_xy2_arm, export=1
3: RND_XY2_EXPAND 2, lsr 3: RND_XY2_EXPAND 2, lsr
.align 5 .align 5
4: RND_XY2_EXPAND 3, lsr 4: RND_XY2_EXPAND 3, lsr
.endfunc endfunc
.align 5 .align 5
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
...@@ -709,4 +709,4 @@ function ff_add_pixels_clamped_arm, export=1 ...@@ -709,4 +709,4 @@ function ff_add_pixels_clamped_arm, export=1
pop {r4-r10} pop {r4-r10}
bx lr bx lr
.endfunc endfunc
...@@ -32,7 +32,7 @@ function ff_\type\()_pixels16\subp\()_armv6, export=1 ...@@ -32,7 +32,7 @@ function ff_\type\()_pixels16\subp\()_armv6, export=1
add r0, r0, #8 add r0, r0, #8
add r1, r1, #8 add r1, r1, #8
b ff_\type\()_pixels8\subp\()_armv6 b ff_\type\()_pixels8\subp\()_armv6
.endfunc endfunc
.endm .endm
call_2x_pixels avg call_2x_pixels avg
...@@ -61,7 +61,7 @@ function ff_put_pixels16_armv6, export=1 ...@@ -61,7 +61,7 @@ function ff_put_pixels16_armv6, export=1
pop {r4-r11} pop {r4-r11}
bx lr bx lr
.endfunc endfunc
function ff_put_pixels8_armv6, export=1 function ff_put_pixels8_armv6, export=1
push {r4-r7} push {r4-r7}
...@@ -77,7 +77,7 @@ function ff_put_pixels8_armv6, export=1 ...@@ -77,7 +77,7 @@ function ff_put_pixels8_armv6, export=1
pop {r4-r7} pop {r4-r7}
bx lr bx lr
.endfunc endfunc
function ff_put_pixels8_x2_armv6, export=1 function ff_put_pixels8_x2_armv6, export=1
push {r4-r11, lr} push {r4-r11, lr}
...@@ -118,7 +118,7 @@ function ff_put_pixels8_x2_armv6, export=1 ...@@ -118,7 +118,7 @@ function ff_put_pixels8_x2_armv6, export=1
bne 1b bne 1b
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
function ff_put_pixels8_y2_armv6, export=1 function ff_put_pixels8_y2_armv6, export=1
push {r4-r11} push {r4-r11}
...@@ -157,7 +157,7 @@ function ff_put_pixels8_y2_armv6, export=1 ...@@ -157,7 +157,7 @@ function ff_put_pixels8_y2_armv6, export=1
pop {r4-r11} pop {r4-r11}
bx lr bx lr
.endfunc endfunc
function ff_put_pixels8_x2_no_rnd_armv6, export=1 function ff_put_pixels8_x2_no_rnd_armv6, export=1
push {r4-r9, lr} push {r4-r9, lr}
...@@ -185,7 +185,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1 ...@@ -185,7 +185,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1
bne 1b bne 1b
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_put_pixels8_y2_no_rnd_armv6, export=1 function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr} push {r4-r9, lr}
...@@ -210,7 +210,7 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1 ...@@ -210,7 +210,7 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1
bne 1b bne 1b
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_avg_pixels8_armv6, export=1 function ff_avg_pixels8_armv6, export=1
pld [r1, r2] pld [r1, r2]
...@@ -260,7 +260,7 @@ function ff_avg_pixels8_armv6, export=1 ...@@ -260,7 +260,7 @@ function ff_avg_pixels8_armv6, export=1
strd r6, r7, [r0], r2 strd r6, r7, [r0], r2
pop {r4-r10, pc} pop {r4-r10, pc}
.endfunc endfunc
function ff_add_pixels_clamped_armv6, export=1 function ff_add_pixels_clamped_armv6, export=1
push {r4-r8,lr} push {r4-r8,lr}
...@@ -287,7 +287,7 @@ function ff_add_pixels_clamped_armv6, export=1 ...@@ -287,7 +287,7 @@ function ff_add_pixels_clamped_armv6, export=1
strd r6, r7, [r1], r2 strd r6, r7, [r1], r2
bgt 1b bgt 1b
pop {r4-r8,pc} pop {r4-r8,pc}
.endfunc endfunc
function ff_get_pixels_armv6, export=1 function ff_get_pixels_armv6, export=1
pld [r1, r2] pld [r1, r2]
...@@ -309,7 +309,7 @@ function ff_get_pixels_armv6, export=1 ...@@ -309,7 +309,7 @@ function ff_get_pixels_armv6, export=1
bgt 1b bgt 1b
pop {r4-r8, pc} pop {r4-r8, pc}
.endfunc endfunc
function ff_diff_pixels_armv6, export=1 function ff_diff_pixels_armv6, export=1
pld [r1, r3] pld [r1, r3]
...@@ -342,7 +342,7 @@ function ff_diff_pixels_armv6, export=1 ...@@ -342,7 +342,7 @@ function ff_diff_pixels_armv6, export=1
bgt 1b bgt 1b
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_pix_abs16_armv6, export=1 function ff_pix_abs16_armv6, export=1
ldr r0, [sp] ldr r0, [sp]
...@@ -371,7 +371,7 @@ function ff_pix_abs16_armv6, export=1 ...@@ -371,7 +371,7 @@ function ff_pix_abs16_armv6, export=1
2: 2:
add r0, r12, lr add r0, r12, lr
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_pix_abs16_x2_armv6, export=1 function ff_pix_abs16_x2_armv6, export=1
ldr r12, [sp] ldr r12, [sp]
...@@ -426,7 +426,7 @@ function ff_pix_abs16_x2_armv6, export=1 ...@@ -426,7 +426,7 @@ function ff_pix_abs16_x2_armv6, export=1
bgt 1b bgt 1b
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
ldr \n0, [r2] ldr \n0, [r2]
...@@ -484,7 +484,7 @@ function ff_pix_abs16_y2_armv6, export=1 ...@@ -484,7 +484,7 @@ function ff_pix_abs16_y2_armv6, export=1
bgt 1b bgt 1b
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
function ff_pix_abs8_armv6, export=1 function ff_pix_abs8_armv6, export=1
pld [r2, r3] pld [r2, r3]
...@@ -514,7 +514,7 @@ function ff_pix_abs8_armv6, export=1 ...@@ -514,7 +514,7 @@ function ff_pix_abs8_armv6, export=1
usada8 lr, r9, r7, lr usada8 lr, r9, r7, lr
add r0, r0, lr add r0, r0, lr
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_sse16_armv6, export=1 function ff_sse16_armv6, export=1
ldr r12, [sp] ldr r12, [sp]
...@@ -565,7 +565,7 @@ function ff_sse16_armv6, export=1 ...@@ -565,7 +565,7 @@ function ff_sse16_armv6, export=1
bgt 1b bgt 1b
pop {r4-r9, pc} pop {r4-r9, pc}
.endfunc endfunc
function ff_pix_norm1_armv6, export=1 function ff_pix_norm1_armv6, export=1
push {r4-r6, lr} push {r4-r6, lr}
...@@ -595,7 +595,7 @@ function ff_pix_norm1_armv6, export=1 ...@@ -595,7 +595,7 @@ function ff_pix_norm1_armv6, export=1
mov r0, lr mov r0, lr
pop {r4-r6, pc} pop {r4-r6, pc}
.endfunc endfunc
function ff_pix_sum_armv6, export=1 function ff_pix_sum_armv6, export=1
push {r4-r7, lr} push {r4-r7, lr}
...@@ -620,4 +620,4 @@ function ff_pix_sum_armv6, export=1 ...@@ -620,4 +620,4 @@ function ff_pix_sum_armv6, export=1
usada8 r3, r7, lr, r3 usada8 r3, r7, lr, r3
add r0, r2, r3 add r0, r2, r3
pop {r4-r7, pc} pop {r4-r7, pc}
.endfunc endfunc
...@@ -240,7 +240,7 @@ ...@@ -240,7 +240,7 @@
.macro pixfunc pfx name suf rnd_op args:vararg .macro pixfunc pfx name suf rnd_op args:vararg
function ff_\pfx\name\suf\()_neon, export=1 function ff_\pfx\name\suf\()_neon, export=1
\name \rnd_op \args \name \rnd_op \args
.endfunc endfunc
.endm .endm
.macro pixfunc2 pfx name args:vararg .macro pixfunc2 pfx name args:vararg
...@@ -250,7 +250,7 @@ function ff_\pfx\name\suf\()_neon, export=1 ...@@ -250,7 +250,7 @@ function ff_\pfx\name\suf\()_neon, export=1
function ff_put_h264_qpel16_mc00_neon, export=1 function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16 mov r3, #16
.endfunc endfunc
pixfunc put_ pixels16 pixfunc put_ pixels16
pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8 pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
...@@ -259,13 +259,13 @@ function ff_put_h264_qpel16_mc00_neon, export=1 ...@@ -259,13 +259,13 @@ function ff_put_h264_qpel16_mc00_neon, export=1
function ff_avg_h264_qpel16_mc00_neon, export=1 function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16 mov r3, #16
.endfunc endfunc
pixfunc avg_ pixels16,, 1 pixfunc avg_ pixels16,, 1
function ff_put_h264_qpel8_mc00_neon, export=1 function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8 mov r3, #8
.endfunc endfunc
pixfunc put_ pixels8 pixfunc put_ pixels8
pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
...@@ -274,7 +274,7 @@ function ff_put_h264_qpel8_mc00_neon, export=1 ...@@ -274,7 +274,7 @@ function ff_put_h264_qpel8_mc00_neon, export=1
function ff_avg_h264_qpel8_mc00_neon, export=1 function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8 mov r3, #8
.endfunc endfunc
pixfunc avg_ pixels8,, 1 pixfunc avg_ pixels8,, 1
...@@ -300,7 +300,7 @@ function ff_put_pixels_clamped_neon, export=1 ...@@ -300,7 +300,7 @@ function ff_put_pixels_clamped_neon, export=1
vst1.64 {d6}, [r1,:64], r2 vst1.64 {d6}, [r1,:64], r2
vst1.64 {d7}, [r1,:64], r2 vst1.64 {d7}, [r1,:64], r2
bx lr bx lr
.endfunc endfunc
function ff_put_signed_pixels_clamped_neon, export=1 function ff_put_signed_pixels_clamped_neon, export=1
vmov.u8 d31, #128 vmov.u8 d31, #128
...@@ -337,7 +337,7 @@ function ff_put_signed_pixels_clamped_neon, export=1 ...@@ -337,7 +337,7 @@ function ff_put_signed_pixels_clamped_neon, export=1
vst1.64 {d6}, [r1,:64], r2 vst1.64 {d6}, [r1,:64], r2
vst1.64 {d7}, [r1,:64], r2 vst1.64 {d7}, [r1,:64], r2
bx lr bx lr
.endfunc endfunc
function ff_add_pixels_clamped_neon, export=1 function ff_add_pixels_clamped_neon, export=1
mov r3, r1 mov r3, r1
...@@ -382,7 +382,7 @@ function ff_add_pixels_clamped_neon, export=1 ...@@ -382,7 +382,7 @@ function ff_add_pixels_clamped_neon, export=1
vst1.64 {d4}, [r3,:64], r2 vst1.64 {d4}, [r3,:64], r2
vst1.64 {d6}, [r3,:64], r2 vst1.64 {d6}, [r3,:64], r2
bx lr bx lr
.endfunc endfunc
function ff_float_to_int16_neon, export=1 function ff_float_to_int16_neon, export=1
subs r2, r2, #8 subs r2, r2, #8
...@@ -426,7 +426,7 @@ function ff_float_to_int16_neon, export=1 ...@@ -426,7 +426,7 @@ function ff_float_to_int16_neon, export=1
vshrn.s32 d5, q9, #16 vshrn.s32 d5, q9, #16
vst1.64 {d4-d5}, [r0,:128]! vst1.64 {d4-d5}, [r0,:128]!
bx lr bx lr
.endfunc endfunc
function ff_float_to_int16_interleave_neon, export=1 function ff_float_to_int16_interleave_neon, export=1
cmp r3, #2 cmp r3, #2
...@@ -719,7 +719,7 @@ function ff_float_to_int16_interleave_neon, export=1 ...@@ -719,7 +719,7 @@ function ff_float_to_int16_interleave_neon, export=1
vld1.64 {d2-d3}, [r4,:128]! vld1.64 {d2-d3}, [r4,:128]!
vcvt.s32.f32 q1, q1, #16 vcvt.s32.f32 q1, q1, #16
b 6b b 6b
.endfunc endfunc
function ff_vector_fmul_neon, export=1 function ff_vector_fmul_neon, export=1
mov r3, r0 mov r3, r0
...@@ -759,7 +759,7 @@ function ff_vector_fmul_neon, export=1 ...@@ -759,7 +759,7 @@ function ff_vector_fmul_neon, export=1
vmul.f32 q9, q1, q3 vmul.f32 q9, q1, q3
3: vst1.64 {d16-d19},[r3,:128]! 3: vst1.64 {d16-d19},[r3,:128]!
bx lr bx lr
.endfunc endfunc
function ff_vector_fmul_window_neon, export=1 function ff_vector_fmul_window_neon, export=1
VFP vdup.32 q8, d0[0] VFP vdup.32 q8, d0[0]
...@@ -811,7 +811,7 @@ NOVFP ldr lr, [sp, #16] ...@@ -811,7 +811,7 @@ NOVFP ldr lr, [sp, #16]
vst1.64 {d20,d21},[r0,:128]! vst1.64 {d20,d21},[r0,:128]!
vst1.64 {d22,d23},[ip,:128], r5 vst1.64 {d22,d23},[ip,:128], r5
pop {r4,r5,pc} pop {r4,r5,pc}
.endfunc endfunc
#if CONFIG_VORBIS_DECODER #if CONFIG_VORBIS_DECODER
function ff_vorbis_inverse_coupling_neon, export=1 function ff_vorbis_inverse_coupling_neon, export=1
...@@ -872,7 +872,7 @@ function ff_vorbis_inverse_coupling_neon, export=1 ...@@ -872,7 +872,7 @@ function ff_vorbis_inverse_coupling_neon, export=1
vst1.32 {d2-d3}, [r0,:128]! vst1.32 {d2-d3}, [r0,:128]!
vst1.32 {d0-d1}, [r1,:128]! vst1.32 {d0-d1}, [r1,:128]!
bx lr bx lr
.endfunc endfunc
#endif #endif
function ff_vector_fmul_scalar_neon, export=1 function ff_vector_fmul_scalar_neon, export=1
...@@ -910,7 +910,7 @@ NOVFP vdup.32 q8, r2 ...@@ -910,7 +910,7 @@ NOVFP vdup.32 q8, r2
bgt 3b bgt 3b
bx lr bx lr
.unreq len .unreq len
.endfunc endfunc
function ff_vector_fmul_sv_scalar_2_neon, export=1 function ff_vector_fmul_sv_scalar_2_neon, export=1
VFP vdup.32 d16, d0[0] VFP vdup.32 d16, d0[0]
...@@ -936,7 +936,7 @@ NOVFP ldr r3, [sp] ...@@ -936,7 +936,7 @@ NOVFP ldr r3, [sp]
2: vst1.32 {d4},[r0,:64]! 2: vst1.32 {d4},[r0,:64]!
vst1.32 {d5},[r0,:64]! vst1.32 {d5},[r0,:64]!
bx lr bx lr
.endfunc endfunc
function ff_vector_fmul_sv_scalar_4_neon, export=1 function ff_vector_fmul_sv_scalar_4_neon, export=1
VFP vdup.32 q10, d0[0] VFP vdup.32 q10, d0[0]
...@@ -975,7 +975,7 @@ NOVFP ldr r3, [sp] ...@@ -975,7 +975,7 @@ NOVFP ldr r3, [sp]
subs r3, r3, #4 subs r3, r3, #4
bgt 3b bgt 3b
pop {pc} pop {pc}
.endfunc endfunc
function ff_sv_fmul_scalar_2_neon, export=1 function ff_sv_fmul_scalar_2_neon, export=1
VFP len .req r2 VFP len .req r2
...@@ -998,7 +998,7 @@ NOVFP vdup.32 q8, r2 ...@@ -998,7 +998,7 @@ NOVFP vdup.32 q8, r2
2: vst1.32 {q1},[r0,:128]! 2: vst1.32 {q1},[r0,:128]!
bx lr bx lr
.unreq len .unreq len
.endfunc endfunc
function ff_sv_fmul_scalar_4_neon, export=1 function ff_sv_fmul_scalar_4_neon, export=1
VFP len .req r2 VFP len .req r2
...@@ -1013,7 +1013,7 @@ NOVFP vdup.32 q8, r2 ...@@ -1013,7 +1013,7 @@ NOVFP vdup.32 q8, r2
bgt 1b bgt 1b
bx lr bx lr
.unreq len .unreq len
.endfunc endfunc
function ff_butterflies_float_neon, export=1 function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128] 1: vld1.32 {q0},[r0,:128]
...@@ -1025,7 +1025,7 @@ function ff_butterflies_float_neon, export=1 ...@@ -1025,7 +1025,7 @@ function ff_butterflies_float_neon, export=1
subs r2, r2, #4 subs r2, r2, #4
bgt 1b bgt 1b
bx lr bx lr
.endfunc endfunc
function ff_scalarproduct_float_neon, export=1 function ff_scalarproduct_float_neon, export=1
vmov.f32 q2, #0.0 vmov.f32 q2, #0.0
...@@ -1038,7 +1038,7 @@ function ff_scalarproduct_float_neon, export=1 ...@@ -1038,7 +1038,7 @@ function ff_scalarproduct_float_neon, export=1
vpadd.f32 d0, d0, d0 vpadd.f32 d0, d0, d0
NOVFP vmov.32 r0, d0[0] NOVFP vmov.32 r0, d0[0]
bx lr bx lr
.endfunc endfunc
function ff_int32_to_float_fmul_scalar_neon, export=1 function ff_int32_to_float_fmul_scalar_neon, export=1
VFP vdup.32 q0, d0[0] VFP vdup.32 q0, d0[0]
...@@ -1066,7 +1066,7 @@ NOVFP len .req r3 ...@@ -1066,7 +1066,7 @@ NOVFP len .req r3
vst1.32 {q10},[r0,:128]! vst1.32 {q10},[r0,:128]!
bx lr bx lr
.unreq len .unreq len
.endfunc endfunc
function ff_vector_fmul_reverse_neon, export=1 function ff_vector_fmul_reverse_neon, export=1
add r2, r2, r3, lsl #2 add r2, r2, r3, lsl #2
...@@ -1090,7 +1090,7 @@ function ff_vector_fmul_reverse_neon, export=1 ...@@ -1090,7 +1090,7 @@ function ff_vector_fmul_reverse_neon, export=1
b 1b b 1b
2: vst1.32 {q8-q9}, [r0,:128]! 2: vst1.32 {q8-q9}, [r0,:128]!
bx lr bx lr
.endfunc endfunc
function ff_vector_fmul_add_neon, export=1 function ff_vector_fmul_add_neon, export=1
ldr r12, [sp] ldr r12, [sp]
...@@ -1117,7 +1117,7 @@ function ff_vector_fmul_add_neon, export=1 ...@@ -1117,7 +1117,7 @@ function ff_vector_fmul_add_neon, export=1
b 1b b 1b
2: vst1.32 {q12-q13},[r0,:128]! 2: vst1.32 {q12-q13},[r0,:128]!
bx lr bx lr
.endfunc endfunc
function ff_vector_clipf_neon, export=1 function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1] VFP vdup.32 q1, d0[1]
...@@ -1143,4 +1143,4 @@ NOVFP ldr r2, [sp] ...@@ -1143,4 +1143,4 @@ NOVFP ldr r2, [sp]
2: vst1.f32 {q8},[r0,:128]! 2: vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]! vst1.f32 {q9},[r0,:128]!
bx lr bx lr
.endfunc endfunc
...@@ -78,7 +78,7 @@ function ff_vector_fmul_vfp, export=1 ...@@ -78,7 +78,7 @@ function ff_vector_fmul_vfp, export=1
fmxr fpscr, r12 fmxr fpscr, r12
vpop {d8-d15} vpop {d8-d15}
bx lr bx lr
.endfunc endfunc
/** /**
* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
...@@ -131,7 +131,7 @@ function ff_vector_fmul_reverse_vfp, export=1 ...@@ -131,7 +131,7 @@ function ff_vector_fmul_reverse_vfp, export=1
vpop {d8-d15} vpop {d8-d15}
bx lr bx lr
.endfunc endfunc
#if HAVE_ARMV6 #if HAVE_ARMV6
/** /**
...@@ -185,5 +185,5 @@ function ff_float_to_int16_vfp, export=1 ...@@ -185,5 +185,5 @@ function ff_float_to_int16_vfp, export=1
vpop {d8-d11} vpop {d8-d11}
pop {r4-r8,pc} pop {r4-r8,pc}
.endfunc endfunc
#endif #endif
...@@ -43,7 +43,7 @@ function fft4_neon ...@@ -43,7 +43,7 @@ function fft4_neon
vst1.32 {d0-d3}, [r0,:128] vst1.32 {d0-d3}, [r0,:128]
bx lr bx lr
.endfunc endfunc
function fft8_neon function fft8_neon
mov r1, r0 mov r1, r0
...@@ -96,7 +96,7 @@ function fft8_neon ...@@ -96,7 +96,7 @@ function fft8_neon
vst1.32 {d0-d3}, [r0,:128] vst1.32 {d0-d3}, [r0,:128]
bx lr bx lr
.endfunc endfunc
function fft16_neon function fft16_neon
movrel r1, mppm movrel r1, mppm
...@@ -198,7 +198,7 @@ function fft16_neon ...@@ -198,7 +198,7 @@ function fft16_neon
vst2.32 {d26-d27},[r0,:128], r1 vst2.32 {d26-d27},[r0,:128], r1
vst2.32 {d30-d31},[r0,:128] vst2.32 {d30-d31},[r0,:128]
bx lr bx lr
.endfunc endfunc
function fft_pass_neon function fft_pass_neon
push {r4-r6,lr} push {r4-r6,lr}
...@@ -274,7 +274,7 @@ function fft_pass_neon ...@@ -274,7 +274,7 @@ function fft_pass_neon
bne 1b bne 1b
pop {r4-r6,pc} pop {r4-r6,pc}
.endfunc endfunc
.macro def_fft n, n2, n4 .macro def_fft n, n2, n4
.align 6 .align 6
...@@ -291,7 +291,7 @@ function fft\n\()_neon ...@@ -291,7 +291,7 @@ function fft\n\()_neon
movrel r1, X(ff_cos_\n) movrel r1, X(ff_cos_\n)
mov r2, #\n4/2 mov r2, #\n4/2
b fft_pass_neon b fft_pass_neon
.endfunc endfunc
.endm .endm
def_fft 32, 16, 8 def_fft 32, 16, 8
...@@ -314,7 +314,7 @@ function ff_fft_calc_neon, export=1 ...@@ -314,7 +314,7 @@ function ff_fft_calc_neon, export=1
ldr r3, [r3, r2, lsl #2] ldr r3, [r3, r2, lsl #2]
mov r0, r1 mov r0, r1
bx r3 bx r3
.endfunc endfunc
function ff_fft_permute_neon, export=1 function ff_fft_permute_neon, export=1
push {r4,lr} push {r4,lr}
...@@ -344,7 +344,7 @@ function ff_fft_permute_neon, export=1 ...@@ -344,7 +344,7 @@ function ff_fft_permute_neon, export=1
bgt 1b bgt 1b
pop {r4,pc} pop {r4,pc}
.endfunc endfunc
.section .rodata .section .rodata
.align 4 .align 4
......
This diff is collapsed.
...@@ -69,7 +69,7 @@ function ff_h264_idct_add_neon, export=1 ...@@ -69,7 +69,7 @@ function ff_h264_idct_add_neon, export=1
vst1.32 {d1[0]}, [r0,:32], r2 vst1.32 {d1[0]}, [r0,:32], r2
bx lr bx lr
.endfunc endfunc
function ff_h264_idct_dc_add_neon, export=1 function ff_h264_idct_dc_add_neon, export=1
vld1.16 {d2[],d3[]}, [r1,:16] vld1.16 {d2[],d3[]}, [r1,:16]
...@@ -88,7 +88,7 @@ function ff_h264_idct_dc_add_neon, export=1 ...@@ -88,7 +88,7 @@ function ff_h264_idct_dc_add_neon, export=1
vst1.32 {d1[0]}, [r0,:32], r2 vst1.32 {d1[0]}, [r0,:32], r2
vst1.32 {d1[1]}, [r0,:32], r2 vst1.32 {d1[1]}, [r0,:32], r2
bx lr bx lr
.endfunc endfunc
function ff_h264_idct_add16_neon, export=1 function ff_h264_idct_add16_neon, export=1
push {r4-r8,lr} push {r4-r8,lr}
...@@ -115,7 +115,7 @@ function ff_h264_idct_add16_neon, export=1 ...@@ -115,7 +115,7 @@ function ff_h264_idct_add16_neon, export=1
add r1, r1, #32 add r1, r1, #32
bne 1b bne 1b
pop {r4-r8,pc} pop {r4-r8,pc}
.endfunc endfunc
function ff_h264_idct_add16intra_neon, export=1 function ff_h264_idct_add16intra_neon, export=1
push {r4-r8,lr} push {r4-r8,lr}
...@@ -140,7 +140,7 @@ function ff_h264_idct_add16intra_neon, export=1 ...@@ -140,7 +140,7 @@ function ff_h264_idct_add16intra_neon, export=1
add r1, r1, #32 add r1, r1, #32
bne 1b bne 1b
pop {r4-r8,pc} pop {r4-r8,pc}
.endfunc endfunc
function ff_h264_idct_add8_neon, export=1 function ff_h264_idct_add8_neon, export=1
push {r4-r10,lr} push {r4-r10,lr}
...@@ -167,7 +167,7 @@ function ff_h264_idct_add8_neon, export=1 ...@@ -167,7 +167,7 @@ function ff_h264_idct_add8_neon, export=1
add r1, r1, #32 add r1, r1, #32
bne 1b bne 1b
pop {r4-r10,pc} pop {r4-r10,pc}
.endfunc endfunc
.section .rodata .section .rodata
scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
function ff_pred16x16_128_dc_neon, export=1 function ff_pred16x16_128_dc_neon, export=1
vmov.i8 q0, #128 vmov.i8 q0, #128
b .L_pred16x16_dc_end b .L_pred16x16_dc_end
.endfunc endfunc
function ff_pred16x16_top_dc_neon, export=1 function ff_pred16x16_top_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -54,7 +54,7 @@ function ff_pred16x16_top_dc_neon, export=1 ...@@ -54,7 +54,7 @@ function ff_pred16x16_top_dc_neon, export=1
vrshrn.u16 d0, q0, #4 vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0] vdup.8 q0, d0[0]
b .L_pred16x16_dc_end b .L_pred16x16_dc_end
.endfunc endfunc
function ff_pred16x16_left_dc_neon, export=1 function ff_pred16x16_left_dc_neon, export=1
sub r2, r0, #1 sub r2, r0, #1
...@@ -64,7 +64,7 @@ function ff_pred16x16_left_dc_neon, export=1 ...@@ -64,7 +64,7 @@ function ff_pred16x16_left_dc_neon, export=1
vrshrn.u16 d0, q0, #4 vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0] vdup.8 q0, d0[0]
b .L_pred16x16_dc_end b .L_pred16x16_dc_end
.endfunc endfunc
function ff_pred16x16_dc_neon, export=1 function ff_pred16x16_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -87,7 +87,7 @@ function ff_pred16x16_dc_neon, export=1 ...@@ -87,7 +87,7 @@ function ff_pred16x16_dc_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 6b bne 6b
bx lr bx lr
.endfunc endfunc
function ff_pred16x16_hor_neon, export=1 function ff_pred16x16_hor_neon, export=1
sub r2, r0, #1 sub r2, r0, #1
...@@ -97,7 +97,7 @@ function ff_pred16x16_hor_neon, export=1 ...@@ -97,7 +97,7 @@ function ff_pred16x16_hor_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
function ff_pred16x16_vert_neon, export=1 function ff_pred16x16_vert_neon, export=1
sub r0, r0, r1 sub r0, r0, r1
...@@ -108,7 +108,7 @@ function ff_pred16x16_vert_neon, export=1 ...@@ -108,7 +108,7 @@ function ff_pred16x16_vert_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
function ff_pred16x16_plane_neon, export=1 function ff_pred16x16_plane_neon, export=1
sub r3, r0, r1 sub r3, r0, r1
...@@ -164,7 +164,7 @@ function ff_pred16x16_plane_neon, export=1 ...@@ -164,7 +164,7 @@ function ff_pred16x16_plane_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
.section .rodata .section .rodata
.align 4 .align 4
...@@ -181,7 +181,7 @@ function ff_pred8x8_hor_neon, export=1 ...@@ -181,7 +181,7 @@ function ff_pred8x8_hor_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
function ff_pred8x8_vert_neon, export=1 function ff_pred8x8_vert_neon, export=1
sub r0, r0, r1 sub r0, r0, r1
...@@ -192,7 +192,7 @@ function ff_pred8x8_vert_neon, export=1 ...@@ -192,7 +192,7 @@ function ff_pred8x8_vert_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
function ff_pred8x8_plane_neon, export=1 function ff_pred8x8_plane_neon, export=1
sub r3, r0, r1 sub r3, r0, r1
...@@ -244,12 +244,12 @@ function ff_pred8x8_plane_neon, export=1 ...@@ -244,12 +244,12 @@ function ff_pred8x8_plane_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 1b bne 1b
bx lr bx lr
.endfunc endfunc
function ff_pred8x8_128_dc_neon, export=1 function ff_pred8x8_128_dc_neon, export=1
vmov.i8 q0, #128 vmov.i8 q0, #128
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_top_dc_neon, export=1 function ff_pred8x8_top_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -261,7 +261,7 @@ function ff_pred8x8_top_dc_neon, export=1 ...@@ -261,7 +261,7 @@ function ff_pred8x8_top_dc_neon, export=1
vdup.8 d0, d0[0] vdup.8 d0, d0[0]
vtrn.32 d0, d1 vtrn.32 d0, d1
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_left_dc_neon, export=1 function ff_pred8x8_left_dc_neon, export=1
sub r2, r0, #1 sub r2, r0, #1
...@@ -272,7 +272,7 @@ function ff_pred8x8_left_dc_neon, export=1 ...@@ -272,7 +272,7 @@ function ff_pred8x8_left_dc_neon, export=1
vdup.8 d1, d0[1] vdup.8 d1, d0[1]
vdup.8 d0, d0[0] vdup.8 d0, d0[0]
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_dc_neon, export=1 function ff_pred8x8_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -298,7 +298,7 @@ function ff_pred8x8_dc_neon, export=1 ...@@ -298,7 +298,7 @@ function ff_pred8x8_dc_neon, export=1
subs r3, r3, #1 subs r3, r3, #1
bne 6b bne 6b
bx lr bx lr
.endfunc endfunc
function ff_pred8x8_l0t_dc_neon, export=1 function ff_pred8x8_l0t_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -316,7 +316,7 @@ function ff_pred8x8_l0t_dc_neon, export=1 ...@@ -316,7 +316,7 @@ function ff_pred8x8_l0t_dc_neon, export=1
vdup.8 q2, d3[2] vdup.8 q2, d3[2]
vtrn.32 q0, q2 vtrn.32 q0, q2
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_l00_dc_neon, export=1 function ff_pred8x8_l00_dc_neon, export=1
sub r2, r0, #1 sub r2, r0, #1
...@@ -327,7 +327,7 @@ function ff_pred8x8_l00_dc_neon, export=1 ...@@ -327,7 +327,7 @@ function ff_pred8x8_l00_dc_neon, export=1
vmov.i8 d1, #128 vmov.i8 d1, #128
vdup.8 d0, d0[0] vdup.8 d0, d0[0]
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_0lt_dc_neon, export=1 function ff_pred8x8_0lt_dc_neon, export=1
sub r2, r0, r1 sub r2, r0, r1
...@@ -347,7 +347,7 @@ function ff_pred8x8_0lt_dc_neon, export=1 ...@@ -347,7 +347,7 @@ function ff_pred8x8_0lt_dc_neon, export=1
vdup.8 d5, d2[5] vdup.8 d5, d2[5]
vtrn.32 q0, q2 vtrn.32 q0, q2
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
function ff_pred8x8_0l0_dc_neon, export=1 function ff_pred8x8_0l0_dc_neon, export=1
add r2, r0, r1, lsl #2 add r2, r0, r1, lsl #2
...@@ -359,4 +359,4 @@ function ff_pred8x8_0l0_dc_neon, export=1 ...@@ -359,4 +359,4 @@ function ff_pred8x8_0l0_dc_neon, export=1
vmov.i8 d0, #128 vmov.i8 d0, #128
vdup.8 d1, d1[0] vdup.8 d1, d1[0]
b .L_pred8x8_dc_end b .L_pred8x8_dc_end
.endfunc endfunc
...@@ -75,7 +75,7 @@ function ff_scalarproduct_int16_neon, export=1 ...@@ -75,7 +75,7 @@ function ff_scalarproduct_int16_neon, export=1
vpaddl.s32 d3, d2 vpaddl.s32 d3, d2
vmov.32 r0, d3[0] vmov.32 r0, d3[0]
bx lr bx lr
.endfunc endfunc
@ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul) @ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
function ff_scalarproduct_and_madd_int16_neon, export=1 function ff_scalarproduct_and_madd_int16_neon, export=1
...@@ -115,4 +115,4 @@ function ff_scalarproduct_and_madd_int16_neon, export=1 ...@@ -115,4 +115,4 @@ function ff_scalarproduct_and_madd_int16_neon, export=1
vpaddl.s32 d3, d2 vpaddl.s32 d3, d2
vmov.32 r0, d3[0] vmov.32 r0, d3[0]
bx lr bx lr
.endfunc endfunc
...@@ -122,7 +122,7 @@ function ff_imdct_half_neon, export=1 ...@@ -122,7 +122,7 @@ function ff_imdct_half_neon, export=1
vst2.32 {d5,d7}, [r8,:128] vst2.32 {d5,d7}, [r8,:128]
pop {r4-r8,pc} pop {r4-r8,pc}
.endfunc endfunc
function ff_imdct_calc_neon, export=1 function ff_imdct_calc_neon, export=1
push {r4-r6,lr} push {r4-r6,lr}
...@@ -158,7 +158,7 @@ function ff_imdct_calc_neon, export=1 ...@@ -158,7 +158,7 @@ function ff_imdct_calc_neon, export=1
bgt 1b bgt 1b
pop {r4-r6,pc} pop {r4-r6,pc}
.endfunc endfunc
function ff_mdct_calc_neon, export=1 function ff_mdct_calc_neon, export=1
push {r4-r10,lr} push {r4-r10,lr}
...@@ -300,4 +300,4 @@ function ff_mdct_calc_neon, export=1 ...@@ -300,4 +300,4 @@ function ff_mdct_calc_neon, export=1
vst2.32 {d5,d7}, [r8,:128] vst2.32 {d5,d7}, [r8,:128]
pop {r4-r10,pc} pop {r4-r10,pc}
.endfunc endfunc
...@@ -114,4 +114,4 @@ function ff_dct_unquantize_h263_armv5te, export=1 ...@@ -114,4 +114,4 @@ function ff_dct_unquantize_h263_armv5te, export=1
strh r9, [r0], #2 strh r9, [r0], #2
strh lr, [r0], #2 strh lr, [r0], #2
pop {r4-r9,pc} pop {r4-r9,pc}
.endfunc endfunc
...@@ -147,7 +147,7 @@ row_dc_only: ...@@ -147,7 +147,7 @@ row_dc_only:
strd a3, [a1, #8] strd a3, [a1, #8]
ldr pc, [sp], #4 ldr pc, [sp], #4
.endfunc endfunc
.macro idct_col .macro idct_col
ldr a4, [a1] /* a4 = col[1:0] */ ldr a4, [a1] /* a4 = col[1:0] */
...@@ -331,7 +331,7 @@ function idct_col_armv5te ...@@ -331,7 +331,7 @@ function idct_col_armv5te
str a2, [a1, #(16*4)] str a2, [a1, #(16*4)]
ldr pc, [sp], #4 ldr pc, [sp], #4
.endfunc endfunc
function idct_col_put_armv5te function idct_col_put_armv5te
str lr, [sp, #-4]! str lr, [sp, #-4]!
...@@ -448,7 +448,7 @@ function idct_col_put_armv5te ...@@ -448,7 +448,7 @@ function idct_col_put_armv5te
strh a2, [v2, -lr] strh a2, [v2, -lr]
ldr pc, [sp], #4 ldr pc, [sp], #4
.endfunc endfunc
function idct_col_add_armv5te function idct_col_add_armv5te
str lr, [sp, #-4]! str lr, [sp, #-4]!
...@@ -598,7 +598,7 @@ function idct_col_add_armv5te ...@@ -598,7 +598,7 @@ function idct_col_add_armv5te
strh a2, [v2] strh a2, [v2]
ldr pc, [sp], #4 ldr pc, [sp], #4
.endfunc endfunc
function ff_simple_idct_armv5te, export=1 function ff_simple_idct_armv5te, export=1
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
...@@ -630,7 +630,7 @@ function ff_simple_idct_armv5te, export=1 ...@@ -630,7 +630,7 @@ function ff_simple_idct_armv5te, export=1
bl idct_col_armv5te bl idct_col_armv5te
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
.endfunc endfunc
function ff_simple_idct_add_armv5te, export=1 function ff_simple_idct_add_armv5te, export=1
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
...@@ -665,7 +665,7 @@ function ff_simple_idct_add_armv5te, export=1 ...@@ -665,7 +665,7 @@ function ff_simple_idct_add_armv5te, export=1
add sp, sp, #8 add sp, sp, #8
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
.endfunc endfunc
function ff_simple_idct_put_armv5te, export=1 function ff_simple_idct_put_armv5te, export=1
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
...@@ -700,4 +700,4 @@ function ff_simple_idct_put_armv5te, export=1 ...@@ -700,4 +700,4 @@ function ff_simple_idct_put_armv5te, export=1
add sp, sp, #8 add sp, sp, #8
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
.endfunc endfunc
...@@ -237,7 +237,7 @@ function idct_row_armv6 ...@@ -237,7 +237,7 @@ function idct_row_armv6
strh r2, [r1, #(16*5)] strh r2, [r1, #(16*5)]
strh r2, [r1, #(16*7)] strh r2, [r1, #(16*7)]
pop {pc} pop {pc}
.endfunc endfunc
/* /*
Compute IDCT of single column, read as row. Compute IDCT of single column, read as row.
...@@ -264,7 +264,7 @@ function idct_col_armv6 ...@@ -264,7 +264,7 @@ function idct_col_armv6
strh r8, [r1, #(16*7)] strh r8, [r1, #(16*7)]
pop {pc} pop {pc}
.endfunc endfunc
/* /*
Compute IDCT of single column, read as row, store saturated 8-bit. Compute IDCT of single column, read as row, store saturated 8-bit.
...@@ -294,7 +294,7 @@ function idct_col_put_armv6 ...@@ -294,7 +294,7 @@ function idct_col_put_armv6
sub r1, r1, r2, lsl #3 sub r1, r1, r2, lsl #3
pop {pc} pop {pc}
.endfunc endfunc
/* /*
Compute IDCT of single column, read as row, add/store saturated 8-bit. Compute IDCT of single column, read as row, add/store saturated 8-bit.
...@@ -349,7 +349,7 @@ function idct_col_add_armv6 ...@@ -349,7 +349,7 @@ function idct_col_add_armv6
sub r1, r1, r2, lsl #3 sub r1, r1, r2, lsl #3
pop {pc} pop {pc}
.endfunc endfunc
/* /*
Compute 8 IDCT row transforms. Compute 8 IDCT row transforms.
...@@ -396,7 +396,7 @@ function ff_simple_idct_armv6, export=1 ...@@ -396,7 +396,7 @@ function ff_simple_idct_armv6, export=1
add sp, sp, #128 add sp, sp, #128
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */ /* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
function ff_simple_idct_add_armv6, export=1 function ff_simple_idct_add_armv6, export=1
...@@ -413,7 +413,7 @@ function ff_simple_idct_add_armv6, export=1 ...@@ -413,7 +413,7 @@ function ff_simple_idct_add_armv6, export=1
add sp, sp, #(128+8) add sp, sp, #(128+8)
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */ /* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
function ff_simple_idct_put_armv6, export=1 function ff_simple_idct_put_armv6, export=1
...@@ -430,4 +430,4 @@ function ff_simple_idct_put_armv6, export=1 ...@@ -430,4 +430,4 @@ function ff_simple_idct_put_armv6, export=1
add sp, sp, #(128+8) add sp, sp, #(128+8)
pop {r4-r11, pc} pop {r4-r11, pc}
.endfunc endfunc
...@@ -77,7 +77,7 @@ function idct_row4_pld_neon ...@@ -77,7 +77,7 @@ function idct_row4_pld_neon
add r3, r3, r1, lsl #1 add r3, r3, r1, lsl #1
pld [r3] pld [r3]
pld [r3, r1] pld [r3, r1]
.endfunc endfunc
function idct_row4_neon function idct_row4_neon
vmov.i32 q15, #(1<<(ROW_SHIFT-1)) vmov.i32 q15, #(1<<(ROW_SHIFT-1))
...@@ -147,7 +147,7 @@ function idct_row4_neon ...@@ -147,7 +147,7 @@ function idct_row4_neon
vst1.64 {d6-d9}, [r2,:128]! vst1.64 {d6-d9}, [r2,:128]!
bx lr bx lr
.endfunc endfunc
function idct_col4_neon function idct_col4_neon
mov ip, #16 mov ip, #16
...@@ -218,7 +218,7 @@ function idct_col4_neon ...@@ -218,7 +218,7 @@ function idct_col4_neon
vsubhn.i32 d6, q14, q6 vsubhn.i32 d6, q14, q6
bx lr bx lr
.endfunc endfunc
.align 6 .align 6
...@@ -237,7 +237,7 @@ function idct_col4_st8_neon ...@@ -237,7 +237,7 @@ function idct_col4_st8_neon
vst1.32 {d5[1]}, [r0,:32], r1 vst1.32 {d5[1]}, [r0,:32], r1
bx lr bx lr
.endfunc endfunc
.section .rodata .section .rodata
.align 4 .align 4
...@@ -275,7 +275,7 @@ function ff_simple_idct_put_neon, export=1 ...@@ -275,7 +275,7 @@ function ff_simple_idct_put_neon, export=1
bl idct_col4_st8_neon bl idct_col4_st8_neon
idct_end idct_end
.endfunc endfunc
.align 6 .align 6
...@@ -312,7 +312,7 @@ function idct_col4_add8_neon ...@@ -312,7 +312,7 @@ function idct_col4_add8_neon
vst1.32 {d5[1]}, [ip,:32], r1 vst1.32 {d5[1]}, [ip,:32], r1
bx lr bx lr
.endfunc endfunc
/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */ /* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
function ff_simple_idct_add_neon, export=1 function ff_simple_idct_add_neon, export=1
...@@ -330,7 +330,7 @@ function ff_simple_idct_add_neon, export=1 ...@@ -330,7 +330,7 @@ function ff_simple_idct_add_neon, export=1
bl idct_col4_add8_neon bl idct_col4_add8_neon
idct_end idct_end
.endfunc endfunc
.align 6 .align 6
...@@ -351,7 +351,7 @@ function idct_col4_st16_neon ...@@ -351,7 +351,7 @@ function idct_col4_st16_neon
vst1.64 {d9}, [r2,:64], ip vst1.64 {d9}, [r2,:64], ip
bx lr bx lr
.endfunc endfunc
/* void ff_simple_idct_neon(DCTELEM *data); */ /* void ff_simple_idct_neon(DCTELEM *data); */
function ff_simple_idct_neon, export=1 function ff_simple_idct_neon, export=1
...@@ -370,4 +370,4 @@ function ff_simple_idct_neon, export=1 ...@@ -370,4 +370,4 @@ function ff_simple_idct_neon, export=1
bl idct_col4_st16_neon bl idct_col4_st16_neon
idct_end idct_end
.endfunc endfunc
...@@ -74,7 +74,7 @@ function ff_vp3_v_loop_filter_neon, export=1 ...@@ -74,7 +74,7 @@ function ff_vp3_v_loop_filter_neon, export=1
vst1.64 {d0}, [ip,:64], r1 vst1.64 {d0}, [ip,:64], r1
vst1.64 {d1}, [ip,:64], r1 vst1.64 {d1}, [ip,:64], r1
bx lr bx lr
.endfunc endfunc
function ff_vp3_h_loop_filter_neon, export=1 function ff_vp3_h_loop_filter_neon, export=1
sub ip, r0, #1 sub ip, r0, #1
...@@ -107,7 +107,7 @@ function ff_vp3_h_loop_filter_neon, export=1 ...@@ -107,7 +107,7 @@ function ff_vp3_h_loop_filter_neon, export=1
vst1.16 {d0[3]}, [ip], r1 vst1.16 {d0[3]}, [ip], r1
vst1.16 {d1[3]}, [ip], r1 vst1.16 {d1[3]}, [ip], r1
bx lr bx lr
.endfunc endfunc
function vp3_idct_start_neon function vp3_idct_start_neon
...@@ -120,7 +120,7 @@ function vp3_idct_start_neon ...@@ -120,7 +120,7 @@ function vp3_idct_start_neon
vadd.s16 q1, q8, q12 vadd.s16 q1, q8, q12
vsub.s16 q8, q8, q12 vsub.s16 q8, q8, q12
vld1.64 {d28-d31}, [r2,:128]! vld1.64 {d28-d31}, [r2,:128]!
.endfunc endfunc
function vp3_idct_core_neon function vp3_idct_core_neon
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
...@@ -211,7 +211,7 @@ function vp3_idct_core_neon ...@@ -211,7 +211,7 @@ function vp3_idct_core_neon
vadd.s16 q10, q1, q2 // Ad = (A - C) * C4 vadd.s16 q10, q1, q2 // Ad = (A - C) * C4
vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2 vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2
bx lr bx lr
.endfunc endfunc
.macro VP3_IDCT_END type .macro VP3_IDCT_END type
function vp3_idct_end_\type\()_neon function vp3_idct_end_\type\()_neon
...@@ -259,7 +259,7 @@ function vp3_idct_end_\type\()_neon ...@@ -259,7 +259,7 @@ function vp3_idct_end_\type\()_neon
vswp d23, d30 vswp d23, d30
.endif .endif
bx lr bx lr
.endfunc endfunc
.endm .endm
VP3_IDCT_END row VP3_IDCT_END row
...@@ -289,7 +289,7 @@ function ff_vp3_idct_neon, export=1 ...@@ -289,7 +289,7 @@ function ff_vp3_idct_neon, export=1
vst1.64 {d24-d27}, [r0,:128]! vst1.64 {d24-d27}, [r0,:128]!
vst1.64 {d28-d31}, [r0,:128]! vst1.64 {d28-d31}, [r0,:128]!
bx lr bx lr
.endfunc endfunc
function ff_vp3_idct_put_neon, export=1 function ff_vp3_idct_put_neon, export=1
mov ip, lr mov ip, lr
...@@ -319,7 +319,7 @@ function ff_vp3_idct_put_neon, export=1 ...@@ -319,7 +319,7 @@ function ff_vp3_idct_put_neon, export=1
vst1.64 {d6}, [r0,:64], r1 vst1.64 {d6}, [r0,:64], r1
vst1.64 {d7}, [r0,:64], r1 vst1.64 {d7}, [r0,:64], r1
bx lr bx lr
.endfunc endfunc
function ff_vp3_idct_add_neon, export=1 function ff_vp3_idct_add_neon, export=1
mov ip, lr mov ip, lr
...@@ -373,4 +373,4 @@ function ff_vp3_idct_add_neon, export=1 ...@@ -373,4 +373,4 @@ function ff_vp3_idct_add_neon, export=1
vst1.64 {d6}, [r2,:64], r1 vst1.64 {d6}, [r2,:64], r1
vst1.64 {d7}, [r2,:64], r1 vst1.64 {d7}, [r2,:64], r1
bx lr bx lr
.endfunc endfunc
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment