Commit 99817091 authored by Mans Rullgard's avatar Mans Rullgard

ARM: use standard syntax for all LDRD/STRD instructions

The standard syntax requires two destination registers for
LDRD/STRD instructions.  Some versions of the GNU assembler
allow using only one with the second implicit, others are
more strict.
Signed-off-by: 's avatarMans Rullgard <mans@mansr.com>
parent b6a3849a
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
.macro h264_chroma_mc8 type, codec=h264 .macro h264_chroma_mc8 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
push {r4-r7, lr} push {r4-r7, lr}
ldrd r4, [sp, #20] ldrd r4, r5, [sp, #20]
.ifc \type,avg .ifc \type,avg
mov lr, r0 mov lr, r0
.endif .endif
...@@ -182,7 +182,7 @@ endfunc ...@@ -182,7 +182,7 @@ endfunc
.macro h264_chroma_mc4 type, codec=h264 .macro h264_chroma_mc4 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
push {r4-r7, lr} push {r4-r7, lr}
ldrd r4, [sp, #20] ldrd r4, r5, [sp, #20]
.ifc \type,avg .ifc \type,avg
mov lr, r0 mov lr, r0
.endif .endif
......
...@@ -886,7 +886,7 @@ T mov sp, r0 ...@@ -886,7 +886,7 @@ T mov sp, r0
mov r12, #8 mov r12, #8
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon bl put_h264_qpel8_h_lowpass_neon
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
mov r3, r2 mov r3, r2
add r12, sp, #64 add r12, sp, #64
sub r1, r1, r2, lsl #1 sub r1, r1, r2, lsl #1
...@@ -913,7 +913,7 @@ T mov sp, r0 ...@@ -913,7 +913,7 @@ T mov sp, r0
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon bl put_h264_qpel8_h_lowpass_neon
mov r4, r0 mov r4, r0
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
sub r1, r1, r2, lsl #1 sub r1, r1, r2, lsl #1
sub r1, r1, #2 sub r1, r1, #2
mov r3, r2 mov r3, r2
...@@ -958,7 +958,7 @@ T mov sp, r0 ...@@ -958,7 +958,7 @@ T mov sp, r0
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel8_v_lowpass_neon bl put_h264_qpel8_v_lowpass_neon
mov r4, r0 mov r4, r0
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
sub r1, r1, r3, lsl #1 sub r1, r1, r3, lsl #1
sub r1, r1, #2 sub r1, r1, #2
sub r2, r4, #64 sub r2, r4, #64
...@@ -1071,7 +1071,7 @@ T mov sp, r0 ...@@ -1071,7 +1071,7 @@ T mov sp, r0
mov r3, #16 mov r3, #16
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon bl put_h264_qpel16_h_lowpass_neon
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
mov r3, r2 mov r3, r2
add r12, sp, #64 add r12, sp, #64
sub r1, r1, r2, lsl #1 sub r1, r1, r2, lsl #1
...@@ -1096,7 +1096,7 @@ T mov sp, r0 ...@@ -1096,7 +1096,7 @@ T mov sp, r0
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon_packed bl put_h264_qpel16_h_lowpass_neon_packed
mov r4, r0 mov r4, r0
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
sub r1, r1, r2, lsl #1 sub r1, r1, r2, lsl #1
sub r1, r1, #2 sub r1, r1, #2
mov r3, r2 mov r3, r2
...@@ -1139,7 +1139,7 @@ T mov sp, r0 ...@@ -1139,7 +1139,7 @@ T mov sp, r0
vpush {d8-d15} vpush {d8-d15}
bl put_h264_qpel16_v_lowpass_neon_packed bl put_h264_qpel16_v_lowpass_neon_packed
mov r4, r0 mov r4, r0
ldrd r0, [r11], #8 ldrd r0, r1, [r11], #8
sub r1, r1, r3, lsl #1 sub r1, r1, r3, lsl #1
sub r1, r1, #2 sub r1, r1, #2
mov r2, r3 mov r2, r3
......
...@@ -61,9 +61,9 @@ function ff_dct_unquantize_h263_armv5te, export=1 ...@@ -61,9 +61,9 @@ function ff_dct_unquantize_h263_armv5te, export=1
mov ip, #0 mov ip, #0
subs r3, r3, #2 subs r3, r3, #2
ble 2f ble 2f
ldrd r4, [r0, #0] ldrd r4, r5, [r0, #0]
1: 1:
ldrd r6, [r0, #8] ldrd r6, r7, [r0, #8]
dequant_t r9, r4, r1, r2, r9 dequant_t r9, r4, r1, r2, r9
dequant_t lr, r5, r1, r2, lr dequant_t lr, r5, r1, r2, lr
...@@ -87,7 +87,7 @@ function ff_dct_unquantize_h263_armv5te, export=1 ...@@ -87,7 +87,7 @@ function ff_dct_unquantize_h263_armv5te, export=1
subs r3, r3, #8 subs r3, r3, #8
it gt it gt
ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ ldrdgt r4, r5, [r0, #0] /* load data early to avoid load/use pipeline stall */
bgt 1b bgt 1b
adds r3, r3, #2 adds r3, r3, #2
......
...@@ -46,8 +46,8 @@ w57: .long W57 ...@@ -46,8 +46,8 @@ w57: .long W57
function idct_row_armv5te function idct_row_armv5te
str lr, [sp, #-4]! str lr, [sp, #-4]!
ldrd v1, [a1, #8] ldrd v1, v2, [a1, #8]
ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */
orrs v1, v1, v2 orrs v1, v1, v2
itt eq itt eq
cmpeq v1, a4 cmpeq v1, a4
...@@ -78,7 +78,7 @@ function idct_row_armv5te ...@@ -78,7 +78,7 @@ function idct_row_armv5te
smultt fp, lr, a3 smultt fp, lr, a3
sub v7, v7, a2 sub v7, v7, a2
smulbt a2, lr, a4 smulbt a2, lr, a4
ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
sub fp, fp, a2 sub fp, fp, a2
orrs a2, a3, a4 orrs a2, a3, a4
...@@ -121,7 +121,7 @@ function idct_row_armv5te ...@@ -121,7 +121,7 @@ function idct_row_armv5te
add a2, v4, fp add a2, v4, fp
mov a2, a2, lsr #11 mov a2, a2, lsr #11
add a4, a4, a2, lsl #16 add a4, a4, a2, lsl #16
strd a3, [a1] strd a3, a4, [a1]
sub a2, v4, fp sub a2, v4, fp
mov a3, a2, lsr #11 mov a3, a2, lsr #11
...@@ -135,7 +135,7 @@ function idct_row_armv5te ...@@ -135,7 +135,7 @@ function idct_row_armv5te
sub a2, v1, v5 sub a2, v1, v5
mov a2, a2, lsr #11 mov a2, a2, lsr #11
add a4, a4, a2, lsl #16 add a4, a4, a2, lsl #16
strd a3, [a1, #8] strd a3, a4, [a1, #8]
ldr pc, [sp], #4 ldr pc, [sp], #4
...@@ -144,8 +144,8 @@ row_dc_only: ...@@ -144,8 +144,8 @@ row_dc_only:
bic a3, a3, #0xe000 bic a3, a3, #0xe000
mov a3, a3, lsl #3 mov a3, a3, lsl #3
mov a4, a3 mov a4, a3
strd a3, [a1] strd a3, a4, [a1]
strd a3, [a1, #8] strd a3, a4, [a1, #8]
ldr pc, [sp], #4 ldr pc, [sp], #4
endfunc endfunc
......
...@@ -159,8 +159,8 @@ function idct_col4_neon ...@@ -159,8 +159,8 @@ function idct_col4_neon
vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/ vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */ vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
ldrd r4, [r2] ldrd r4, r5, [r2]
ldrd r6, [r2, #16] ldrd r6, r7, [r2, #16]
orrs r4, r4, r5 orrs r4, r4, r5
idct_col4_top idct_col4_top
...@@ -176,7 +176,7 @@ function idct_col4_neon ...@@ -176,7 +176,7 @@ function idct_col4_neon
vadd.i32 q14, q14, q7 vadd.i32 q14, q14, q7
1: orrs r6, r6, r7 1: orrs r6, r6, r7
ldrd r4, [r2, #16] ldrd r4, r5, [r2, #16]
it eq it eq
addeq r2, r2, #16 addeq r2, r2, #16
beq 2f beq 2f
...@@ -188,7 +188,7 @@ function idct_col4_neon ...@@ -188,7 +188,7 @@ function idct_col4_neon
vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
2: orrs r4, r4, r5 2: orrs r4, r4, r5
ldrd r4, [r2, #16] ldrd r4, r5, [r2, #16]
it eq it eq
addeq r2, r2, #16 addeq r2, r2, #16
beq 3f beq 3f
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment