Commit 87b8e950 authored by Derek Buitenhuis's avatar Derek Buitenhuis

Merge commit 'cdb1665f'

* commit 'cdb1665f':
  aarch64: Make transpose_4x4H do a regular transpose
Merged-by: 's avatarDerek Buitenhuis <derek.buitenhuis@gmail.com>
parents 4fe4c5c3 cdb1665f
......@@ -33,25 +33,25 @@ function ff_h264_idct_add_neon, export=1
sshr v17.4H, v3.4H, #1
st1 {v30.8H}, [x1], #16
sub v5.4H, v0.4H, v2.4H
add v6.4H, v1.4H, v17.4H
sub v7.4H, v16.4H, v3.4H
add v0.4H, v4.4H, v6.4H
add v1.4H, v5.4H, v7.4H
sub v3.4H, v4.4H, v6.4H
sub v2.4H, v5.4H, v7.4H
sub v6.4H, v16.4H, v3.4H
add v7.4H, v1.4H, v17.4H
add v0.4H, v4.4H, v7.4H
add v1.4H, v5.4H, v6.4H
sub v2.4H, v5.4H, v6.4H
sub v3.4H, v4.4H, v7.4H
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
add v4.4H, v0.4H, v3.4H
add v4.4H, v0.4H, v2.4H
ld1 {v18.S}[0], [x0], x2
sshr v16.4H, v2.4H, #1
sshr v16.4H, v3.4H, #1
sshr v17.4H, v1.4H, #1
ld1 {v19.S}[1], [x0], x2
sub v5.4H, v0.4H, v3.4H
ld1 {v18.S}[1], [x0], x2
sub v5.4H, v0.4H, v2.4H
ld1 {v19.S}[1], [x0], x2
add v6.4H, v16.4H, v1.4H
ins v4.D[1], v5.D[0]
sub v7.4H, v2.4H, v17.4H
sub v7.4H, v17.4H, v3.4H
ld1 {v19.S}[0], [x0], x2
ins v6.D[1], v7.D[0]
sub x0, x0, x2, lsl #2
......@@ -68,8 +68,8 @@ function ff_h264_idct_add_neon, export=1
sqxtun v1.8B, v1.8H
st1 {v0.S}[0], [x0], x2
st1 {v1.S}[1], [x0], x2
st1 {v0.S}[1], [x0], x2
st1 {v1.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
sub x1, x1, #32
......
......@@ -107,12 +107,12 @@
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
trn1 \r7\().4H, \r2\().4H, \r3\().4H
trn2 \r6\().4H, \r2\().4H, \r3\().4H
trn1 \r0\().2S, \r4\().2S, \r7\().2S
trn2 \r3\().2S, \r4\().2S, \r7\().2S
trn1 \r1\().2S, \r5\().2S, \r6\().2S
trn2 \r2\().2S, \r5\().2S, \r6\().2S
trn1 \r6\().4H, \r2\().4H, \r3\().4H
trn2 \r7\().4H, \r2\().4H, \r3\().4H
trn1 \r0\().2S, \r4\().2S, \r6\().2S
trn2 \r2\().2S, \r4\().2S, \r6\().2S
trn1 \r1\().2S, \r5\().2S, \r7\().2S
trn2 \r3\().2S, \r5\().2S, \r7\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment