Commit 6eda85e1 authored by Reimar Döffinger's avatar Reimar Döffinger Committed by Ronald S. Bultje

sbrdsp.asm: convert all instructions to float/SSE ones.

Since the values are floats, using the float operations
makes sense, improves performance on some CPUs and
makes the code SSE compatible instead of needing SSE2.

Based on suggestion by Jason.
Signed-off-by: 's avatarReimar Döffinger <Reimar.Doeffinger@gmx.de>
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent 3416d080
...@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5 ...@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5
lea r0, [r0 + r3*8] lea r0, [r0 + r3*8]
neg r3 neg r3
.loop4: .loop4:
movq m0, [r2 + 4*r3 + 0] movlps m0, [r2 + 4*r3 + 0]
movq m1, [r2 + 4*r3 + 8] movlps m1, [r2 + 4*r3 + 8]
movq m2, [r1 + 0*STEP] movlps m2, [r1 + 0*STEP]
movq m3, [r1 + 2*STEP] movlps m3, [r1 + 2*STEP]
movhps m2, [r1 + 1*STEP] movhps m2, [r1 + 1*STEP]
movhps m3, [r1 + 3*STEP] movhps m3, [r1 + 3*STEP]
punpckldq m0, m0 unpcklps m0, m0
punpckldq m1, m1 unpcklps m1, m1
mulps m0, m2 mulps m0, m2
mulps m1, m3 mulps m1, m3
movu [r0 + 8*r3 + 0], m0 movu [r0 + 8*r3 + 0], m0
...@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5 ...@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5
jz .end jz .end
.loop1: ; element 0 and 1 can be computed at the same time .loop1: ; element 0 and 1 can be computed at the same time
movss m0, [r2] movss m0, [r2]
movq m2, [r1] movlps m2, [r1]
punpckldq m0, m0 unpcklps m0, m0
mulps m2, m0 mulps m2, m0
movlps [r0], m2 movlps [r0], m2
add r0, 8 add r0, 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment