Commit bcbe9e44 authored by James Almer's avatar James Almer

x86/sbrdsp: zero extend m_max in apply_noise_main

Tested-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 44028547
...@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max ...@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
apply_noise_main: apply_noise_main:
%if ARCH_X86_64 == 0 || WIN64 %if ARCH_X86_64 == 0 || WIN64
mov kxd, m_maxm mov kxd, m_maxm
%define count kxq DEFINE_ARGS Y, s_m, q_filt, noise, count
%else %else
%define count m_maxq DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
%endif %endif
movsxdifnidn noiseq, noised movsxdifnidn noiseq, noised
dec noiseq dec noiseq
shl count, 2 shl countd, 2
%ifdef PIC %ifdef PIC
lea NOISE_TABLE, [sbr_noise_table] lea NOISE_TABLE, [sbr_noise_table]
%endif %endif
lea Yq, [Yq + 2*count] lea Yq, [Yq + 2*countq]
add s_mq, count add s_mq, countq
add q_filtq, count add q_filtq, countq
shl noiseq, 3 shl noiseq, 3
pxor m5, m5 pxor m5, m5
neg count neg countq
.loop: .loop:
mova m1, [q_filtq + count] mova m1, [q_filtq + countq]
movu m3, [noiseq + NOISE_TABLE + 1*mmsize] movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
movu m4, [noiseq + NOISE_TABLE + 2*mmsize] movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
add noiseq, 2*mmsize add noiseq, 2*mmsize
...@@ -404,7 +404,7 @@ apply_noise_main: ...@@ -404,7 +404,7 @@ apply_noise_main:
punpckldq m1, m1 punpckldq m1, m1
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
mova m3, [s_mq + count] mova m3, [s_mq + countq]
; TODO: replace by a vpermd in AVX2 ; TODO: replace by a vpermd in AVX2
punpckhdq m4, m3, m3 punpckhdq m4, m3, m3
punpckldq m3, m3 punpckldq m3, m3
...@@ -414,15 +414,15 @@ apply_noise_main: ...@@ -414,15 +414,15 @@ apply_noise_main:
mulps m4, m0 ; s_m[m] * phi_sign mulps m4, m0 ; s_m[m] * phi_sign
pand m1, m6 pand m1, m6
pand m2, m7 pand m2, m7
movu m6, [Yq + 2*count] movu m6, [Yq + 2*countq]
movu m7, [Yq + 2*count + mmsize] movu m7, [Yq + 2*countq + mmsize]
addps m3, m1 addps m3, m1
addps m4, m2 addps m4, m2
addps m6, m3 addps m6, m3
addps m7, m4 addps m7, m4
movu [Yq + 2*count], m6 movu [Yq + 2*countq], m6
movu [Yq + 2*count + mmsize], m7 movu [Yq + 2*countq + mmsize], m7
add count, mmsize add countq, mmsize
jl .loop jl .loop
RET RET
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment