Commit c728518b authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Justin Ruggles

x86: fft: fix imdct_half() for AVX

Some calculations were changed in b6a3849a to use mmsize, which was not correct
for the AVX version, which uses INIT_YMM and therefore has mmsize == 32.

Fixes Bug 341.
Signed-off-by: 's avatarJustin Ruggles <justin.ruggles@gmail.com>
parent 150adea6
...@@ -1009,7 +1009,11 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i ...@@ -1009,7 +1009,11 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
push rrevtab push rrevtab
%endif %endif
sub r3, mmsize/4 %if mmsize == 8
sub r3, 2
%else
sub r3, 4
%endif
%if ARCH_X86_64 || mmsize == 8 %if ARCH_X86_64 || mmsize == 8
xor r4, r4 xor r4, r4
sub r4, r3 sub r4, r3
...@@ -1036,7 +1040,9 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i ...@@ -1036,7 +1040,9 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
mova [r1+r5*8], m0 mova [r1+r5*8], m0
mova [r1+r6*8], m2 mova [r1+r6*8], m2
add r4, 2 add r4, 2
%elif ARCH_X86_64 sub r4, 2
%else
%if ARCH_X86_64
movzx r5, word [rrevtab+r4-4] movzx r5, word [rrevtab+r4-4]
movzx r6, word [rrevtab+r4-2] movzx r6, word [rrevtab+r4-2]
movzx r10, word [rrevtab+r3] movzx r10, word [rrevtab+r3]
...@@ -1057,7 +1063,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i ...@@ -1057,7 +1063,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
movlps [r1+r5*8], xmm1 movlps [r1+r5*8], xmm1
movhps [r1+r4*8], xmm1 movhps [r1+r4*8], xmm1
%endif %endif
sub r3, mmsize/4 sub r3, 4
%endif
jns .pre jns .pre
mov r5, r0 mov r5, r0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment