Commit 30b45d9c authored by Ronald S. Bultje's avatar Ronald S. Bultje

x86inc: automatically insert vzeroupper for YMM functions.

parent 8ea1459b
...@@ -1158,12 +1158,7 @@ ALIGN 16 ...@@ -1158,12 +1158,7 @@ ALIGN 16
add src1q, 2*mmsize add src1q, 2*mmsize
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
...@@ -1193,12 +1188,7 @@ ALIGN 16 ...@@ -1193,12 +1188,7 @@ ALIGN 16
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
...@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len ...@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
%endif %endif
add lenq, mmsize add lenq, mmsize
jl .loop jl .loop
%if mmsize == 32
vzeroupper
RET
%endif
.end: .end:
REP_RET REP_RET
%endmacro %endmacro
......
...@@ -749,9 +749,6 @@ section .text ...@@ -749,9 +749,6 @@ section .text
; The others pass args in registers and don't spill anything. ; The others pass args in registers and don't spill anything.
cglobal fft_dispatch%2, 2,5,8, z, nbits cglobal fft_dispatch%2, 2,5,8, z, nbits
FFT_DISPATCH fullsuffix, nbits FFT_DISPATCH fullsuffix, nbits
%if mmsize == 32
vzeroupper
%endif
RET RET
%endmacro ; DECL_FFT %endmacro ; DECL_FFT
...@@ -956,9 +953,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i ...@@ -956,9 +953,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
%1 r0, r1, r6, rtcos, rtsin %1 r0, r1, r6, rtcos, rtsin
%if ARCH_X86_64 == 0 %if ARCH_X86_64 == 0
add esp, 12 add esp, 12
%endif
%if mmsize == 32
vzeroupper
%endif %endif
RET RET
%endmacro %endmacro
......
...@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len ...@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
mova [dstq+lenq+mmsize], m2 mova [dstq+lenq+mmsize], m2
add lenq, mmsize*2 add lenq, mmsize*2
jl .loop jl .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
...@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len ...@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len
mova [dstq+lenq+3*mmsize], m3 mova [dstq+lenq+3*mmsize], m3
add lenq, mmsize*4 add lenq, mmsize*4
jl .loop jl .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
......
...@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 ...@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
add srcq, mmsize*2 add srcq, mmsize*2
sub lend, mmsize*2/4 sub lend, mmsize*2/4
jg .loop jg .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
...@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 ...@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
add src0q, mmsize add src0q, mmsize
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
......
...@@ -40,12 +40,7 @@ ALIGN 16 ...@@ -40,12 +40,7 @@ ALIGN 16
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
...@@ -86,12 +81,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len ...@@ -86,12 +81,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
mova [dstq+lenq+mmsize], m2 mova [dstq+lenq+mmsize], m2
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
%if mmsize == 32
vzeroupper
RET
%else
REP_RET REP_RET
%endif
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
......
...@@ -369,11 +369,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 ...@@ -369,11 +369,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
%macro RET 0 %macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
%if mmsize == 32
vzeroupper
%endif
ret ret
%endmacro %endmacro
%macro REP_RET 0 %macro REP_RET 0
%if regs_used > 7 || xmm_regs_used > 6 %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
RET RET
%else %else
rep ret rep ret
...@@ -410,11 +413,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 ...@@ -410,11 +413,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
%macro RET 0 %macro RET 0
POP_IF_USED 14, 13, 12, 11, 10, 9 POP_IF_USED 14, 13, 12, 11, 10, 9
%if mmsize == 32
vzeroupper
%endif
ret ret
%endmacro %endmacro
%macro REP_RET 0 %macro REP_RET 0
%if regs_used > 9 %if regs_used > 9 || mmsize == 32
RET RET
%else %else
rep ret rep ret
...@@ -456,11 +462,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ...@@ -456,11 +462,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%macro RET 0 %macro RET 0
POP_IF_USED 6, 5, 4, 3 POP_IF_USED 6, 5, 4, 3
%if mmsize == 32
vzeroupper
%endif
ret ret
%endmacro %endmacro
%macro REP_RET 0 %macro REP_RET 0
%if regs_used > 3 %if regs_used > 3 || mmsize == 32
RET RET
%else %else
rep ret rep ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment