Commit be923ed6 authored by Diego Biurrun's avatar Diego Biurrun

x86: fmtconvert: port to cpuflags

parent 588fafe7
...@@ -26,11 +26,11 @@ SECTION_TEXT ...@@ -26,11 +26,11 @@ SECTION_TEXT
;--------------------------------------------------------------------------------- ;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len); ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;--------------------------------------------------------------------------------- ;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2 %macro INT32_TO_FLOAT_FMUL_SCALAR 1
%if UNIX64 %if UNIX64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
%else %else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
%endif %endif
%if WIN64 %if WIN64
SWAP 0, 2 SWAP 0, 2
...@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len ...@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
add dstq, lenq add dstq, lenq
neg lenq neg lenq
.loop: .loop:
%ifidn %1, sse2 %if cpuflag(sse2)
cvtdq2ps m1, [srcq+lenq ] cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16] cvtdq2ps m2, [srcq+lenq+16]
%else %else
...@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len ...@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
REP_RET REP_RET
%endmacro %endmacro
INIT_XMM INIT_XMM sse
%define SPLATD SPLATD_SSE %define SPLATD SPLATD_SSE
%define movdqa movaps INT32_TO_FLOAT_FMUL_SCALAR 5
INT32_TO_FLOAT_FMUL_SCALAR sse, 5 INIT_XMM sse2
%undef movdqa
%define SPLATD SPLATD_SSE2 %define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3 INT32_TO_FLOAT_FMUL_SCALAR 3
%undef SPLATD %undef SPLATD
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len); ; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2 %macro FLOAT_TO_INT16 1
cglobal float_to_int16_%1, 3,3,%2, dst, src, len cglobal float_to_int16, 3, 3, %1, dst, src, len
add lenq, lenq add lenq, lenq
lea srcq, [srcq+2*lenq] lea srcq, [srcq+2*lenq]
add dstq, lenq add dstq, lenq
neg lenq neg lenq
.loop: .loop:
%ifidn %1, sse2 %if cpuflag(sse2)
cvtps2dq m0, [srcq+2*lenq ] cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16] cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1 packssdw m0, m1
...@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len ...@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%endif %endif
add lenq, 16 add lenq, 16
js .loop js .loop
%ifnidn %1, sse2 %if mmsize == 8
emms emms
%endif %endif
REP_RET REP_RET
%endmacro %endmacro
INIT_XMM INIT_XMM sse2
FLOAT_TO_INT16 sse2, 2 FLOAT_TO_INT16 2
INIT_MMX INIT_MMX sse
FLOAT_TO_INT16 sse, 0 FLOAT_TO_INT16 0
%define cvtps2pi pf2id %define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0 INIT_MMX 3dnow
FLOAT_TO_INT16 0
%undef cvtps2pi %undef cvtps2pi
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step); ; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_STEP 2 %macro FLOAT_TO_INT16_STEP 1
cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2 cglobal float_to_int16_step, 4, 7, %1, dst, src, len, step, step3, v1, v2
add lenq, lenq add lenq, lenq
lea srcq, [srcq+2*lenq] lea srcq, [srcq+2*lenq]
lea step3q, [stepq*3] lea step3q, [stepq*3]
neg lenq neg lenq
.loop: .loop:
%ifidn %1, sse2 %if cpuflag(sse2)
cvtps2dq m0, [srcq+2*lenq ] cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16] cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1 packssdw m0, m1
...@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2 ...@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
%endif %endif
add lenq, 16 add lenq, 16
js .loop js .loop
%ifnidn %1, sse2 %if mmsize == 8
emms emms
%endif %endif
REP_RET REP_RET
%endmacro %endmacro
INIT_XMM INIT_XMM sse2
FLOAT_TO_INT16_STEP sse2, 2 FLOAT_TO_INT16_STEP 2
INIT_MMX INIT_MMX sse
FLOAT_TO_INT16_STEP sse, 0 FLOAT_TO_INT16_STEP 0
%define cvtps2pi pf2id %define cvtps2pi pf2id
FLOAT_TO_INT16_STEP 3dnow, 0 INIT_MMX 3dnow
FLOAT_TO_INT16_STEP 0
%undef cvtps2pi %undef cvtps2pi
;------------------------------------------------------------------------------- ;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len); ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;------------------------------------------------------------------------------- ;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1 %macro FLOAT_TO_INT16_INTERLEAVE2 0
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
lea lenq, [4*r2q] lea lenq, [4*r2q]
mov src1q, [src0q+gprsize] mov src1q, [src0q+gprsize]
mov src0q, [src0q] mov src0q, [src0q]
...@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len ...@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
add src1q, lenq add src1q, lenq
neg lenq neg lenq
.loop: .loop:
%ifidn %1, sse2 %if cpuflag(sse2)
cvtps2dq m0, [src0q+lenq] cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq] cvtps2dq m1, [src1q+lenq]
packssdw m0, m1 packssdw m0, m1
...@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len ...@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
%endif %endif
add lenq, 16 add lenq, 16
js .loop js .loop
%ifnidn %1, sse2 %if mmsize == 8
emms emms
%endif %endif
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX 3dnow
%define cvtps2pi pf2id %define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow FLOAT_TO_INT16_INTERLEAVE2
%undef cvtps2pi %undef cvtps2pi
%define movdqa movaps INIT_MMX sse
FLOAT_TO_INT16_INTERLEAVE2 sse FLOAT_TO_INT16_INTERLEAVE2
%undef movdqa INIT_XMM sse2
INIT_XMM FLOAT_TO_INT16_INTERLEAVE2
FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2 %macro PSWAPD_SSE 2
...@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 ...@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
punpckldq %1, %2 punpckldq %1, %2
%endmacro %endmacro
%macro FLOAT_TO_INT16_INTERLEAVE6 1 %macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64 %if ARCH_X86_64
mov lend, r2d mov lend, r2d
%else %else
...@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, ...@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
RET RET
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse
%define pswapd PSWAPD_SSE %define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 sse FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow
%define cvtps2pi pf2id %define cvtps2pi pf2id
%define pswapd PSWAPD_3DNOW %define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 3dnow FLOAT_TO_INT16_INTERLEAVE6
%undef pswapd %undef pswapd
FLOAT_TO_INT16_INTERLEAVE6 3dnowext INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6
%undef cvtps2pi %undef cvtps2pi
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len); ; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro FLOAT_INTERLEAVE6 2 %macro FLOAT_INTERLEAVE6 1
cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64 %if ARCH_X86_64
mov lend, r2d mov lend, r2d
%else %else
...@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le ...@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
sub src4q, srcq sub src4q, srcq
sub src5q, srcq sub src5q, srcq
.loop: .loop:
%ifidn %1, sse %if cpuflag(sse)
movaps m0, [srcq] movaps m0, [srcq]
movaps m1, [srcq+src1q] movaps m1, [srcq+src1q]
movaps m2, [srcq+src2q] movaps m2, [srcq+src2q]
...@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le ...@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
add dstq, mmsize*6 add dstq, mmsize*6
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
%ifidn %1, mmx %if mmsize == 8
emms emms
%endif %endif
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmx
FLOAT_INTERLEAVE6 mmx, 0 FLOAT_INTERLEAVE6 0
INIT_XMM INIT_XMM sse
FLOAT_INTERLEAVE6 sse, 7 FLOAT_INTERLEAVE6 7
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len); ; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro FLOAT_INTERLEAVE2 2 %macro FLOAT_INTERLEAVE2 1
cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
mov src1q, [srcq+gprsize] mov src1q, [srcq+gprsize]
mov srcq, [srcq ] mov srcq, [srcq ]
sub src1q, srcq sub src1q, srcq
.loop: .loop:
MOVPS m0, [srcq ] mova m0, [srcq ]
MOVPS m1, [srcq+src1q ] mova m1, [srcq+src1q ]
MOVPS m3, [srcq +mmsize] mova m3, [srcq +mmsize]
MOVPS m4, [srcq+src1q+mmsize] mova m4, [srcq+src1q+mmsize]
MOVPS m2, m0 mova m2, m0
PUNPCKLDQ m0, m1 PUNPCKLDQ m0, m1
PUNPCKHDQ m2, m1 PUNPCKHDQ m2, m1
MOVPS m1, m3 mova m1, m3
PUNPCKLDQ m3, m4 PUNPCKLDQ m3, m4
PUNPCKHDQ m1, m4 PUNPCKHDQ m1, m4
MOVPS [dstq ], m0 mova [dstq ], m0
MOVPS [dstq+1*mmsize], m2 mova [dstq+1*mmsize], m2
MOVPS [dstq+2*mmsize], m3 mova [dstq+2*mmsize], m3
MOVPS [dstq+3*mmsize], m1 mova [dstq+3*mmsize], m1
add srcq, mmsize*2 add srcq, mmsize*2
add dstq, mmsize*4 add dstq, mmsize*4
sub lend, mmsize/2 sub lend, mmsize/2
jg .loop jg .loop
%ifidn %1, mmx %if mmsize == 8
emms emms
%endif %endif
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmx
%define MOVPS movq
%define PUNPCKLDQ punpckldq %define PUNPCKLDQ punpckldq
%define PUNPCKHDQ punpckhdq %define PUNPCKHDQ punpckhdq
FLOAT_INTERLEAVE2 mmx, 0 FLOAT_INTERLEAVE2 0
INIT_XMM INIT_XMM sse
%define MOVPS movaps
%define PUNPCKLDQ unpcklps %define PUNPCKLDQ unpcklps
%define PUNPCKHDQ unpckhps %define PUNPCKHDQ unpckhps
FLOAT_INTERLEAVE2 sse, 5 FLOAT_INTERLEAVE2 5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment