Commit b8f02f5b authored by Justin Ruggles's avatar Justin Ruggles

dsputil: use cpuflags in x86 versions of vector_clip_int32()

parent f2bd8a07
...@@ -2419,9 +2419,9 @@ void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min ...@@ -2419,9 +2419,9 @@ void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...@@ -2877,7 +2877,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2877,7 +2877,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
if (mm_flags & AV_CPU_FLAG_ATOM) { if (mm_flags & AV_CPU_FLAG_ATOM) {
c->vector_clip_int32 = ff_vector_clip_int32_sse2_int; c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
} else { } else {
c->vector_clip_int32 = ff_vector_clip_int32_sse2; c->vector_clip_int32 = ff_vector_clip_int32_sse2;
} }
...@@ -2909,7 +2909,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2909,7 +2909,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
#if HAVE_YASM #if HAVE_YASM
c->vector_clip_int32 = ff_vector_clip_int32_sse41; c->vector_clip_int32 = ff_vector_clip_int32_sse4;
#endif #endif
} }
......
...@@ -1055,9 +1055,14 @@ emu_edge mmx ...@@ -1055,9 +1055,14 @@ emu_edge mmx
; int32_t max, unsigned int len) ; int32_t max, unsigned int len)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro VECTOR_CLIP_INT32 4 ; %1 = number of xmm registers used
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ; %2 = number of inline load/process/store loops per asm loop
%ifidn %1, sse2 ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%2, dst, src, min, max, len
%if %4
cvtsi2ss m4, minm cvtsi2ss m4, minm
cvtsi2ss m5, maxm cvtsi2ss m5, maxm
%else %else
...@@ -1068,12 +1073,12 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ...@@ -1068,12 +1073,12 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
SPLATD m5 SPLATD m5
.loop: .loop:
%assign %%i 1 %assign %%i 1
%rep %3 %rep %2
mova m0, [srcq+mmsize*0*%%i] mova m0, [srcq+mmsize*0*%%i]
mova m1, [srcq+mmsize*1*%%i] mova m1, [srcq+mmsize*1*%%i]
mova m2, [srcq+mmsize*2*%%i] mova m2, [srcq+mmsize*2*%%i]
mova m3, [srcq+mmsize*3*%%i] mova m3, [srcq+mmsize*3*%%i]
%if %4 %if %3
mova m7, [srcq+mmsize*4*%%i] mova m7, [srcq+mmsize*4*%%i]
mova m8, [srcq+mmsize*5*%%i] mova m8, [srcq+mmsize*5*%%i]
mova m9, [srcq+mmsize*6*%%i] mova m9, [srcq+mmsize*6*%%i]
...@@ -1083,7 +1088,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ...@@ -1083,7 +1088,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
CLIPD m1, m4, m5, m6 CLIPD m1, m4, m5, m6
CLIPD m2, m4, m5, m6 CLIPD m2, m4, m5, m6
CLIPD m3, m4, m5, m6 CLIPD m3, m4, m5, m6
%if %4 %if %3
CLIPD m7, m4, m5, m6 CLIPD m7, m4, m5, m6
CLIPD m8, m4, m5, m6 CLIPD m8, m4, m5, m6
CLIPD m9, m4, m5, m6 CLIPD m9, m4, m5, m6
...@@ -1093,7 +1098,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ...@@ -1093,7 +1098,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
mova [dstq+mmsize*1*%%i], m1 mova [dstq+mmsize*1*%%i], m1
mova [dstq+mmsize*2*%%i], m2 mova [dstq+mmsize*2*%%i], m2
mova [dstq+mmsize*3*%%i], m3 mova [dstq+mmsize*3*%%i], m3
%if %4 %if %3
mova [dstq+mmsize*4*%%i], m7 mova [dstq+mmsize*4*%%i], m7
mova [dstq+mmsize*5*%%i], m8 mova [dstq+mmsize*5*%%i], m8
mova [dstq+mmsize*6*%%i], m9 mova [dstq+mmsize*6*%%i], m9
...@@ -1101,25 +1106,26 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ...@@ -1101,25 +1106,26 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%endif %endif
%assign %%i %%i+1 %assign %%i %%i+1
%endrep %endrep
add srcq, mmsize*4*(%3+%4) add srcq, mmsize*4*(%2+%3)
add dstq, mmsize*4*(%3+%4) add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%3+%4) sub lend, mmsize*(%2+%3)
jg .loop jg .loop
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmx
%define SPLATD SPLATD_MMX %define SPLATD SPLATD_MMX
%define CLIPD CLIPD_MMX %define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 mmx, 0, 1, 0 VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM INIT_XMM sse2
%define SPLATD SPLATD_SSE2 %define SPLATD SPLATD_SSE2
VECTOR_CLIP_INT32 sse2_int, 6, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0, _int
%define CLIPD CLIPD_SSE2 %define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 sse2, 6, 2, 0 VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
%define CLIPD CLIPD_SSE41 %define CLIPD CLIPD_SSE41
%ifdef m8 %ifdef m8
VECTOR_CLIP_INT32 sse41, 11, 1, 1 VECTOR_CLIP_INT32 11, 1, 1, 0
%else %else
VECTOR_CLIP_INT32 sse41, 6, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0
%endif %endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment