Commit 95a98ab3 authored by Justin Ruggles's avatar Justin Ruggles

ac3dsp: simplify x86 versions of ac3_max_msb_abs_int16

Simplifies the code by using cpuflags and a new macro.
Also fixes the invalid use of the MMX2 pshufw operation in the MMX-only
function.
parent 11e33402
...@@ -91,12 +91,36 @@ AC3_EXPONENT_MIN sse2 ...@@ -91,12 +91,36 @@ AC3_EXPONENT_MIN sse2
; This is used for mmxext and sse2 because they have pminsw/pmaxsw. ; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro AC3_MAX_MSB_ABS_INT16 2 ; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len %macro OR_WORDS_HORIZ 2 ; src, tmp
%if cpuflag(sse2)
movhlps %2, %1
por %1, %2
pshuflw %2, %1, q0032
por %1, %2
pshuflw %2, %1, q0001
por %1, %2
%elif cpuflag(mmx2)
pshufw %2, %1, q0032
por %1, %2
pshufw %2, %1, q0001
por %1, %2
%else ; mmx
movq %2, %1
psrlq %2, 32
por %1, %2
movq %2, %1
psrlq %2, 16
por %1, %2
%endif
%endmacro
%macro AC3_MAX_MSB_ABS_INT16 1
cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
pxor m2, m2 pxor m2, m2
pxor m3, m3 pxor m3, m3
.loop: .loop:
%ifidn %2, min_max %ifidn %1, min_max
mova m0, [srcq] mova m0, [srcq]
mova m1, [srcq+mmsize] mova m1, [srcq+mmsize]
pminsw m2, m0 pminsw m2, m0
...@@ -104,7 +128,7 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len ...@@ -104,7 +128,7 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
pmaxsw m3, m0 pmaxsw m3, m0
pmaxsw m3, m1 pmaxsw m3, m1
%else ; or_abs %else ; or_abs
%ifidn %1, mmx %if notcpuflag(ssse3)
mova m0, [srcq] mova m0, [srcq]
mova m1, [srcq+mmsize] mova m1, [srcq+mmsize]
ABS2 m0, m1, m3, m4 ABS2 m0, m1, m3, m4
...@@ -119,34 +143,27 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len ...@@ -119,34 +143,27 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
add srcq, mmsize*2 add srcq, mmsize*2
sub lend, mmsize sub lend, mmsize
ja .loop ja .loop
%ifidn %2, min_max %ifidn %1, min_max
ABS2 m2, m3, m0, m1 ABS2 m2, m3, m0, m1
por m2, m3 por m2, m3
%endif %endif
%ifidn mmsize, 16 OR_WORDS_HORIZ m2, m0
movhlps m0, m2
por m2, m0
%endif
PSHUFLW m0, m2, 0xe
por m2, m0
PSHUFLW m0, m2, 0x1
por m2, m0
movd eax, m2 movd eax, m2
and eax, 0xFFFF and eax, 0xFFFF
RET RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmx
%define ABS2 ABS2_MMX %define ABS2 ABS2_MMX
%define PSHUFLW pshufw AC3_MAX_MSB_ABS_INT16 or_abs
AC3_MAX_MSB_ABS_INT16 mmx, or_abs INIT_MMX mmx2
%define ABS2 ABS2_MMX2 %define ABS2 ABS2_MMX2
AC3_MAX_MSB_ABS_INT16 mmxext, min_max AC3_MAX_MSB_ABS_INT16 min_max
INIT_XMM INIT_XMM sse2
%define PSHUFLW pshuflw AC3_MAX_MSB_ABS_INT16 min_max
AC3_MAX_MSB_ABS_INT16 sse2, min_max INIT_XMM ssse3
%define ABS2 ABS2_SSSE3 %define ABS2 ABS2_SSSE3
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs AC3_MAX_MSB_ABS_INT16 or_abs
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
......
...@@ -27,10 +27,10 @@ extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int n ...@@ -27,10 +27,10 @@ extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int n
extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len); extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len); extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
...@@ -67,7 +67,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) ...@@ -67,7 +67,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
} }
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
c->float_to_fixed24 = ff_float_to_fixed24_sse; c->float_to_fixed24 = ff_float_to_fixed24_sse;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment