Commit d721f67d authored by Justin Ruggles's avatar Justin Ruggles

lavr: Add x86-optimized functions for s16 to flt conversion

parent 1168e29d
......@@ -22,6 +22,11 @@
%include "x86inc.asm"
%include "x86util.asm"
%include "util.asm"
SECTION_RODATA 32
pf_s16_inv_scale: times 4 dd 0x38000000
SECTION_TEXT
......@@ -47,6 +52,37 @@ cglobal conv_s16_to_s32, 3,3,3, dst, src, len
jl .loop
REP_RET
;------------------------------------------------------------------------------
; void ff_conv_s16_to_flt(float *dst, const int16_t *src, int len);
;------------------------------------------------------------------------------
%macro CONV_S16_TO_FLT 0
cglobal conv_s16_to_flt, 3,3,3, dst, src, len
lea lenq, [2*lend]
add srcq, lenq
lea dstq, [dstq + 2*lenq]
neg lenq
mova m2, [pf_s16_inv_scale]
ALIGN 16
.loop:
mova m0, [srcq+lenq]
S16_TO_S32_SX 0, 1
cvtdq2ps m0, m0
cvtdq2ps m1, m1
mulps m0, m2
mulps m1, m2
mova [dstq+2*lenq ], m0
mova [dstq+2*lenq+mmsize], m1
add lenq, mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
CONV_S16_TO_FLT
INIT_XMM sse4
CONV_S16_TO_FLT
;-----------------------------------------------------------------------------
; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
; int channels);
......
......@@ -24,6 +24,9 @@
extern void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len);
extern void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len);
extern void ff_conv_s16_to_flt_sse4(float *dst, const int16_t *src, int len);
extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len);
extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len);
extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len);
......@@ -48,6 +51,12 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
}
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
}
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment