Commit 31d0d718 authored by Justin Ruggles's avatar Justin Ruggles

lavr: x86: optimized 2-channel flt to s16p conversion

parent 6092dafb
...@@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH ...@@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH
INIT_XMM avx INIT_XMM avx
CONV_S16_TO_FLTP_6CH CONV_S16_TO_FLTP_6CH
%endif %endif
;------------------------------------------------------------------------------
; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len,
; int channels);
;------------------------------------------------------------------------------
%macro CONV_FLT_TO_S16P_2CH 0
cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
lea lenq, [2*lend]
mov dst1q, [dst0q+gprsize]
mov dst0q, [dst0q ]
lea srcq, [srcq+4*lenq]
add dst0q, lenq
add dst1q, lenq
neg lenq
mova m5, [pf_s16_scale]
.loop:
mova m0, [srcq+4*lenq ]
mova m1, [srcq+4*lenq+ mmsize]
mova m2, [srcq+4*lenq+2*mmsize]
mova m3, [srcq+4*lenq+3*mmsize]
DEINT2_PS 0, 1, 4
DEINT2_PS 2, 3, 4
mulps m0, m0, m5
mulps m1, m1, m5
mulps m2, m2, m5
mulps m3, m3, m5
cvtps2dq m0, m0
cvtps2dq m1, m1
cvtps2dq m2, m2
cvtps2dq m3, m3
packssdw m0, m2
packssdw m1, m3
mova [dst0q+lenq], m0
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
CONV_FLT_TO_S16P_2CH
%if HAVE_AVX
INIT_XMM avx
CONV_FLT_TO_S16P_2CH
%endif
...@@ -120,6 +120,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src, ...@@ -120,6 +120,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src, extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src,
int len, int channels); int len, int channels);
extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
int len, int channels);
extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
int len, int channels);
av_cold void ff_audio_convert_init_x86(AudioConvert *ac) av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{ {
#if HAVE_YASM #if HAVE_YASM
...@@ -175,6 +180,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -175,6 +180,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2); 2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2); 6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
} }
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
...@@ -219,6 +226,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -219,6 +226,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx); 2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx); 6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
} }
#endif #endif
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment