Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
d5b4e50c
Commit
d5b4e50c
authored
May 02, 2012
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavr: x86: optimized 6-channel fltp to s16 conversion
parent
a58a0139
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
129 additions
and
0 deletions
+129
-0
audio_convert.asm
libavresample/x86/audio_convert.asm
+114
-0
audio_convert_init.c
libavresample/x86/audio_convert_init.c
+15
-0
No files found.
libavresample/x86/audio_convert.asm
View file @
d5b4e50c
...
@@ -581,6 +581,120 @@ CONV_FLTP_TO_S16_2CH
...
@@ -581,6 +581,120 @@ CONV_FLTP_TO_S16_2CH
INIT_XMM
ssse3
INIT_XMM
ssse3
CONV_FLTP_TO_S16_2CH
CONV_FLTP_TO_S16_2CH
;------------------------------------------------------------------------------
; void ff_conv_fltp_to_s16_6ch(int16_t *dst, float *const *src, int len,
; int channels);
;------------------------------------------------------------------------------
%macro
CONV_FLTP_TO_S16_6CH
0
%if
ARCH_X86_64
cglobal
conv_fltp_to_s16_6ch
,
3
,
8
,
7
,
dst
,
src
,
len
,
src1
,
src2
,
src3
,
src4
,
src5
%else
cglobal
conv_fltp_to_s16_6ch
,
2
,
7
,
7
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
%define
lend
dword
r2m
%endif
mov
src1q
,
[
srcq
+
1
*
gprsize
]
mov
src2q
,
[
srcq
+
2
*
gprsize
]
mov
src3q
,
[
srcq
+
3
*
gprsize
]
mov
src4q
,
[
srcq
+
4
*
gprsize
]
mov
src5q
,
[
srcq
+
5
*
gprsize
]
mov
srcq
,
[srcq]
sub
src1q
,
srcq
sub
src2q
,
srcq
sub
src3q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
movaps
xmm6
,
[
pf_s16_scale
]
.
loop
:
%if
cpuflag
(
sse2
)
mulps
m0
,
m6
,
[
srcq
]
mulps
m1
,
m6
,
[
srcq
+
src1q
]
mulps
m2
,
m6
,
[
srcq
+
src2q
]
mulps
m3
,
m6
,
[
srcq
+
src3q
]
mulps
m4
,
m6
,
[
srcq
+
src4q
]
mulps
m5
,
m6
,
[
srcq
+
src5q
]
cvtps2dq
m0
,
m0
cvtps2dq
m1
,
m1
cvtps2dq
m2
,
m2
cvtps2dq
m3
,
m3
cvtps2dq
m4
,
m4
cvtps2dq
m5
,
m5
packssdw
m0
,
m3
; m0 = 0, 6, 12, 18, 3, 9, 15, 21
packssdw
m1
,
m4
; m1 = 1, 7, 13, 19, 4, 10, 16, 22
packssdw
m2
,
m5
; m2 = 2, 8, 14, 20, 5, 11, 17, 23
; unpack words:
movhlps
m3
,
m0
; m3 = 3, 9, 15, 21, x, x, x, x
punpcklwd
m0
,
m1
; m0 = 0, 1, 6, 7, 12, 13, 18, 19
punpckhwd
m1
,
m2
; m1 = 4, 5, 10, 11, 16, 17, 22, 23
punpcklwd
m2
,
m3
; m2 = 2, 3, 8, 9, 14, 15, 20, 21
; blend dwords:
shufps
m3
,
m0
,
m2
,
q2020
; m3 = 0, 1, 12, 13, 2, 3, 14, 15
shufps
m0
,
m1
,
q2031
; m0 = 6, 7, 18, 19, 4, 5, 16, 17
shufps
m2
,
m1
,
q3131
; m2 = 8, 9, 20, 21, 10, 11, 22, 23
; shuffle dwords:
shufps
m1
,
m2
,
m3
,
q3120
; m1 = 8, 9, 10, 11, 12, 13, 14, 15
shufps
m3
,
m0
,
q0220
; m3 = 0, 1, 2, 3, 4, 5, 6, 7
shufps
m0
,
m2
,
q3113
; m0 = 16, 17, 18, 19, 20, 21, 22, 23
mova
[
dstq
+
0
*
mmsize
]
,
m3
mova
[
dstq
+
1
*
mmsize
]
,
m1
mova
[
dstq
+
2
*
mmsize
]
,
m0
%else
; sse
movlps
xmm0
,
[
srcq
]
movlps
xmm1
,
[
srcq
+
src1q
]
movlps
xmm2
,
[
srcq
+
src2q
]
movlps
xmm3
,
[
srcq
+
src3q
]
movlps
xmm4
,
[
srcq
+
src4q
]
movlps
xmm5
,
[
srcq
+
src5q
]
mulps
xmm0
,
xmm6
mulps
xmm1
,
xmm6
mulps
xmm2
,
xmm6
mulps
xmm3
,
xmm6
mulps
xmm4
,
xmm6
mulps
xmm5
,
xmm6
cvtps2pi
mm0
,
xmm0
cvtps2pi
mm1
,
xmm1
cvtps2pi
mm2
,
xmm2
cvtps2pi
mm3
,
xmm3
cvtps2pi
mm4
,
xmm4
cvtps2pi
mm5
,
xmm5
packssdw
mm0
,
mm3
; m0 = 0, 6, 3, 9
packssdw
mm1
,
mm4
; m1 = 1, 7, 4, 10
packssdw
mm2
,
mm5
; m2 = 2, 8, 5, 11
; unpack words
pshufw
mm3
,
mm0
,
q1032
; m3 = 3, 9, 0, 6
punpcklwd
mm0
,
mm1
; m0 = 0, 1, 6, 7
punpckhwd
mm1
,
mm2
; m1 = 4, 5, 10, 11
punpcklwd
mm2
,
mm3
; m2 = 2, 3, 8, 9
; unpack dwords
pshufw
mm3
,
mm0
,
q1032
; m3 = 6, 7, 0, 1
punpckldq
mm0
,
mm2
; m0 = 0, 1, 2, 3 (final)
punpckhdq
mm2
,
mm1
; m2 = 8, 9, 10, 11 (final)
punpckldq
mm1
,
mm3
; m1 = 4, 5, 6, 7 (final)
mova
[
dstq
+
0
*
mmsize
]
,
mm0
mova
[
dstq
+
1
*
mmsize
]
,
mm1
mova
[
dstq
+
2
*
mmsize
]
,
mm2
%endif
add
srcq
,
mmsize
add
dstq
,
mmsize
*
3
sub
lend
,
mmsize
/
4
jg
.
loop
%if
mmsize
==
8
emms
RET
%else
REP_RET
%endif
%endmacro
INIT_MMX
sse
CONV_FLTP_TO_S16_6CH
INIT_XMM
sse2
CONV_FLTP_TO_S16_6CH
%if
HAVE_AVX
INIT_XMM
avx
CONV_FLTP_TO_S16_6CH
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
; int channels);
; int channels);
...
...
libavresample/x86/audio_convert_init.c
View file @
d5b4e50c
...
@@ -71,6 +71,13 @@ extern void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src,
...
@@ -71,6 +71,13 @@ extern void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src,
extern
void
ff_conv_fltp_to_s16_2ch_ssse3
(
int16_t
*
dst
,
float
*
const
*
src
,
extern
void
ff_conv_fltp_to_s16_2ch_ssse3
(
int16_t
*
dst
,
float
*
const
*
src
,
int
len
,
int
channels
);
int
len
,
int
channels
);
extern
void
ff_conv_fltp_to_s16_6ch_sse
(
int16_t
*
dst
,
float
*
const
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_fltp_to_s16_6ch_sse2
(
int16_t
*
dst
,
float
*
const
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_fltp_to_s16_6ch_avx
(
int16_t
*
dst
,
float
*
const
*
src
,
int
len
,
int
channels
);
extern
void
ff_conv_fltp_to_flt_6ch_mmx
(
float
*
dst
,
float
*
const
*
src
,
int
len
,
extern
void
ff_conv_fltp_to_flt_6ch_mmx
(
float
*
dst
,
float
*
const
*
src
,
int
len
,
int
channels
);
int
channels
);
extern
void
ff_conv_fltp_to_flt_6ch_sse4
(
float
*
dst
,
float
*
const
*
src
,
int
len
,
extern
void
ff_conv_fltp_to_flt_6ch_sse4
(
float
*
dst
,
float
*
const
*
src
,
int
len
,
...
@@ -89,12 +96,18 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
...
@@ -89,12 +96,18 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_FLTP
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_FLTP
,
6
,
1
,
4
,
"MMX"
,
ff_conv_fltp_to_flt_6ch_mmx
);
6
,
1
,
4
,
"MMX"
,
ff_conv_fltp_to_flt_6ch_mmx
);
}
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_FLTP
,
6
,
1
,
2
,
"SSE"
,
ff_conv_fltp_to_s16_6ch_sse
);
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
{
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S32
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S32
,
0
,
16
,
16
,
"SSE2"
,
ff_conv_s32_to_s16_sse2
);
0
,
16
,
16
,
"SSE2"
,
ff_conv_s32_to_s16_sse2
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S16P
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S16P
,
6
,
16
,
8
,
"SSE2"
,
ff_conv_s16p_to_s16_6ch_sse2
);
6
,
16
,
8
,
"SSE2"
,
ff_conv_s16p_to_s16_6ch_sse2
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_FLTP
,
6
,
16
,
4
,
"SSE2"
,
ff_conv_fltp_to_s16_6ch_sse2
);
}
else
{
}
else
{
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S16P
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_S16P
,
6
,
1
,
4
,
"SSE2SLOW"
,
ff_conv_s16p_to_s16_6ch_sse2slow
);
6
,
1
,
4
,
"SSE2SLOW"
,
ff_conv_s16p_to_s16_6ch_sse2slow
);
...
@@ -143,6 +156,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
...
@@ -143,6 +156,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2
,
16
,
8
,
"AVX"
,
ff_conv_s16p_to_flt_2ch_avx
);
2
,
16
,
8
,
"AVX"
,
ff_conv_s16p_to_flt_2ch_avx
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16P
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_S16P
,
6
,
16
,
4
,
"AVX"
,
ff_conv_s16p_to_flt_6ch_avx
);
6
,
16
,
4
,
"AVX"
,
ff_conv_s16p_to_flt_6ch_avx
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_S16
,
AV_SAMPLE_FMT_FLTP
,
6
,
16
,
4
,
"AVX"
,
ff_conv_fltp_to_s16_6ch_avx
);
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_FLTP
,
ff_audio_convert_set_func
(
ac
,
AV_SAMPLE_FMT_FLT
,
AV_SAMPLE_FMT_FLTP
,
6
,
16
,
4
,
"AVX"
,
ff_conv_fltp_to_flt_6ch_avx
);
6
,
16
,
4
,
"AVX"
,
ff_conv_fltp_to_flt_6ch_avx
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment