Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b30a3633
Commit
b30a3633
authored
Sep 25, 2012
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: af_volume: add SSE2/SSSE3/AVX-optimized s32 volume scaling
parent
f96f1e06
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
3 deletions
+106
-3
af_volume.asm
libavfilter/x86/af_volume.asm
+85
-3
af_volume_init.c
libavfilter/x86/af_volume_init.c
+20
-0
x86inc.asm
libavutil/x86/x86inc.asm
+1
-0
No files found.
libavfilter/x86/af_volume.asm
View file @
b30a3633
...
...
@@ -19,12 +19,15 @@
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86
inc
.asm"
%include
"libavutil/x86/x86
util
.asm"
SECTION_RODATA
32
pd_1_256
:
times
4
dq
0x3F70000000000000
pd_int32_max
:
times
4
dq
0x41DFFFFFFFC00000
pw_1
:
times
8
dw
1
pw_128
:
times
8
dw
128
pq_128
:
times
2
dq
128
SECTION_TEXT
...
...
@@ -54,3 +57,82 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
sub
lenq
,
mmsize
jge
.
loop
REP_RET
;------------------------------------------------------------------------------
; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
; int volume)
;------------------------------------------------------------------------------
%macro
SCALE_SAMPLES_S32
0
cglobal
scale_samples_s32
,
4
,
4
,
4
,
dst
,
src
,
len
,
volume
%if
ARCH_X86_32
&&
cpuflag
(
avx
)
vbroadcastss
xmm2
,
volumem
%else
movd
xmm2
,
volumed
pshufd
xmm2
,
xmm2
,
0
%endif
CVTDQ2PD
m2
,
xmm2
mulpd
m2
,
m2
,
[
pd_1_256
]
mova
m3
,
[
pd_int32_max
]
lea
lenq
,
[
lend
*
4
-
mmsize
]
.
loop
:
CVTDQ2PD
m0
,
[
srcq
+
lenq
]
CVTDQ2PD
m1
,
[
srcq
+
lenq
+
mmsize
/
2
]
mulpd
m0
,
m0
,
m2
mulpd
m1
,
m1
,
m2
minpd
m0
,
m0
,
m3
minpd
m1
,
m1
,
m3
cvtpd2dq
xmm0
,
m0
cvtpd2dq
xmm1
,
m1
%if
cpuflag
(
avx
)
vmovdqa
[
dstq
+
lenq
]
,
xmm0
vmovdqa
[
dstq
+
lenq
+
mmsize
/
2
]
,
xmm1
%else
movq
[
dstq
+
lenq
]
,
xmm0
movq
[
dstq
+
lenq
+
mmsize
/
2
]
,
xmm1
%endif
sub
lenq
,
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse2
%define
CVTDQ2PD
cvtdq2pd
SCALE_SAMPLES_S32
%define
CVTDQ2PD
vcvtdq2pd
INIT_YMM
avx
SCALE_SAMPLES_S32
%undef
CVTDQ2PD
; NOTE: This is not bit-identical with the C version because it clips to
; [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX]
INIT_XMM
ssse3
,
atom
cglobal
scale_samples_s32
,
4
,
4
,
8
,
dst
,
src
,
len
,
volume
movd
m4
,
volumem
pshufd
m4
,
m4
,
0
mova
m5
,
[
pq_128
]
pxor
m6
,
m6
lea
lenq
,
[
lend
*
4
-
mmsize
]
.
loop
:
; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8);
mova
m7
,
[
srcq
+
lenq
]
pabsd
m3
,
m7
pshufd
m0
,
m3
,
q0100
pshufd
m1
,
m3
,
q0302
pmuludq
m0
,
m4
pmuludq
m1
,
m4
paddq
m0
,
m5
paddq
m1
,
m5
psrlq
m0
,
7
psrlq
m1
,
7
shufps
m2
,
m0
,
m1
,
q3131
shufps
m0
,
m0
,
m1
,
q2020
pcmpgtd
m2
,
m6
por
m0
,
m2
psrld
m0
,
1
psignd
m0
,
m7
mova
[
dstq
+
lenq
]
,
m0
sub
lenq
,
mmsize
jge
.
loop
REP_RET
libavfilter/x86/af_volume_init.c
View file @
b30a3633
...
...
@@ -25,6 +25,13 @@
void
ff_scale_samples_s16_sse2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
len
,
int
volume
);
void
ff_scale_samples_s32_sse2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
len
,
int
volume
);
void
ff_scale_samples_s32_ssse3_atom
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
len
,
int
volume
);
void
ff_scale_samples_s32_avx
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
len
,
int
volume
);
void
ff_volume_init_x86
(
VolumeContext
*
vol
)
{
int
mm_flags
=
av_get_cpu_flags
();
...
...
@@ -35,5 +42,18 @@ void ff_volume_init_x86(VolumeContext *vol)
vol
->
scale_samples
=
ff_scale_samples_s16_sse2
;
vol
->
samples_align
=
8
;
}
}
else
if
(
sample_fmt
==
AV_SAMPLE_FMT_S32
)
{
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
vol
->
scale_samples
=
ff_scale_samples_s32_sse2
;
vol
->
samples_align
=
4
;
}
if
(
EXTERNAL_SSSE3
(
mm_flags
)
&&
mm_flags
&
AV_CPU_FLAG_ATOM
)
{
vol
->
scale_samples
=
ff_scale_samples_s32_ssse3_atom
;
vol
->
samples_align
=
4
;
}
if
(
EXTERNAL_AVX
(
mm_flags
))
{
vol
->
scale_samples
=
ff_scale_samples_s32_avx
;
vol
->
samples_align
=
8
;
}
}
}
libavutil/x86/x86inc.asm
View file @
b30a3633
...
...
@@ -956,6 +956,7 @@ AVX_INSTR cmpps, 1, 0, 0
AVX_INSTR
cmpsd
,
1
,
0
,
0
AVX_INSTR
cmpss
,
1
,
0
,
0
AVX_INSTR
cvtdq2ps
,
1
,
0
,
0
AVX_INSTR
cvtpd2dq
,
1
,
0
,
0
AVX_INSTR
cvtps2dq
,
1
,
0
,
0
AVX_INSTR
divpd
,
1
,
0
,
0
AVX_INSTR
divps
,
1
,
0
,
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment