Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
ac7eb4cb
Commit
ac7eb4cb
authored
Sep 24, 2012
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
float_dsp: add vector_dmul_scalar() to multiply a vector of doubles
Include x86-optimized versions for SSE2 and AVX.
parent
da025d11
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
89 additions
and
0 deletions
+89
-0
float_dsp.c
libavutil/float_dsp.c
+9
-0
float_dsp.h
libavutil/float_dsp.h
+15
-0
float_dsp.asm
libavutil/x86/float_dsp.asm
+45
-0
float_dsp_init.c
libavutil/x86/float_dsp_init.c
+9
-0
x86util.asm
libavutil/x86/x86util.asm
+11
-0
No files found.
libavutil/float_dsp.c
View file @
ac7eb4cb
...
...
@@ -44,11 +44,20 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
dst
[
i
]
=
src
[
i
]
*
mul
;
}
static
void
vector_dmul_scalar_c
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src
[
i
]
*
mul
;
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
View file @
ac7eb4cb
...
...
@@ -66,6 +66,21 @@ typedef struct AVFloatDSPContext {
*/
void
(
*
vector_fmul_scalar
)(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
/**
* Multiply a vector of double by a scalar double. Source and
* destination vectors must overlap exactly or not at all.
*
* @param dst result vector
* constraints: 32-byte aligned
* @param src input vector
* constraints: 32-byte aligned
* @param mul scalar value
* @param len length of vector
* constraints: multiple of 8
*/
void
(
*
vector_dmul_scalar
)(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
}
AVFloatDSPContext
;
/**
...
...
libavutil/x86/float_dsp.asm
View file @
ac7eb4cb
...
...
@@ -114,3 +114,48 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
INIT_XMM
sse
VECTOR_FMUL_SCALAR
;------------------------------------------------------------------------------
; void ff_vector_dmul_scalar(double *dst, const double *src, double mul,
; int len)
;------------------------------------------------------------------------------
%macro
VECTOR_DMUL_SCALAR
0
%if
UNIX64
cglobal
vector_dmul_scalar
,
3
,
3
,
3
,
dst
,
src
,
len
%else
cglobal
vector_dmul_scalar
,
4
,
4
,
3
,
dst
,
src
,
mul
,
len
%endif
%if
ARCH_X86_32
VBROADCASTSD
xmm0
,
mulm
%else
%if
WIN64
movlhps
xmm2
,
xmm2
%if
cpuflag
(
avx
)
vinsertf128
ymm2
,
ymm2
,
xmm2
,
1
%endif
SWAP
0
,
2
%else
movlhps
xmm0
,
xmm0
%if
cpuflag
(
avx
)
vinsertf128
ymm0
,
ymm0
,
xmm0
,
1
%endif
%endif
%endif
lea
lenq
,
[
lend
*
8
-
2
*
mmsize
]
.
loop
:
mulpd
m1
,
m0
,
[
srcq
+
lenq
]
mulpd
m2
,
m0
,
[
srcq
+
lenq
+
mmsize
]
mova
[
dstq
+
lenq
]
,
m1
mova
[
dstq
+
lenq
+
mmsize
]
,
m2
sub
lenq
,
2
*
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse2
VECTOR_DMUL_SCALAR
%if
HAVE_AVX_EXTERNAL
INIT_YMM
avx
VECTOR_DMUL_SCALAR
%endif
libavutil/x86/float_dsp_init.c
View file @
ac7eb4cb
...
...
@@ -35,6 +35,11 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
extern
void
ff_vector_fmul_scalar_sse
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
extern
void
ff_vector_dmul_scalar_sse2
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
void
ff_float_dsp_init_x86
(
AVFloatDSPContext
*
fdsp
)
{
int
mm_flags
=
av_get_cpu_flags
();
...
...
@@ -44,8 +49,12 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
}
if
(
EXTERNAL_AVX
(
mm_flags
))
{
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
}
}
libavutil/x86/x86util.asm
View file @
ac7eb4cb
...
...
@@ -631,6 +631,17 @@
%endif
%endmacro
%macro
VBROADCASTSD
2
; dst xmm/ymm, src m64
%if
cpuflag
(
avx
)
&&
mmsize
==
32
vbroadcastsd
%1
,
%2
%elif
cpuflag
(
sse3
)
movddup
%1
,
%2
%else
; sse2
movsd
%1
,
%2
movlhps
%1
,
%1
%endif
%endmacro
%macro
SHUFFLE_MASK_W
8
%
rep
8
%
if
%1
>=
0x80
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment