Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
55aa03b9
Commit
55aa03b9
authored
Jan 20, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
floatdsp: move vector_fmul_add from dsputil to avfloatdsp.
parent
0881cbf3
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
131 additions
and
108 deletions
+131
-108
aacsbr.c
libavcodec/aacsbr.c
+5
-5
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-3
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-27
dsputil.c
libavcodec/dsputil.c
+0
-7
dsputil.h
libavcodec/dsputil.h
+0
-2
float_altivec.c
libavcodec/ppc/float_altivec.c
+0
-25
wmadec.c
libavcodec/wmadec.c
+4
-4
dsputil.asm
libavcodec/x86/dsputil.asm
+0
-28
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-7
float_dsp_init_neon.c
libavutil/arm/float_dsp_init_neon.c
+4
-0
float_dsp_neon.S
libavutil/arm/float_dsp_neon.S
+27
-0
float_dsp.c
libavutil/float_dsp.c
+9
-0
float_dsp.h
libavutil/float_dsp.h
+18
-0
float_dsp_altivec.c
libavutil/ppc/float_dsp_altivec.c
+24
-0
float_dsp_altivec.h
libavutil/ppc/float_dsp_altivec.h
+4
-0
float_dsp_init.c
libavutil/ppc/float_dsp_init.c
+1
-0
float_dsp.asm
libavutil/x86/float_dsp.asm
+28
-0
float_dsp_init.c
libavutil/x86/float_dsp_init.c
+7
-0
No files found.
libavcodec/aacsbr.c
View file @
55aa03b9
...
...
@@ -1172,8 +1172,8 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct,
* Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
* (14496-3 sp04 p206)
*/
static
void
sbr_qmf_synthesis
(
DSPContext
*
dsp
,
FFTContext
*
mdct
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
f
dsp
,
static
void
sbr_qmf_synthesis
(
FFTContext
*
mdct
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
dsp
,
float
*
out
,
float
X
[
2
][
38
][
64
],
float
mdct_buf
[
2
][
64
],
float
*
v0
,
int
*
v_off
,
const
unsigned
int
div
)
...
...
@@ -1204,7 +1204,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
mdct
->
imdct_half
(
mdct
,
mdct_buf
[
1
],
X
[
1
][
i
]);
sbrdsp
->
qmf_deint_bfly
(
v
,
mdct_buf
[
1
],
mdct_buf
[
0
]);
}
fdsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
192
>>
div
),
sbr_qmf_window
+
(
64
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
256
>>
div
),
sbr_qmf_window
+
(
128
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
448
>>
div
),
sbr_qmf_window
+
(
192
>>
div
),
out
,
64
>>
div
);
...
...
@@ -1702,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
nch
=
2
;
}
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
L
,
sbr
->
X
[
0
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
0
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
0
].
synthesis_filterbank_samples_offset
,
downsampled
);
if
(
nch
==
2
)
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
R
,
sbr
->
X
[
1
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
1
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
1
].
synthesis_filterbank_samples_offset
,
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
55aa03b9
...
...
@@ -146,8 +146,6 @@ void ff_butterflies_float_neon(float *v1, float *v2, int len);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clipf_neon
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
int
len
);
...
...
@@ -301,7 +299,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
c
->
vector_clipf
=
ff_vector_clipf_neon
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
55aa03b9
...
...
@@ -580,33 +580,6 @@ function ff_vector_fmul_reverse_neon, export=1
bx lr
endfunc
function ff_vector_fmul_add_neon, export=1
ldr r12, [sp]
vld1.32 {q0-q1}, [r1,:128]!
vld1.32 {q8-q9}, [r2,:128]!
vld1.32 {q2-q3}, [r3,:128]!
vmul.f32 q10, q0, q8
vmul.f32 q11, q1, q9
1: vadd.f32 q12, q2, q10
vadd.f32 q13, q3, q11
pld [r1, #16]
pld [r2, #16]
pld [r3, #16]
subs r12, r12, #8
beq 2f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [r2,:128]!
vmul.f32 q10, q0, q8
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [r2,:128]!
vmul.f32 q11, q1, q9
vld1.32 {q2-q3}, [r3,:128]!
vst1.32 {q12-q13},[r0,:128]!
b 1b
2: vst1.32 {q12-q13},[r0,:128]!
bx lr
endfunc
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
...
...
libavcodec/dsputil.c
View file @
55aa03b9
...
...
@@ -2360,12 +2360,6 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
dst
[
i
]
=
src0
[
i
]
*
src1
[
-
i
];
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
{
...
...
@@ -2714,7 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
add_8x8basis
=
add_8x8basis_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
...
...
libavcodec/dsputil.h
View file @
55aa03b9
...
...
@@ -342,8 +342,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
...
...
libavcodec/ppc/float_altivec.c
View file @
55aa03b9
...
...
@@ -51,32 +51,7 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
}
}
static
void
vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_add
=
vector_fmul_add_altivec
;
}
libavcodec/wmadec.c
View file @
55aa03b9
...
...
@@ -379,16 +379,16 @@ static void wma_window(WMACodecContext *s, float *out)
block_len
=
s
->
block_len
;
bsize
=
s
->
frame_len_bits
-
s
->
block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
out
,
block_len
);
s
->
f
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
out
,
block_len
);
}
else
{
block_len
=
1
<<
s
->
prev_block_len_bits
;
n
=
(
s
->
block_len
-
block_len
)
/
2
;
bsize
=
s
->
frame_len_bits
-
s
->
prev_block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
out
+
n
,
block_len
);
s
->
f
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
out
+
n
,
block_len
);
memcpy
(
out
+
n
+
block_len
,
in
+
n
+
block_len
,
n
*
sizeof
(
float
));
}
...
...
libavcodec/x86/dsputil.asm
View file @
55aa03b9
...
...
@@ -604,34 +604,6 @@ VECTOR_FMUL_REVERSE
INIT_YMM
avx
VECTOR_FMUL_REVERSE
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro
VECTOR_FMUL_ADD
0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
dst
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
mmsize
]
,
m1
sub
lenq
,
2
*
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
; %1 = aligned/unaligned
%macro
BSWAP_LOOPS
1
mov
r3
,
r2
...
...
libavcodec/x86/dsputil_mmx.c
View file @
55aa03b9
...
...
@@ -1853,11 +1853,6 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clip_int32_mmx
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
...
...
@@ -2141,7 +2136,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#if HAVE_YASM
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
#endif
/* HAVE_YASM */
...
...
@@ -2295,7 +2289,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
#endif
/* HAVE_AVX_EXTERNAL */
}
...
...
libavutil/arm/float_dsp_init_neon.c
View file @
55aa03b9
...
...
@@ -35,10 +35,14 @@ void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
}
libavutil/arm/float_dsp_neon.S
View file @
55aa03b9
...
...
@@ -193,3 +193,30 @@ function ff_vector_fmul_window_neon, export=1
vst1.32 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
endfunc
function ff_vector_fmul_add_neon, export=1
ldr r12, [sp]
vld1.32 {q0-q1}, [r1,:128]!
vld1.32 {q8-q9}, [r2,:128]!
vld1.32 {q2-q3}, [r3,:128]!
vmul.f32 q10, q0, q8
vmul.f32 q11, q1, q9
1: vadd.f32 q12, q2, q10
vadd.f32 q13, q3, q11
pld [r1, #16]
pld [r2, #16]
pld [r3, #16]
subs r12, r12, #8
beq 2f
vld1.32 {q0}, [r1,:128]!
vld1.32 {q8}, [r2,:128]!
vmul.f32 q10, q0, q8
vld1.32 {q1}, [r1,:128]!
vld1.32 {q9}, [r2,:128]!
vmul.f32 q11, q1, q9
vld1.32 {q2-q3}, [r3,:128]!
vst1.32 {q12-q13},[r0,:128]!
b 1b
2: vst1.32 {q12-q13},[r0,:128]!
bx lr
endfunc
libavutil/float_dsp.c
View file @
55aa03b9
...
...
@@ -71,6 +71,14 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
...
...
@@ -78,6 +86,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
fdsp
->
vector_fmul_add
=
vector_fmul_add_c
;
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
View file @
55aa03b9
...
...
@@ -100,6 +100,24 @@ typedef struct AVFloatDSPContext {
*/
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
/**
* Calculate the product of two vectors of floats, add a third vector of
* floats and store the result in a vector of floats.
*
* @param dst output vector
* constraints: 32-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* constraints: 32-byte aligned
* @param src1 third input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 16
*/
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
}
AVFloatDSPContext
;
/**
...
...
libavutil/ppc/float_dsp_altivec.c
View file @
55aa03b9
...
...
@@ -69,3 +69,27 @@ void ff_vector_fmul_window_altivec(float *dst, const float *src0,
vec_st
(
t1
,
j
,
dst
);
}
}
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
libavutil/ppc/float_dsp_altivec.h
View file @
55aa03b9
...
...
@@ -28,4 +28,8 @@ extern void ff_vector_fmul_window_altivec(float *dst, const float *src0,
const
float
*
src1
,
const
float
*
win
,
int
len
);
extern
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
libavutil/ppc/float_dsp_init.c
View file @
55aa03b9
...
...
@@ -32,6 +32,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
return
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_altivec
;
if
(
!
bit_exact
)
{
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
...
...
libavutil/x86/float_dsp.asm
View file @
55aa03b9
...
...
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
INIT_YMM
avx
VECTOR_DMUL_SCALAR
%endif
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro
VECTOR_FMUL_ADD
0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
dst
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
mmsize
]
,
m1
sub
lenq
,
2
*
mmsize
jge
.
loop
REP_RET
%endmacro
INIT_XMM
sse
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
libavutil/x86/float_dsp_init.c
View file @
55aa03b9
...
...
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#if HAVE_6REGS && HAVE_INLINE_ASM
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
...
...
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
...
...
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment