Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6eabb0d3
Commit
6eabb0d3
authored
Jan 13, 2011
by
Justin Ruggles
Committed by
Mans Rullgard
Jan 22, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change DSPContext.vector_fmul() from dst=dst*src to dest=src0*src1.
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
fcb7e535
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
67 additions
and
68 deletions
+67
-68
aacenc.c
libavcodec/aacenc.c
+1
-1
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+1
-1
dsputil_init_vfp.c
libavcodec/arm/dsputil_init_vfp.c
+2
-1
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+22
-23
dsputil_vfp.S
libavcodec/arm/dsputil_vfp.S
+14
-15
atrac3.c
libavcodec/atrac3.c
+1
-1
dsputil.c
libavcodec/dsputil.c
+2
-2
dsputil.h
libavcodec/dsputil.h
+1
-1
nellymoserenc.c
libavcodec/nellymoserenc.c
+3
-3
float_altivec.c
libavcodec/ppc/float_altivec.c
+5
-5
twinvq.c
libavcodec/twinvq.c
+2
-2
vorbis_dec.c
libavcodec/vorbis_dec.c
+1
-1
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+12
-12
No files found.
libavcodec/aacenc.c
View file @
6eabb0d3
...
...
@@ -256,7 +256,7 @@ static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
s
->
output
[
i
-
448
-
k
]
=
(
i
<
1024
)
?
sce
->
saved
[
i
]
:
audio
[(
i
-
1024
)
*
chans
];
s
->
dsp
.
vector_fmul
(
s
->
output
,
k
?
swindow
:
pwindow
,
128
);
s
->
dsp
.
vector_fmul
(
s
->
output
,
s
->
output
,
k
?
swindow
:
pwindow
,
128
);
s
->
dsp
.
vector_fmul_reverse
(
s
->
output
+
128
,
s
->
output
+
128
,
swindow
,
128
);
ff_mdct_calc
(
&
s
->
mdct128
,
sce
->
coeffs
+
k
,
s
->
output
);
}
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
6eabb0d3
...
...
@@ -138,7 +138,7 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void
ff_vp3_v_loop_filter_neon
(
uint8_t
*
,
int
,
int
*
);
void
ff_vp3_h_loop_filter_neon
(
uint8_t
*
,
int
,
int
*
);
void
ff_vector_fmul_neon
(
float
*
dst
,
const
float
*
src
,
int
len
);
void
ff_vector_fmul_neon
(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
float
add_bias
,
int
len
);
...
...
libavcodec/arm/dsputil_init_vfp.c
View file @
6eabb0d3
...
...
@@ -21,7 +21,8 @@
#include "libavcodec/dsputil.h"
#include "dsputil_arm.h"
void
ff_vector_fmul_vfp
(
float
*
dst
,
const
float
*
src
,
int
len
);
void
ff_vector_fmul_vfp
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_reverse_vfp
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
ff_float_to_int16_vfp
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
...
...
libavcodec/arm/dsputil_neon.S
View file @
6eabb0d3
...
...
@@ -738,42 +738,41 @@ function ff_float_to_int16_interleave_neon, export=1
endfunc
function ff_vector_fmul_neon, export=1
mov r3, r0
subs r2, r2, #8
vld1.64 {d0-d3}, [r0,:128]!
vld1.64 {d4-d7}, [r1,:128]!
subs r3, r3, #8
vld1.64 {d0-d3}, [r1,:128]!
vld1.64 {d4-d7}, [r2,:128]!
vmul.f32 q8, q0, q2
vmul.f32 q9, q1, q3
beq 3f
bics ip, r
2
, #15
bics ip, r
3
, #15
beq 2f
1: subs ip, ip, #16
vld1.64 {d0-d1}, [r
0
,:128]!
vld1.64 {d4-d5}, [r
1
,:128]!
vld1.64 {d0-d1}, [r
1
,:128]!
vld1.64 {d4-d5}, [r
2
,:128]!
vmul.f32 q10, q0, q2
vld1.64 {d2-d3}, [r
0
,:128]!
vld1.64 {d6-d7}, [r
1
,:128]!
vld1.64 {d2-d3}, [r
1
,:128]!
vld1.64 {d6-d7}, [r
2
,:128]!
vmul.f32 q11, q1, q3
vst1.64 {d16-d19},[r
3
,:128]!
vld1.64 {d0-d1}, [r
0
,:128]!
vld1.64 {d4-d5}, [r
1
,:128]!
vst1.64 {d16-d19},[r
0
,:128]!
vld1.64 {d0-d1}, [r
1
,:128]!
vld1.64 {d4-d5}, [r
2
,:128]!
vmul.f32 q8, q0, q2
vld1.64 {d2-d3}, [r
0
,:128]!
vld1.64 {d6-d7}, [r
1
,:128]!
vld1.64 {d2-d3}, [r
1
,:128]!
vld1.64 {d6-d7}, [r
2
,:128]!
vmul.f32 q9, q1, q3
vst1.64 {d20-d23},[r
3
,:128]!
vst1.64 {d20-d23},[r
0
,:128]!
bne 1b
ands r
2, r2
, #15
ands r
3, r3
, #15
beq 3f
2: vld1.64 {d0-d1}, [r
0
,:128]!
vld1.64 {d4-d5}, [r
1
,:128]!
vst1.64 {d16-d17},[r
3
,:128]!
2: vld1.64 {d0-d1}, [r
1
,:128]!
vld1.64 {d4-d5}, [r
2
,:128]!
vst1.64 {d16-d17},[r
0
,:128]!
vmul.f32 q8, q0, q2
vld1.64 {d2-d3}, [r
0
,:128]!
vld1.64 {d6-d7}, [r
1
,:128]!
vst1.64 {d18-d19},[r
3
,:128]!
vld1.64 {d2-d3}, [r
1
,:128]!
vld1.64 {d6-d7}, [r
2
,:128]!
vst1.64 {d18-d19},[r
0
,:128]!
vmul.f32 q9, q1, q3
3: vst1.64 {d16-d19},[r
3
,:128]!
3: vst1.64 {d16-d19},[r
0
,:128]!
bx lr
endfunc
...
...
libavcodec/arm/dsputil_vfp.S
View file @
6eabb0d3
...
...
@@ -41,34 +41,33 @@
* ARM VFP optimized implementation of 'vector_fmul_c' function.
* Assume that len is a positive number and is multiple of 8
*/
@ void ff_vector_fmul_vfp(float *dst, const float *src, int len)
@ void ff_vector_fmul_vfp(float *dst, const float *src
0, const float *src1
, int len)
function ff_vector_fmul_vfp, export=1
vpush {d8-d15}
mov r3, r0
fmrx r12, fpscr
orr r12, r12, #(3 << 16) /* set vector size to 4 */
fmxr fpscr, r12
vldmia r
3
!, {s0-s3}
vldmia r
1
!, {s8-s11}
vldmia r
3
!, {s4-s7}
vldmia r
1
!, {s12-s15}
vldmia r
1
!, {s0-s3}
vldmia r
2
!, {s8-s11}
vldmia r
1
!, {s4-s7}
vldmia r
2
!, {s12-s15}
vmul.f32 s8, s0, s8
1:
subs r
2, r2
, #16
subs r
3, r3
, #16
vmul.f32 s12, s4, s12
vldmiage r
3
!, {s16-s19}
vldmiage r
1
!, {s24-s27}
vldmiage r
3
!, {s20-s23}
vldmiage r
1
!, {s28-s31}
vldmiage r
1
!, {s16-s19}
vldmiage r
2
!, {s24-s27}
vldmiage r
1
!, {s20-s23}
vldmiage r
2
!, {s28-s31}
vmulge.f32 s24, s16, s24
vstmia r0!, {s8-s11}
vstmia r0!, {s12-s15}
vmulge.f32 s28, s20, s28
vldmiagt r
3
!, {s0-s3}
vldmiagt r
1
!, {s8-s11}
vldmiagt r
3
!, {s4-s7}
vldmiagt r
1
!, {s12-s15}
vldmiagt r
1
!, {s0-s3}
vldmiagt r
2
!, {s8-s11}
vldmiagt r
1
!, {s4-s7}
vldmiagt r
2
!, {s12-s15}
vmulge.f32 s8, s0, s8
vstmiage r0!, {s24-s27}
vstmiage r0!, {s28-s31}
...
...
libavcodec/atrac3.c
View file @
6eabb0d3
...
...
@@ -159,7 +159,7 @@ static void IMLT(ATRAC3Context *q, float *pInput, float *pOutput, int odd_band)
ff_imdct_calc
(
&
q
->
mdct_ctx
,
pOutput
,
pInput
);
/* Perform windowing on the output. */
dsp
.
vector_fmul
(
pOutput
,
mdct_window
,
512
);
dsp
.
vector_fmul
(
pOutput
,
pOutput
,
mdct_window
,
512
);
}
...
...
libavcodec/dsputil.c
View file @
6eabb0d3
...
...
@@ -3750,10 +3750,10 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ
(
rd8x8_c
,
rd16_c
)
WRAPPER8_16_SQ
(
bit8x8_c
,
bit16_c
)
static
void
vector_fmul_c
(
float
*
dst
,
const
float
*
src
,
int
len
){
static
void
vector_fmul_c
(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
*=
src
[
i
];
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
];
}
static
void
vector_fmul_reverse_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
){
...
...
libavcodec/dsputil.h
View file @
6eabb0d3
...
...
@@ -375,7 +375,7 @@ typedef struct DSPContext {
void
(
*
vorbis_inverse_coupling
)(
float
*
mag
,
float
*
ang
,
int
blocksize
);
void
(
*
ac3_downmix
)(
float
(
*
samples
)[
256
],
float
(
*
matrix
)[
2
],
int
out_ch
,
int
in_ch
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_fmul
)(
float
*
dst
,
const
float
*
src
,
int
len
);
void
(
*
vector_fmul
)(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
);
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
...
...
libavcodec/nellymoserenc.c
View file @
6eabb0d3
...
...
@@ -113,13 +113,13 @@ static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
static
void
apply_mdct
(
NellyMoserEncodeContext
*
s
)
{
memcpy
(
s
->
in_buff
,
s
->
buf
[
s
->
bufsel
],
NELLY_BUF_LEN
*
sizeof
(
float
));
s
->
dsp
.
vector_fmul
(
s
->
in_buff
,
ff_sine_128
,
NELLY_BUF_LEN
);
s
->
dsp
.
vector_fmul
(
s
->
in_buff
,
s
->
buf
[
s
->
bufsel
],
ff_sine_128
,
NELLY_BUF_LEN
);
s
->
dsp
.
vector_fmul_reverse
(
s
->
in_buff
+
NELLY_BUF_LEN
,
s
->
buf
[
s
->
bufsel
]
+
NELLY_BUF_LEN
,
ff_sine_128
,
NELLY_BUF_LEN
);
ff_mdct_calc
(
&
s
->
mdct_ctx
,
s
->
mdct_out
,
s
->
in_buff
);
s
->
dsp
.
vector_fmul
(
s
->
buf
[
s
->
bufsel
]
+
NELLY_BUF_LEN
,
ff_sine_128
,
NELLY_BUF_LEN
);
s
->
dsp
.
vector_fmul
(
s
->
buf
[
s
->
bufsel
]
+
NELLY_BUF_LEN
,
s
->
buf
[
s
->
bufsel
]
+
NELLY_BUF_LEN
,
ff_sine_128
,
NELLY_BUF_LEN
);
s
->
dsp
.
vector_fmul_reverse
(
s
->
buf
[
s
->
bufsel
]
+
2
*
NELLY_BUF_LEN
,
s
->
buf
[
1
-
s
->
bufsel
],
ff_sine_128
,
NELLY_BUF_LEN
);
ff_mdct_calc
(
&
s
->
mdct_ctx
,
s
->
mdct_out
+
NELLY_BUF_LEN
,
s
->
buf
[
s
->
bufsel
]
+
NELLY_BUF_LEN
);
...
...
libavcodec/ppc/float_altivec.c
View file @
6eabb0d3
...
...
@@ -23,16 +23,16 @@
#include "dsputil_altivec.h"
#include "util_altivec.h"
static
void
vector_fmul_altivec
(
float
*
dst
,
const
float
*
src
,
int
len
)
static
void
vector_fmul_altivec
(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
)
{
int
i
;
vector
float
d0
,
d1
,
s
,
zero
=
(
vector
float
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d0
=
vec_ld
(
0
,
dst
+
i
);
s
=
vec_ld
(
0
,
src
+
i
);
d1
=
vec_ld
(
16
,
dst
+
i
);
d0
=
vec_ld
(
0
,
src0
+
i
);
s
=
vec_ld
(
0
,
src
1
+
i
);
d1
=
vec_ld
(
16
,
src0
+
i
);
d0
=
vec_madd
(
d0
,
s
,
zero
);
d1
=
vec_madd
(
d1
,
vec_ld
(
16
,
src
+
i
),
zero
);
d1
=
vec_madd
(
d1
,
vec_ld
(
16
,
src
1
+
i
),
zero
);
vec_st
(
d0
,
0
,
dst
+
i
);
vec_st
(
d1
,
16
,
dst
+
i
);
}
...
...
libavcodec/twinvq.c
View file @
6eabb0d3
...
...
@@ -783,7 +783,7 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
dec_bark_env
(
tctx
,
bark1
[
i
][
j
],
bark_use_hist
[
i
][
j
],
i
,
tctx
->
tmp_buf
,
gain
[
sub
*
i
+
j
],
ftype
);
tctx
->
dsp
.
vector_fmul
(
chunk
+
block_size
*
j
,
tctx
->
tmp_buf
,
tctx
->
dsp
.
vector_fmul
(
chunk
+
block_size
*
j
,
chunk
+
block_size
*
j
,
tctx
->
tmp_buf
,
block_size
);
}
...
...
@@ -805,7 +805,7 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
dec_lpc_spectrum_inv
(
tctx
,
lsp
,
ftype
,
tctx
->
tmp_buf
);
for
(
j
=
0
;
j
<
mtab
->
fmode
[
ftype
].
sub
;
j
++
)
{
tctx
->
dsp
.
vector_fmul
(
chunk
,
tctx
->
tmp_buf
,
block_size
);
tctx
->
dsp
.
vector_fmul
(
chunk
,
chunk
,
tctx
->
tmp_buf
,
block_size
);
chunk
+=
block_size
;
}
}
...
...
libavcodec/vorbis_dec.c
View file @
6eabb0d3
...
...
@@ -1578,7 +1578,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
for
(
j
=
vc
->
audio_channels
-
1
;
j
>=
0
;
j
--
)
{
ch_floor_ptr
=
vc
->
channel_floors
+
j
*
blocksize
/
2
;
ch_res_ptr
=
vc
->
channel_residues
+
res_chan
[
j
]
*
blocksize
/
2
;
vc
->
dsp
.
vector_fmul
(
ch_floor_ptr
,
ch_res_ptr
,
blocksize
/
2
);
vc
->
dsp
.
vector_fmul
(
ch_floor_ptr
,
ch_
floor_ptr
,
ch_
res_ptr
,
blocksize
/
2
);
ff_imdct_half
(
&
vc
->
mdct
[
blockflag
],
ch_res_ptr
,
ch_floor_ptr
);
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
6eabb0d3
...
...
@@ -2074,38 +2074,38 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c
}
}
static
void
vector_fmul_3dnow
(
float
*
dst
,
const
float
*
src
,
int
len
){
static
void
vector_fmul_3dnow
(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
){
x86_reg
i
=
(
len
-
4
)
*
4
;
__asm__
volatile
(
"1:
\n\t
"
"movq (%
1
,%0), %%mm0
\n\t
"
"movq 8(%
1
,%0), %%mm1
\n\t
"
"pfmul (%
2
,%0), %%mm0
\n\t
"
"pfmul 8(%
2
,%0), %%mm1
\n\t
"
"movq (%
2
,%0), %%mm0
\n\t
"
"movq 8(%
2
,%0), %%mm1
\n\t
"
"pfmul (%
3
,%0), %%mm0
\n\t
"
"pfmul 8(%
3
,%0), %%mm1
\n\t
"
"movq %%mm0, (%1,%0)
\n\t
"
"movq %%mm1, 8(%1,%0)
\n\t
"
"sub $16, %0
\n\t
"
"jge 1b
\n\t
"
"femms
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src
)
:
"r"
(
dst
),
"r"
(
src
0
),
"r"
(
src1
)
:
"memory"
);
}
static
void
vector_fmul_sse
(
float
*
dst
,
const
float
*
src
,
int
len
){
static
void
vector_fmul_sse
(
float
*
dst
,
const
float
*
src
0
,
const
float
*
src1
,
int
len
){
x86_reg
i
=
(
len
-
8
)
*
4
;
__asm__
volatile
(
"1:
\n\t
"
"movaps (%
1
,%0), %%xmm0
\n\t
"
"movaps 16(%
1
,%0), %%xmm1
\n\t
"
"mulps (%
2
,%0), %%xmm0
\n\t
"
"mulps 16(%
2
,%0), %%xmm1
\n\t
"
"movaps (%
2
,%0), %%xmm0
\n\t
"
"movaps 16(%
2
,%0), %%xmm1
\n\t
"
"mulps (%
3
,%0), %%xmm0
\n\t
"
"mulps 16(%
3
,%0), %%xmm1
\n\t
"
"movaps %%xmm0, (%1,%0)
\n\t
"
"movaps %%xmm1, 16(%1,%0)
\n\t
"
"sub $32, %0
\n\t
"
"jge 1b
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src
)
:
"r"
(
dst
),
"r"
(
src
0
),
"r"
(
src1
)
:
"memory"
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment