Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
e034cc6c
Commit
e034cc6c
authored
Jan 07, 2013
by
Justin Ruggles
Committed by
Luca Barbato
Jan 16, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavc: Move vector_fmul_window to AVFloatDSPContext
Signed-off-by:
Luca Barbato
<
lu_zero@gentoo.org
>
parent
a7ba3244
Hide whitespace changes
Inline
Side-by-side
Showing
22 changed files
with
251 additions
and
225 deletions
+251
-225
aacdec.c
libavcodec/aacdec.c
+18
-18
ac3dec.c
libavcodec/ac3dec.c
+5
-4
ac3dec.h
libavcodec/ac3dec.h
+2
-0
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-3
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-47
atrac1.c
libavcodec/atrac1.c
+5
-4
dsputil.c
libavcodec/dsputil.c
+0
-18
dsputil.h
libavcodec/dsputil.h
+0
-2
nellymoserdec.c
libavcodec/nellymoserdec.c
+6
-3
float_altivec.c
libavcodec/ppc/float_altivec.c
+0
-35
twinvq.c
libavcodec/twinvq.c
+4
-5
vorbisdec.c
libavcodec/vorbisdec.c
+3
-3
wmaprodec.c
libavcodec/wmaprodec.c
+2
-2
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-81
float_dsp_init_neon.c
libavutil/arm/float_dsp_init_neon.c
+4
-0
float_dsp_neon.S
libavutil/arm/float_dsp_neon.S
+47
-0
float_dsp.c
libavutil/float_dsp.c
+20
-0
float_dsp.h
libavutil/float_dsp.h
+19
-0
float_dsp_altivec.c
libavutil/ppc/float_dsp_altivec.c
+33
-0
float_dsp_altivec.h
libavutil/ppc/float_dsp_altivec.h
+4
-0
float_dsp_init.c
libavutil/ppc/float_dsp_init.c
+4
-0
float_dsp_init.c
libavutil/x86/float_dsp_init.c
+75
-0
No files found.
libavcodec/aacdec.c
View file @
e034cc6c
...
...
@@ -2173,35 +2173,35 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/
if
((
ics
->
window_sequence
[
1
]
==
ONLY_LONG_SEQUENCE
||
ics
->
window_sequence
[
1
]
==
LONG_STOP_SEQUENCE
)
&&
(
ics
->
window_sequence
[
0
]
==
ONLY_LONG_SEQUENCE
||
ics
->
window_sequence
[
0
]
==
LONG_START_SEQUENCE
))
{
ac
->
dsp
.
vector_fmul_window
(
out
,
saved
,
buf
,
lwindow_prev
,
512
);
ac
->
f
dsp
.
vector_fmul_window
(
out
,
saved
,
buf
,
lwindow_prev
,
512
);
}
else
{
memcpy
(
out
,
saved
,
448
*
sizeof
(
float
));
memcpy
(
out
,
saved
,
448
*
sizeof
(
float
));
if
(
ics
->
window_sequence
[
0
]
==
EIGHT_SHORT_SEQUENCE
)
{
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
0
*
128
,
saved
+
448
,
buf
+
0
*
128
,
swindow_prev
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
1
*
128
,
buf
+
0
*
128
+
64
,
buf
+
1
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
2
*
128
,
buf
+
1
*
128
+
64
,
buf
+
2
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
3
*
128
,
buf
+
2
*
128
+
64
,
buf
+
3
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
temp
,
buf
+
3
*
128
+
64
,
buf
+
4
*
128
,
swindow
,
64
);
memcpy
(
out
+
448
+
4
*
128
,
temp
,
64
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
0
*
128
,
saved
+
448
,
buf
+
0
*
128
,
swindow_prev
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
1
*
128
,
buf
+
0
*
128
+
64
,
buf
+
1
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
2
*
128
,
buf
+
1
*
128
+
64
,
buf
+
2
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
3
*
128
,
buf
+
2
*
128
+
64
,
buf
+
3
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
temp
,
buf
+
3
*
128
+
64
,
buf
+
4
*
128
,
swindow
,
64
);
memcpy
(
out
+
448
+
4
*
128
,
temp
,
64
*
sizeof
(
float
));
}
else
{
ac
->
dsp
.
vector_fmul_window
(
out
+
448
,
saved
+
448
,
buf
,
swindow_prev
,
64
);
memcpy
(
out
+
576
,
buf
+
64
,
448
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
,
saved
+
448
,
buf
,
swindow_prev
,
64
);
memcpy
(
out
+
576
,
buf
+
64
,
448
*
sizeof
(
float
));
}
}
// buffer update
if
(
ics
->
window_sequence
[
0
]
==
EIGHT_SHORT_SEQUENCE
)
{
memcpy
(
saved
,
temp
+
64
,
64
*
sizeof
(
float
));
ac
->
dsp
.
vector_fmul_window
(
saved
+
64
,
buf
+
4
*
128
+
64
,
buf
+
5
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
saved
+
192
,
buf
+
5
*
128
+
64
,
buf
+
6
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
saved
+
320
,
buf
+
6
*
128
+
64
,
buf
+
7
*
128
,
swindow
,
64
);
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
memcpy
(
saved
,
temp
+
64
,
64
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
64
,
buf
+
4
*
128
+
64
,
buf
+
5
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
192
,
buf
+
5
*
128
+
64
,
buf
+
6
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
320
,
buf
+
6
*
128
+
64
,
buf
+
7
*
128
,
swindow
,
64
);
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
}
else
if
(
ics
->
window_sequence
[
0
]
==
LONG_START_SEQUENCE
)
{
memcpy
(
saved
,
buf
+
512
,
448
*
sizeof
(
float
));
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
memcpy
(
saved
,
buf
+
512
,
448
*
sizeof
(
float
));
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
}
else
{
// LONG_STOP or ONLY_LONG
memcpy
(
saved
,
buf
+
512
,
512
*
sizeof
(
float
));
memcpy
(
saved
,
buf
+
512
,
512
*
sizeof
(
float
));
}
}
...
...
libavcodec/ac3dec.c
View file @
e034cc6c
...
...
@@ -170,6 +170,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
ff_mdct_init
(
&
s
->
imdct_512
,
9
,
1
,
1
.
0
);
ff_kbd_window_init
(
s
->
window
,
5
.
0
,
256
);
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
s
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
ff_ac3dsp_init
(
&
s
->
ac3dsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
ff_fmt_convert_init
(
&
s
->
fmt_conv
,
avctx
);
av_lfg_init
(
&
s
->
dith_state
,
0
);
...
...
@@ -606,15 +607,15 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
for
(
i
=
0
;
i
<
128
;
i
++
)
x
[
i
]
=
s
->
transform_coeffs
[
ch
][
2
*
i
];
s
->
imdct_256
.
imdct_half
(
&
s
->
imdct_256
,
s
->
tmp_output
,
x
);
s
->
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
s
->
f
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
for
(
i
=
0
;
i
<
128
;
i
++
)
x
[
i
]
=
s
->
transform_coeffs
[
ch
][
2
*
i
+
1
];
s
->
imdct_256
.
imdct_half
(
&
s
->
imdct_256
,
s
->
delay
[
ch
-
1
],
x
);
}
else
{
s
->
imdct_512
.
imdct_half
(
&
s
->
imdct_512
,
s
->
tmp_output
,
s
->
transform_coeffs
[
ch
]);
s
->
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
s
->
f
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
memcpy
(
s
->
delay
[
ch
-
1
],
s
->
tmp_output
+
128
,
128
*
sizeof
(
float
));
}
}
...
...
libavcodec/ac3dec.h
View file @
e034cc6c
...
...
@@ -50,6 +50,7 @@
#ifndef AVCODEC_AC3DEC_H
#define AVCODEC_AC3DEC_H
#include "libavutil/float_dsp.h"
#include "libavutil/lfg.h"
#include "ac3.h"
#include "ac3dsp.h"
...
...
@@ -193,6 +194,7 @@ typedef struct AC3DecodeContext {
///@name Optimization
DSPContext
dsp
;
///< for optimization
AVFloatDSPContext
fdsp
;
AC3DSPContext
ac3dsp
;
FmtConvertContext
fmt_conv
;
///< optimized conversion functions
///@}
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
e034cc6c
...
...
@@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void
ff_avg_h264_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_h264_chroma_mc2_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
...
...
@@ -302,7 +300,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
avg_h264_qpel_pixels_tab
[
1
][
15
]
=
ff_avg_h264_qpel8_mc33_neon
;
}
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
e034cc6c
...
...
@@ -532,53 +532,6 @@ function ff_add_pixels_clamped_neon, export=1
bx lr
endfunc
function ff_vector_fmul_window_neon, export=1
push {r4,r5,lr}
ldr lr, [sp, #12]
sub r2, r2, #8
sub r5, lr, #2
add r2, r2, r5, lsl #2
add r4, r3, r5, lsl #3
add ip, r0, r5, lsl #3
mov r5, #-16
vld1.32 {d0,d1}, [r1,:128]!
vld1.32 {d2,d3}, [r2,:128], r5
vld1.32 {d4,d5}, [r3,:128]!
vld1.32 {d6,d7}, [r4,:128], r5
1: subs lr, lr, #4
vmul.f32 d22, d0, d4
vrev64.32 q3, q3
vmul.f32 d23, d1, d5
vrev64.32 q1, q1
vmul.f32 d20, d0, d7
vmul.f32 d21, d1, d6
beq 2f
vmla.f32 d22, d3, d7
vld1.32 {d0,d1}, [r1,:128]!
vmla.f32 d23, d2, d6
vld1.32 {d18,d19},[r2,:128], r5
vmls.f32 d20, d3, d4
vld1.32 {d24,d25},[r3,:128]!
vmls.f32 d21, d2, d5
vld1.32 {d6,d7}, [r4,:128], r5
vmov q1, q9
vrev64.32 q11, q11
vmov q2, q12
vswp d22, d23
vst1.32 {d20,d21},[r0,:128]!
vst1.32 {d22,d23},[ip,:128], r5
b 1b
2: vmla.f32 d22, d3, d7
vmla.f32 d23, d2, d6
vmls.f32 d20, d3, d4
vmls.f32 d21, d2, d5
vrev64.32 q11, q11
vswp d22, d23
vst1.32 {d20,d21},[r0,:128]!
vst1.32 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
endfunc
#if CONFIG_VORBIS_DECODER
function ff_vorbis_inverse_coupling_neon, export=1
vmov.i32 q10, #1<<31
...
...
libavcodec/atrac1.c
View file @
e034cc6c
...
...
@@ -32,6 +32,7 @@
#include <stddef.h>
#include <stdio.h>
#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "get_bits.h"
#include "dsputil.h"
...
...
@@ -81,7 +82,7 @@ typedef struct {
DECLARE_ALIGNED
(
32
,
float
,
high
)[
512
];
float
*
bands
[
3
];
FFTContext
mdct_ctx
[
3
];
DSPContext
dsp
;
AVFloatDSPContext
f
dsp
;
}
AT1Ctx
;
/** size of the transform in samples in the long mode for each QMF band */
...
...
@@ -141,8 +142,8 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
at1_imdct
(
q
,
&
q
->
spec
[
pos
],
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
nbits
,
band_num
);
/* overlap and window */
q
->
dsp
.
vector_fmul_window
(
&
q
->
bands
[
band_num
][
start_pos
],
prev_buf
,
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
ff_sine_32
,
16
);
q
->
f
dsp
.
vector_fmul_window
(
&
q
->
bands
[
band_num
][
start_pos
],
prev_buf
,
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
ff_sine_32
,
16
);
prev_buf
=
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
+
16
];
start_pos
+=
block_size
;
...
...
@@ -357,7 +358,7 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
ff_atrac_generate_tables
();
ff_dsputil_init
(
&
q
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
q
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
q
->
bands
[
0
]
=
q
->
low
;
q
->
bands
[
1
]
=
q
->
mid
;
...
...
libavcodec/dsputil.c
View file @
e034cc6c
...
...
@@ -2367,23 +2367,6 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
static
void
vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
for
(
i
=-
len
,
j
=
len
-
1
;
i
<
0
;
i
++
,
j
--
)
{
float
s0
=
src0
[
i
];
float
s1
=
src1
[
j
];
float
wi
=
win
[
i
];
float
wj
=
win
[
j
];
dst
[
i
]
=
s0
*
wj
-
s1
*
wi
;
dst
[
j
]
=
s0
*
wi
+
s1
*
wj
;
}
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
{
...
...
@@ -2839,7 +2822,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
#endif
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_fmul_window
=
vector_fmul_window_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
...
...
libavcodec/dsputil.h
View file @
e034cc6c
...
...
@@ -352,8 +352,6 @@ typedef struct DSPContext {
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
...
...
libavcodec/nellymoserdec.c
View file @
e034cc6c
...
...
@@ -32,6 +32,7 @@
*/
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/lfg.h"
#include "libavutil/random_seed.h"
#include "avcodec.h"
...
...
@@ -52,7 +53,7 @@ typedef struct NellyMoserDecodeContext {
AVLFG
random_state
;
GetBitContext
gb
;
float
scale_bias
;
DSPContext
dsp
;
AVFloatDSPContext
f
dsp
;
FFTContext
imdct_ctx
;
DECLARE_ALIGNED
(
32
,
float
,
imdct_buf
)[
2
][
NELLY_BUF_LEN
];
float
*
imdct_out
;
...
...
@@ -107,7 +108,9 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
(
NELLY_BUF_LEN
-
NELLY_FILL_LEN
)
*
sizeof
(
float
));
s
->
imdct_ctx
.
imdct_half
(
&
s
->
imdct_ctx
,
s
->
imdct_out
,
aptr
);
s
->
dsp
.
vector_fmul_window
(
aptr
,
s
->
imdct_prev
+
NELLY_BUF_LEN
/
2
,
s
->
imdct_out
,
ff_sine_128
,
NELLY_BUF_LEN
/
2
);
s
->
fdsp
.
vector_fmul_window
(
aptr
,
s
->
imdct_prev
+
NELLY_BUF_LEN
/
2
,
s
->
imdct_out
,
ff_sine_128
,
NELLY_BUF_LEN
/
2
);
FFSWAP
(
float
*
,
s
->
imdct_out
,
s
->
imdct_prev
);
}
}
...
...
@@ -121,7 +124,7 @@ static av_cold int decode_init(AVCodecContext * avctx) {
av_lfg_init
(
&
s
->
random_state
,
0
);
ff_mdct_init
(
&
s
->
imdct_ctx
,
8
,
1
,
1
.
0
);
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
s
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
s
->
scale_bias
=
1
.
0
/
(
32768
*
8
);
avctx
->
sample_fmt
=
AV_SAMPLE_FMT_FLT
;
...
...
libavcodec/ppc/float_altivec.c
View file @
e034cc6c
...
...
@@ -75,43 +75,8 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
}
}
static
void
vector_fmul_window_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
vector
float
zero
,
t0
,
t1
,
s0
,
s1
,
wi
,
wj
;
const
vector
unsigned
char
reverse
=
vcprm
(
3
,
2
,
1
,
0
);
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
zero
=
(
vector
float
)
vec_splat_u32
(
0
);
for
(
i
=-
len
*
4
,
j
=
len
*
4
-
16
;
i
<
0
;
i
+=
16
,
j
-=
16
)
{
s0
=
vec_ld
(
i
,
src0
);
s1
=
vec_ld
(
j
,
src1
);
wi
=
vec_ld
(
i
,
win
);
wj
=
vec_ld
(
j
,
win
);
s1
=
vec_perm
(
s1
,
s1
,
reverse
);
wj
=
vec_perm
(
wj
,
wj
,
reverse
);
t0
=
vec_madd
(
s0
,
wj
,
zero
);
t0
=
vec_nmsub
(
s1
,
wi
,
t0
);
t1
=
vec_madd
(
s0
,
wi
,
zero
);
t1
=
vec_madd
(
s1
,
wj
,
t1
);
t1
=
vec_perm
(
t1
,
t1
,
reverse
);
vec_st
(
t0
,
i
,
dst
);
vec_st
(
t1
,
j
,
dst
);
}
}
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_add
=
vector_fmul_add_altivec
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
vector_fmul_window
=
vector_fmul_window_altivec
;
}
}
libavcodec/twinvq.c
View file @
e034cc6c
...
...
@@ -650,11 +650,10 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
mdct
->
imdct_half
(
mdct
,
buf1
+
bsize
*
j
,
in
+
bsize
*
j
);
tctx
->
dsp
.
vector_fmul_window
(
out2
,
prev_buf
+
(
bsize
-
wsize
)
/
2
,
buf1
+
bsize
*
j
,
ff_sine_windows
[
av_log2
(
wsize
)],
wsize
/
2
);
tctx
->
fdsp
.
vector_fmul_window
(
out2
,
prev_buf
+
(
bsize
-
wsize
)
/
2
,
buf1
+
bsize
*
j
,
ff_sine_windows
[
av_log2
(
wsize
)],
wsize
/
2
);
out2
+=
wsize
;
memcpy
(
out2
,
buf1
+
bsize
*
j
+
wsize
/
2
,
(
bsize
-
wsize
/
2
)
*
sizeof
(
float
));
...
...
libavcodec/vorbisdec.c
View file @
e034cc6c
...
...
@@ -1620,13 +1620,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
const
float
*
win
=
vc
->
win
[
blockflag
&
previous_window
];
if
(
blockflag
==
previous_window
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
blocksize
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
blocksize
/
4
);
}
else
if
(
blockflag
>
previous_window
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
bs0
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
bs0
/
4
);
memcpy
(
ret
+
bs0
/
2
,
buf
+
bs0
/
4
,
((
bs1
-
bs0
)
/
4
)
*
sizeof
(
float
));
}
else
{
memcpy
(
ret
,
saved
,
((
bs1
-
bs0
)
/
4
)
*
sizeof
(
float
));
vc
->
dsp
.
vector_fmul_window
(
ret
+
(
bs1
-
bs0
)
/
4
,
saved
+
(
bs1
-
bs0
)
/
4
,
buf
,
win
,
bs0
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
+
(
bs1
-
bs0
)
/
4
,
saved
+
(
bs1
-
bs0
)
/
4
,
buf
,
win
,
bs0
/
4
);
}
memcpy
(
saved
,
buf
+
blocksize
/
4
,
blocksize
/
4
*
sizeof
(
float
));
}
...
...
libavcodec/wmaprodec.c
View file @
e034cc6c
...
...
@@ -1046,8 +1046,8 @@ static void wmapro_window(WMAProDecodeCtx *s)
winlen
>>=
1
;
s
->
dsp
.
vector_fmul_window
(
start
,
start
,
start
+
winlen
,
window
,
winlen
);
s
->
f
dsp
.
vector_fmul_window
(
start
,
start
,
start
+
winlen
,
window
,
winlen
);
s
->
channel
[
c
].
prev_block_len
=
s
->
subframe_len
;
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
e034cc6c
...
...
@@ -1892,72 +1892,6 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
}
}
#if HAVE_6REGS
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
8
;
__asm__
volatile
(
"1:
\n
"
"pswapd (%5, %1), %%mm1
\n
"
"movq (%5, %0), %%mm0
\n
"
"pswapd (%4, %1), %%mm5
\n
"
"movq (%3, %0), %%mm4
\n
"
"movq %%mm0, %%mm2
\n
"
"movq %%mm1, %%mm3
\n
"
"pfmul %%mm4, %%mm2
\n
"
// src0[len + i] * win[len + i]
"pfmul %%mm5, %%mm3
\n
"
// src1[j] * win[len + j]
"pfmul %%mm4, %%mm1
\n
"
// src0[len + i] * win[len + j]
"pfmul %%mm5, %%mm0
\n
"
// src1[j] * win[len + i]
"pfadd %%mm3, %%mm2
\n
"
"pfsub %%mm0, %%mm1
\n
"
"pswapd %%mm2, %%mm2
\n
"
"movq %%mm1, (%2, %0)
\n
"
"movq %%mm2, (%2, %1)
\n
"
"sub $8, %1
\n
"
"add $8, %0
\n
"
"jl 1b
\n
"
"femms
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
static
void
vector_fmul_window_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
16
;
__asm__
volatile
(
"1:
\n
"
"movaps (%5, %1), %%xmm1
\n
"
"movaps (%5, %0), %%xmm0
\n
"
"movaps (%4, %1), %%xmm5
\n
"
"movaps (%3, %0), %%xmm4
\n
"
"shufps $0x1b, %%xmm1, %%xmm1
\n
"
"shufps $0x1b, %%xmm5, %%xmm5
\n
"
"movaps %%xmm0, %%xmm2
\n
"
"movaps %%xmm1, %%xmm3
\n
"
"mulps %%xmm4, %%xmm2
\n
"
// src0[len + i] * win[len + i]
"mulps %%xmm5, %%xmm3
\n
"
// src1[j] * win[len + j]
"mulps %%xmm4, %%xmm1
\n
"
// src0[len + i] * win[len + j]
"mulps %%xmm5, %%xmm0
\n
"
// src1[j] * win[len + i]
"addps %%xmm3, %%xmm2
\n
"
"subps %%xmm0, %%xmm1
\n
"
"shufps $0x1b, %%xmm2, %%xmm2
\n
"
"movaps %%xmm1, (%2, %0)
\n
"
"movaps %%xmm2, (%2, %1)
\n
"
"sub $16, %1
\n
"
"add $16, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
#endif
/* HAVE_6REGS */
static
void
vector_clipf_sse
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
int
len
)
{
...
...
@@ -2320,14 +2254,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
#endif
/* HAVE_YASM */
}
static
void
dsputil_init_3dnowext
(
DSPContext
*
c
,
AVCodecContext
*
avctx
,
int
mm_flags
)
{
#if HAVE_AMD3DNOWEXT_INLINE && HAVE_6REGS
c
->
vector_fmul_window
=
vector_fmul_window_3dnowext
;
#endif
}
static
void
dsputil_init_sse
(
DSPContext
*
c
,
AVCodecContext
*
avctx
,
int
mm_flags
)
{
const
int
high_bit_depth
=
avctx
->
bits_per_raw_sample
>
8
;
...
...
@@ -2343,10 +2269,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
#if HAVE_6REGS
c
->
vector_fmul_window
=
vector_fmul_window_sse
;
#endif
c
->
vector_clipf
=
vector_clipf_sse
;
#endif
/* HAVE_INLINE_ASM */
...
...
@@ -2530,9 +2452,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
dsputil_init_3dnow
(
c
,
avctx
,
mm_flags
);
if
(
mm_flags
&
AV_CPU_FLAG_3DNOWEXT
)
dsputil_init_3dnowext
(
c
,
avctx
,
mm_flags
);
if
(
mm_flags
&
AV_CPU_FLAG_SSE
)
dsputil_init_sse
(
c
,
avctx
,
mm_flags
);
...
...
libavutil/arm/float_dsp_init_neon.c
View file @
e034cc6c
...
...
@@ -32,9 +32,13 @@ void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
}
libavutil/arm/float_dsp_neon.S
View file @
e034cc6c
...
...
@@ -146,3 +146,50 @@ NOVFP vdup.32 q8, r2
bx lr
.unreq len
endfunc
function ff_vector_fmul_window_neon, export=1
push {r4,r5,lr}
ldr lr, [sp, #12]
sub r2, r2, #8
sub r5, lr, #2
add r2, r2, r5, lsl #2
add r4, r3, r5, lsl #3
add ip, r0, r5, lsl #3
mov r5, #-16
vld1.32 {d0,d1}, [r1,:128]!
vld1.32 {d2,d3}, [r2,:128], r5
vld1.32 {d4,d5}, [r3,:128]!
vld1.32 {d6,d7}, [r4,:128], r5
1: subs lr, lr, #4
vmul.f32 d22, d0, d4
vrev64.32 q3, q3
vmul.f32 d23, d1, d5
vrev64.32 q1, q1
vmul.f32 d20, d0, d7
vmul.f32 d21, d1, d6
beq 2f
vmla.f32 d22, d3, d7
vld1.32 {d0,d1}, [r1,:128]!
vmla.f32 d23, d2, d6
vld1.32 {d18,d19},[r2,:128], r5
vmls.f32 d20, d3, d4
vld1.32 {d24,d25},[r3,:128]!
vmls.f32 d21, d2, d5
vld1.32 {d6,d7}, [r4,:128], r5
vmov q1, q9
vrev64.32 q11, q11
vmov q2, q12
vswp d22, d23
vst1.32 {d20,d21},[r0,:128]!
vst1.32 {d22,d23},[ip,:128], r5
b 1b
2: vmla.f32 d22, d3, d7
vmla.f32 d23, d2, d6
vmls.f32 d20, d3, d4
vmls.f32 d21, d2, d5
vrev64.32 q11, q11
vswp d22, d23
vst1.32 {d20,d21},[r0,:128]!
vst1.32 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
endfunc
libavutil/float_dsp.c
View file @
e034cc6c
...
...
@@ -52,12 +52,32 @@ static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
dst
[
i
]
=
src
[
i
]
*
mul
;
}
static
void
vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
for
(
i
=
-
len
,
j
=
len
-
1
;
i
<
0
;
i
++
,
j
--
)
{
float
s0
=
src0
[
i
];
float
s1
=
src1
[
j
];
float
wi
=
win
[
i
];
float
wj
=
win
[
j
];
dst
[
i
]
=
s0
*
wj
-
s1
*
wi
;
dst
[
j
]
=
s0
*
wi
+
s1
*
wj
;
}
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
View file @
e034cc6c
...
...
@@ -81,6 +81,25 @@ typedef struct AVFloatDSPContext {
*/
void
(
*
vector_dmul_scalar
)(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
/**
* Overlap/add with window function.
* Used primarily by MDCT-based audio codecs.
* Source and destination vectors must overlap exactly or not at all.
*
* @param dst result vector
* constraints: 16-byte aligned
* @param src0 first source vector
* constraints: 16-byte aligned
* @param src1 second source vector
* constraints: 16-byte aligned
* @param win half-window vector
* constraints: 16-byte aligned
* @param len length of vector
* constraints: multiple of 4
*/
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
}
AVFloatDSPContext
;
/**
...
...
libavutil/ppc/float_dsp_altivec.c
View file @
e034cc6c
...
...
@@ -36,3 +36,36 @@ void ff_vector_fmul_altivec(float *dst, const float *src0, const float *src1,
vec_st
(
d1
,
16
,
dst
+
i
);
}
}
void
ff_vector_fmul_window_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
vector
float
zero
,
t0
,
t1
,
s0
,
s1
,
wi
,
wj
;
const
vector
unsigned
char
reverse
=
vcprm
(
3
,
2
,
1
,
0
);
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
zero
=
(
vector
float
)
vec_splat_u32
(
0
);
for
(
i
=
-
len
*
4
,
j
=
len
*
4
-
16
;
i
<
0
;
i
+=
16
,
j
-=
16
)
{
s0
=
vec_ld
(
i
,
src0
);
s1
=
vec_ld
(
j
,
src1
);
wi
=
vec_ld
(
i
,
win
);
wj
=
vec_ld
(
j
,
win
);
s1
=
vec_perm
(
s1
,
s1
,
reverse
);
wj
=
vec_perm
(
wj
,
wj
,
reverse
);
t0
=
vec_madd
(
s0
,
wj
,
zero
);
t0
=
vec_nmsub
(
s1
,
wi
,
t0
);
t1
=
vec_madd
(
s0
,
wi
,
zero
);
t1
=
vec_madd
(
s1
,
wj
,
t1
);
t1
=
vec_perm
(
t1
,
t1
,
reverse
);
vec_st
(
t0
,
i
,
dst
);
vec_st
(
t1
,
j
,
dst
);
}
}
libavutil/ppc/float_dsp_altivec.h
View file @
e034cc6c
...
...
@@ -24,4 +24,8 @@
extern
void
ff_vector_fmul_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
extern
void
ff_vector_fmul_window_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
libavutil/ppc/float_dsp_init.c
View file @
e034cc6c
...
...
@@ -32,5 +32,9 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
return
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
if
(
!
bit_exact
)
{
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
}
#endif
}
libavutil/x86/float_dsp_init.c
View file @
e034cc6c
...
...
@@ -21,6 +21,7 @@
#include "libavutil/cpu.h"
#include "libavutil/float_dsp.h"
#include "cpu.h"
#include "asm.h"
extern
void
ff_vector_fmul_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
...
...
@@ -40,10 +41,84 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
#if HAVE_6REGS
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
8
;
__asm__
volatile
(
"1:
\n
"
"pswapd (%5, %1), %%mm1
\n
"
"movq (%5, %0), %%mm0
\n
"
"pswapd (%4, %1), %%mm5
\n
"
"movq (%3, %0), %%mm4
\n
"
"movq %%mm0, %%mm2
\n
"
"movq %%mm1, %%mm3
\n
"
"pfmul %%mm4, %%mm2
\n
"
// src0[len + i] * win[len + i]
"pfmul %%mm5, %%mm3
\n
"
// src1[j] * win[len + j]
"pfmul %%mm4, %%mm1
\n
"
// src0[len + i] * win[len + j]
"pfmul %%mm5, %%mm0
\n
"
// src1[j] * win[len + i]
"pfadd %%mm3, %%mm2
\n
"
"pfsub %%mm0, %%mm1
\n
"
"pswapd %%mm2, %%mm2
\n
"
"movq %%mm1, (%2, %0)
\n
"
"movq %%mm2, (%2, %1)
\n
"
"sub $8, %1
\n
"
"add $8, %0
\n
"
"jl 1b
\n
"
"femms
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
static
void
vector_fmul_window_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
16
;
__asm__
volatile
(
"1:
\n
"
"movaps (%5, %1), %%xmm1
\n
"
"movaps (%5, %0), %%xmm0
\n
"
"movaps (%4, %1), %%xmm5
\n
"
"movaps (%3, %0), %%xmm4
\n
"
"shufps $0x1b, %%xmm1, %%xmm1
\n
"
"shufps $0x1b, %%xmm5, %%xmm5
\n
"
"movaps %%xmm0, %%xmm2
\n
"
"movaps %%xmm1, %%xmm3
\n
"
"mulps %%xmm4, %%xmm2
\n
"
// src0[len + i] * win[len + i]
"mulps %%xmm5, %%xmm3
\n
"
// src1[j] * win[len + j]
"mulps %%xmm4, %%xmm1
\n
"
// src0[len + i] * win[len + j]
"mulps %%xmm5, %%xmm0
\n
"
// src1[j] * win[len + i]
"addps %%xmm3, %%xmm2
\n
"
"subps %%xmm0, %%xmm1
\n
"
"shufps $0x1b, %%xmm2, %%xmm2
\n
"
"movaps %%xmm1, (%2, %0)
\n
"
"movaps %%xmm2, (%2, %1)
\n
"
"sub $16, %1
\n
"
"add $16, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
#endif
/* HAVE_6REGS */
void
ff_float_dsp_init_x86
(
AVFloatDSPContext
*
fdsp
)
{
int
mm_flags
=
av_get_cpu_flags
();
#if HAVE_6REGS
if
(
INLINE_AMD3DNOWEXT
(
mm_flags
))
{
fdsp
->
vector_fmul_window
=
vector_fmul_window_3dnowext
;
}
if
(
INLINE_SSE
(
mm_flags
))
{
fdsp
->
vector_fmul_window
=
vector_fmul_window_sse
;
}
#endif
if
(
EXTERNAL_SSE
(
mm_flags
))
{
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment