Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
9d35fa52
Commit
9d35fa52
authored
Apr 25, 2011
by
Vitor Sessak
Committed by
Reinhard Tartler
Apr 26, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add AVX FFT implementation.
Signed-off-by:
Reinhard Tartler
<
siretart@tauware.de
>
parent
13dfce3d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
450 additions
and
207 deletions
+450
-207
Changelog
Changelog
+1
-1
aac.h
libavcodec/aac.h
+5
-5
aacenc.h
libavcodec/aacenc.h
+1
-1
ac3dec.h
libavcodec/ac3dec.h
+5
-5
ac3enc.c
libavcodec/ac3enc.c
+1
-1
atrac1.c
libavcodec/atrac1.c
+10
-10
atrac3.c
libavcodec/atrac3.c
+3
-3
binkaudio.c
libavcodec/binkaudio.c
+1
-1
cook.c
libavcodec/cook.c
+1
-1
dca.c
libavcodec/dca.c
+5
-5
fft.c
libavcodec/fft.c
+48
-5
fft.h
libavcodec/fft.h
+2
-1
imc.c
libavcodec/imc.c
+1
-1
nellymoserdec.c
libavcodec/nellymoserdec.c
+2
-2
nellymoserenc.c
libavcodec/nellymoserenc.c
+3
-3
qdm2.c
libavcodec/qdm2.c
+1
-1
wma.h
libavcodec/wma.h
+4
-4
wmaprodec.c
libavcodec/wmaprodec.c
+2
-2
wmavoice.c
libavcodec/wmavoice.c
+3
-3
fft.c
libavcodec/x86/fft.c
+8
-1
fft.h
libavcodec/x86/fft.h
+2
-0
fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+334
-150
fft_sse.c
libavcodec/x86/fft_sse.c
+7
-1
No files found.
Changelog
View file @
9d35fa52
...
...
@@ -5,7 +5,7 @@ releases are sorted from youngest to oldest.
version <next>:
- Lots of deprecated API cruft removed
- fft and imdct optimizations for AVX (Sandy Bridge) processors
version 0.7_beta1:
...
...
libavcodec/aac.h
View file @
9d35fa52
...
...
@@ -223,9 +223,9 @@ typedef struct {
float
sf
[
120
];
///< scalefactors
int
sf_idx
[
128
];
///< scalefactor indices (used by encoder)
uint8_t
zeroes
[
128
];
///< band is not coded (used by encoder)
DECLARE_ALIGNED
(
16
,
float
,
coeffs
)[
1024
];
///< coefficients for IMDCT
DECLARE_ALIGNED
(
16
,
float
,
saved
)[
1024
];
///< overlap
DECLARE_ALIGNED
(
16
,
float
,
ret
)[
2048
];
///< PCM output
DECLARE_ALIGNED
(
32
,
float
,
coeffs
)[
1024
];
///< coefficients for IMDCT
DECLARE_ALIGNED
(
32
,
float
,
saved
)[
1024
];
///< overlap
DECLARE_ALIGNED
(
32
,
float
,
ret
)[
2048
];
///< PCM output
DECLARE_ALIGNED
(
16
,
int16_t
,
ltp_state
)[
3072
];
///< time signal for LTP
PredictorState
predictor_state
[
MAX_PREDICTORS
];
}
SingleChannelElement
;
...
...
@@ -272,7 +272,7 @@ typedef struct {
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
* @{
*/
DECLARE_ALIGNED
(
16
,
float
,
buf_mdct
)[
1024
];
DECLARE_ALIGNED
(
32
,
float
,
buf_mdct
)[
1024
];
/** @} */
/**
...
...
@@ -296,7 +296,7 @@ typedef struct {
int
sf_offset
;
///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
/** @} */
DECLARE_ALIGNED
(
16
,
float
,
temp
)[
128
];
DECLARE_ALIGNED
(
32
,
float
,
temp
)[
128
];
enum
OCStatus
output_configured
;
}
AACContext
;
...
...
libavcodec/aacenc.h
View file @
9d35fa52
...
...
@@ -64,7 +64,7 @@ typedef struct AACEncContext {
int
last_frame
;
float
lambda
;
DECLARE_ALIGNED
(
16
,
int
,
qcoefs
)[
96
];
///< quantized coefficients
DECLARE_ALIGNED
(
16
,
float
,
scoefs
)[
1024
];
///< scaled coefficients
DECLARE_ALIGNED
(
32
,
float
,
scoefs
)[
1024
];
///< scaled coefficients
}
AACEncContext
;
#endif
/* AVCODEC_AACENC_H */
libavcodec/ac3dec.h
View file @
9d35fa52
...
...
@@ -200,11 +200,11 @@ typedef struct {
///@defgroup arrays aligned arrays
DECLARE_ALIGNED
(
16
,
int
,
fixed_coeffs
)[
AC3_MAX_CHANNELS
][
AC3_MAX_COEFS
];
///> fixed-point transform coefficients
DECLARE_ALIGNED
(
16
,
float
,
transform_coeffs
)[
AC3_MAX_CHANNELS
][
AC3_MAX_COEFS
];
///< transform coefficients
DECLARE_ALIGNED
(
16
,
float
,
delay
)[
AC3_MAX_CHANNELS
][
AC3_BLOCK_SIZE
];
///< delay - added to the next block
DECLARE_ALIGNED
(
16
,
float
,
window
)[
AC3_BLOCK_SIZE
];
///< window coefficients
DECLARE_ALIGNED
(
16
,
float
,
tmp_output
)[
AC3_BLOCK_SIZE
];
///< temporary storage for output before windowing
DECLARE_ALIGNED
(
16
,
float
,
output
)[
AC3_MAX_CHANNELS
][
AC3_BLOCK_SIZE
];
///< output after imdct transform and windowing
DECLARE_ALIGNED
(
32
,
float
,
transform_coeffs
)[
AC3_MAX_CHANNELS
][
AC3_MAX_COEFS
];
///< transform coefficients
DECLARE_ALIGNED
(
32
,
float
,
delay
)[
AC3_MAX_CHANNELS
][
AC3_BLOCK_SIZE
];
///< delay - added to the next block
DECLARE_ALIGNED
(
32
,
float
,
window
)[
AC3_BLOCK_SIZE
];
///< window coefficients
DECLARE_ALIGNED
(
32
,
float
,
tmp_output
)[
AC3_BLOCK_SIZE
];
///< temporary storage for output before windowing
DECLARE_ALIGNED
(
32
,
float
,
output
)[
AC3_MAX_CHANNELS
][
AC3_BLOCK_SIZE
];
///< output after imdct transform and windowing
///@}
}
AC3DecodeContext
;
...
...
libavcodec/ac3enc.c
View file @
9d35fa52
...
...
@@ -201,7 +201,7 @@ typedef struct AC3EncodeContext {
uint8_t
exp_strategy
[
AC3_MAX_CHANNELS
][
AC3_MAX_BLOCKS
];
///< exponent strategies
DECLARE_ALIGNED
(
16
,
SampleType
,
windowed_samples
)[
AC3_WINDOW_SIZE
];
DECLARE_ALIGNED
(
32
,
SampleType
,
windowed_samples
)[
AC3_WINDOW_SIZE
];
}
AC3EncodeContext
;
typedef
struct
AC3Mant
{
...
...
libavcodec/atrac1.c
View file @
9d35fa52
...
...
@@ -60,11 +60,11 @@ typedef struct {
int
log2_block_count
[
AT1_QMF_BANDS
];
///< log2 number of blocks in a band
int
num_bfus
;
///< number of Block Floating Units
float
*
spectrum
[
2
];
DECLARE_ALIGNED
(
16
,
float
,
spec1
)[
AT1_SU_SAMPLES
];
///< mdct buffer
DECLARE_ALIGNED
(
16
,
float
,
spec2
)[
AT1_SU_SAMPLES
];
///< mdct buffer
DECLARE_ALIGNED
(
16
,
float
,
fst_qmf_delay
)[
46
];
///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED
(
16
,
float
,
snd_qmf_delay
)[
46
];
///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED
(
16
,
float
,
last_qmf_delay
)[
256
+
23
];
///< delay line for the last stacked QMF filter
DECLARE_ALIGNED
(
32
,
float
,
spec1
)[
AT1_SU_SAMPLES
];
///< mdct buffer
DECLARE_ALIGNED
(
32
,
float
,
spec2
)[
AT1_SU_SAMPLES
];
///< mdct buffer
DECLARE_ALIGNED
(
32
,
float
,
fst_qmf_delay
)[
46
];
///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED
(
32
,
float
,
snd_qmf_delay
)[
46
];
///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED
(
32
,
float
,
last_qmf_delay
)[
256
+
23
];
///< delay line for the last stacked QMF filter
}
AT1SUCtx
;
/**
...
...
@@ -72,13 +72,13 @@ typedef struct {
*/
typedef
struct
{
AT1SUCtx
SUs
[
AT1_MAX_CHANNELS
];
///< channel sound unit
DECLARE_ALIGNED
(
16
,
float
,
spec
)[
AT1_SU_SAMPLES
];
///< the mdct spectrum buffer
DECLARE_ALIGNED
(
32
,
float
,
spec
)[
AT1_SU_SAMPLES
];
///< the mdct spectrum buffer
DECLARE_ALIGNED
(
16
,
float
,
low
)[
256
];
DECLARE_ALIGNED
(
16
,
float
,
mid
)[
256
];
DECLARE_ALIGNED
(
16
,
float
,
high
)[
512
];
DECLARE_ALIGNED
(
32
,
float
,
low
)[
256
];
DECLARE_ALIGNED
(
32
,
float
,
mid
)[
256
];
DECLARE_ALIGNED
(
32
,
float
,
high
)[
512
];
float
*
bands
[
3
];
DECLARE_ALIGNED
(
16
,
float
,
out_samples
)[
AT1_MAX_CHANNELS
][
AT1_SU_SAMPLES
];
DECLARE_ALIGNED
(
32
,
float
,
out_samples
)[
AT1_MAX_CHANNELS
][
AT1_SU_SAMPLES
];
FFTContext
mdct_ctx
[
3
];
int
channels
;
DSPContext
dsp
;
...
...
libavcodec/atrac3.c
View file @
9d35fa52
...
...
@@ -74,8 +74,8 @@ typedef struct {
int
gcBlkSwitch
;
gain_block
gainBlock
[
2
];
DECLARE_ALIGNED
(
16
,
float
,
spectrum
)[
1024
];
DECLARE_ALIGNED
(
16
,
float
,
IMDCT_buf
)[
1024
];
DECLARE_ALIGNED
(
32
,
float
,
spectrum
)[
1024
];
DECLARE_ALIGNED
(
32
,
float
,
IMDCT_buf
)[
1024
];
float
delayBuf1
[
46
];
///<qmf delay buffers
float
delayBuf2
[
46
];
...
...
@@ -122,7 +122,7 @@ typedef struct {
FFTContext
mdct_ctx
;
}
ATRAC3Context
;
static
DECLARE_ALIGNED
(
16
,
float
,
mdct_window
)[
512
];
static
DECLARE_ALIGNED
(
32
,
float
,
mdct_window
)[
512
];
static
VLC
spectral_coeff_tab
[
7
];
static
float
gain_tab1
[
16
];
static
float
gain_tab2
[
31
];
...
...
libavcodec/binkaudio.c
View file @
9d35fa52
...
...
@@ -55,7 +55,7 @@ typedef struct {
int
num_bands
;
unsigned
int
*
bands
;
float
root
;
DECLARE_ALIGNED
(
16
,
FFTSample
,
coeffs
)[
BINK_BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
32
,
FFTSample
,
coeffs
)[
BINK_BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
16
,
short
,
previous
)[
BINK_BLOCK_MAX_SIZE
/
16
];
///< coeffs from previous audio block
float
*
coeffs_ptr
[
MAX_CHANNELS
];
///< pointers to the coeffs arrays for float_to_int16_interleave
union
{
...
...
libavcodec/cook.c
View file @
9d35fa52
...
...
@@ -153,7 +153,7 @@ typedef struct cook {
/* data buffers */
uint8_t
*
decoded_bytes_buffer
;
DECLARE_ALIGNED
(
16
,
float
,
mono_mdct_output
)[
2048
];
DECLARE_ALIGNED
(
32
,
float
,
mono_mdct_output
)[
2048
];
float
decode_buffer_1
[
1024
];
float
decode_buffer_2
[
1024
];
float
decode_buffer_0
[
1060
];
/* static allocation for joint decode */
...
...
libavcodec/dca.c
View file @
9d35fa52
...
...
@@ -321,16 +321,16 @@ typedef struct {
/* Subband samples history (for ADPCM) */
float
subband_samples_hist
[
DCA_PRIM_CHANNELS_MAX
][
DCA_SUBBANDS
][
4
];
DECLARE_ALIGNED
(
16
,
float
,
subband_fir_hist
)[
DCA_PRIM_CHANNELS_MAX
][
512
];
DECLARE_ALIGNED
(
16
,
float
,
subband_fir_noidea
)[
DCA_PRIM_CHANNELS_MAX
][
32
];
DECLARE_ALIGNED
(
32
,
float
,
subband_fir_hist
)[
DCA_PRIM_CHANNELS_MAX
][
512
];
DECLARE_ALIGNED
(
32
,
float
,
subband_fir_noidea
)[
DCA_PRIM_CHANNELS_MAX
][
32
];
int
hist_index
[
DCA_PRIM_CHANNELS_MAX
];
DECLARE_ALIGNED
(
16
,
float
,
raXin
)[
32
];
DECLARE_ALIGNED
(
32
,
float
,
raXin
)[
32
];
int
output
;
///< type of output
float
scale_bias
;
///< output scale
DECLARE_ALIGNED
(
16
,
float
,
subband_samples
)[
DCA_BLOCKS_MAX
][
DCA_PRIM_CHANNELS_MAX
][
DCA_SUBBANDS
][
8
];
DECLARE_ALIGNED
(
16
,
float
,
samples
)[(
DCA_PRIM_CHANNELS_MAX
+
1
)
*
256
];
DECLARE_ALIGNED
(
32
,
float
,
subband_samples
)[
DCA_BLOCKS_MAX
][
DCA_PRIM_CHANNELS_MAX
][
DCA_SUBBANDS
][
8
];
DECLARE_ALIGNED
(
32
,
float
,
samples
)[(
DCA_PRIM_CHANNELS_MAX
+
1
)
*
256
];
const
float
*
samples_chanptr
[
DCA_PRIM_CHANNELS_MAX
+
1
];
uint8_t
dca_buffer
[
DCA_MAX_FRAME_SIZE
+
DCA_MAX_EXSS_HEADER_SIZE
+
DCA_BUFFER_PADDING_SIZE
];
...
...
libavcodec/fft.c
View file @
9d35fa52
...
...
@@ -93,6 +93,44 @@ av_cold void ff_init_ff_cos_tabs(int index)
#endif
}
static
const
int
avx_tab
[]
=
{
0
,
4
,
1
,
5
,
8
,
12
,
9
,
13
,
2
,
6
,
3
,
7
,
10
,
14
,
11
,
15
};
static
int
is_second_half_of_fft32
(
int
i
,
int
n
)
{
if
(
n
<=
32
)
return
i
>=
16
;
else
if
(
i
<
n
/
2
)
return
is_second_half_of_fft32
(
i
,
n
/
2
);
else
if
(
i
<
3
*
n
/
4
)
return
is_second_half_of_fft32
(
i
-
n
/
2
,
n
/
4
);
else
return
is_second_half_of_fft32
(
i
-
3
*
n
/
4
,
n
/
4
);
}
static
av_cold
void
fft_perm_avx
(
FFTContext
*
s
)
{
int
i
;
int
n
=
1
<<
s
->
nbits
;
for
(
i
=
0
;
i
<
n
;
i
+=
16
)
{
int
k
;
if
(
is_second_half_of_fft32
(
i
,
n
))
{
for
(
k
=
0
;
k
<
16
;
k
++
)
s
->
revtab
[
-
split_radix_permutation
(
i
+
k
,
n
,
s
->
inverse
)
&
(
n
-
1
)]
=
i
+
avx_tab
[
k
];
}
else
{
for
(
k
=
0
;
k
<
16
;
k
++
)
{
int
j
=
i
+
k
;
j
=
(
j
&
~
7
)
|
((
j
>>
1
)
&
3
)
|
((
j
<<
2
)
&
4
);
s
->
revtab
[
-
split_radix_permutation
(
i
+
k
,
n
,
s
->
inverse
)
&
(
n
-
1
)]
=
j
;
}
}
}
}
av_cold
int
ff_fft_init
(
FFTContext
*
s
,
int
nbits
,
int
inverse
)
{
int
i
,
j
,
n
;
...
...
@@ -132,11 +170,16 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
for
(
j
=
4
;
j
<=
nbits
;
j
++
)
{
ff_init_ff_cos_tabs
(
j
);
}
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
j
=
i
;
if
(
s
->
fft_permutation
==
FF_FFT_PERM_SWAP_LSBS
)
j
=
(
j
&~
3
)
|
((
j
>>
1
)
&
1
)
|
((
j
<<
1
)
&
2
);
s
->
revtab
[
-
split_radix_permutation
(
i
,
n
,
s
->
inverse
)
&
(
n
-
1
)]
=
j
;
if
(
s
->
fft_permutation
==
FF_FFT_PERM_AVX
)
{
fft_perm_avx
(
s
);
}
else
{
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
j
=
i
;
if
(
s
->
fft_permutation
==
FF_FFT_PERM_SWAP_LSBS
)
j
=
(
j
&~
3
)
|
((
j
>>
1
)
&
1
)
|
((
j
<<
1
)
&
2
);
s
->
revtab
[
-
split_radix_permutation
(
i
,
n
,
s
->
inverse
)
&
(
n
-
1
)]
=
j
;
}
}
return
0
;
...
...
libavcodec/fft.h
View file @
9d35fa52
...
...
@@ -85,6 +85,7 @@ struct FFTContext {
int
fft_permutation
;
#define FF_FFT_PERM_DEFAULT 0
#define FF_FFT_PERM_SWAP_LSBS 1
#define FF_FFT_PERM_AVX 2
int
mdct_permutation
;
#define FF_MDCT_PERM_NONE 0
#define FF_MDCT_PERM_INTERLEAVE 1
...
...
@@ -97,7 +98,7 @@ struct FFTContext {
#endif
#define COSTABLE(size) \
COSTABLE_CONST DECLARE_ALIGNED(
16
, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
COSTABLE_CONST DECLARE_ALIGNED(
32
, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
extern
COSTABLE
(
16
);
extern
COSTABLE
(
32
);
...
...
libavcodec/imc.c
View file @
9d35fa52
...
...
@@ -88,7 +88,7 @@ typedef struct {
DSPContext
dsp
;
FFTContext
fft
;
DECLARE_ALIGNED
(
16
,
FFTComplex
,
samples
)[
COEFFS
/
2
];
DECLARE_ALIGNED
(
32
,
FFTComplex
,
samples
)[
COEFFS
/
2
];
float
*
out_samples
;
}
IMCContext
;
...
...
libavcodec/nellymoserdec.c
View file @
9d35fa52
...
...
@@ -47,7 +47,7 @@
typedef
struct
NellyMoserDecodeContext
{
AVCodecContext
*
avctx
;
DECLARE_ALIGNED
(
16
,
float
,
float_buf
)[
NELLY_SAMPLES
];
DECLARE_ALIGNED
(
32
,
float
,
float_buf
)[
NELLY_SAMPLES
];
float
state
[
128
];
AVLFG
random_state
;
GetBitContext
gb
;
...
...
@@ -55,7 +55,7 @@ typedef struct NellyMoserDecodeContext {
DSPContext
dsp
;
FFTContext
imdct_ctx
;
FmtConvertContext
fmt_conv
;
DECLARE_ALIGNED
(
16
,
float
,
imdct_out
)[
NELLY_BUF_LEN
*
2
];
DECLARE_ALIGNED
(
32
,
float
,
imdct_out
)[
NELLY_BUF_LEN
*
2
];
}
NellyMoserDecodeContext
;
static
void
overlap_and_window
(
NellyMoserDecodeContext
*
s
,
float
*
state
,
float
*
audio
,
float
*
a_in
)
...
...
libavcodec/nellymoserenc.c
View file @
9d35fa52
...
...
@@ -55,9 +55,9 @@ typedef struct NellyMoserEncodeContext {
int
have_saved
;
DSPContext
dsp
;
FFTContext
mdct_ctx
;
DECLARE_ALIGNED
(
16
,
float
,
mdct_out
)[
NELLY_SAMPLES
];
DECLARE_ALIGNED
(
16
,
float
,
in_buff
)[
NELLY_SAMPLES
];
DECLARE_ALIGNED
(
16
,
float
,
buf
)[
2
][
3
*
NELLY_BUF_LEN
];
///< sample buffer
DECLARE_ALIGNED
(
32
,
float
,
mdct_out
)[
NELLY_SAMPLES
];
DECLARE_ALIGNED
(
32
,
float
,
in_buff
)[
NELLY_SAMPLES
];
DECLARE_ALIGNED
(
32
,
float
,
buf
)[
2
][
3
*
NELLY_BUF_LEN
];
///< sample buffer
float
(
*
opt
)[
NELLY_BANDS
];
uint8_t
(
*
path
)[
NELLY_BANDS
];
}
NellyMoserEncodeContext
;
...
...
libavcodec/qdm2.c
View file @
9d35fa52
...
...
@@ -120,7 +120,7 @@ typedef struct {
}
FFTCoefficient
;
typedef
struct
{
DECLARE_ALIGNED
(
16
,
QDM2Complex
,
complex
)[
MPA_MAX_CHANNELS
][
256
];
DECLARE_ALIGNED
(
32
,
QDM2Complex
,
complex
)[
MPA_MAX_CHANNELS
][
256
];
}
QDM2FFT
;
/**
...
...
libavcodec/wma.h
View file @
9d35fa52
...
...
@@ -113,15 +113,15 @@ typedef struct WMACodecContext {
uint8_t
ms_stereo
;
///< true if mid/side stereo mode
uint8_t
channel_coded
[
MAX_CHANNELS
];
///< true if channel is coded
int
exponents_bsize
[
MAX_CHANNELS
];
///< log2 ratio frame/exp. length
DECLARE_ALIGNED
(
16
,
float
,
exponents
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
32
,
float
,
exponents
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
];
float
max_exponent
[
MAX_CHANNELS
];
WMACoef
coefs1
[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
16
,
float
,
coefs
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
16
,
FFTSample
,
output
)[
BLOCK_MAX_SIZE
*
2
];
DECLARE_ALIGNED
(
32
,
float
,
coefs
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
];
DECLARE_ALIGNED
(
32
,
FFTSample
,
output
)[
BLOCK_MAX_SIZE
*
2
];
FFTContext
mdct_ctx
[
BLOCK_NB_SIZES
];
float
*
windows
[
BLOCK_NB_SIZES
];
/* output buffer for one frame and the last for IMDCT windowing */
DECLARE_ALIGNED
(
16
,
float
,
frame_out
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
*
2
];
DECLARE_ALIGNED
(
32
,
float
,
frame_out
)[
MAX_CHANNELS
][
BLOCK_MAX_SIZE
*
2
];
/* last frame info */
uint8_t
last_superframe
[
MAX_CODED_SUPERFRAME_SIZE
+
4
];
/* padding added */
int
last_bitoffset
;
...
...
libavcodec/wmaprodec.c
View file @
9d35fa52
...
...
@@ -145,7 +145,7 @@ typedef struct {
uint8_t
table_idx
;
///< index in sf_offsets for the scale factor reference block
float
*
coeffs
;
///< pointer to the subframe decode buffer
uint16_t
num_vec_coeffs
;
///< number of vector coded coefficients
DECLARE_ALIGNED
(
16
,
float
,
out
)[
WMAPRO_BLOCK_MAX_SIZE
+
WMAPRO_BLOCK_MAX_SIZE
/
2
];
///< output buffer
DECLARE_ALIGNED
(
32
,
float
,
out
)[
WMAPRO_BLOCK_MAX_SIZE
+
WMAPRO_BLOCK_MAX_SIZE
/
2
];
///< output buffer
}
WMAProChannelCtx
;
/**
...
...
@@ -170,7 +170,7 @@ typedef struct WMAProDecodeCtx {
FF_INPUT_BUFFER_PADDING_SIZE
];
///< compressed frame data
PutBitContext
pb
;
///< context for filling the frame_data buffer
FFTContext
mdct_ctx
[
WMAPRO_BLOCK_SIZES
];
///< MDCT context per block size
DECLARE_ALIGNED
(
16
,
float
,
tmp
)[
WMAPRO_BLOCK_MAX_SIZE
];
///< IMDCT output buffer
DECLARE_ALIGNED
(
32
,
float
,
tmp
)[
WMAPRO_BLOCK_MAX_SIZE
];
///< IMDCT output buffer
float
*
windows
[
WMAPRO_BLOCK_SIZES
];
///< windows for the different block sizes
/* frame size dependent frame information (set during initialization) */
...
...
libavcodec/wmavoice.c
View file @
9d35fa52
...
...
@@ -275,11 +275,11 @@ typedef struct {
///< by postfilter
float
denoise_filter_cache
[
MAX_FRAMESIZE
];
int
denoise_filter_cache_size
;
///< samples in #denoise_filter_cache
DECLARE_ALIGNED
(
16
,
float
,
tilted_lpcs_pf
)[
0x80
];
DECLARE_ALIGNED
(
32
,
float
,
tilted_lpcs_pf
)[
0x80
];
///< aligned buffer for LPC tilting
DECLARE_ALIGNED
(
16
,
float
,
denoise_coeffs_pf
)[
0x80
];
DECLARE_ALIGNED
(
32
,
float
,
denoise_coeffs_pf
)[
0x80
];
///< aligned buffer for denoise coefficients
DECLARE_ALIGNED
(
16
,
float
,
synth_filter_out_buf
)[
0x80
+
MAX_LSPS_ALIGN16
];
DECLARE_ALIGNED
(
32
,
float
,
synth_filter_out_buf
)[
0x80
+
MAX_LSPS_ALIGN16
];
///< aligned buffer for postfilter speech
///< synthesis
/**
...
...
libavcodec/x86/fft.c
View file @
9d35fa52
...
...
@@ -25,7 +25,14 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
{
#if HAVE_YASM
int
has_vectors
=
av_get_cpu_flags
();
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
if
(
has_vectors
&
AV_CPU_FLAG_AVX
&&
HAVE_AVX
&&
s
->
nbits
>=
5
)
{
/* AVX for SB */
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_avx
;
s
->
fft_permute
=
ff_fft_permute_sse
;
s
->
fft_calc
=
ff_fft_calc_avx
;
s
->
fft_permutation
=
FF_FFT_PERM_AVX
;
}
else
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
/* SSE for P3/P4/K8 */
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_sse
;
...
...
libavcodec/x86/fft.h
View file @
9d35fa52
...
...
@@ -22,6 +22,7 @@
#include "libavcodec/fft.h"
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
);
...
...
@@ -32,6 +33,7 @@ void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_dct32_float_sse
(
FFTSample
*
out
,
const
FFTSample
*
in
);
#endif
libavcodec/x86/fft_mmx.asm
View file @
9d35fa52
This diff is collapsed.
Click to expand it.
libavcodec/x86/fft_sse.c
View file @
9d35fa52
...
...
@@ -28,6 +28,12 @@ DECLARE_ASM_CONST(16, int, ff_m1m1m1m1)[4] =
void
ff_fft_dispatch_sse
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_sse
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_avx
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
)
{
ff_fft_dispatch_interleave_avx
(
z
,
s
->
nbits
);
}
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
)
{
...
...
@@ -77,7 +83,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
long
n
=
s
->
mdct_size
;
long
n4
=
n
>>
2
;
ff_imdct_half_sse
(
s
,
output
+
n4
,
input
);
s
->
imdct_half
(
s
,
output
+
n4
,
input
);
j
=
-
n
;
k
=
n
-
16
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment