Commit ee2bc597 authored by Janne Grunau's avatar Janne Grunau

aarch64: NEON float (i)MDCT

Approximately as fast as the ARM NEON version on Apple's A7.
parent 650c4300
......@@ -18,3 +18,4 @@ NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
......@@ -26,6 +26,10 @@
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
av_cold void ff_fft_init_aarch64(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
......@@ -33,5 +37,11 @@ av_cold void ff_fft_init_aarch64(FFTContext *s)
if (have_neon(cpu_flags)) {
s->fft_permute = ff_fft_permute_neon;
s->fft_calc = ff_fft_calc_neon;
#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_neon;
s->imdct_half = ff_imdct_half_neon;
s->mdct_calc = ff_mdct_calc_neon;
s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
#endif
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment