Commit 68b8e21b authored by Michael Niedermayer's avatar Michael Niedermayer

avcodec/aacdec: Use avpriv_float_dsp_alloc()

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 46b64e30
...@@ -295,7 +295,7 @@ struct AACContext { ...@@ -295,7 +295,7 @@ struct AACContext {
FFTContext mdct_ld; FFTContext mdct_ld;
FFTContext mdct_ltp; FFTContext mdct_ltp;
FmtConvertContext fmt_conv; FmtConvertContext fmt_conv;
AVFloatDSPContext fdsp; AVFloatDSPContext *fdsp;
int random_state; int random_state;
/** @} */ /** @} */
......
...@@ -1137,7 +1137,10 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ...@@ -1137,7 +1137,10 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
ff_aac_sbr_init(); ff_aac_sbr_init();
ff_fmt_convert_init(&ac->fmt_conv, avctx); ff_fmt_convert_init(&ac->fmt_conv, avctx);
avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
if (!ac->fdsp) {
return AVERROR(ENOMEM);
}
ac->random_state = 0x1f2e3d4c; ac->random_state = 0x1f2e3d4c;
...@@ -1641,9 +1644,9 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1641,9 +1644,9 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
cfo[k] = ac->random_state; cfo[k] = ac->random_state;
} }
band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
scale = sf[idx] / sqrtf(band_energy); scale = sf[idx] / sqrtf(band_energy);
ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
} }
} else { } else {
const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
...@@ -1789,7 +1792,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1789,7 +1792,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
} }
} while (len -= 2); } while (len -= 2);
ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
} }
} }
...@@ -2002,7 +2005,7 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) ...@@ -2002,7 +2005,7 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[0].band_type[idx] < NOISE_BT &&
cpe->ch[1].band_type[idx] < NOISE_BT) { cpe->ch[1].band_type[idx] < NOISE_BT) {
for (group = 0; group < ics->group_len[g]; group++) { for (group = 0; group < ics->group_len[g]; group++) {
ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
ch1 + group * 128 + offsets[i], ch1 + group * 128 + offsets[i],
offsets[i+1] - offsets[i]); offsets[i+1] - offsets[i]);
} }
...@@ -2041,7 +2044,7 @@ static void apply_intensity_stereo(AACContext *ac, ...@@ -2041,7 +2044,7 @@ static void apply_intensity_stereo(AACContext *ac,
c *= 1 - 2 * cpe->ms_mask[idx]; c *= 1 - 2 * cpe->ms_mask[idx];
scale = c * sce1->sf[idx]; scale = c * sce1->sf[idx];
for (group = 0; group < ics->group_len[g]; group++) for (group = 0; group < ics->group_len[g]; group++)
ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
coef0 + group * 128 + offsets[i], coef0 + group * 128 + offsets[i],
scale, scale,
offsets[i + 1] - offsets[i]); offsets[i + 1] - offsets[i]);
...@@ -2409,15 +2412,15 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out, ...@@ -2409,15 +2412,15 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out,
const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024); ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
} else { } else {
memset(in, 0, 448 * sizeof(float)); memset(in, 0, 448 * sizeof(float));
ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
} }
if (ics->window_sequence[0] != LONG_START_SEQUENCE) { if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
} else { } else {
ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
memset(in + 1024 + 576, 0, 448 * sizeof(float)); memset(in + 1024 + 576, 0, 448 * sizeof(float));
} }
ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in); ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
...@@ -2470,17 +2473,17 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce) ...@@ -2470,17 +2473,17 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce)
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
memcpy(saved_ltp, saved, 512 * sizeof(float)); memcpy(saved_ltp, saved, 512 * sizeof(float));
memset(saved_ltp + 576, 0, 448 * sizeof(float)); memset(saved_ltp + 576, 0, 448 * sizeof(float));
ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float)); memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float));
memset(saved_ltp + 576, 0, 448 * sizeof(float)); memset(saved_ltp + 576, 0, 448 * sizeof(float));
ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
} else { // LONG_STOP or ONLY_LONG } else { // LONG_STOP or ONLY_LONG
ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
for (i = 0; i < 512; i++) for (i = 0; i < 512; i++)
saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i]; saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
} }
...@@ -2521,19 +2524,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) ...@@ -2521,19 +2524,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/ */
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512);
} else { } else {
memcpy( out, saved, 448 * sizeof(float)); memcpy( out, saved, 448 * sizeof(float));
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); ac->fdsp->vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
} else { } else {
ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
memcpy( out + 576, buf + 64, 448 * sizeof(float)); memcpy( out + 576, buf + 64, 448 * sizeof(float));
} }
} }
...@@ -2541,9 +2544,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) ...@@ -2541,9 +2544,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
// buffer update // buffer update
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
memcpy( saved, temp + 64, 64 * sizeof(float)); memcpy( saved, temp + 64, 64 * sizeof(float));
ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
memcpy( saved, buf + 512, 448 * sizeof(float)); memcpy( saved, buf + 512, 448 * sizeof(float));
...@@ -2568,10 +2571,10 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) ...@@ -2568,10 +2571,10 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
if (ics->use_kb_window[1]) { if (ics->use_kb_window[1]) {
// AAC LD uses a low overlap sine window instead of a KBD window // AAC LD uses a low overlap sine window instead of a KBD window
memcpy(out, saved, 192 * sizeof(float)); memcpy(out, saved, 192 * sizeof(float));
ac->fdsp.vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64); ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64);
memcpy( out + 320, buf + 64, 192 * sizeof(float)); memcpy( out + 320, buf + 64, 192 * sizeof(float));
} else { } else {
ac->fdsp.vector_fmul_window(out, saved, buf, ff_sine_512, 256); ac->fdsp->vector_fmul_window(out, saved, buf, ff_sine_512, 256);
} }
// buffer update // buffer update
...@@ -3167,6 +3170,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) ...@@ -3167,6 +3170,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
ff_mdct_end(&ac->mdct_small); ff_mdct_end(&ac->mdct_small);
ff_mdct_end(&ac->mdct_ld); ff_mdct_end(&ac->mdct_ld);
ff_mdct_end(&ac->mdct_ltp); ff_mdct_end(&ac->mdct_ltp);
av_freep(&ac->fdsp);
return 0; return 0;
} }
......
...@@ -1700,7 +1700,7 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, ...@@ -1700,7 +1700,7 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
} }
for (ch = 0; ch < nch; ch++) { for (ch = 0; ch < nch; ch++) {
/* decode channel */ /* decode channel */
sbr_qmf_analysis(&ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples, sbr_qmf_analysis(ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
(float*)sbr->qmf_filter_scratch, (float*)sbr->qmf_filter_scratch,
sbr->data[ch].W, sbr->data[ch].Ypos); sbr->data[ch].W, sbr->data[ch].Ypos);
sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low, sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low,
...@@ -1746,13 +1746,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, ...@@ -1746,13 +1746,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
nch = 2; nch = 2;
} }
sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, &ac->fdsp, sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
L, sbr->X[0], sbr->qmf_filter_scratch, L, sbr->X[0], sbr->qmf_filter_scratch,
sbr->data[0].synthesis_filterbank_samples, sbr->data[0].synthesis_filterbank_samples,
&sbr->data[0].synthesis_filterbank_samples_offset, &sbr->data[0].synthesis_filterbank_samples_offset,
downsampled); downsampled);
if (nch == 2) if (nch == 2)
sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, &ac->fdsp, sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
R, sbr->X[1], sbr->qmf_filter_scratch, R, sbr->X[1], sbr->qmf_filter_scratch,
sbr->data[1].synthesis_filterbank_samples, sbr->data[1].synthesis_filterbank_samples,
&sbr->data[1].synthesis_filterbank_samples_offset, &sbr->data[1].synthesis_filterbank_samples_offset,
......
...@@ -90,7 +90,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -90,7 +90,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
*/ */
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512);
} else { } else {
{ {
float *buf1 = saved; float *buf1 = saved;
...@@ -199,7 +199,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -199,7 +199,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
} }
} }
} else { } else {
ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
{ {
float *buf1 = buf + 64; float *buf1 = buf + 64;
float *buf2 = out + 576; float *buf2 = out + 576;
...@@ -248,9 +248,9 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -248,9 +248,9 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
// buffer update // buffer update
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
{ {
float *buf1 = buf + 7*128 + 64; float *buf1 = buf + 7*128 + 64;
float *buf2 = saved + 448; float *buf2 = saved + 448;
...@@ -561,7 +561,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -561,7 +561,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
: "memory" : "memory"
); );
ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
for (i = 0; i < 16; i++){ for (i = 0; i < 16; i++){
/* loop unrolled 4 times */ /* loop unrolled 4 times */
__asm__ volatile ( __asm__ volatile (
...@@ -646,7 +646,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -646,7 +646,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
: [loop_end]"r"(loop_end) : [loop_end]"r"(loop_end)
: "memory" : "memory"
); );
ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
for (i = 0; i < 16; i++){ for (i = 0; i < 16; i++){
/* loop unrolled 8 times */ /* loop unrolled 8 times */
__asm__ volatile ( __asm__ volatile (
...@@ -683,7 +683,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -683,7 +683,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
} }
} else { // LONG_STOP or ONLY_LONG } else { // LONG_STOP or ONLY_LONG
float *ptr1, *ptr2, *ptr3; float *ptr1, *ptr2, *ptr3;
ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
ptr1 = &saved_ltp[512]; ptr1 = &saved_ltp[512];
ptr2 = &ac->buf_mdct[1023]; ptr2 = &ac->buf_mdct[1023];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment