Commit 2b6a8187 authored by Mirjana Vulin's avatar Mirjana Vulin Committed by Michael Niedermayer

mips: optimization for float aac decoder (core module)

Signed-off-by: 's avatarMirjana Vulin <mvulin@mips.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 9df9420d
......@@ -257,10 +257,12 @@ typedef struct ChannelElement {
SpectralBandReplication sbr;
} ChannelElement;
typedef struct AACContext AACContext;
/**
* main AAC context
*/
typedef struct AACContext {
struct AACContext {
AVClass *class;
AVCodecContext *avctx;
AVFrame frame;
......@@ -317,6 +319,18 @@ typedef struct AACContext {
OutputConfiguration oc[2];
int warned_num_aac_frames;
} AACContext;
/* aacdec functions pointers */
void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);
void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns,
IndividualChannelStream *ics, int decode);
void (*windowing_and_mdct_ltp)(AACContext *ac, float *out,
float *in, IndividualChannelStream *ics);
void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
};
void ff_aacdec_init_mips(AACContext *c);
#endif /* AVCODEC_AAC_H */
......@@ -108,6 +108,8 @@
#if ARCH_ARM
# include "arm/aac.h"
#elif ARCH_MIPS
# include "mips/aacdec_mips.h"
#endif
static VLC vlc_scalefactors;
......@@ -872,6 +874,8 @@ static void reset_predictor_group(PredictorState *ps, int group_num)
ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
size);
static void aacdec_init(AACContext *ac);
static av_cold int aac_decode_init(AVCodecContext *avctx)
{
AACContext *ac = avctx->priv_data;
......@@ -879,6 +883,8 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
ac->avctx = avctx;
ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
aacdec_init(ac);
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
if (avctx->extradata_size > 0) {
......@@ -2165,10 +2171,10 @@ static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
memset(&predTime[i], 0, (2048 - i) * sizeof(float));
windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
if (sce->tns.present)
apply_tns(predFreq, &sce->tns, &sce->ics, 0);
ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
if (ltp->used[sfb])
......@@ -2380,25 +2386,25 @@ static void spectral_to_sample(AACContext *ac)
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
if (che->ch[0].ics.predictor_present) {
if (che->ch[0].ics.ltp.present)
apply_ltp(ac, &che->ch[0]);
ac->apply_ltp(ac, &che->ch[0]);
if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
apply_ltp(ac, &che->ch[1]);
ac->apply_ltp(ac, &che->ch[1]);
}
}
if (che->ch[0].tns.present)
apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
if (che->ch[1].tns.present)
apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
if (type <= TYPE_CPE)
apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
imdct_and_windowing(ac, &che->ch[0]);
ac->imdct_and_windowing(ac, &che->ch[0]);
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
update_ltp(ac, &che->ch[0]);
ac->update_ltp(ac, &che->ch[0]);
if (type == TYPE_CPE) {
imdct_and_windowing(ac, &che->ch[1]);
ac->imdct_and_windowing(ac, &che->ch[1]);
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
update_ltp(ac, &che->ch[1]);
ac->update_ltp(ac, &che->ch[1]);
}
if (ac->oc[1].m4ac.sbr > 0) {
ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
......@@ -2979,6 +2985,17 @@ static av_cold int latm_decode_init(AVCodecContext *avctx)
return ret;
}
static void aacdec_init(AACContext *c)
{
c->imdct_and_windowing = imdct_and_windowing;
c->apply_ltp = apply_ltp;
c->apply_tns = apply_tns;
c->windowing_and_mdct_ltp = windowing_and_mdct_ltp;
c->update_ltp = update_ltp;
if(ARCH_MIPS)
ff_aacdec_init_mips(c);
}
/**
* AVOptions for Japanese DTV specific extensions (ADTS only)
*/
......
......@@ -13,3 +13,4 @@ OBJS-$(CONFIG_FFT) += mips/fft_init_table.o
MIPSFPU-OBJS-$(CONFIG_FFT) += mips/fft_mips.o
MIPSFPU-OBJS += mips/fmtconvert_mips.o
OBJS-$(CONFIG_AC3DSP) += mips/ac3dsp_mips.o
OBJS-$(CONFIG_AAC_DECODER) += mips/aacdec_mips.o
This diff is collapsed.
This diff is collapsed.
......@@ -106,6 +106,43 @@ static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
}
}
static void vector_fmul_scalar_mips(float *dst, const float *src, float mul,
int len)
{
float temp0, temp1, temp2, temp3;
float *local_src = (float*)src;
float *end = local_src + len;
/* loop unrolled 4 times */
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[temp0], 0(%[src]) \n\t"
"lwc1 %[temp1], 4(%[src]) \n\t"
"lwc1 %[temp2], 8(%[src]) \n\t"
"lwc1 %[temp3], 12(%[src]) \n\t"
"addiu %[dst], %[dst], 16 \n\t"
"mul.s %[temp0], %[temp0], %[mul] \n\t"
"mul.s %[temp1], %[temp1], %[mul] \n\t"
"mul.s %[temp2], %[temp2], %[mul] \n\t"
"mul.s %[temp3], %[temp3], %[mul] \n\t"
"addiu %[src], %[src], 16 \n\t"
"swc1 %[temp0], -16(%[dst]) \n\t"
"swc1 %[temp1], -12(%[dst]) \n\t"
"swc1 %[temp2], -8(%[dst]) \n\t"
"bne %[src], %[end], 1b \n\t"
" swc1 %[temp3], -4(%[dst]) \n\t"
".set pop \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
[temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
[dst]"+r"(dst), [src]"+r"(local_src)
: [end]"r"(end), [mul]"f"(mul)
: "memory"
);
}
static void vector_fmul_window_mips(float *dst, const float *src0,
const float *src1, const float *win, int len)
{
......@@ -216,11 +253,91 @@ static void vector_fmul_window_mips(float *dst, const float *src0,
);
}
}
static void butterflies_float_mips(float *av_restrict v1, float *av_restrict v2,
int len)
{
float temp0, temp1, temp2, temp3, temp4;
float temp5, temp6, temp7, temp8, temp9;
float temp10, temp11, temp12, temp13, temp14, temp15;
int pom;
pom = (len >> 2)-1;
/* loop unrolled 4 times */
__asm__ volatile (
"lwc1 %[temp0], 0(%[v1]) \n\t"
"lwc1 %[temp1], 4(%[v1]) \n\t"
"lwc1 %[temp2], 8(%[v1]) \n\t"
"lwc1 %[temp3], 12(%[v1]) \n\t"
"lwc1 %[temp4], 0(%[v2]) \n\t"
"lwc1 %[temp5], 4(%[v2]) \n\t"
"lwc1 %[temp6], 8(%[v2]) \n\t"
"lwc1 %[temp7], 12(%[v2]) \n\t"
"beq %[pom], $zero, 2f \n\t"
"1: \n\t"
"sub.s %[temp8], %[temp0], %[temp4] \n\t"
"add.s %[temp9], %[temp0], %[temp4] \n\t"
"sub.s %[temp10], %[temp1], %[temp5] \n\t"
"add.s %[temp11], %[temp1], %[temp5] \n\t"
"sub.s %[temp12], %[temp2], %[temp6] \n\t"
"add.s %[temp13], %[temp2], %[temp6] \n\t"
"sub.s %[temp14], %[temp3], %[temp7] \n\t"
"add.s %[temp15], %[temp3], %[temp7] \n\t"
"addiu %[v1], %[v1], 16 \n\t"
"addiu %[v2], %[v2], 16 \n\t"
"addiu %[pom], %[pom], -1 \n\t"
"lwc1 %[temp0], 0(%[v1]) \n\t"
"lwc1 %[temp1], 4(%[v1]) \n\t"
"lwc1 %[temp2], 8(%[v1]) \n\t"
"lwc1 %[temp3], 12(%[v1]) \n\t"
"lwc1 %[temp4], 0(%[v2]) \n\t"
"lwc1 %[temp5], 4(%[v2]) \n\t"
"lwc1 %[temp6], 8(%[v2]) \n\t"
"lwc1 %[temp7], 12(%[v2]) \n\t"
"swc1 %[temp9], -16(%[v1]) \n\t"
"swc1 %[temp8], -16(%[v2]) \n\t"
"swc1 %[temp11], -12(%[v1]) \n\t"
"swc1 %[temp10], -12(%[v2]) \n\t"
"swc1 %[temp13], -8(%[v1]) \n\t"
"swc1 %[temp12], -8(%[v2]) \n\t"
"swc1 %[temp15], -4(%[v1]) \n\t"
"swc1 %[temp14], -4(%[v2]) \n\t"
"bgtz %[pom], 1b \n\t"
"2: \n\t"
"sub.s %[temp8], %[temp0], %[temp4] \n\t"
"add.s %[temp9], %[temp0], %[temp4] \n\t"
"sub.s %[temp10], %[temp1], %[temp5] \n\t"
"add.s %[temp11], %[temp1], %[temp5] \n\t"
"sub.s %[temp12], %[temp2], %[temp6] \n\t"
"add.s %[temp13], %[temp2], %[temp6] \n\t"
"sub.s %[temp14], %[temp3], %[temp7] \n\t"
"add.s %[temp15], %[temp3], %[temp7] \n\t"
"swc1 %[temp9], 0(%[v1]) \n\t"
"swc1 %[temp8], 0(%[v2]) \n\t"
"swc1 %[temp11], 4(%[v1]) \n\t"
"swc1 %[temp10], 4(%[v2]) \n\t"
"swc1 %[temp13], 8(%[v1]) \n\t"
"swc1 %[temp12], 8(%[v2]) \n\t"
"swc1 %[temp15], 12(%[v1]) \n\t"
"swc1 %[temp14], 12(%[v2]) \n\t"
: [v1]"+r"(v1), [v2]"+r"(v2), [pom]"+r"(pom), [temp0] "=&f" (temp0),
[temp1]"=&f"(temp1), [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
[temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
[temp7]"=&f"(temp7), [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
[temp10]"=&f"(temp10), [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
[temp13]"=&f"(temp13), [temp14]"=&f"(temp14), [temp15]"=&f"(temp15)
:
: "memory"
);
}
#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp) {
#if HAVE_INLINE_ASM && HAVE_MIPSFPU
fdsp->vector_fmul = vector_fmul_mips;
fdsp->vector_fmul_scalar = vector_fmul_scalar_mips;
fdsp->vector_fmul_window = vector_fmul_window_mips;
fdsp->butterflies_float = butterflies_float_mips;
#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment