Commit 81824fe0 authored by Alex Converse's avatar Alex Converse

aacdec: Only load and write each predictor variable once.

This is slightly faster and opens the door for further optimization.

Originally committed as revision 24475 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 70c99adb
......@@ -1240,23 +1240,26 @@ static av_always_inline void predict(PredictorState *ps, float *coef,
float e0, e1;
float pv;
float k1, k2;
float r0 = ps->r0, r1 = ps->r1;
float cor0 = ps->cor0, cor1 = ps->cor1;
float var0 = ps->var0, var1 = ps->var1;
k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
pv = flt16_round(k1 * r0 + k2 * r1);
if (output_enable)
*coef += pv * sf_scale;
e0 = *coef * inv_sf_scale;
e1 = e0 - k1 * ps->r0;
e1 = e0 - k1 * r0;
ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5f * (ps->r1 * ps->r1 + e1 * e1));
ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5f * (ps->r0 * ps->r0 + e0 * e0));
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
ps->r0 = flt16_trunc(a * e0);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment