Commit ce4a29c0 authored by Michael Niedermayer's avatar Michael Niedermayer

optimize antialias

switch to integer antialias code as default as its faster now

Originally committed as revision 3925 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent d04728bb
...@@ -224,7 +224,6 @@ static inline int l2_unscale_group(int steps, int mant, int scale_factor) ...@@ -224,7 +224,6 @@ static inline int l2_unscale_group(int steps, int mant, int scale_factor)
/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */ /* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent) static inline int l3_unscale(int value, int exponent)
{ {
unsigned int m; unsigned int m;
int e; int e;
...@@ -323,7 +322,7 @@ static int decode_init(AVCodecContext * avctx) ...@@ -323,7 +322,7 @@ static int decode_init(AVCodecContext * avctx)
avctx->sample_fmt= SAMPLE_FMT_S16; avctx->sample_fmt= SAMPLE_FMT_S16;
#endif #endif
if(avctx->antialias_algo == FF_AA_INT) if(avctx->antialias_algo != FF_AA_FLOAT)
s->compute_antialias= compute_antialias_integer; s->compute_antialias= compute_antialias_integer;
else else
s->compute_antialias= compute_antialias_float; s->compute_antialias= compute_antialias_float;
...@@ -450,10 +449,10 @@ static int decode_init(AVCodecContext * avctx) ...@@ -450,10 +449,10 @@ static int decode_init(AVCodecContext * avctx)
ci = ci_table[i]; ci = ci_table[i];
cs = 1.0 / sqrt(1.0 + ci * ci); cs = 1.0 / sqrt(1.0 + ci * ci);
ca = cs * ci; ca = cs * ci;
csa_table[i][0] = FIX(cs); csa_table[i][0] = FIXHR(cs/4);
csa_table[i][1] = FIX(ca); csa_table[i][1] = FIXHR(ca/4);
csa_table[i][2] = FIX(ca) + FIX(cs); csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
csa_table[i][3] = FIX(ca) - FIX(cs); csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
csa_table_float[i][0] = cs; csa_table_float[i][0] = cs;
csa_table_float[i][1] = ca; csa_table_float[i][1] = ca;
csa_table_float[i][2] = ca + cs; csa_table_float[i][2] = ca + cs;
...@@ -1911,8 +1910,8 @@ static void compute_stereo(MPADecodeContext *s, ...@@ -1911,8 +1910,8 @@ static void compute_stereo(MPADecodeContext *s,
static void compute_antialias_integer(MPADecodeContext *s, static void compute_antialias_integer(MPADecodeContext *s,
GranuleDef *g) GranuleDef *g)
{ {
int32_t *ptr, *p0, *p1, *csa; int32_t *ptr, *csa;
int n, i, j; int n, i;
/* we antialias only "long" bands */ /* we antialias only "long" bands */
if (g->block_type == 2) { if (g->block_type == 2) {
...@@ -1926,35 +1925,24 @@ static void compute_antialias_integer(MPADecodeContext *s, ...@@ -1926,35 +1925,24 @@ static void compute_antialias_integer(MPADecodeContext *s,
ptr = g->sb_hybrid + 18; ptr = g->sb_hybrid + 18;
for(i = n;i > 0;i--) { for(i = n;i > 0;i--) {
p0 = ptr - 1; int tmp0, tmp1, tmp2;
p1 = ptr;
csa = &csa_table[0][0]; csa = &csa_table[0][0];
for(j=0;j<4;j++) { #define INT_AA(j) \
int tmp0 = *p0; tmp0 = 4*(ptr[-1-j]);\
int tmp1 = *p1; tmp1 = 4*(ptr[ j]);\
#if 0 tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\
*p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); ptr[-1-j] = tmp2 - MULH(tmp1, csa[2+4*j]);\
*p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); ptr[ j] = tmp2 + MULH(tmp0, csa[3+4*j]);
#else
int64_t tmp2= MUL64(tmp0 + tmp1, csa[0]); INT_AA(0)
*p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2])); INT_AA(1)
*p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3])); INT_AA(2)
#endif INT_AA(3)
p0--; p1++; INT_AA(4)
csa += 4; INT_AA(5)
tmp0 = *p0; INT_AA(6)
tmp1 = *p1; INT_AA(7)
#if 0
*p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
*p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
#else
tmp2= MUL64(tmp0 + tmp1, csa[0]);
*p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2]));
*p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3]));
#endif
p0--; p1++;
csa += 4;
}
ptr += 18; ptr += 18;
} }
} }
...@@ -1962,8 +1950,8 @@ static void compute_antialias_integer(MPADecodeContext *s, ...@@ -1962,8 +1950,8 @@ static void compute_antialias_integer(MPADecodeContext *s,
static void compute_antialias_float(MPADecodeContext *s, static void compute_antialias_float(MPADecodeContext *s,
GranuleDef *g) GranuleDef *g)
{ {
int32_t *ptr, *p0, *p1; int32_t *ptr;
int n, i, j; int n, i;
/* we antialias only "long" bands */ /* we antialias only "long" bands */
if (g->block_type == 2) { if (g->block_type == 2) {
...@@ -1977,35 +1965,23 @@ static void compute_antialias_float(MPADecodeContext *s, ...@@ -1977,35 +1965,23 @@ static void compute_antialias_float(MPADecodeContext *s,
ptr = g->sb_hybrid + 18; ptr = g->sb_hybrid + 18;
for(i = n;i > 0;i--) { for(i = n;i > 0;i--) {
float tmp0, tmp1;
float *csa = &csa_table_float[0][0]; float *csa = &csa_table_float[0][0];
p0 = ptr - 1; #define FLOAT_AA(j)\
p1 = ptr; tmp0= ptr[-1-j];\
for(j=0;j<4;j++) { tmp1= ptr[ j];\
float tmp0 = *p0; ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\
float tmp1 = *p1; ptr[ j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]);
#if 1
*p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]); FLOAT_AA(0)
*p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]); FLOAT_AA(1)
#else FLOAT_AA(2)
float tmp2= (tmp0 + tmp1) * csa[0]; FLOAT_AA(3)
*p0 = lrintf(tmp2 - tmp1 * csa[2]); FLOAT_AA(4)
*p1 = lrintf(tmp2 + tmp0 * csa[3]); FLOAT_AA(5)
#endif FLOAT_AA(6)
p0--; p1++; FLOAT_AA(7)
csa += 4;
tmp0 = *p0;
tmp1 = *p1;
#if 1
*p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]);
*p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]);
#else
tmp2= (tmp0 + tmp1) * csa[0];
*p0 = lrintf(tmp2 - tmp1 * csa[2]);
*p1 = lrintf(tmp2 + tmp0 * csa[3]);
#endif
p0--; p1++;
csa += 4;
}
ptr += 18; ptr += 18;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment