Commit 16d4c28c authored by Ganesh Ajjanagadde's avatar Ganesh Ajjanagadde

avcodec/mpegaudio_tablegen: speed up dynamic table creation

This does some miscellaneous stuff mainly avoiding the usage of pow to
achieve significant speedups. This is not speed critical, but is
unnecessary latency and cycles wasted for a user.

All tables tested and are identical to the old ones
(bit-exact even in floating point case).

Sample benchmark (x86-64, Haswell, GNU/Linux):
old:
102329530 decicycles in mpegaudio_tableinit,       1 runs,      0 skips

new:
34111900 decicycles in mpegaudio_tableinit,       1 runs,      0 skips
Reviewed-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
Signed-off-by: 's avatarGanesh Ajjanagadde <gajjanagadde@gmail.com>
parent 538e8ab5
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <stdlib.h> #include <stdlib.h>
#define CONFIG_HARDCODED_TABLES 0 #define CONFIG_HARDCODED_TABLES 0
#include "mpegaudio_tablegen.h" #include "mpegaudio_tablegen.h"
#include "libavutil/tablegen.h"
#include "tableprint.h" #include "tableprint.h"
int main(void) int main(void)
......
...@@ -45,12 +45,21 @@ static float expval_table_float[512][16]; ...@@ -45,12 +45,21 @@ static float expval_table_float[512][16];
static av_cold void mpegaudio_tableinit(void) static av_cold void mpegaudio_tableinit(void)
{ {
int i, value, exponent; int i, value, exponent;
double exp2_lut[4] = {
1.00000000000000000000, /* 2 ^ (0 * 0.25) */
1.18920711500272106672, /* 2 ^ (1 * 0.25) */
M_SQRT2 , /* 2 ^ (2 * 0.25) */
1.68179283050742908606, /* 2 ^ (3 * 0.25) */
};
double cbrt_lut[16];
for (i = 0; i < 16; ++i)
cbrt_lut[i] = cbrt(i);
for (i = 1; i < TABLE_4_3_SIZE; i++) { for (i = 1; i < TABLE_4_3_SIZE; i++) {
double value = i / 4; double value = i / 4;
double f, fm; double f, fm;
int e, m; int e, m;
/* cbrtf() isn't available on all systems, so we use powf(). */ f = value / IMDCT_SCALAR * cbrt(value) * exp2_lut[i & 3];
f = value / IMDCT_SCALAR * pow(value, 1.0 / 3.0) * pow(2, (i & 3) * 0.25);
fm = frexp(f, &e); fm = frexp(f, &e);
m = (uint32_t)(fm * (1LL << 31) + 0.5); m = (uint32_t)(fm * (1LL << 31) + 0.5);
e += FRAC_BITS - 31 + 5 - 100; e += FRAC_BITS - 31 + 5 - 100;
...@@ -61,10 +70,8 @@ static av_cold void mpegaudio_tableinit(void) ...@@ -61,10 +70,8 @@ static av_cold void mpegaudio_tableinit(void)
} }
for (exponent = 0; exponent < 512; exponent++) { for (exponent = 0; exponent < 512; exponent++) {
for (value = 0; value < 16; value++) { for (value = 0; value < 16; value++) {
/* cbrtf() isn't available on all systems, so we use powf(). */ double f = value * cbrt_lut[value] * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR;
double f = (double)value * pow(value, 1.0 / 3.0) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR; expval_table_fixed[exponent][value] = (f < 0xFFFFFFFF ? llrint(f) : 0xFFFFFFFF);
/* llrint() isn't always available, so round and cast manually. */
expval_table_fixed[exponent][value] = (long long int) (f < 0xFFFFFFFF ? floor(f + 0.5) : 0xFFFFFFFF);
expval_table_float[exponent][value] = f; expval_table_float[exponent][value] = f;
} }
exp_table_fixed[exponent] = expval_table_fixed[exponent][1]; exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment