Commit 2f17f9c1 authored by Loren Merritt's avatar Loren Merritt

1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.

1.3x and 3% on G4.
Though I think only part of this speedup is due to my optimizations per se;
some of it is that I got a better roll on the GCC random code generator.
Trivial reorderings of this function have a disproportionate effect on speed.

Originally committed as revision 19726 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent ae925315
...@@ -146,25 +146,51 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) { ...@@ -146,25 +146,51 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) {
} }
} }
static inline void render_line_unrolled(intptr_t x, intptr_t y, int x1, intptr_t sy, int ady, int adx, float * buf) {
int err = -adx;
int tmp;
x -= x1-1;
buf += x1-1;
while (++x < 0) {
err += ady;
if (err >= 0) {
err += ady - adx;
y += sy;
buf[x++] = ff_vorbis_floor1_inverse_db_table[y];
}
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
}
if (x <= 0) {
if (err + ady >= 0)
y += sy;
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
}
}
static void render_line(int x0, int y0, int x1, int y1, float * buf) { static void render_line(int x0, int y0, int x1, int y1, float * buf) {
int dy = y1 - y0; int dy = y1 - y0;
int adx = x1 - x0; int adx = x1 - x0;
int ady = FFABS(dy);
int sy = dy<0 ? -1 : 1;
buf[x0] = ff_vorbis_floor1_inverse_db_table[y0];
if(ady*2<=adx) { // optimized common case
render_line_unrolled(x0, y0, x1, sy, ady, adx, buf);
} else {
int base = dy / adx; int base = dy / adx;
int ady = FFABS(dy) - FFABS(base) * adx;
int x = x0; int x = x0;
int y = y0; int y = y0;
int err = 0; int err = -adx;
int sy = dy<0 ? -1 : 1; ady -= FFABS(base) * adx;
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
while (++x < x1) { while (++x < x1) {
y += base;
err += ady; err += ady;
if (err >= adx) { if (err >= 0) {
err -= adx; err -= adx;
y += sy; y += sy;
} }
y += base;
buf[x] = ff_vorbis_floor1_inverse_db_table[y]; buf[x] = ff_vorbis_floor1_inverse_db_table[y];
} }
}
} }
void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) { void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment