Commit f82e8f34 authored by Martin Storsjö's avatar Martin Storsjö

adpcm: Store the trellis nodes in a heap instead of a sorted array

This avoids having to memmove the large parts of the array when inserting into
it.

For -trellis 8, this lowers the run time from 245 seconds to 190 seconds,
for a 30 second 44 kHz mono sample, on my machine.

Originally committed as revision 25731 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 5d6e4c16
...@@ -352,6 +352,7 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples, ...@@ -352,6 +352,7 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
TrellisNode *t = node_buf + frontier*(i&1); TrellisNode *t = node_buf + frontier*(i&1);
TrellisNode **u; TrellisNode **u;
int sample = samples[i*stride]; int sample = samples[i*stride];
int heap_pos = 0;
memset(nodes_next, 0, frontier*sizeof(TrellisNode*)); memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
for(j=0; j<frontier && nodes[j]; j++) { for(j=0; j<frontier && nodes[j]; j++) {
// higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
...@@ -369,11 +370,11 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples, ...@@ -369,11 +370,11 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
#define STORE_NODE(NAME, STEP_INDEX)\ #define STORE_NODE(NAME, STEP_INDEX)\
int d;\ int d;\
uint32_t ssd;\ uint32_t ssd;\
int pos;\
TrellisNode *u;\
dec_sample = av_clip_int16(dec_sample);\ dec_sample = av_clip_int16(dec_sample);\
d = sample - dec_sample;\ d = sample - dec_sample;\
ssd = nodes[j]->ssd + d*d;\ ssd = nodes[j]->ssd + d*d;\
if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
continue;\
/* Collapse any two states with the same previous sample value. \ /* Collapse any two states with the same previous sample value. \
* One could also distinguish states by step and by 2nd to last * One could also distinguish states by step and by 2nd to last
* sample, but the effects of that are negligible. */\ * sample, but the effects of that are negligible. */\
...@@ -383,12 +384,28 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples, ...@@ -383,12 +384,28 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
goto next_##NAME;\ goto next_##NAME;\
}\ }\
}\ }\
for(k=0; k<frontier; k++) {\ if (heap_pos < frontier) {\
if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\ pos = heap_pos++;\
TrellisNode *u = nodes_next[frontier-1];\ } else {\
/* Find the largest node in the heap, which is one \
* of the leaf nodes. */\
int maxpos = 0;\
uint32_t max_ssd = 0;\
for (k = frontier >> 1; k < frontier; k++) {\
if (nodes_next[k]->ssd > max_ssd) {\
maxpos = k;\
max_ssd = nodes_next[k]->ssd;\
}\
}\
pos = maxpos;\
if (ssd > max_ssd)\
goto next_##NAME;\
}\
u = nodes_next[pos];\
if(!u) {\ if(!u) {\
assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\ assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\
u = t++;\ u = t++;\
nodes_next[pos] = u;\
u->path = pathn++;\ u->path = pathn++;\
}\ }\
u->ssd = ssd;\ u->ssd = ssd;\
...@@ -397,10 +414,14 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples, ...@@ -397,10 +414,14 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
u->sample1 = dec_sample;\ u->sample1 = dec_sample;\
paths[u->path].nibble = nibble;\ paths[u->path].nibble = nibble;\
paths[u->path].prev = nodes[j]->path;\ paths[u->path].prev = nodes[j]->path;\
memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\ /* Sift the newly inserted node down in the heap to \
nodes_next[k] = u;\ * restore the heap property. */\
while (pos > 0) {\
int parent = (pos - 1) >> 1;\
if (nodes_next[parent]->ssd <= ssd)\
break;\ break;\
}\ FFSWAP(TrellisNode*, nodes_next[parent], nodes_next[pos]);\
pos = parent;\
}\ }\
next_##NAME:; next_##NAME:;
STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8)); STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment