Commit 9b538537 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Michael Niedermayer

Rewrite main resampling loop (common and linear).

This removes a branch at a performance-sensitive point (in the middle
of the loop). In fate-swr-resample-s32p-8000-2626, this makes the code
about 10% faster. It also simplifies the loops, allowing us to rewrite
it in yasm at some later point.

The compensation_distance != 0 code and index < 0 code are still kind
of hairy. For compensation_distance != 0, this should likely be handled
in the caller, so that it calls swri_resample twice (once until the
dst_incr switch-point, and once with the remainder of the samples). For
index < 0, the code should probably be rewritten to break out of the
loop once sample_index >= 0, and then resume (e.g. as a tail-call) to
the common or linear resampling loops.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent e91f27cb
...@@ -134,37 +134,69 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int ...@@ -134,37 +134,69 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
av_assert2(index >= 0); av_assert2(index >= 0);
*consumed= index; *consumed= index;
index = 0; index = 0;
}else if(compensation_distance == 0 && !c->linear && index >= 0){ } else if (compensation_distance == 0 && index >= 0) {
int sample_index = 0; int64_t end_index = (1 + src_size - c->filter_length) << c->phase_shift;
for(dst_index=0; dst_index < dst_size; dst_index++){ int64_t delta_frac = (end_index - index) * c->src_incr - c->frac;
FELEM *filter; int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr;
sample_index += index >> c->phase_shift; int n = FFMIN(dst_size, delta_n);
int sample_index;
if (!c->linear) {
sample_index = index >> c->phase_shift;
index &= c->phase_mask; index &= c->phase_mask;
filter= ((FELEM*)c->filter_bank) + c->filter_alloc*index; for (dst_index = 0; dst_index < n; dst_index++) {
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
if(sample_index + c->filter_length > src_size){
break;
}else{
#ifdef COMMON_CORE #ifdef COMMON_CORE
COMMON_CORE COMMON_CORE
#else #else
FELEM2 val=0; FELEM2 val=0;
for(i=0; i<c->filter_length; i++){ for (i = 0; i < c->filter_length; i++) {
val += src[sample_index + i] * (FELEM2)filter[i]; val += src[sample_index + i] * (FELEM2)filter[i];
} }
OUT(dst[dst_index], val); OUT(dst[dst_index], val);
#endif #endif
frac += dst_incr_frac;
index += dst_incr;
if (frac >= c->src_incr) {
frac -= c->src_incr;
index++;
}
sample_index += index >> c->phase_shift;
index &= c->phase_mask;
} }
} else {
sample_index = index >> c->phase_shift;
index &= c->phase_mask;
for (dst_index = 0; dst_index < n; dst_index++) {
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
FELEM2 val=0, v2 = 0;
frac += dst_incr_frac; #ifdef LINEAR_CORE
index += dst_incr; LINEAR_CORE
if(frac >= c->src_incr){ #else
frac -= c->src_incr; for (i = 0; i < c->filter_length; i++) {
index++; val += src[sample_index + i] * (FELEM2)filter[i];
v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
}
#endif
val += (v2 - val) * (FELEML) frac / c->src_incr;
OUT(dst[dst_index], val);
frac += dst_incr_frac;
index += dst_incr;
if (frac >= c->src_incr) {
frac -= c->src_incr;
index++;
}
sample_index += index >> c->phase_shift;
index &= c->phase_mask;
} }
} }
*consumed = sample_index; *consumed = sample_index;
}else{ } else {
int sample_index = 0; int sample_index = 0;
for(dst_index=0; dst_index < dst_size; dst_index++){ for(dst_index=0; dst_index < dst_size; dst_index++){
FELEM *filter; FELEM *filter;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment