Commit 0c142e4c authored by Michael Niedermayer's avatar Michael Niedermayer

swr: introduce filter_alloc in preparation of SIMD resample optimisations

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 80e857c9
...@@ -37,6 +37,7 @@ typedef struct ResampleContext { ...@@ -37,6 +37,7 @@ typedef struct ResampleContext {
const AVClass *av_class; const AVClass *av_class;
uint8_t *filter_bank; uint8_t *filter_bank;
int filter_length; int filter_length;
int filter_alloc;
int ideal_dst_incr; int ideal_dst_incr;
int dst_incr; int dst_incr;
int index; int index;
...@@ -89,7 +90,7 @@ static double bessel(double x){ ...@@ -89,7 +90,7 @@ static double bessel(double x){
* @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16 * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16
* @return 0 on success, negative on error * @return 0 on success, negative on error
*/ */
static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int phase_count, int scale, int type){ static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale, int type){
int ph, i; int ph, i;
double x, y, w; double x, y, w;
double *tab = av_malloc(tap_count * sizeof(*tab)); double *tab = av_malloc(tap_count * sizeof(*tab));
...@@ -133,19 +134,19 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap ...@@ -133,19 +134,19 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
switch(c->format){ switch(c->format){
case AV_SAMPLE_FMT_S16P: case AV_SAMPLE_FMT_S16P:
for(i=0;i<tap_count;i++) for(i=0;i<tap_count;i++)
((int16_t*)filter)[ph * tap_count + i] = av_clip(lrintf(tab[i] * scale / norm), INT16_MIN, INT16_MAX); ((int16_t*)filter)[ph * alloc + i] = av_clip(lrintf(tab[i] * scale / norm), INT16_MIN, INT16_MAX);
break; break;
case AV_SAMPLE_FMT_S32P: case AV_SAMPLE_FMT_S32P:
for(i=0;i<tap_count;i++) for(i=0;i<tap_count;i++)
((int32_t*)filter)[ph * tap_count + i] = av_clip(lrintf(tab[i] * scale / norm), INT32_MIN, INT32_MAX); ((int32_t*)filter)[ph * alloc + i] = av_clip(lrintf(tab[i] * scale / norm), INT32_MIN, INT32_MAX);
break; break;
case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_FLTP:
for(i=0;i<tap_count;i++) for(i=0;i<tap_count;i++)
((float*)filter)[ph * tap_count + i] = tab[i] * scale / norm; ((float*)filter)[ph * alloc + i] = tab[i] * scale / norm;
break; break;
case AV_SAMPLE_FMT_DBLP: case AV_SAMPLE_FMT_DBLP:
for(i=0;i<tap_count;i++) for(i=0;i<tap_count;i++)
((double*)filter)[ph * tap_count + i] = tab[i] * scale / norm; ((double*)filter)[ph * alloc + i] = tab[i] * scale / norm;
break; break;
} }
} }
...@@ -225,13 +226,14 @@ ResampleContext *swri_resample_init(ResampleContext *c, int out_rate, int in_rat ...@@ -225,13 +226,14 @@ ResampleContext *swri_resample_init(ResampleContext *c, int out_rate, int in_rat
c->linear = linear; c->linear = linear;
c->factor = factor; c->factor = factor;
c->filter_length = FFMAX((int)ceil(filter_size/factor), 1); c->filter_length = FFMAX((int)ceil(filter_size/factor), 1);
c->filter_bank = av_mallocz(c->filter_length*(phase_count+1)*c->felem_size); c->filter_alloc = FFALIGN(c->filter_length, 8);
c->filter_bank = av_mallocz(c->filter_alloc*(phase_count+1)*c->felem_size);
if (!c->filter_bank) if (!c->filter_bank)
goto error; goto error;
if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, phase_count, 1<<c->filter_shift, WINDOW_TYPE)) if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, c->filter_alloc, phase_count, 1<<c->filter_shift, WINDOW_TYPE))
goto error; goto error;
memcpy(c->filter_bank + (c->filter_length*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_length-1)*c->felem_size); memcpy(c->filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_alloc-1)*c->felem_size);
memcpy(c->filter_bank + (c->filter_length*phase_count )*c->felem_size, c->filter_bank + (c->filter_length - 1)*c->felem_size, c->felem_size); memcpy(c->filter_bank + (c->filter_alloc*phase_count )*c->felem_size, c->filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size);
} }
c->compensation_distance= 0; c->compensation_distance= 0;
......
...@@ -50,7 +50,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int ...@@ -50,7 +50,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
frac = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr; frac = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
}else if(compensation_distance == 0 && !c->linear && index >= 0){ }else if(compensation_distance == 0 && !c->linear && index >= 0){
for(dst_index=0; dst_index < dst_size; dst_index++){ for(dst_index=0; dst_index < dst_size; dst_index++){
FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_length*(index & c->phase_mask); FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_alloc*(index & c->phase_mask);
int sample_index= index >> c->phase_shift; int sample_index= index >> c->phase_shift;
if(sample_index + c->filter_length > src_size){ if(sample_index + c->filter_length > src_size){
...@@ -72,7 +72,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int ...@@ -72,7 +72,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
} }
}else{ }else{
for(dst_index=0; dst_index < dst_size; dst_index++){ for(dst_index=0; dst_index < dst_size; dst_index++){
FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_length*(index & c->phase_mask); FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_alloc*(index & c->phase_mask);
int sample_index= index >> c->phase_shift; int sample_index= index >> c->phase_shift;
FELEM2 val=0; FELEM2 val=0;
...@@ -85,7 +85,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int ...@@ -85,7 +85,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
FELEM2 v2=0; FELEM2 v2=0;
for(i=0; i<c->filter_length; i++){ for(i=0; i<c->filter_length; i++){
val += src[sample_index + i] * (FELEM2)filter[i]; val += src[sample_index + i] * (FELEM2)filter[i];
v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_length]; v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
} }
val+=(v2-val)*(FELEML)frac / c->src_incr; val+=(v2-val)*(FELEML)frac / c->src_incr;
}else{ }else{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment