Commit da64bd6a authored by Rostislav Pehlivanov's avatar Rostislav Pehlivanov

aaccoder: tweak PNS implementation further

This commit changes a few things about the noise substitution
logic:
 - Brings back the quantization factor (reduced to 3) during
   scalefactor index calculations.
 - Rejects any zeroed bands. They should be inaudiable and it's
   a waste transmitting the scalefactor indices for these.
 - Uses swb_offsets instead of incrementing a 'start' with every
   window group size.
 - Rejects all PNS during short windows.
Overall improves quality. There was a plan to use the lfg system
to create the random numbers instead of using whatever the decoder
uses but for now this works fine. Entropy is far from important here.
Signed-off-by: 's avatarRostislav Pehlivanov <atomnuker@gmail.com>
parent b421455e
...@@ -51,9 +51,12 @@ ...@@ -51,9 +51,12 @@
/** Frequency in Hz for lower limit of noise substitution **/ /** Frequency in Hz for lower limit of noise substitution **/
#define NOISE_LOW_LIMIT 4000 #define NOISE_LOW_LIMIT 4000
/** Pointless to substitute very high short lived inaudiable frequencies **/
#define NOISE_HIGH_LIMIT 18120
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
* beyond which no PNS is used (since the SFBs contain tone rather than noise) */ * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
#define NOISE_SPREAD_THRESHOLD 0.9673f #define NOISE_SPREAD_THRESHOLD 0.5073f
/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to /* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
* replace low energy non zero bands */ * replace low energy non zero bands */
...@@ -335,7 +338,7 @@ static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement ...@@ -335,7 +338,7 @@ static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement
minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]); minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]);
bands++; bands++;
} else if (sce->band_type[w*16+g] == NOISE_BT) { } else if (sce->band_type[w*16+g] == NOISE_BT) {
sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->pns_ener[w*16+g])*2), -100, 155); sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]); minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]);
bands++; bands++;
} }
...@@ -863,7 +866,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, ...@@ -863,7 +866,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
{ {
FFPsyBand *band; FFPsyBand *band;
int w, g, w2, i, start, count = 0; int w, g, w2, i;
float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
float *NOR34 = &s->scoefs[3*128]; float *NOR34 = &s->scoefs[3*128];
const float lambda = s->lambda; const float lambda = s->lambda;
...@@ -871,20 +874,20 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ...@@ -871,20 +874,20 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f); const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f);
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
return;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
start = 0;
for (g = 0; g < sce->ics.num_swb; g++) { for (g = 0; g < sce->ics.num_swb; g++) {
int noise_sfi, try_pns = 0; int noise_sfi;
float dist1 = 0.0f, dist2 = 0.0f, noise_amp; float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
float pns_energy = 0.0f, energy_ratio, dist_thresh; float pns_energy = 0.0f, energy_ratio, dist_thresh;
float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f; float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
float freq_boost = FFMAX(0.88f*start*freq_mult/NOISE_LOW_LIMIT, 1.0f); const int start = sce->ics.swb_offset[w*16+g];
if (start*freq_mult < NOISE_LOW_LIMIT) { const float freq = start*freq_mult;
start += sce->ics.swb_sizes[g]; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
if (freq < NOISE_LOW_LIMIT)
continue; continue;
} else {
dist_thresh = FFMIN(0.008f*(NOISE_LOW_LIMIT/start*freq_mult), 1.11f);
}
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
sfb_energy += band->energy; sfb_energy += band->energy;
...@@ -892,18 +895,12 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ...@@ -892,18 +895,12 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
threshold += band->threshold; threshold += band->threshold;
} }
if (sce->zeroes[w*16+g]) { /* Ramps down at ~8000Hz and loosens the dist threshold */
try_pns = 1; dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 1.27f);
} else if (sfb_energy < threshold*freq_boost) {
try_pns = 1;
} else if (spread > spread_threshold) {
try_pns = 0;
} else if (sfb_energy < threshold*thr_mult*freq_boost) {
try_pns = 1;
}
if (!try_pns || !sfb_energy) { if (sce->zeroes[w*16+g] || spread < spread_threshold ||
start += sce->ics.swb_sizes[g]; sfb_energy > threshold*thr_mult*freq_boost || !sfb_energy) {
sce->pns_ener[w*16+g] = sfb_energy;
continue; continue;
} }
...@@ -911,16 +908,17 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ...@@ -911,16 +908,17 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
float band_energy, scale; float band_energy, scale;
band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; const int start_c = sce->ics.swb_offset[(w+w2)*16+g];
band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
for (i = 0; i < sce->ics.swb_sizes[g]; i++) for (i = 0; i < sce->ics.swb_sizes[g]; i++)
PNS[i] = s->random_state = lcg_random(s->random_state); PNS[i] = s->random_state = lcg_random(s->random_state);
band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
scale = noise_amp/sqrtf(band_energy); scale = noise_amp/sqrtf(band_energy);
s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]); s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
abs_pow34_v(NOR34, &sce->coeffs[start+(w+w2)*128], sce->ics.swb_sizes[g]); abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]); abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
dist1 += quantize_band_cost(s, &sce->coeffs[start + (w+w2)*128], dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
NOR34, NOR34,
sce->ics.swb_sizes[g], sce->ics.swb_sizes[g],
sce->sf_idx[(w+w2)*16+g], sce->sf_idx[(w+w2)*16+g],
...@@ -935,7 +933,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ...@@ -935,7 +933,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
} }
energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */ energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */
sce->pns_ener[w*16+g] = energy_ratio*sfb_energy; sce->pns_ener[w*16+g] = energy_ratio*sfb_energy;
if (energy_ratio > 0.80f && energy_ratio < 1.20f && dist1/dist2 > dist_thresh) { if (energy_ratio > 0.85f && energy_ratio < 1.25f && dist1/dist2 > dist_thresh) {
sce->band_type[w*16+g] = NOISE_BT; sce->band_type[w*16+g] = NOISE_BT;
sce->zeroes[w*16+g] = 0; sce->zeroes[w*16+g] = 0;
if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */ if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */
...@@ -943,9 +941,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ...@@ -943,9 +941,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
sce->band_type[w*16+g-1] = NOISE_BT; sce->band_type[w*16+g-1] = NOISE_BT;
sce->zeroes[w*16+g-1] = 0; sce->zeroes[w*16+g-1] = 0;
} }
count++;
} }
start += sce->ics.swb_sizes[g];
} }
} }
} }
......
...@@ -163,7 +163,7 @@ fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-re ...@@ -163,7 +163,7 @@ fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-re
fate-aac-pns-encode: CMP = stddev fate-aac-pns-encode: CMP = stddev
fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-pns-encode: CMP_SHIFT = -4096 fate-aac-pns-encode: CMP_SHIFT = -4096
fate-aac-pns-encode: CMP_TARGET = 637.66 fate-aac-pns-encode: CMP_TARGET = 633.77
fate-aac-pns-encode: SIZE_TOLERANCE = 3560 fate-aac-pns-encode: SIZE_TOLERANCE = 3560
fate-aac-pns-encode: FUZZ = 5 fate-aac-pns-encode: FUZZ = 5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment