Commit fa4d900c authored by Rostislav Pehlivanov's avatar Rostislav Pehlivanov

aacenc_tns: rework TNS descision logic

Changes:
 - strongly prefer dual filters to a single filter
 - less strict about using 2 filters w.r.t. energy
 - scrap the usage of threshold and spread, useless
 - use odd-shaped windows to set the filter direction
 - use 4 bits instead of 3 bits for short windows
 - simplify and reduce the main loop to a single level
 - add stricter regulations for short windows

All of this now makes the TNS implementation operate
as good as it can and it definitely shows. The frequency
thresholds are now even better defined by looking at
the spectrals and the overall sound has been improved at
the price of just a few bits that are well worth it.
parent bf39beca
...@@ -31,10 +31,22 @@ ...@@ -31,10 +31,22 @@
#include "aacenc_utils.h" #include "aacenc_utils.h"
#include "aacenc_quantization.h" #include "aacenc_quantization.h"
/* Could be set to 3 to save an additional bit at the cost of little quality */
#define TNS_Q_BITS 4
/* Coefficient resolution in short windows */
#define TNS_Q_BITS_IS8 4
/* Define this to save a bit, be warned decoders can't deal with it /* Define this to save a bit, be warned decoders can't deal with it
* so it is not lossless despite what the specifications say */ * so it is not lossless despite what the specifications say */
// #define TNS_ENABLE_COEF_COMPRESSION // #define TNS_ENABLE_COEF_COMPRESSION
/* TNS will only be used if the LPC gain is within these margins */
#define TNS_GAIN_THRESHOLD_LOW 1.477f
#define TNS_GAIN_THRESHOLD_HIGH 7.0f
#define TNS_GAIN_THRESHOLD_LOW_IS8 0.16f*TNS_GAIN_THRESHOLD_LOW
#define TNS_GAIN_THRESHOLD_HIGH_IS8 0.26f*TNS_GAIN_THRESHOLD_HIGH
static inline int compress_coeffs(int *coef, int order, int c_bits) static inline int compress_coeffs(int *coef, int order, int c_bits)
{ {
int i; int i;
...@@ -160,21 +172,22 @@ static inline void quantize_coefs(double *coef, int *idx, float *lpc, int order, ...@@ -160,21 +172,22 @@ static inline void quantize_coefs(double *coef, int *idx, float *lpc, int order,
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
{ {
TemporalNoiseShaping *tns = &sce->tns; TemporalNoiseShaping *tns = &sce->tns;
double gain, coefs[MAX_LPC_ORDER];
int w, w2, g, count = 0; int w, w2, g, count = 0;
const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb); const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
const int c_bits = is8 ? TNS_Q_BITS_SHORT == 4 : TNS_Q_BITS == 4; const int c_bits = is8 ? TNS_Q_BITS_IS8 == 4 : TNS_Q_BITS == 4;
const int slant = sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE ? 1 :
sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, mmm); int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, mmm);
int sfb_end = av_clip(sce->ics.num_swb, 0, mmm); int sfb_end = av_clip(sce->ics.num_swb, 0, mmm);
int order = is8 ? 5 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
for (w = 0; w < sce->ics.num_windows; w++) { for (w = 0; w < sce->ics.num_windows; w++) {
int use_tns; float en[2] = {0.0f, 0.0f};
int order = is8 ? 5 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
int coef_start = w*sce->ics.num_swb + sce->ics.swb_offset[sfb_start]; int coef_start = w*sce->ics.num_swb + sce->ics.swb_offset[sfb_start];
int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start]; int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
float e_ratio = 0.0f, threshold = 0.0f, spread = 0.0f, en[2] = {0.0, 0.0f};
double gain = 0.0f, coefs[MAX_LPC_ORDER] = {0};
for (g = 0; g < sce->ics.num_swb; g++) { for (g = 0; g < sce->ics.num_swb; g++) {
if (w*16+g < sfb_start || w*16+g > sfb_end) if (w*16+g < sfb_start || w*16+g > sfb_end)
...@@ -185,8 +198,6 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) ...@@ -185,8 +198,6 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
en[1] += band->energy; en[1] += band->energy;
else else
en[0] += band->energy; en[0] += band->energy;
threshold += band->threshold;
spread += band->spread;
} }
} }
...@@ -198,37 +209,31 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) ...@@ -198,37 +209,31 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
coef_len, order, coefs); coef_len, order, coefs);
if (!order || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH) if (!order || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
use_tns = 0; continue;
else if ((en[0]+en[1]) < TNS_GAIN_THRESHOLD_LOW*threshold || spread < TNS_SPREAD_THRESHOLD) if (is8 && (gain < TNS_GAIN_THRESHOLD_LOW_IS8 || gain > TNS_GAIN_THRESHOLD_HIGH_IS8))
use_tns = 0; continue;
else if (is8 || order < 2) {
use_tns = 1; tns->n_filt[w] = 1;
for (g = 0; g < tns->n_filt[w]; g++) {
if (use_tns) { tns->length[w][g] = sfb_end - sfb_start;
e_ratio = en[0]/en[1]; tns->direction[w][g] = slant != 2 ? slant : en[0] < en[1];
if (is8 || order < 2 || (e_ratio > TNS_E_RATIO_LOW && e_ratio < TNS_E_RATIO_HIGH)) { tns->order[w][g] = order;
tns->n_filt[w] = 1; quantize_coefs(coefs, tns->coef_idx[w][g], tns->coef[w][g],
for (g = 0; g < tns->n_filt[w]; g++) { order, c_bits);
tns->length[w][g] = sfb_end - sfb_start; }
tns->direction[w][g] = en[0] < en[1]; } else { /* 2 filters due to energy disbalance */
tns->order[w][g] = order; tns->n_filt[w] = 2;
quantize_coefs(coefs, tns->coef_idx[w][g], tns->coef[w][g], for (g = 0; g < tns->n_filt[w]; g++) {
order, c_bits); tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
} tns->order[w][g] = !g ? order/2 : order - tns->order[w][g-1];
} else { /* 2 filters due to energy disbalance */ tns->length[w][g] = !g ? (sfb_end - sfb_start)/2 : \
tns->n_filt[w] = 2;
for (g = 0; g < tns->n_filt[w]; g++) {
tns->direction[w][g] = en[g] < en[!g];
tns->order[w][g] = !g ? order/2 : order - tns->order[w][g-1];
tns->length[w][g] = !g ? (sfb_end - sfb_start)/2 : \
(sfb_end - sfb_start) - tns->length[w][g-1]; (sfb_end - sfb_start) - tns->length[w][g-1];
quantize_coefs(&coefs[!g ? 0 : order - tns->order[w][g-1]], quantize_coefs(&coefs[!g ? 0 : order - tns->order[w][g-1]],
tns->coef_idx[w][g], tns->coef[w][g], tns->coef_idx[w][g], tns->coef[w][g],
tns->order[w][g], c_bits); tns->order[w][g], c_bits);
}
} }
count++;
} }
count += tns->n_filt[w];
} }
sce->tns.present = !!count; sce->tns.present = !!count;
} }
...@@ -30,24 +30,6 @@ ...@@ -30,24 +30,6 @@
#include "aacenc.h" #include "aacenc.h"
/* Could be set to 3 to save an additional bit at the cost of little quality */
#define TNS_Q_BITS 4
/* Coefficient resolution in short windows */
#define TNS_Q_BITS_SHORT 3
/* TNS will only be used if the LPC gain is within these margins */
#define TNS_GAIN_THRESHOLD_LOW 1.437f
#define TNS_GAIN_THRESHOLD_HIGH 21.19f
/* If the energy ratio between the low SFBs vs the high SFBs is not between
* those two values, use 2 filters instead */
#define TNS_E_RATIO_LOW 0.77
#define TNS_E_RATIO_HIGH 1.23
/* Do not use TNS if the psy band spread is below this value */
#define TNS_SPREAD_THRESHOLD 0.5f
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce); void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce); void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce); void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment