Commit f20b6717 authored by Rostislav Pehlivanov's avatar Rostislav Pehlivanov

aacenc_tns: rework the way coefficients are calculated

This commit abandons the way the specifications state to
quantize the coefficients, makes use of the new LPC float
functions and is much better.

The original way of converting non-normalized float samples
to int32_t which out LPC system expects was wrong and it was
wrong to assume the coefficients that are generated are also
valid. It was essentially a full garbage-in, garbage-out
system and it definitely shows when looking at spectrals
and listening. The high frequencies were very overattenuated.
The new LPC function performs the analysis directly.

The specifications state to quantize the coefficients into
four bit index values using an asin() function which of course
had to have ugly ternary operators because the function turns
negative if the coefficients are negative which when encoding
causes invalid bitstream to get generated.

This deviates from this by using the direct TNS tables, which
are fairly small since you only have 4 bits at most for index
values. The LPC values are directly quantized against the tables
and are then used to perform filtering after the requantization,
which simply fetches the array values.

The end result is that TNS works much better now and doesn't
attenuate anything but the actual signal, e.g. TNS removes
quantization errors and does it's job correctly now.

It might be enabled by default soon since it doesn't hurt and
helps reduce nastyness at low bitrates.
Signed-off-by: 's avatarRostislav Pehlivanov <atomnuker@gmail.com>
parent 1cd5daee
...@@ -964,6 +964,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ...@@ -964,6 +964,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
...@@ -979,6 +980,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ...@@ -979,6 +980,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
...@@ -994,6 +996,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ...@@ -994,6 +996,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
...@@ -1009,6 +1012,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ...@@ -1009,6 +1012,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
......
...@@ -404,10 +404,9 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, ...@@ -404,10 +404,9 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
encode_band_info(s, sce); encode_band_info(s, sce);
encode_scale_factors(avctx, s, sce); encode_scale_factors(avctx, s, sce);
encode_pulses(s, &sce->pulse); encode_pulses(s, &sce->pulse);
put_bits(&s->pb, 1, !!sce->tns.present);
if (s->coder->encode_tns_info) if (s->coder->encode_tns_info)
s->coder->encode_tns_info(s, sce); s->coder->encode_tns_info(s, sce);
else
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, 0); //ssr put_bits(&s->pb, 1, 0); //ssr
encode_spectral_coeffs(s, sce); encode_spectral_coeffs(s, sce);
return 0; return 0;
...@@ -609,6 +608,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, ...@@ -609,6 +608,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->coder->search_for_pns(s, avctx, sce); s->coder->search_for_pns(s, avctx, sce);
if (s->options.tns && s->coder->search_for_tns) if (s->options.tns && s->coder->search_for_tns)
s->coder->search_for_tns(s, sce); s->coder->search_for_tns(s, sce);
if (s->options.tns && s->coder->apply_tns_filt)
s->coder->apply_tns_filt(sce);
if (sce->tns.present) if (sce->tns.present)
tns_mode = 1; tns_mode = 1;
} }
......
...@@ -63,6 +63,7 @@ typedef struct AACCoefficientsEncoder { ...@@ -63,6 +63,7 @@ typedef struct AACCoefficientsEncoder {
void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe); void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*apply_tns_filt)(SingleChannelElement *sce);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce); void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce); void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce); void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
......
...@@ -31,62 +31,30 @@ ...@@ -31,62 +31,30 @@
#include "aacenc_utils.h" #include "aacenc_utils.h"
#include "aacenc_quantization.h" #include "aacenc_quantization.h"
static inline void conv_to_int32(int32_t *loc, float *samples, int num, float norm)
{
int i;
for (i = 0; i < num; i++)
loc[i] = ceilf((samples[i]/norm)*INT32_MAX);
}
static inline void conv_to_float(float *arr, int32_t *cof, int num)
{
int i;
for (i = 0; i < num; i++)
arr[i] = (float)cof[i]/INT32_MAX;
}
/* Input: quantized 4 bit coef, output: 1 if first (MSB) 2 bits are the same */
static inline int coef_test_compression(int coef)
{
int tmp = coef >> 2;
int res = ff_ctz(tmp);
if (res > 1)
return 1; /* ...00 -> compressable */
else if (res == 1)
return 0; /* ...10 -> uncompressable */
else if (ff_ctz(tmp >> 1) > 0)
return 0; /* ...0 1 -> uncompressable */
else
return 1; /* ...1 1 -> compressable */
}
static inline int compress_coef(int *coefs, int num) static inline int compress_coef(int *coefs, int num)
{ {
int i, res = 0; int i, c = 0;
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
res += coef_test_compression(coefs[i]); c += coefs[i] < 4 || coefs[i] > 11;
return res == num ? 1 : 0; return c == num;
} }
/** /**
* Encode TNS data. * Encode TNS data.
* Coefficient compression saves a single bit. * Coefficient compression saves a single bit per coefficient.
*/ */
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce) void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
{ {
int i, w, filt, coef_len, coef_compress; int i, w, filt, coef_len, coef_compress;
const int coef_res = MAX_LPC_PRECISION == 4 ? 1 : 0;
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
put_bits(&s->pb, 1, !!sce->tns.present);
if (!sce->tns.present) if (!sce->tns.present)
return; return;
for (i = 0; i < sce->ics.num_windows; i++) { for (i = 0; i < sce->ics.num_windows; i++) {
put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]); put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
if (sce->tns.n_filt[i]) { if (sce->tns.n_filt[i]) {
put_bits(&s->pb, 1, !!coef_res); put_bits(&s->pb, 1, 1);
for (filt = 0; filt < sce->tns.n_filt[i]; filt++) { for (filt = 0; filt < sce->tns.n_filt[i]; filt++) {
put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]); put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]);
put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]); put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]);
...@@ -95,7 +63,7 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce) ...@@ -95,7 +63,7 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
sce->tns.order[i][filt]); sce->tns.order[i][filt]);
put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]); put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]);
put_bits(&s->pb, 1, !!coef_compress); put_bits(&s->pb, 1, !!coef_compress);
coef_len = coef_res + 3 - coef_compress; coef_len = 4 - coef_compress;
for (w = 0; w < sce->tns.order[i][filt]; w++) for (w = 0; w < sce->tns.order[i][filt]; w++)
put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]); put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]);
} }
...@@ -104,24 +72,25 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce) ...@@ -104,24 +72,25 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
} }
} }
static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw, static void process_tns_coeffs(TemporalNoiseShaping *tns, double *coef_raw,
int order, int w, int filt) int *order_p, int w, int filt)
{ {
int i, j; int i, j, order = *order_p;
int *idx = tns->coef_idx[w][filt]; int *idx = tns->coef_idx[w][filt];
float *lpc = tns->coef[w][filt]; float *lpc = tns->coef[w][filt];
const int iqfac_p = ((1 << (MAX_LPC_PRECISION-1)) - 0.5)/(M_PI/2.0);
const int iqfac_m = ((1 << (MAX_LPC_PRECISION-1)) + 0.5)/(M_PI/2.0);
float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f}; float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f};
/* Quantization */ if (!order)
return;
/* Not what the specs say, but it's better */
for (i = 0; i < order; i++) { for (i = 0; i < order; i++) {
idx[i] = ceilf(asin(tns_coefs_raw[i])*((tns_coefs_raw[i] >= 0) ? iqfac_p : iqfac_m)); idx[i] = quant_array_idx(coef_raw[i], tns_tmp2_map_0_4, 16);
lpc[i] = 2*sin(idx[i]/((idx[i] >= 0) ? iqfac_p : iqfac_m)); lpc[i] = tns_tmp2_map_0_4[idx[i]];
} }
/* Trim any coeff less than 0.1f from the end */ /* Trim any coeff less than 0.1f from the end */
for (i = order; i > -1; i--) { for (i = order-1; i > -1; i--) {
lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f; lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f;
if (lpc[i] != 0.0 ) { if (lpc[i] != 0.0 ) {
order = i; order = i;
...@@ -129,9 +98,6 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw, ...@@ -129,9 +98,6 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
} }
} }
if (!order)
return 0;
/* Step up procedure, convert to LPC coeffs */ /* Step up procedure, convert to LPC coeffs */
out[0] = 1.0f; out[0] = 1.0f;
for (i = 1; i <= order; i++) { for (i = 1; i <= order; i++) {
...@@ -143,35 +109,59 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw, ...@@ -143,35 +109,59 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
} }
out[i] = lpc[i-1]; out[i] = lpc[i-1];
} }
*order_p = order;
memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float)); memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float));
return order;
} }
static void apply_tns_filter(float *out, float *in, int order, int direction, /* Apply TNS filter */
float *tns_coefs, int ltp_used, int w, int filt, void ff_aac_apply_tns(SingleChannelElement *sce)
int start_i, int len)
{ {
int i, j, inc, start = start_i; const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
float tmp[TNS_MAX_ORDER+1]; float *coef = sce->pcoeffs;
if (direction) { TemporalNoiseShaping *tns = &sce->tns;
int w, filt, m, i;
int bottom, top, order, start, end, size, inc;
float *lpc, tmp[TNS_MAX_ORDER+1];
return;
for (w = 0; w < sce->ics.num_windows; w++) {
bottom = sce->ics.num_swb;
for (filt = 0; filt < tns->n_filt[w]; filt++) {
top = bottom;
bottom = FFMAX(0, top - tns->length[w][filt]);
order = tns->order[w][filt];
lpc = tns->coef[w][filt];
if (!order)
continue;
start = sce->ics.swb_offset[FFMIN(bottom, mmm)];
end = sce->ics.swb_offset[FFMIN( top, mmm)];
if ((size = end - start) <= 0)
continue;
if (tns->direction[w][filt]) {
inc = -1; inc = -1;
start = (start + len) - 1; start = end - 1;
} else { } else {
inc = 1; inc = 1;
} }
if (!ltp_used) { /* AR filter */ start += w * 128;
for (i = 0; i < len; i++, start += inc)
out[i] = in[start]; if (!sce->ics.ltp.present) {
for (j = 1; j <= FFMIN(i, order); j++) // ar filter
out[i] += tns_coefs[j]*in[start - j*inc]; for (m = 0; m < size; m++, start += inc)
} else { /* MA filter */ for (i = 1; i <= FFMIN(m, order); i++)
for (i = 0; i < len; i++, start += inc) { coef[start] += coef[start - i * inc]*lpc[i - 1];
tmp[0] = out[i] = in[start]; } else {
for (j = 1; j <= FFMIN(i, order); j++) // ma filter
out[i] += tmp[j]*tns_coefs[j]; for (m = 0; m < size; m++, start += inc) {
for (j = order; j > 0; j--) tmp[0] = coef[start];
tmp[j] = tmp[j - 1]; for (i = 1; i <= FFMIN(m, order); i++)
coef[start] += tmp[i]*lpc[i - 1];
for (i = order; i > 0; i--)
tmp[i] = tmp[i - 1];
}
}
} }
} }
} }
...@@ -179,57 +169,54 @@ static void apply_tns_filter(float *out, float *in, int order, int direction, ...@@ -179,57 +169,54 @@ static void apply_tns_filter(float *out, float *in, int order, int direction,
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
{ {
TemporalNoiseShaping *tns = &sce->tns; TemporalNoiseShaping *tns = &sce->tns;
int w, g, order, sfb_start, sfb_len, coef_start, shift[MAX_LPC_ORDER], count = 0; int w, g, w2, prev_end_sfb = 0, count = 0;
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER; const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
const float freq_mult = mpeg4audio_sample_rates[s->samplerate_index]/(1024.0f/sce->ics.num_windows)/2.0f;
float max_coef = 0.0f;
sce->tns.present = 0;
return;
for (coef_start = 0; coef_start < 1024; coef_start++) for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
max_coef = FFMAX(max_coef, sce->pcoeffs[coef_start]); int order = 0, filters = 1;
int sfb_start = 0, sfb_len = 0;
for (w = 0; w < sce->ics.num_windows; w++) { int coef_start = 0, coef_len = 0;
int filters = 1, start = 0, coef_len = 0; float energy = 0.0f, threshold = 0.0f;
int32_t conv_coeff[1024] = {0}; double coefs[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
int32_t coefs_t[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
/* Determine start sfb + coef - excludes anything below threshold */
for (g = 0; g < sce->ics.num_swb; g++) { for (g = 0; g < sce->ics.num_swb; g++) {
if (start*freq_mult > TNS_LOW_LIMIT) { if (!sfb_start && w*16+g > TNS_LOW_LIMIT && w*16+g > prev_end_sfb) {
sfb_start = w*16+g; sfb_start = w*16+g;
sfb_len = (w+1)*16 + g - sfb_start;
coef_start = sce->ics.swb_offset[sfb_start]; coef_start = sce->ics.swb_offset[sfb_start];
}
if (sfb_start) {
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
if (!sfb_len && band->energy < band->threshold*1.3f) {
sfb_len = (w+w2)*16+g - sfb_start;
prev_end_sfb = sfb_start + sfb_len;
coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start; coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
break; break;
} }
start += sce->ics.swb_sizes[g]; energy += band->energy;
threshold += band->threshold;
}
if (!sfb_len) {
sfb_len = (w+sce->ics.group_len[w])*16+g - sfb_start;
coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
}
}
} }
if (coef_len <= 0) if (sfb_len <= 0 || coef_len <= 0)
continue; continue;
if (coef_start + coef_len > 1024)
conv_to_int32(conv_coeff, &sce->pcoeffs[coef_start], coef_len, max_coef); coef_len = 1024 - coef_start;
/* LPC */ /* LPC */
order = ff_lpc_calc_coefs(&s->lpc, conv_coeff, coef_len, order = ff_lpc_calc_levinsion(&s->lpc, &sce->coeffs[coef_start], coef_len,
TNS_MIN_PRED_ORDER, tns_max_order, coefs, 0, tns_max_order, ORDER_METHOD_LOG);
32, coefs_t, shift,
FF_LPC_TYPE_LEVINSON, 10, if (energy > threshold) {
ORDER_METHOD_EST, MAX_LPC_SHIFT, 0) - 1;
/* Works surprisingly well, remember to tweak MAX_LPC_SHIFT if you want to play around with this */
if (shift[order] > 3) {
int direction = 0; int direction = 0;
float tns_coefs_raw[TNS_MAX_ORDER];
tns->n_filt[w] = filters++; tns->n_filt[w] = filters++;
conv_to_float(tns_coefs_raw, coefs_t[order], order);
for (g = 0; g < tns->n_filt[w]; g++) { for (g = 0; g < tns->n_filt[w]; g++) {
process_tns_coeffs(tns, tns_coefs_raw, order, w, g); process_tns_coeffs(tns, coefs[order], &order, w, g);
apply_tns_filter(&sce->coeffs[coef_start], sce->pcoeffs, order, direction, tns->coef[w][g],
sce->ics.ltp.present, w, g, coef_start, coef_len);
tns->order[w][g] = order; tns->order[w][g] = order;
tns->length[w][g] = sfb_len; tns->length[w][g] = sfb_len;
tns->direction[w][g] = direction; tns->direction[w][g] = direction;
......
...@@ -30,16 +30,11 @@ ...@@ -30,16 +30,11 @@
#include "aacenc.h" #include "aacenc.h"
/** Frequency in Hz for lower limit of TNS **/ /** Lower limit of TNS in SFBs **/
#define TNS_LOW_LIMIT 2150 #define TNS_LOW_LIMIT 24
/** LPC settings */
#define TNS_MIN_PRED_ORDER 0
#define MAX_LPC_PRECISION 4 /* 4 bits ltp coeff precision */
#define TNS_LPC_PASSES 2
#define MAX_LPC_SHIFT 4
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce); void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_apply_tns(SingleChannelElement *sce);
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce); void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
#endif /* AVCODEC_AACENC_TNS_H */ #endif /* AVCODEC_AACENC_TNS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment