Commit 6711aa21 authored by Claudio Freire's avatar Claudio Freire

AAC encoder: various fixes in M/S coding

1. Fix sf_idx and band_type addressing to address only the first
subwindow in the group (others could hold garbage values)

2. Don't step on ms_mask when is_mask is set. I/S selection
already sets the ms_mask properly and shouldn't be overridden.

3. Use mid/sid cb/sf when computing coding error, as should be
since those are the cb/sfs that will eventually be set.

4. Fix distortion computation on multi-subwindow groups (was
subtracting the bits terms multiple times)

5. Clear ms_mask when one side uses PNS and the other doesn't.
When using PNS, ms_mask signals correlated noise, which can be
detected just like regular M/S detection, so we don't skip
noise bands, but when only one side uses PNS setting the flag
can confuse some encoders, so avoid that.
parent 4dcb69cc
...@@ -831,8 +831,9 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) ...@@ -831,8 +831,9 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
start = 0; start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) { for (g = 0; g < sce0->ics.num_swb; g++) {
float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
cpe->ms_mask[w*16+g] = 0; if (!cpe->is_mask[w*16+g])
if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g]) { cpe->ms_mask[w*16+g] = 0;
if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
float Mmax = 0.0f, Smax = 0.0f; float Mmax = 0.0f, Smax = 0.0f;
/* Must compute mid/side SF and book for the whole window group */ /* Must compute mid/side SF and book for the whole window group */
...@@ -861,7 +862,7 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) ...@@ -861,7 +862,7 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512); mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512); sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
&& ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g) && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
|| !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) { || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
/* scalefactor range violation, bad stuff, will decrease quality unacceptably */ /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
...@@ -894,40 +895,42 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) ...@@ -894,40 +895,42 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
L34, L34,
sce0->ics.swb_sizes[g], sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g], sce0->sf_idx[w*16+g],
sce0->band_type[(w+w2)*16+g], sce0->band_type[w*16+g],
lambda / band0->threshold, INFINITY, &b1, NULL, 0); lambda / band0->threshold, INFINITY, &b1, NULL, 0);
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
R34, R34,
sce1->ics.swb_sizes[g], sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g], sce1->sf_idx[w*16+g],
sce1->band_type[(w+w2)*16+g], sce1->band_type[w*16+g],
lambda / band1->threshold, INFINITY, &b2, NULL, 0); lambda / band1->threshold, INFINITY, &b2, NULL, 0);
dist2 += quantize_band_cost(s, M, dist2 += quantize_band_cost(s, M,
M34, M34,
sce0->ics.swb_sizes[g], sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g], mididx,
sce0->band_type[(w+w2)*16+g], midcb,
lambda / minthr, INFINITY, &b3, NULL, 0); lambda / minthr, INFINITY, &b3, NULL, 0);
dist2 += quantize_band_cost(s, S, dist2 += quantize_band_cost(s, S,
S34, S34,
sce1->ics.swb_sizes[g], sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g], sididx,
sce1->band_type[(w+w2)*16+g], sidcb,
mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
B0 += b1+b2; B0 += b1+b2;
B1 += b3+b4; B1 += b3+b4;
dist1 -= B0; dist1 -= b1+b2;
dist2 -= B1; dist2 -= b3+b4;
} }
cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
if (cpe->ms_mask[w*16+g]) { if (cpe->ms_mask[w*16+g]) {
/* Setting the M/S mask is useful with I/S or PNS, but only the flag */ if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
sce0->sf_idx[w*16+g] = mididx; sce0->sf_idx[w*16+g] = mididx;
sce1->sf_idx[w*16+g] = sididx; sce1->sf_idx[w*16+g] = sididx;
sce0->band_type[w*16+g] = midcb; sce0->band_type[w*16+g] = midcb;
sce1->band_type[w*16+g] = sidcb; sce1->band_type[w*16+g] = sidcb;
} else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
/* ms_mask unneeded, and it confuses some decoders */
cpe->ms_mask[w*16+g] = 0;
} }
break; break;
} else if (B1 > B0) { } else if (B1 > B0) {
......
...@@ -2357,8 +2357,9 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe) ...@@ -2357,8 +2357,9 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
start = 0; start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) { for (g = 0; g < sce0->ics.num_swb; g++) {
float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
cpe->ms_mask[w*16+g] = 0; if (!cpe->is_mask[w*16+g])
if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g]) { cpe->ms_mask[w*16+g] = 0;
if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
float Mmax = 0.0f, Smax = 0.0f; float Mmax = 0.0f, Smax = 0.0f;
/* Must compute mid/side SF and book for the whole window group */ /* Must compute mid/side SF and book for the whole window group */
...@@ -2387,7 +2388,7 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe) ...@@ -2387,7 +2388,7 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512); mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512); sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
&& ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g) && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
|| !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) { || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
/* scalefactor range violation, bad stuff, will decrease quality unacceptably */ /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
...@@ -2420,40 +2421,42 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe) ...@@ -2420,40 +2421,42 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
L34, L34,
sce0->ics.swb_sizes[g], sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g], sce0->sf_idx[w*16+g],
sce0->band_type[(w+w2)*16+g], sce0->band_type[w*16+g],
lambda / band0->threshold, INFINITY, &b1, NULL, 0); lambda / band0->threshold, INFINITY, &b1, NULL, 0);
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
R34, R34,
sce1->ics.swb_sizes[g], sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g], sce1->sf_idx[w*16+g],
sce1->band_type[(w+w2)*16+g], sce1->band_type[w*16+g],
lambda / band1->threshold, INFINITY, &b2, NULL, 0); lambda / band1->threshold, INFINITY, &b2, NULL, 0);
dist2 += quantize_band_cost(s, M, dist2 += quantize_band_cost(s, M,
M34, M34,
sce0->ics.swb_sizes[g], sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g], mididx,
sce0->band_type[(w+w2)*16+g], midcb,
lambda / minthr, INFINITY, &b3, NULL, 0); lambda / minthr, INFINITY, &b3, NULL, 0);
dist2 += quantize_band_cost(s, S, dist2 += quantize_band_cost(s, S,
S34, S34,
sce1->ics.swb_sizes[g], sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g], sididx,
sce1->band_type[(w+w2)*16+g], sidcb,
mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
B0 += b1+b2; B0 += b1+b2;
B1 += b3+b4; B1 += b3+b4;
dist1 -= B0; dist1 -= b1+b2;
dist2 -= B1; dist2 -= b3+b4;
} }
cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
if (cpe->ms_mask[w*16+g]) { if (cpe->ms_mask[w*16+g]) {
/* Setting the M/S mask is useful with I/S or PNS, but only the flag */ if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
sce0->sf_idx[w*16+g] = mididx; sce0->sf_idx[w*16+g] = mididx;
sce1->sf_idx[w*16+g] = sididx; sce1->sf_idx[w*16+g] = sididx;
sce0->band_type[w*16+g] = midcb; sce0->band_type[w*16+g] = midcb;
sce1->band_type[w*16+g] = sidcb; sce1->band_type[w*16+g] = sidcb;
} else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
/* ms_mask unneeded, and it confuses some decoders */
cpe->ms_mask[w*16+g] = 0;
} }
break; break;
} else if (B1 > B0) { } else if (B1 > B0) {
......
...@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa ...@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa
fate-aac-aref-encode: CMP = stddev fate-aac-aref-encode: CMP = stddev
fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
fate-aac-aref-encode: CMP_SHIFT = -4096 fate-aac-aref-encode: CMP_SHIFT = -4096
fate-aac-aref-encode: CMP_TARGET = 586 fate-aac-aref-encode: CMP_TARGET = 669
fate-aac-aref-encode: SIZE_TOLERANCE = 2464 fate-aac-aref-encode: SIZE_TOLERANCE = 2464
fate-aac-aref-encode: FUZZ = 89 fate-aac-aref-encode: FUZZ = 89
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment