aaccoder.c 35.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * AAC coefficients encoder
 * Copyright (C) 2008-2009 Konstantin Shishkov
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27 28 29 30 31 32
 * AAC coefficients encoder
 */

/***********************************
 *              TODOs:
 * speedup quantizer selection
 * add sane pulse detection
 ***********************************/

33 34
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage

35
#include <float.h>
36

37
#include "libavutil/mathematics.h"
38
#include "mathops.h"
39 40 41 42 43
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
#include "aacenc.h"
#include "aactab.h"
44
#include "aacenctab.h"
45
#include "aacenc_utils.h"
46
#include "aacenc_quantization.h"
47

48
#include "aacenc_is.h"
49
#include "aacenc_tns.h"
50
#include "aacenc_ltp.h"
51
#include "aacenc_pred.h"
52

53 54
#include "libavcodec/aaccoder_twoloop.h"

55 56
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
 * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
57
#define NOISE_SPREAD_THRESHOLD 0.9f
58

59 60 61
/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
 * replace low energy non zero bands */
#define NOISE_LAMBDA_REPLACE 1.948f
62

63 64
#include "libavcodec/aaccoder_trellis.h"

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
/**
 * structure used in optimal codebook search
 */
typedef struct BandCodingPath {
    int prev_idx; ///< pointer to the previous path point
    float cost;   ///< path cost
    int run;
} BandCodingPath;

/**
 * Encode band info for single window group bands.
 */
static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
                                     int win, int group_len, const float lambda)
{
80
    BandCodingPath path[120][CB_TOT_ALL];
Mans Rullgard's avatar
Mans Rullgard committed
81
    int w, swb, cb, start, size;
82
    int i, j;
83
    const int max_sfb  = sce->ics.max_sfb;
84
    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
85
    const int run_esc  = (1 << run_bits) - 1;
86 87 88 89 90 91 92
    int idx, ppos, count;
    int stackrun[120], stackcb[120], stack_len;
    float next_minrd = INFINITY;
    int next_mincb = 0;

    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
    start = win*128;
93
    for (cb = 0; cb < CB_TOT_ALL; cb++) {
94
        path[0][cb].cost     = 0.0f;
95
        path[0][cb].prev_idx = -1;
96
        path[0][cb].run      = 0;
97
    }
98
    for (swb = 0; swb < max_sfb; swb++) {
99
        size = sce->ics.swb_sizes[swb];
100
        if (sce->zeroes[win*16 + swb]) {
101
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
102
                path[swb+1][cb].prev_idx = cb;
103 104
                path[swb+1][cb].cost     = path[swb][cb].cost;
                path[swb+1][cb].run      = path[swb][cb].run + 1;
105
            }
106
        } else {
107 108 109 110
            float minrd = next_minrd;
            int mincb = next_mincb;
            next_minrd = INFINITY;
            next_mincb = 0;
111
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
112 113
                float cost_stay_here, cost_get_here;
                float rd = 0.0f;
114 115 116 117 118 119 120
                if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
                    cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
                    path[swb+1][cb].prev_idx = -1;
                    path[swb+1][cb].cost     = INFINITY;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                    continue;
                }
121
                for (w = 0; w < group_len; w++) {
122
                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
123 124
                    rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
                                             &s->scoefs[start + w*128], size,
125
                                             sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
126
                                             lambda / band->threshold, INFINITY, NULL, NULL, 0);
127 128 129
                }
                cost_stay_here = path[swb][cb].cost + rd;
                cost_get_here  = minrd              + rd + run_bits + 4;
130
                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
131
                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
                    cost_stay_here += run_bits;
                if (cost_get_here < cost_stay_here) {
                    path[swb+1][cb].prev_idx = mincb;
                    path[swb+1][cb].cost     = cost_get_here;
                    path[swb+1][cb].run      = 1;
                } else {
                    path[swb+1][cb].prev_idx = cb;
                    path[swb+1][cb].cost     = cost_stay_here;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                }
                if (path[swb+1][cb].cost < next_minrd) {
                    next_minrd = path[swb+1][cb].cost;
                    next_mincb = cb;
                }
            }
        }
        start += sce->ics.swb_sizes[swb];
    }

    //convert resulting path from backward-linked list
    stack_len = 0;
153
    idx       = 0;
154
    for (cb = 1; cb < CB_TOT_ALL; cb++)
155
        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
156 157
            idx = cb;
    ppos = max_sfb;
158
    while (ppos > 0) {
159
        av_assert1(idx >= 0);
160 161 162 163 164 165 166 167 168
        cb = idx;
        stackrun[stack_len] = path[ppos][cb].run;
        stackcb [stack_len] = cb;
        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
        ppos -= path[ppos][cb].run;
        stack_len++;
    }
    //perform actual band info encoding
    start = 0;
169
    for (i = stack_len - 1; i >= 0; i--) {
170 171
        cb = aac_cb_out_map[stackcb[i]];
        put_bits(&s->pb, 4, cb);
172
        count = stackrun[i];
173
        memset(sce->zeroes + win*16 + start, !cb, count);
174
        //XXX: memset when band_type is also uint8_t
175
        for (j = 0; j < count; j++) {
176
            sce->band_type[win*16 + start] = cb;
177 178
            start++;
        }
179
        while (count >= run_esc) {
180 181 182 183 184 185 186
            put_bits(&s->pb, run_bits, run_esc);
            count -= run_esc;
        }
        put_bits(&s->pb, run_bits, count);
    }
}

187

188 189 190 191 192
typedef struct TrellisPath {
    float cost;
    int prev;
} TrellisPath;

193
#define TRELLIS_STAGES 121
194
#define TRELLIS_STATES (SCALE_MAX_DIFF+1)
195

196 197
static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
{
198 199
    int w, g;
    int prevscaler_n = -255, prevscaler_i = 0;
200 201 202 203
    int bands = 0;

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
204 205
            if (sce->zeroes[w*16+g])
                continue;
206
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
207
                sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
208 209
                bands++;
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
210
                sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
211 212
                if (prevscaler_n == -255)
                    prevscaler_n = sce->sf_idx[w*16+g];
213 214 215 216 217 218 219 220 221 222 223
                bands++;
            }
        }
    }

    if (!bands)
        return;

    /* Clip the scalefactor indices */
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
224 225
            if (sce->zeroes[w*16+g])
                continue;
226
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
227
                sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
228
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
229
                sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
230 231 232 233 234
            }
        }
    }
}

235
static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
236 237
                                       SingleChannelElement *sce,
                                       const float lambda)
238 239
{
    int q, w, w2, g, start = 0;
240
    int i, j;
241
    int idx;
242 243
    TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
    int bandaddr[TRELLIS_STAGES];
244 245
    int minq;
    float mincost;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
    float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
    int q0, q1, qcnt = 0;

    for (i = 0; i < 1024; i++) {
        float t = fabsf(sce->coeffs[i]);
        if (t > 0.0f) {
            q0f = FFMIN(q0f, t);
            q1f = FFMAX(q1f, t);
            qnrgf += t*t;
            qcnt++;
        }
    }

    if (!qcnt) {
        memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
        memset(sce->zeroes, 1, sizeof(sce->zeroes));
        return;
    }

    //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
266
    q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
267
    //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
268
    q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
269 270 271 272
    if (q1 - q0 > 60) {
        int q0low  = q0;
        int q1high = q1;
        //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
273
        int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
274 275 276 277 278 279 280 281 282 283
        q1 = qnrg + 30;
        q0 = qnrg - 30;
        if (q0 < q0low) {
            q1 += q0low - q0;
            q0  = q0low;
        } else if (q1 > q1high) {
            q0 -= q1 - q1high;
            q1  = q1high;
        }
    }
284 285 286 287 288 289
    // q0 == q1 isn't really a legal situation
    if (q0 == q1) {
        // the following is indirect but guarantees q1 != q0 && q1 near q0
        q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
        q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
    }
290

291
    for (i = 0; i < TRELLIS_STATES; i++) {
292 293
        paths[0][i].cost    = 0.0f;
        paths[0][i].prev    = -1;
294
    }
295 296
    for (j = 1; j < TRELLIS_STAGES; j++) {
        for (i = 0; i < TRELLIS_STATES; i++) {
297 298 299
            paths[j][i].cost    = INFINITY;
            paths[j][i].prev    = -2;
        }
300
    }
301
    idx = 1;
302
    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
303
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
304
        start = w*128;
305
        for (g = 0; g < sce->ics.num_swb; g++) {
306
            const float *coefs = &sce->coeffs[start];
307 308 309
            float qmin, qmax;
            int nz = 0;

310
            bandaddr[idx] = w * 16 + g;
311 312
            qmin = INT_MAX;
            qmax = 0.0f;
313
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
314
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
315
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
316 317 318 319 320
                    sce->zeroes[(w+w2)*16+g] = 1;
                    continue;
                }
                sce->zeroes[(w+w2)*16+g] = 0;
                nz = 1;
321
                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
322
                    float t = fabsf(coefs[w2*128+i]);
323
                    if (t > 0.0f)
324 325
                        qmin = FFMIN(qmin, t);
                    qmax = FFMAX(qmax, t);
326 327
                }
            }
328
            if (nz) {
329 330
                int minscale, maxscale;
                float minrd = INFINITY;
331
                float maxval;
332
                //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
333
                minscale = coef2minsf(qmin);
334
                //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
335
                maxscale = coef2maxsf(qmax);
336 337
                minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
                maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
338 339 340 341
                if (minscale == maxscale) {
                    maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
                    minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
                }
342
                maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
343
                for (q = minscale; q < maxscale; q++) {
344
                    float dist = 0;
345
                    int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
346
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
347
                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
348
                        dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
349
                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
350
                    }
351
                    minrd = FFMIN(minrd, dist);
352

353
                    for (i = 0; i < q1 - q0; i++) {
354
                        float cost;
355
                        cost = paths[idx - 1][i].cost + dist
356
                               + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
357
                        if (cost < paths[idx][q].cost) {
358 359
                            paths[idx][q].cost    = cost;
                            paths[idx][q].prev    = i;
360 361 362
                        }
                    }
                }
363
            } else {
364
                for (q = 0; q < q1 - q0; q++) {
Alex Converse's avatar
Alex Converse committed
365 366
                    paths[idx][q].cost = paths[idx - 1][q].cost + 1;
                    paths[idx][q].prev = q;
367 368 369 370
                }
            }
            sce->zeroes[w*16+g] = !nz;
            start += sce->ics.swb_sizes[g];
371
            idx++;
372 373
        }
    }
374 375 376
    idx--;
    mincost = paths[idx][0].cost;
    minq    = 0;
377
    for (i = 1; i < TRELLIS_STATES; i++) {
378 379 380
        if (paths[idx][i].cost < mincost) {
            mincost = paths[idx][i].cost;
            minq = i;
381 382
        }
    }
383
    while (idx) {
384
        sce->sf_idx[bandaddr[idx]] = minq + q0;
385
        minq = FFMAX(paths[idx][minq].prev, 0);
386
        idx--;
387 388
    }
    //set the same quantizers inside window groups
389 390 391
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
        for (g = 0;  g < sce->ics.num_swb; g++)
            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
392 393 394 395
                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}

static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
396 397
                                       SingleChannelElement *sce,
                                       const float lambda)
398
{
Mans Rullgard's avatar
Mans Rullgard committed
399
    int i, w, w2, g;
400 401 402
    int minq = 255;

    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
403 404 405
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0; g < sce->ics.num_swb; g++) {
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
406
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
407
                if (band->energy <= band->threshold) {
408 409
                    sce->sf_idx[(w+w2)*16+g] = 218;
                    sce->zeroes[(w+w2)*16+g] = 1;
410
                } else {
411
                    sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
412 413 414 415 416 417
                    sce->zeroes[(w+w2)*16+g] = 0;
                }
                minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
            }
        }
    }
418
    for (i = 0; i < 128; i++) {
419 420
        sce->sf_idx[i] = 140;
        //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
421 422
    }
    //set the same quantizers inside window groups
423 424 425
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
        for (g = 0;  g < sce->ics.num_swb; g++)
            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
426 427 428
                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}

429
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
430
{
431
    FFPsyBand *band;
432
    int w, g, w2, i;
433 434
    int wlen = 1024 / sce->ics.num_windows;
    int bandwidth, cutoff;
435 436
    float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
    float *NOR34 = &s->scoefs[3*128];
437
    uint8_t nextband[128];
438
    const float lambda = s->lambda;
439
    const float freq_mult = avctx->sample_rate*0.5f/wlen;
440
    const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
441 442 443 444 445 446 447 448 449 450
    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
    const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);

    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
        / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
        * (lambda / 120.f);

    /** Keep this in sync with twoloop's cutoff selection */
    float rate_bandwidth_multiplier = 1.5f;
451
    int prev = -1000, prev_sf = -1;
452 453 454 455 456 457 458 459 460 461 462 463 464
    int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
        : (avctx->bit_rate / avctx->channels);

    frame_bit_rate *= 1.15f;

    if (avctx->cutoff > 0) {
        bandwidth = avctx->cutoff;
    } else {
        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
    }

    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
465

466
    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
467
    ff_init_nextband_map(sce, nextband);
468
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
469
        int wstart = w*128;
470
        for (g = 0;  g < sce->ics.num_swb; g++) {
471
            int noise_sfi;
472
            float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
473
            float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
474 475
            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
            float min_energy = -1.0f, max_energy = 0.0f;
476
            const int start = wstart+sce->ics.swb_offset[g];
477
            const float freq = (start-wstart)*freq_mult;
478
            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
479 480 481
            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
482
                continue;
483
            }
484 485
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
486
                sfb_energy += band->energy;
487
                spread     = FFMIN(spread, band->spread);
488
                threshold  += band->threshold;
489 490 491 492 493 494
                if (!w2) {
                    min_energy = max_energy = band->energy;
                } else {
                    min_energy = FFMIN(min_energy, band->energy);
                    max_energy = FFMAX(max_energy, band->energy);
                }
495 496
            }

497
            /* Ramps down at ~8000Hz and loosens the dist threshold */
498 499 500 501 502 503 504 505
            dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;

            /* PNS is acceptable when all of these are true:
             * 1. high spread energy (noise-like band)
             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
             *
             * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
506
             */
507 508
            if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
                ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
509 510
                (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
                min_energy < pns_transient_energy_r * max_energy ) {
511
                sce->pns_ener[w*16+g] = sfb_energy;
512 513
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
514 515 516
                continue;
            }

517
            pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
518
            noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
519
            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
520 521 522 523 524 525 526 527
            if (prev != -1000) {
                int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
                if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
                    if (!sce->zeroes[w*16+g])
                        prev_sf = sce->sf_idx[w*16+g];
                    continue;
                }
            }
528
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
529
                float band_energy, scale, pns_senergy;
530
                const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
531
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
532 533 534 535 536 537
                for (i = 0; i < sce->ics.swb_sizes[g]; i+=2) {
                    double rnd[2];
                    av_bmg_get(&s->lfg, rnd);
                    PNS[i+0] = (float)rnd[0];
                    PNS[i+1] = (float)rnd[1];
                }
538 539 540
                band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                scale = noise_amp/sqrtf(band_energy);
                s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
541 542
                pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                pns_energy += pns_senergy;
543
                abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
544
                abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
545
                dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
546 547 548 549
                                            NOR34,
                                            sce->ics.swb_sizes[g],
                                            sce->sf_idx[(w+w2)*16+g],
                                            sce->band_alt[(w+w2)*16+g],
550 551 552 553
                                            lambda/band->threshold, INFINITY, NULL, NULL, 0);
                /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
                dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
            }
554
            if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
555 556 557
                dist2 += 5;
            } else {
                dist2 += 9;
558
            }
559 560
            energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
            sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
561
            if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
562 563
                sce->band_type[w*16+g] = NOISE_BT;
                sce->zeroes[w*16+g] = 0;
564
                prev = noise_sfi;
565 566 567
            } else {
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
568 569 570 571 572
            }
        }
    }
}

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
{
    FFPsyBand *band;
    int w, g, w2;
    int wlen = 1024 / sce->ics.num_windows;
    int bandwidth, cutoff;
    const float lambda = s->lambda;
    const float freq_mult = avctx->sample_rate*0.5f/wlen;
    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);

    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
        / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
        * (lambda / 120.f);

    /** Keep this in sync with twoloop's cutoff selection */
    float rate_bandwidth_multiplier = 1.5f;
    int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
        : (avctx->bit_rate / avctx->channels);

    frame_bit_rate *= 1.15f;

    if (avctx->cutoff > 0) {
        bandwidth = avctx->cutoff;
    } else {
        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
    }

    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;

    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
            float min_energy = -1.0f, max_energy = 0.0f;
            const int start = sce->ics.swb_offset[g];
            const float freq = start*freq_mult;
            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
                sce->can_pns[w*16+g] = 0;
                continue;
            }
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                sfb_energy += band->energy;
                spread     = FFMIN(spread, band->spread);
                threshold  += band->threshold;
                if (!w2) {
                    min_energy = max_energy = band->energy;
                } else {
                    min_energy = FFMIN(min_energy, band->energy);
                    max_energy = FFMAX(max_energy, band->energy);
                }
            }

            /* PNS is acceptable when all of these are true:
             * 1. high spread energy (noise-like band)
             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
             */
            sce->pns_ener[w*16+g] = sfb_energy;
            if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
                sce->can_pns[w*16+g] = 0;
            } else {
                sce->can_pns[w*16+g] = 1;
            }
        }
    }
}

644
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
645
{
646 647
    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
    uint8_t nextband0[128], nextband1[128];
648 649
    float M[128], S[128];
    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
650
    const float lambda = s->lambda;
651
    const float mslambda = FFMIN(1.0f, lambda / 120.f);
652 653
    SingleChannelElement *sce0 = &cpe->ch[0];
    SingleChannelElement *sce1 = &cpe->ch[1];
654
    if (!cpe->common_window)
655
        return;
656

657 658 659 660 661 662 663
    /** Scout out next nonzero bands */
    ff_init_nextband_map(sce0, nextband0);
    ff_init_nextband_map(sce1, nextband1);

    prev_mid = sce0->sf_idx[0];
    prev_side = sce1->sf_idx[0];
    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
664
        start = 0;
665
        for (g = 0;  g < sce0->ics.num_swb; g++) {
666
            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
667 668 669
            if (!cpe->is_mask[w*16+g])
                cpe->ms_mask[w*16+g] = 0;
            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
670 671 672
                float Mmax = 0.0f, Smax = 0.0f;

                /* Must compute mid/side SF and book for the whole window group */
673 674
                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
675 676
                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
677
                        S[i] =  M[i]
678
                              - sce1->coeffs[start+(w+w2)*128+i];
679
                    }
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
                    abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
                    abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
                        Mmax = FFMAX(Mmax, M34[i]);
                        Smax = FFMAX(Smax, S34[i]);
                    }
                }

                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
                    float dist1 = 0.0f, dist2 = 0.0f;
                    int B0 = 0, B1 = 0;
                    int minidx;
                    int mididx, sididx;
                    int midcb, sidcb;

                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
696 697
                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
698
                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
699 700
                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
701 702 703 704
                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
                        continue;
                    }

705 706 707
                    midcb = find_min_book(Mmax, mididx);
                    sidcb = find_min_book(Smax, sididx);

708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
                    /* No CB can be zero */
                    midcb = FFMAX(1,midcb);
                    sidcb = FFMAX(1,sidcb);

                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
                        float minthr = FFMIN(band0->threshold, band1->threshold);
                        int b1,b2,b3,b4;
                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
                            S[i] =  M[i]
                                  - sce1->coeffs[start+(w+w2)*128+i];
                        }

                        abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                        abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                        abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
                        abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
                                                    L34,
                                                    sce0->ics.swb_sizes[g],
731 732
                                                    sce0->sf_idx[w*16+g],
                                                    sce0->band_type[w*16+g],
733 734 735 736
                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
                                                    R34,
                                                    sce1->ics.swb_sizes[g],
737 738
                                                    sce1->sf_idx[w*16+g],
                                                    sce1->band_type[w*16+g],
739 740 741 742
                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
                        dist2 += quantize_band_cost(s, M,
                                                    M34,
                                                    sce0->ics.swb_sizes[g],
743 744
                                                    mididx,
                                                    midcb,
745 746 747 748
                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
                        dist2 += quantize_band_cost(s, S,
                                                    S34,
                                                    sce1->ics.swb_sizes[g],
749 750
                                                    sididx,
                                                    sidcb,
751 752 753
                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
                        B0 += b1+b2;
                        B1 += b3+b4;
754 755
                        dist1 -= b1+b2;
                        dist2 -= b3+b4;
756 757 758
                    }
                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
                    if (cpe->ms_mask[w*16+g]) {
759
                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
760 761 762 763
                            sce0->sf_idx[w*16+g] = mididx;
                            sce1->sf_idx[w*16+g] = sididx;
                            sce0->band_type[w*16+g] = midcb;
                            sce1->band_type[w*16+g] = sidcb;
764 765 766
                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
                            /* ms_mask unneeded, and it confuses some decoders */
                            cpe->ms_mask[w*16+g] = 0;
767 768 769 770 771 772
                        }
                        break;
                    } else if (B1 > B0) {
                        /* More boost won't fix this */
                        break;
                    }
773 774
                }
            }
775 776 777 778
            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
                prev_mid = sce0->sf_idx[w*16+g];
            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
                prev_side = sce1->sf_idx[w*16+g];
779 780 781 782 783
            start += sce0->ics.swb_sizes[g];
        }
    }
}

784
AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
785
    [AAC_CODER_ANMR] = {
786 787 788
        search_for_quantizers_anmr,
        encode_window_bands_info,
        quantize_and_encode_band,
789
        ff_aac_encode_tns_info,
790
        ff_aac_encode_ltp_info,
791
        ff_aac_encode_main_pred,
792
        ff_aac_adjust_common_pred,
793
        ff_aac_adjust_common_ltp,
794
        ff_aac_apply_main_pred,
795
        ff_aac_apply_tns,
796 797
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
798
        set_special_band_scalefactors,
799
        search_for_pns,
800
        mark_pns,
801
        ff_aac_search_for_tns,
802
        ff_aac_search_for_ltp,
803
        search_for_ms,
804 805
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
806
    },
807
    [AAC_CODER_TWOLOOP] = {
808
        search_for_quantizers_twoloop,
809
        codebook_trellis_rate,
810
        quantize_and_encode_band,
811
        ff_aac_encode_tns_info,
812
        ff_aac_encode_ltp_info,
813
        ff_aac_encode_main_pred,
814
        ff_aac_adjust_common_pred,
815
        ff_aac_adjust_common_ltp,
816
        ff_aac_apply_main_pred,
817
        ff_aac_apply_tns,
818 819
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
820
        set_special_band_scalefactors,
821
        search_for_pns,
822
        mark_pns,
823
        ff_aac_search_for_tns,
824
        ff_aac_search_for_ltp,
825
        search_for_ms,
826 827
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
828
    },
829
    [AAC_CODER_FAST] = {
830 831 832
        search_for_quantizers_fast,
        encode_window_bands_info,
        quantize_and_encode_band,
833
        ff_aac_encode_tns_info,
834
        ff_aac_encode_ltp_info,
835
        ff_aac_encode_main_pred,
836
        ff_aac_adjust_common_pred,
837
        ff_aac_adjust_common_ltp,
838
        ff_aac_apply_main_pred,
839
        ff_aac_apply_tns,
840 841
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
842
        set_special_band_scalefactors,
843
        search_for_pns,
844
        mark_pns,
845
        ff_aac_search_for_tns,
846
        ff_aac_search_for_ltp,
847
        search_for_ms,
848 849
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
850 851
    },
};