aaccoder.c 40 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * AAC coefficients encoder
 * Copyright (C) 2008-2009 Konstantin Shishkov
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27 28 29 30 31 32
 * AAC coefficients encoder
 */

/***********************************
 *              TODOs:
 * speedup quantizer selection
 * add sane pulse detection
 ***********************************/

33 34
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage

35
#include <float.h>
36

37
#include "libavutil/mathematics.h"
38
#include "mathops.h"
39 40 41 42 43
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
#include "aacenc.h"
#include "aactab.h"
44
#include "aacenctab.h"
45
#include "aacenc_utils.h"
46
#include "aacenc_quantization.h"
47

48
#include "aacenc_is.h"
49
#include "aacenc_tns.h"
50
#include "aacenc_ltp.h"
51
#include "aacenc_pred.h"
52

53 54
#include "libavcodec/aaccoder_twoloop.h"

55 56
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
 * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
57
#define NOISE_SPREAD_THRESHOLD 0.9f
58

59 60 61
/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
 * replace low energy non zero bands */
#define NOISE_LAMBDA_REPLACE 1.948f
62

63 64
#include "libavcodec/aaccoder_trellis.h"

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
/**
 * structure used in optimal codebook search
 */
typedef struct BandCodingPath {
    int prev_idx; ///< pointer to the previous path point
    float cost;   ///< path cost
    int run;
} BandCodingPath;

/**
 * Encode band info for single window group bands.
 */
static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
                                     int win, int group_len, const float lambda)
{
80
    BandCodingPath path[120][CB_TOT_ALL];
Mans Rullgard's avatar
Mans Rullgard committed
81
    int w, swb, cb, start, size;
82
    int i, j;
83
    const int max_sfb  = sce->ics.max_sfb;
84
    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
85
    const int run_esc  = (1 << run_bits) - 1;
86 87 88 89 90
    int idx, ppos, count;
    int stackrun[120], stackcb[120], stack_len;
    float next_minrd = INFINITY;
    int next_mincb = 0;

91
    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
92
    start = win*128;
93
    for (cb = 0; cb < CB_TOT_ALL; cb++) {
94
        path[0][cb].cost     = 0.0f;
95
        path[0][cb].prev_idx = -1;
96
        path[0][cb].run      = 0;
97
    }
98
    for (swb = 0; swb < max_sfb; swb++) {
99
        size = sce->ics.swb_sizes[swb];
100
        if (sce->zeroes[win*16 + swb]) {
101
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
102
                path[swb+1][cb].prev_idx = cb;
103 104
                path[swb+1][cb].cost     = path[swb][cb].cost;
                path[swb+1][cb].run      = path[swb][cb].run + 1;
105
            }
106
        } else {
107 108 109 110
            float minrd = next_minrd;
            int mincb = next_mincb;
            next_minrd = INFINITY;
            next_mincb = 0;
111
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
112 113
                float cost_stay_here, cost_get_here;
                float rd = 0.0f;
114 115 116 117 118 119 120
                if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
                    cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
                    path[swb+1][cb].prev_idx = -1;
                    path[swb+1][cb].cost     = INFINITY;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                    continue;
                }
121
                for (w = 0; w < group_len; w++) {
122
                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
123 124
                    rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
                                             &s->scoefs[start + w*128], size,
125
                                             sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
126
                                             lambda / band->threshold, INFINITY, NULL, NULL, 0);
127 128 129
                }
                cost_stay_here = path[swb][cb].cost + rd;
                cost_get_here  = minrd              + rd + run_bits + 4;
130
                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
131
                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
                    cost_stay_here += run_bits;
                if (cost_get_here < cost_stay_here) {
                    path[swb+1][cb].prev_idx = mincb;
                    path[swb+1][cb].cost     = cost_get_here;
                    path[swb+1][cb].run      = 1;
                } else {
                    path[swb+1][cb].prev_idx = cb;
                    path[swb+1][cb].cost     = cost_stay_here;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                }
                if (path[swb+1][cb].cost < next_minrd) {
                    next_minrd = path[swb+1][cb].cost;
                    next_mincb = cb;
                }
            }
        }
        start += sce->ics.swb_sizes[swb];
    }

    //convert resulting path from backward-linked list
    stack_len = 0;
153
    idx       = 0;
154
    for (cb = 1; cb < CB_TOT_ALL; cb++)
155
        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
156 157
            idx = cb;
    ppos = max_sfb;
158
    while (ppos > 0) {
159
        av_assert1(idx >= 0);
160 161 162 163 164 165 166 167 168
        cb = idx;
        stackrun[stack_len] = path[ppos][cb].run;
        stackcb [stack_len] = cb;
        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
        ppos -= path[ppos][cb].run;
        stack_len++;
    }
    //perform actual band info encoding
    start = 0;
169
    for (i = stack_len - 1; i >= 0; i--) {
170 171
        cb = aac_cb_out_map[stackcb[i]];
        put_bits(&s->pb, 4, cb);
172
        count = stackrun[i];
173
        memset(sce->zeroes + win*16 + start, !cb, count);
174
        //XXX: memset when band_type is also uint8_t
175
        for (j = 0; j < count; j++) {
176
            sce->band_type[win*16 + start] = cb;
177 178
            start++;
        }
179
        while (count >= run_esc) {
180 181 182 183 184 185 186
            put_bits(&s->pb, run_bits, run_esc);
            count -= run_esc;
        }
        put_bits(&s->pb, run_bits, count);
    }
}

187

188 189 190 191 192
typedef struct TrellisPath {
    float cost;
    int prev;
} TrellisPath;

193
#define TRELLIS_STAGES 121
194
#define TRELLIS_STATES (SCALE_MAX_DIFF+1)
195

196 197
static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
{
198 199
    int w, g;
    int prevscaler_n = -255, prevscaler_i = 0;
200 201 202
    int bands = 0;

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
203
        for (g = 0; g < sce->ics.num_swb; g++) {
204 205
            if (sce->zeroes[w*16+g])
                continue;
206
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
207
                sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
208 209
                bands++;
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
210
                sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
211 212
                if (prevscaler_n == -255)
                    prevscaler_n = sce->sf_idx[w*16+g];
213 214 215 216 217 218 219 220 221 222
                bands++;
            }
        }
    }

    if (!bands)
        return;

    /* Clip the scalefactor indices */
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
223
        for (g = 0; g < sce->ics.num_swb; g++) {
224 225
            if (sce->zeroes[w*16+g])
                continue;
226
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
227
                sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
228
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
229
                sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
230 231 232 233 234
            }
        }
    }
}

235
static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
236 237
                                       SingleChannelElement *sce,
                                       const float lambda)
238 239
{
    int q, w, w2, g, start = 0;
240
    int i, j;
241
    int idx;
242 243
    TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
    int bandaddr[TRELLIS_STAGES];
244 245
    int minq;
    float mincost;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
    float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
    int q0, q1, qcnt = 0;

    for (i = 0; i < 1024; i++) {
        float t = fabsf(sce->coeffs[i]);
        if (t > 0.0f) {
            q0f = FFMIN(q0f, t);
            q1f = FFMAX(q1f, t);
            qnrgf += t*t;
            qcnt++;
        }
    }

    if (!qcnt) {
        memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
        memset(sce->zeroes, 1, sizeof(sce->zeroes));
        return;
    }

    //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
266
    q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
267
    //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
268
    q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
269 270 271 272
    if (q1 - q0 > 60) {
        int q0low  = q0;
        int q1high = q1;
        //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
273
        int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
274 275 276 277 278 279 280 281 282 283
        q1 = qnrg + 30;
        q0 = qnrg - 30;
        if (q0 < q0low) {
            q1 += q0low - q0;
            q0  = q0low;
        } else if (q1 > q1high) {
            q0 -= q1 - q1high;
            q1  = q1high;
        }
    }
284 285 286 287 288 289
    // q0 == q1 isn't really a legal situation
    if (q0 == q1) {
        // the following is indirect but guarantees q1 != q0 && q1 near q0
        q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
        q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
    }
290

291
    for (i = 0; i < TRELLIS_STATES; i++) {
292 293
        paths[0][i].cost    = 0.0f;
        paths[0][i].prev    = -1;
294
    }
295 296
    for (j = 1; j < TRELLIS_STAGES; j++) {
        for (i = 0; i < TRELLIS_STATES; i++) {
297 298 299
            paths[j][i].cost    = INFINITY;
            paths[j][i].prev    = -2;
        }
300
    }
301
    idx = 1;
302
    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
303
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
304
        start = w*128;
305
        for (g = 0; g < sce->ics.num_swb; g++) {
306
            const float *coefs = &sce->coeffs[start];
307 308 309
            float qmin, qmax;
            int nz = 0;

310
            bandaddr[idx] = w * 16 + g;
311 312
            qmin = INT_MAX;
            qmax = 0.0f;
313
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
314
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
315
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
316 317 318 319 320
                    sce->zeroes[(w+w2)*16+g] = 1;
                    continue;
                }
                sce->zeroes[(w+w2)*16+g] = 0;
                nz = 1;
321
                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
322
                    float t = fabsf(coefs[w2*128+i]);
323
                    if (t > 0.0f)
324 325
                        qmin = FFMIN(qmin, t);
                    qmax = FFMAX(qmax, t);
326 327
                }
            }
328
            if (nz) {
329 330
                int minscale, maxscale;
                float minrd = INFINITY;
331
                float maxval;
332
                //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
333
                minscale = coef2minsf(qmin);
334
                //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
335
                maxscale = coef2maxsf(qmax);
336 337
                minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
                maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
338 339 340 341
                if (minscale == maxscale) {
                    maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
                    minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
                }
342
                maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
343
                for (q = minscale; q < maxscale; q++) {
344
                    float dist = 0;
345
                    int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
346
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
347
                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
348
                        dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
349
                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
350
                    }
351
                    minrd = FFMIN(minrd, dist);
352

353
                    for (i = 0; i < q1 - q0; i++) {
354
                        float cost;
355
                        cost = paths[idx - 1][i].cost + dist
356
                               + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
357
                        if (cost < paths[idx][q].cost) {
358 359
                            paths[idx][q].cost    = cost;
                            paths[idx][q].prev    = i;
360 361 362
                        }
                    }
                }
363
            } else {
364
                for (q = 0; q < q1 - q0; q++) {
Alex Converse's avatar
Alex Converse committed
365 366
                    paths[idx][q].cost = paths[idx - 1][q].cost + 1;
                    paths[idx][q].prev = q;
367 368 369 370
                }
            }
            sce->zeroes[w*16+g] = !nz;
            start += sce->ics.swb_sizes[g];
371
            idx++;
372 373
        }
    }
374 375 376
    idx--;
    mincost = paths[idx][0].cost;
    minq    = 0;
377
    for (i = 1; i < TRELLIS_STATES; i++) {
378 379 380
        if (paths[idx][i].cost < mincost) {
            mincost = paths[idx][i].cost;
            minq = i;
381 382
        }
    }
383
    while (idx) {
384
        sce->sf_idx[bandaddr[idx]] = minq + q0;
385
        minq = FFMAX(paths[idx][minq].prev, 0);
386
        idx--;
387 388
    }
    //set the same quantizers inside window groups
389
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
390
        for (g = 0; g < sce->ics.num_swb; g++)
391
            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
392 393 394 395
                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}

static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
396 397
                                       SingleChannelElement *sce,
                                       const float lambda)
398
{
399 400 401 402 403 404 405 406 407 408 409 410
    int start = 0, i, w, w2, g;
    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
    float dists[128] = { 0 }, uplims[128] = { 0 };
    float maxvals[128];
    int fflag, minscaler;
    int its  = 0;
    int allz = 0;
    float minthr = INFINITY;

    // for values above this the decoder might end up in an endless loop
    // due to always having more bits than what can be encoded.
    destbits = FFMIN(destbits, 5800);
411
    //some heuristic to determine initial quantizers will reduce search time
412
    //determine zero bands and upper limits
413
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
414
        start = 0;
415
        for (g = 0; g < sce->ics.num_swb; g++) {
416 417
            int nz = 0;
            float uplim = 0.0f, energy = 0.0f;
418
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
419
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
420 421 422
                uplim += band->threshold;
                energy += band->energy;
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
423
                    sce->zeroes[(w+w2)*16+g] = 1;
424
                    continue;
425
                }
426
                nz = 1;
427
            }
428 429 430 431 432 433 434
            uplims[w*16+g] = uplim *512;
            sce->band_type[w*16+g] = 0;
            sce->zeroes[w*16+g] = !nz;
            if (nz)
                minthr = FFMIN(minthr, uplim);
            allz |= nz;
            start += sce->ics.swb_sizes[g];
435 436
        }
    }
437
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
438
        for (g = 0; g < sce->ics.num_swb; g++) {
439 440 441 442 443 444
            if (sce->zeroes[w*16+g]) {
                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
                continue;
            }
            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
        }
445
    }
446 447 448

    if (!allz)
        return;
449
    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
450 451 452 453
    ff_quantize_band_cost_cache_init(s);

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        start = w*128;
454
        for (g = 0; g < sce->ics.num_swb; g++) {
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
            const float *scaled = s->scoefs + start;
            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
            start += sce->ics.swb_sizes[g];
        }
    }

    //perform two-loop search
    //outer loop - improve quality
    do {
        int tbits, qstep;
        minscaler = sce->sf_idx[0];
        //inner loop - quantize spectrum to fit into given number of bits
        qstep = its ? 1 : 32;
        do {
            int prev = -1;
            tbits = 0;
            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
                start = w*128;
473
                for (g = 0; g < sce->ics.num_swb; g++) {
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
                    const float *coefs = sce->coeffs + start;
                    const float *scaled = s->scoefs + start;
                    int bits = 0;
                    int cb;
                    float dist = 0.0f;

                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
                        start += sce->ics.swb_sizes[g];
                        continue;
                    }
                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                        int b;
                        dist += quantize_band_cost_cached(s, w + w2, g,
                                                          coefs + w2*128,
                                                          scaled + w2*128,
                                                          sce->ics.swb_sizes[g],
                                                          sce->sf_idx[w*16+g],
                                                          cb, 1.0f, INFINITY,
                                                          &b, NULL, 0);
                        bits += b;
                    }
                    dists[w*16+g] = dist - bits;
                    if (prev != -1) {
                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
                    }
                    tbits += bits;
                    start += sce->ics.swb_sizes[g];
                    prev = sce->sf_idx[w*16+g];
                }
            }
            if (tbits > destbits) {
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] < 218 - qstep)
                        sce->sf_idx[i] += qstep;
            } else {
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] > 60 - qstep)
                        sce->sf_idx[i] -= qstep;
            }
            qstep >>= 1;
            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
                qstep = 1;
        } while (qstep);

        fflag = 0;
        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);

        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
            for (g = 0; g < sce->ics.num_swb; g++) {
                int prevsc = sce->sf_idx[w*16+g];
                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
                        sce->sf_idx[w*16+g]--;
                    else //Try to make sure there is some energy in every band
                        sce->sf_idx[w*16+g]-=2;
                }
                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
                if (sce->sf_idx[w*16+g] != prevsc)
                    fflag = 1;
                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
            }
        }
        its++;
    } while (fflag && its < 10);
541 542
}

543
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
544
{
545
    FFPsyBand *band;
546
    int w, g, w2, i;
547 548
    int wlen = 1024 / sce->ics.num_windows;
    int bandwidth, cutoff;
549 550
    float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
    float *NOR34 = &s->scoefs[3*128];
551
    uint8_t nextband[128];
552
    const float lambda = s->lambda;
553
    const float freq_mult = avctx->sample_rate*0.5f/wlen;
554
    const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
555 556 557 558 559
    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
    const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);

    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
560
        / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
561 562 563 564
        * (lambda / 120.f);

    /** Keep this in sync with twoloop's cutoff selection */
    float rate_bandwidth_multiplier = 1.5f;
565
    int prev = -1000, prev_sf = -1;
566
    int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
567 568 569 570 571 572 573 574 575 576 577 578
        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
        : (avctx->bit_rate / avctx->channels);

    frame_bit_rate *= 1.15f;

    if (avctx->cutoff > 0) {
        bandwidth = avctx->cutoff;
    } else {
        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
    }

    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
579

580
    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
581
    ff_init_nextband_map(sce, nextband);
582
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
583
        int wstart = w*128;
584
        for (g = 0; g < sce->ics.num_swb; g++) {
585
            int noise_sfi;
586
            float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
587
            float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
588 589
            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
            float min_energy = -1.0f, max_energy = 0.0f;
590
            const int start = wstart+sce->ics.swb_offset[g];
591
            const float freq = (start-wstart)*freq_mult;
592
            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
593 594 595
            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
596
                continue;
597
            }
598 599
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
600
                sfb_energy += band->energy;
601
                spread     = FFMIN(spread, band->spread);
602
                threshold  += band->threshold;
603 604 605 606 607 608
                if (!w2) {
                    min_energy = max_energy = band->energy;
                } else {
                    min_energy = FFMIN(min_energy, band->energy);
                    max_energy = FFMAX(max_energy, band->energy);
                }
609 610
            }

611
            /* Ramps down at ~8000Hz and loosens the dist threshold */
612 613 614 615 616 617 618 619
            dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;

            /* PNS is acceptable when all of these are true:
             * 1. high spread energy (noise-like band)
             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
             *
             * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
620
             */
621 622
            if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
                ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
623 624
                (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
                min_energy < pns_transient_energy_r * max_energy ) {
625
                sce->pns_ener[w*16+g] = sfb_energy;
626 627
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
628 629 630
                continue;
            }

631
            pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
632
            noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
633
            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
634 635 636 637 638 639 640 641
            if (prev != -1000) {
                int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
                if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
                    if (!sce->zeroes[w*16+g])
                        prev_sf = sce->sf_idx[w*16+g];
                    continue;
                }
            }
642
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
643
                float band_energy, scale, pns_senergy;
644
                const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
645
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
646 647 648
                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
                    s->random_state  = lcg_random(s->random_state);
                    PNS[i] = s->random_state;
649
                }
650 651 652
                band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                scale = noise_amp/sqrtf(band_energy);
                s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
653 654
                pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                pns_energy += pns_senergy;
655 656
                s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
                s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]);
657
                dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
658 659 660 661
                                            NOR34,
                                            sce->ics.swb_sizes[g],
                                            sce->sf_idx[(w+w2)*16+g],
                                            sce->band_alt[(w+w2)*16+g],
662 663 664 665
                                            lambda/band->threshold, INFINITY, NULL, NULL, 0);
                /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
                dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
            }
666
            if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
667 668 669
                dist2 += 5;
            } else {
                dist2 += 9;
670
            }
671 672
            energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
            sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
673
            if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
674 675
                sce->band_type[w*16+g] = NOISE_BT;
                sce->zeroes[w*16+g] = 0;
676
                prev = noise_sfi;
677 678 679
            } else {
                if (!sce->zeroes[w*16+g])
                    prev_sf = sce->sf_idx[w*16+g];
680 681 682 683 684
            }
        }
    }
}

685 686 687 688 689 690 691 692 693 694 695 696
static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
{
    FFPsyBand *band;
    int w, g, w2;
    int wlen = 1024 / sce->ics.num_windows;
    int bandwidth, cutoff;
    const float lambda = s->lambda;
    const float freq_mult = avctx->sample_rate*0.5f/wlen;
    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);

    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
697
        / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
698 699 700 701
        * (lambda / 120.f);

    /** Keep this in sync with twoloop's cutoff selection */
    float rate_bandwidth_multiplier = 1.5f;
702
    int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
        : (avctx->bit_rate / avctx->channels);

    frame_bit_rate *= 1.15f;

    if (avctx->cutoff > 0) {
        bandwidth = avctx->cutoff;
    } else {
        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
    }

    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;

    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
718
        for (g = 0; g < sce->ics.num_swb; g++) {
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
            float min_energy = -1.0f, max_energy = 0.0f;
            const int start = sce->ics.swb_offset[g];
            const float freq = start*freq_mult;
            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
                sce->can_pns[w*16+g] = 0;
                continue;
            }
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                sfb_energy += band->energy;
                spread     = FFMIN(spread, band->spread);
                threshold  += band->threshold;
                if (!w2) {
                    min_energy = max_energy = band->energy;
                } else {
                    min_energy = FFMIN(min_energy, band->energy);
                    max_energy = FFMAX(max_energy, band->energy);
                }
            }

            /* PNS is acceptable when all of these are true:
             * 1. high spread energy (noise-like band)
             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
             */
            sce->pns_ener[w*16+g] = sfb_energy;
            if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
                sce->can_pns[w*16+g] = 0;
            } else {
                sce->can_pns[w*16+g] = 1;
            }
        }
    }
}

756
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
757
{
758 759
    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
    uint8_t nextband0[128], nextband1[128];
760 761 762
    float *M   = s->scoefs + 128*0, *S   = s->scoefs + 128*1;
    float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3;
    float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5;
763
    const float lambda = s->lambda;
764
    const float mslambda = FFMIN(1.0f, lambda / 120.f);
765 766
    SingleChannelElement *sce0 = &cpe->ch[0];
    SingleChannelElement *sce1 = &cpe->ch[1];
767
    if (!cpe->common_window)
768
        return;
769

770 771 772 773 774 775 776
    /** Scout out next nonzero bands */
    ff_init_nextband_map(sce0, nextband0);
    ff_init_nextband_map(sce1, nextband1);

    prev_mid = sce0->sf_idx[0];
    prev_side = sce1->sf_idx[0];
    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
777
        start = 0;
778
        for (g = 0; g < sce0->ics.num_swb; g++) {
779
            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
780 781 782
            if (!cpe->is_mask[w*16+g])
                cpe->ms_mask[w*16+g] = 0;
            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
783 784 785
                float Mmax = 0.0f, Smax = 0.0f;

                /* Must compute mid/side SF and book for the whole window group */
786 787
                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
788 789
                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
790
                        S[i] =  M[i]
791
                              - sce1->coeffs[start+(w+w2)*128+i];
792
                    }
793 794
                    s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]);
                    s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]);
795 796 797 798 799 800 801 802 803 804 805 806 807 808
                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
                        Mmax = FFMAX(Mmax, M34[i]);
                        Smax = FFMAX(Smax, S34[i]);
                    }
                }

                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
                    float dist1 = 0.0f, dist2 = 0.0f;
                    int B0 = 0, B1 = 0;
                    int minidx;
                    int mididx, sididx;
                    int midcb, sidcb;

                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
809 810
                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
811
                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
812 813
                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
814 815 816 817
                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
                        continue;
                    }

818 819 820
                    midcb = find_min_book(Mmax, mididx);
                    sidcb = find_min_book(Smax, sididx);

821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
                    /* No CB can be zero */
                    midcb = FFMAX(1,midcb);
                    sidcb = FFMAX(1,sidcb);

                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
                        float minthr = FFMIN(band0->threshold, band1->threshold);
                        int b1,b2,b3,b4;
                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
                            S[i] =  M[i]
                                  - sce1->coeffs[start+(w+w2)*128+i];
                        }

837 838 839 840
                        s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                        s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                        s->abs_pow34(M34, M,                         sce0->ics.swb_sizes[g]);
                        s->abs_pow34(S34, S,                         sce0->ics.swb_sizes[g]);
841 842 843
                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
                                                    L34,
                                                    sce0->ics.swb_sizes[g],
844 845
                                                    sce0->sf_idx[w*16+g],
                                                    sce0->band_type[w*16+g],
846 847 848 849
                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
                                                    R34,
                                                    sce1->ics.swb_sizes[g],
850 851
                                                    sce1->sf_idx[w*16+g],
                                                    sce1->band_type[w*16+g],
852 853 854 855
                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
                        dist2 += quantize_band_cost(s, M,
                                                    M34,
                                                    sce0->ics.swb_sizes[g],
856 857
                                                    mididx,
                                                    midcb,
858 859 860 861
                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
                        dist2 += quantize_band_cost(s, S,
                                                    S34,
                                                    sce1->ics.swb_sizes[g],
862 863
                                                    sididx,
                                                    sidcb,
864 865 866
                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
                        B0 += b1+b2;
                        B1 += b3+b4;
867 868
                        dist1 -= b1+b2;
                        dist2 -= b3+b4;
869 870 871
                    }
                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
                    if (cpe->ms_mask[w*16+g]) {
872
                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
873 874 875 876
                            sce0->sf_idx[w*16+g] = mididx;
                            sce1->sf_idx[w*16+g] = sididx;
                            sce0->band_type[w*16+g] = midcb;
                            sce1->band_type[w*16+g] = sidcb;
877 878 879
                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
                            /* ms_mask unneeded, and it confuses some decoders */
                            cpe->ms_mask[w*16+g] = 0;
880 881 882 883 884 885
                        }
                        break;
                    } else if (B1 > B0) {
                        /* More boost won't fix this */
                        break;
                    }
886 887
                }
            }
888 889 890 891
            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
                prev_mid = sce0->sf_idx[w*16+g];
            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
                prev_side = sce1->sf_idx[w*16+g];
892 893 894 895 896
            start += sce0->ics.swb_sizes[g];
        }
    }
}

897
AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
898
    [AAC_CODER_ANMR] = {
899 900 901
        search_for_quantizers_anmr,
        encode_window_bands_info,
        quantize_and_encode_band,
902
        ff_aac_encode_tns_info,
903
        ff_aac_encode_ltp_info,
904
        ff_aac_encode_main_pred,
905
        ff_aac_adjust_common_pred,
906
        ff_aac_adjust_common_ltp,
907
        ff_aac_apply_main_pred,
908
        ff_aac_apply_tns,
909 910
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
911
        set_special_band_scalefactors,
912
        search_for_pns,
913
        mark_pns,
914
        ff_aac_search_for_tns,
915
        ff_aac_search_for_ltp,
916
        search_for_ms,
917 918
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
919
    },
920
    [AAC_CODER_TWOLOOP] = {
921
        search_for_quantizers_twoloop,
922
        codebook_trellis_rate,
923
        quantize_and_encode_band,
924
        ff_aac_encode_tns_info,
925
        ff_aac_encode_ltp_info,
926
        ff_aac_encode_main_pred,
927
        ff_aac_adjust_common_pred,
928
        ff_aac_adjust_common_ltp,
929
        ff_aac_apply_main_pred,
930
        ff_aac_apply_tns,
931 932
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
933
        set_special_band_scalefactors,
934
        search_for_pns,
935
        mark_pns,
936
        ff_aac_search_for_tns,
937
        ff_aac_search_for_ltp,
938
        search_for_ms,
939 940
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
941
    },
942
    [AAC_CODER_FAST] = {
943
        search_for_quantizers_fast,
944
        codebook_trellis_rate,
945
        quantize_and_encode_band,
946
        ff_aac_encode_tns_info,
947
        ff_aac_encode_ltp_info,
948
        ff_aac_encode_main_pred,
949
        ff_aac_adjust_common_pred,
950
        ff_aac_adjust_common_ltp,
951
        ff_aac_apply_main_pred,
952
        ff_aac_apply_tns,
953 954
        ff_aac_update_ltp,
        ff_aac_ltp_insert_new_frame,
955
        set_special_band_scalefactors,
956
        search_for_pns,
957
        mark_pns,
958
        ff_aac_search_for_tns,
959
        ff_aac_search_for_ltp,
960
        search_for_ms,
961 962
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
963 964
    },
};