aaccoder.c 40.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * AAC coefficients encoder
 * Copyright (C) 2008-2009 Konstantin Shishkov
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27 28 29 30 31 32
 * AAC coefficients encoder
 */

/***********************************
 *              TODOs:
 * speedup quantizer selection
 * add sane pulse detection
 ***********************************/

33 34
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage

35
#include <float.h>
36
#include "libavutil/mathematics.h"
37 38 39 40 41
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
#include "aacenc.h"
#include "aactab.h"
42
#include "aacenctab.h"
43
#include "aacenc_utils.h"
44
#include "aacenc_quantization.h"
45
#include "aac_tablegen_decl.h"
46

47
#include "aacenc_is.h"
48
#include "aacenc_tns.h"
49
#include "aacenc_pred.h"
50

51
/** Frequency in Hz for lower limit of noise substitution **/
52 53 54 55 56 57 58 59 60 61
#define NOISE_LOW_LIMIT 4500

/* Energy spread threshold value below which no PNS is used, this corresponds to
 * typically around 17Khz, after which PNS usage decays ending at 19Khz */
#define NOISE_SPREAD_THRESHOLD 0.5f

/* This constant gets divided by lambda to return ~1.65 which when multiplied
 * by the band->threshold and compared to band->energy is the boundary between
 * excessive PNS and little PNS usage. */
#define NOISE_LAMBDA_NUMERATOR 252.1f
62

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
/**
 * structure used in optimal codebook search
 */
typedef struct BandCodingPath {
    int prev_idx; ///< pointer to the previous path point
    float cost;   ///< path cost
    int run;
} BandCodingPath;

/**
 * Encode band info for single window group bands.
 */
static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
                                     int win, int group_len, const float lambda)
{
78
    BandCodingPath path[120][CB_TOT_ALL];
Mans Rullgard's avatar
Mans Rullgard committed
79
    int w, swb, cb, start, size;
80
    int i, j;
81
    const int max_sfb  = sce->ics.max_sfb;
82
    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
83
    const int run_esc  = (1 << run_bits) - 1;
84 85 86 87 88 89 90
    int idx, ppos, count;
    int stackrun[120], stackcb[120], stack_len;
    float next_minrd = INFINITY;
    int next_mincb = 0;

    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
    start = win*128;
91
    for (cb = 0; cb < CB_TOT_ALL; cb++) {
92
        path[0][cb].cost     = 0.0f;
93
        path[0][cb].prev_idx = -1;
94
        path[0][cb].run      = 0;
95
    }
96
    for (swb = 0; swb < max_sfb; swb++) {
97
        size = sce->ics.swb_sizes[swb];
98
        if (sce->zeroes[win*16 + swb]) {
99
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
100
                path[swb+1][cb].prev_idx = cb;
101 102
                path[swb+1][cb].cost     = path[swb][cb].cost;
                path[swb+1][cb].run      = path[swb][cb].run + 1;
103
            }
104
        } else {
105 106 107 108
            float minrd = next_minrd;
            int mincb = next_mincb;
            next_minrd = INFINITY;
            next_mincb = 0;
109
            for (cb = 0; cb < CB_TOT_ALL; cb++) {
110 111
                float cost_stay_here, cost_get_here;
                float rd = 0.0f;
112 113 114 115 116 117 118
                if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
                    cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
                    path[swb+1][cb].prev_idx = -1;
                    path[swb+1][cb].cost     = INFINITY;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                    continue;
                }
119
                for (w = 0; w < group_len; w++) {
120
                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
121 122
                    rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
                                             &s->scoefs[start + w*128], size,
123
                                             sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
124
                                             lambda / band->threshold, INFINITY, NULL, 0);
125 126 127
                }
                cost_stay_here = path[swb][cb].cost + rd;
                cost_get_here  = minrd              + rd + run_bits + 4;
128
                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
129
                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
                    cost_stay_here += run_bits;
                if (cost_get_here < cost_stay_here) {
                    path[swb+1][cb].prev_idx = mincb;
                    path[swb+1][cb].cost     = cost_get_here;
                    path[swb+1][cb].run      = 1;
                } else {
                    path[swb+1][cb].prev_idx = cb;
                    path[swb+1][cb].cost     = cost_stay_here;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                }
                if (path[swb+1][cb].cost < next_minrd) {
                    next_minrd = path[swb+1][cb].cost;
                    next_mincb = cb;
                }
            }
        }
        start += sce->ics.swb_sizes[swb];
    }

    //convert resulting path from backward-linked list
    stack_len = 0;
151
    idx       = 0;
152
    for (cb = 1; cb < CB_TOT_ALL; cb++)
153
        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
154 155
            idx = cb;
    ppos = max_sfb;
156
    while (ppos > 0) {
157
        av_assert1(idx >= 0);
158 159 160 161 162 163 164 165 166
        cb = idx;
        stackrun[stack_len] = path[ppos][cb].run;
        stackcb [stack_len] = cb;
        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
        ppos -= path[ppos][cb].run;
        stack_len++;
    }
    //perform actual band info encoding
    start = 0;
167
    for (i = stack_len - 1; i >= 0; i--) {
168 169
        cb = aac_cb_out_map[stackcb[i]];
        put_bits(&s->pb, 4, cb);
170
        count = stackrun[i];
171
        memset(sce->zeroes + win*16 + start, !cb, count);
172
        //XXX: memset when band_type is also uint8_t
173
        for (j = 0; j < count; j++) {
174
            sce->band_type[win*16 + start] = cb;
175 176
            start++;
        }
177
        while (count >= run_esc) {
178 179 180 181 182 183 184
            put_bits(&s->pb, run_bits, run_esc);
            count -= run_esc;
        }
        put_bits(&s->pb, run_bits, count);
    }
}

185 186 187
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
                                  int win, int group_len, const float lambda)
{
188
    BandCodingPath path[120][CB_TOT_ALL];
Mans Rullgard's avatar
Mans Rullgard committed
189
    int w, swb, cb, start, size;
190 191 192 193 194 195
    int i, j;
    const int max_sfb  = sce->ics.max_sfb;
    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
    const int run_esc  = (1 << run_bits) - 1;
    int idx, ppos, count;
    int stackrun[120], stackcb[120], stack_len;
196
    float next_minbits = INFINITY;
197 198 199 200
    int next_mincb = 0;

    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
    start = win*128;
201
    for (cb = 0; cb < CB_TOT_ALL; cb++) {
202 203 204 205 206 207 208
        path[0][cb].cost     = run_bits+4;
        path[0][cb].prev_idx = -1;
        path[0][cb].run      = 0;
    }
    for (swb = 0; swb < max_sfb; swb++) {
        size = sce->ics.swb_sizes[swb];
        if (sce->zeroes[win*16 + swb]) {
209
            float cost_stay_here = path[swb][0].cost;
210
            float cost_get_here  = next_minbits + run_bits + 4;
211 212 213 214 215 216 217 218 219 220 221 222
            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
                cost_stay_here += run_bits;
            if (cost_get_here < cost_stay_here) {
                path[swb+1][0].prev_idx = next_mincb;
                path[swb+1][0].cost     = cost_get_here;
                path[swb+1][0].run      = 1;
            } else {
                path[swb+1][0].prev_idx = 0;
                path[swb+1][0].cost     = cost_stay_here;
                path[swb+1][0].run      = path[swb][0].run + 1;
            }
223
            next_minbits = path[swb+1][0].cost;
224
            next_mincb = 0;
225
            for (cb = 1; cb < CB_TOT_ALL; cb++) {
226 227 228
                path[swb+1][cb].cost = 61450;
                path[swb+1][cb].prev_idx = -1;
                path[swb+1][cb].run = 0;
229 230
            }
        } else {
231
            float minbits = next_minbits;
232 233
            int mincb = next_mincb;
            int startcb = sce->band_type[win*16+swb];
234
            startcb = aac_cb_in_map[startcb];
235
            next_minbits = INFINITY;
236 237 238 239 240 241
            next_mincb = 0;
            for (cb = 0; cb < startcb; cb++) {
                path[swb+1][cb].cost = 61450;
                path[swb+1][cb].prev_idx = -1;
                path[swb+1][cb].run = 0;
            }
242
            for (cb = startcb; cb < CB_TOT_ALL; cb++) {
243
                float cost_stay_here, cost_get_here;
244
                float bits = 0.0f;
245
                if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
246 247 248 249 250
                    path[swb+1][cb].cost = 61450;
                    path[swb+1][cb].prev_idx = -1;
                    path[swb+1][cb].run = 0;
                    continue;
                }
251
                for (w = 0; w < group_len; w++) {
252 253
                    bits += quantize_band_cost(s, &sce->coeffs[start + w*128],
                                               &s->scoefs[start + w*128], size,
254
                                               sce->sf_idx[win*16+swb],
255
                                               aac_cb_out_map[cb],
256
                                               0, INFINITY, NULL, 0);
257
                }
258 259
                cost_stay_here = path[swb][cb].cost + bits;
                cost_get_here  = minbits            + bits + run_bits + 4;
260 261 262 263 264 265 266 267 268 269 270 271
                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
                    cost_stay_here += run_bits;
                if (cost_get_here < cost_stay_here) {
                    path[swb+1][cb].prev_idx = mincb;
                    path[swb+1][cb].cost     = cost_get_here;
                    path[swb+1][cb].run      = 1;
                } else {
                    path[swb+1][cb].prev_idx = cb;
                    path[swb+1][cb].cost     = cost_stay_here;
                    path[swb+1][cb].run      = path[swb][cb].run + 1;
                }
272 273
                if (path[swb+1][cb].cost < next_minbits) {
                    next_minbits = path[swb+1][cb].cost;
274 275 276 277 278 279 280 281 282 283
                    next_mincb = cb;
                }
            }
        }
        start += sce->ics.swb_sizes[swb];
    }

    //convert resulting path from backward-linked list
    stack_len = 0;
    idx       = 0;
284
    for (cb = 1; cb < CB_TOT_ALL; cb++)
285 286 287 288
        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
            idx = cb;
    ppos = max_sfb;
    while (ppos > 0) {
289
        av_assert1(idx >= 0);
290 291 292 293 294 295 296 297 298 299
        cb = idx;
        stackrun[stack_len] = path[ppos][cb].run;
        stackcb [stack_len] = cb;
        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
        ppos -= path[ppos][cb].run;
        stack_len++;
    }
    //perform actual band info encoding
    start = 0;
    for (i = stack_len - 1; i >= 0; i--) {
300 301
        cb = aac_cb_out_map[stackcb[i]];
        put_bits(&s->pb, 4, cb);
302
        count = stackrun[i];
303
        memset(sce->zeroes + win*16 + start, !cb, count);
304 305
        //XXX: memset when band_type is also uint8_t
        for (j = 0; j < count; j++) {
306
            sce->band_type[win*16 + start] = cb;
307 308 309 310 311 312 313 314 315 316
            start++;
        }
        while (count >= run_esc) {
            put_bits(&s->pb, run_bits, run_esc);
            count -= run_esc;
        }
        put_bits(&s->pb, run_bits, count);
    }
}

317 318 319 320 321
typedef struct TrellisPath {
    float cost;
    int prev;
} TrellisPath;

322
#define TRELLIS_STAGES 121
323
#define TRELLIS_STATES (SCALE_MAX_DIFF+1)
324

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
{
    int w, g, start = 0;
    int minscaler_n = sce->sf_idx[0], minscaler_i = sce->sf_idx[0];
    int bands = 0;

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        start = 0;
        for (g = 0;  g < sce->ics.num_swb; g++) {
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
                sce->sf_idx[w*16+g] = av_clip(ceilf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
                minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]);
                bands++;
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
                sce->sf_idx[w*16+g] = av_clip(4+log2f(sce->pns_ener[w*16+g])*2, -100, 155);
                minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]);
                bands++;
            }
            start += sce->ics.swb_sizes[g];
        }
    }

    if (!bands)
        return;

    /* Clip the scalefactor indices */
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_i, minscaler_i + SCALE_MAX_DIFF);
            } else if (sce->band_type[w*16+g] == NOISE_BT) {
                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF);
            }
        }
    }
}

362
static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
363 364
                                       SingleChannelElement *sce,
                                       const float lambda)
365 366
{
    int q, w, w2, g, start = 0;
367
    int i, j;
368
    int idx;
369 370
    TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
    int bandaddr[TRELLIS_STAGES];
371 372
    int minq;
    float mincost;
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
    float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
    int q0, q1, qcnt = 0;

    for (i = 0; i < 1024; i++) {
        float t = fabsf(sce->coeffs[i]);
        if (t > 0.0f) {
            q0f = FFMIN(q0f, t);
            q1f = FFMAX(q1f, t);
            qnrgf += t*t;
            qcnt++;
        }
    }

    if (!qcnt) {
        memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
        memset(sce->zeroes, 1, sizeof(sce->zeroes));
        return;
    }

    //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
393
    q0 = coef2minsf(q0f);
394
    //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
395
    q1 = coef2maxsf(q1f);
396 397 398 399
    if (q1 - q0 > 60) {
        int q0low  = q0;
        int q1high = q1;
        //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
400
        int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
401 402 403 404 405 406 407 408 409 410
        q1 = qnrg + 30;
        q0 = qnrg - 30;
        if (q0 < q0low) {
            q1 += q0low - q0;
            q0  = q0low;
        } else if (q1 > q1high) {
            q0 -= q1 - q1high;
            q1  = q1high;
        }
    }
411

412
    for (i = 0; i < TRELLIS_STATES; i++) {
413 414
        paths[0][i].cost    = 0.0f;
        paths[0][i].prev    = -1;
415
    }
416 417
    for (j = 1; j < TRELLIS_STAGES; j++) {
        for (i = 0; i < TRELLIS_STATES; i++) {
418 419 420
            paths[j][i].cost    = INFINITY;
            paths[j][i].prev    = -2;
        }
421
    }
422
    idx = 1;
423
    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
424
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
425
        start = w*128;
426
        for (g = 0; g < sce->ics.num_swb; g++) {
427
            const float *coefs = &sce->coeffs[start];
428 429 430
            float qmin, qmax;
            int nz = 0;

431
            bandaddr[idx] = w * 16 + g;
432 433
            qmin = INT_MAX;
            qmax = 0.0f;
434
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
435
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
436
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
437 438 439 440 441
                    sce->zeroes[(w+w2)*16+g] = 1;
                    continue;
                }
                sce->zeroes[(w+w2)*16+g] = 0;
                nz = 1;
442
                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
443
                    float t = fabsf(coefs[w2*128+i]);
444
                    if (t > 0.0f)
445 446
                        qmin = FFMIN(qmin, t);
                    qmax = FFMAX(qmax, t);
447 448
                }
            }
449
            if (nz) {
450 451
                int minscale, maxscale;
                float minrd = INFINITY;
452
                float maxval;
453
                //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
454
                minscale = coef2minsf(qmin);
455
                //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
456
                maxscale = coef2maxsf(qmax);
457 458
                minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
                maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
459
                maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
460
                for (q = minscale; q < maxscale; q++) {
461
                    float dist = 0;
462
                    int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
463
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
464
                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
465
                        dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
466
                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
467
                    }
468
                    minrd = FFMIN(minrd, dist);
469

470
                    for (i = 0; i < q1 - q0; i++) {
471
                        float cost;
472
                        cost = paths[idx - 1][i].cost + dist
473
                               + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
474
                        if (cost < paths[idx][q].cost) {
475 476
                            paths[idx][q].cost    = cost;
                            paths[idx][q].prev    = i;
477 478 479
                        }
                    }
                }
480
            } else {
481
                for (q = 0; q < q1 - q0; q++) {
Alex Converse's avatar
Alex Converse committed
482 483
                    paths[idx][q].cost = paths[idx - 1][q].cost + 1;
                    paths[idx][q].prev = q;
484 485 486 487
                }
            }
            sce->zeroes[w*16+g] = !nz;
            start += sce->ics.swb_sizes[g];
488
            idx++;
489 490
        }
    }
491 492 493
    idx--;
    mincost = paths[idx][0].cost;
    minq    = 0;
494
    for (i = 1; i < TRELLIS_STATES; i++) {
495 496 497
        if (paths[idx][i].cost < mincost) {
            mincost = paths[idx][i].cost;
            minq = i;
498 499
        }
    }
500
    while (idx) {
501
        sce->sf_idx[bandaddr[idx]] = minq + q0;
502 503
        minq = paths[idx][minq].prev;
        idx--;
504 505
    }
    //set the same quantizers inside window groups
506 507 508
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
        for (g = 0;  g < sce->ics.num_swb; g++)
            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
509 510 511 512 513 514
                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}

/**
 * two-loop quantizers search taken from ISO 13818-7 Appendix C
 */
515 516 517 518
static void search_for_quantizers_twoloop(AVCodecContext *avctx,
                                          AACEncContext *s,
                                          SingleChannelElement *sce,
                                          const float lambda)
519 520
{
    int start = 0, i, w, w2, g;
521
    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
522
    float dists[128] = { 0 }, uplims[128] = { 0 };
523
    float maxvals[128];
524
    int fflag, minscaler;
525
    int its  = 0;
526 527 528
    int allz = 0;
    float minthr = INFINITY;

529 530 531
    // for values above this the decoder might end up in an endless loop
    // due to always having more bits than what can be encoded.
    destbits = FFMIN(destbits, 5800);
532 533
    //XXX: some heuristic to determine initial quantizers will reduce search time
    //determine zero bands and upper limits
534 535
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
536
            int nz = 0;
537
            float uplim = 0.0f, energy = 0.0f;
538
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
539
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
540
                uplim  += band->threshold;
541
                energy += band->energy;
542
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
543 544 545 546 547 548 549
                    sce->zeroes[(w+w2)*16+g] = 1;
                    continue;
                }
                nz = 1;
            }
            uplims[w*16+g] = uplim *512;
            sce->zeroes[w*16+g] = !nz;
550
            if (nz)
551
                minthr = FFMIN(minthr, uplim);
552
            allz |= nz;
553 554
        }
    }
555 556 557
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
            if (sce->zeroes[w*16+g]) {
558 559 560
                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
                continue;
            }
561
            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
562 563 564
        }
    }

565
    if (!allz)
566 567
        return;
    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
568 569 570 571 572 573 574 575 576 577

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        start = w*128;
        for (g = 0;  g < sce->ics.num_swb; g++) {
            const float *scaled = s->scoefs + start;
            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
            start += sce->ics.swb_sizes[g];
        }
    }

578 579
    //perform two-loop search
    //outer loop - improve quality
580
    do {
581 582 583 584
        int tbits, qstep;
        minscaler = sce->sf_idx[0];
        //inner loop - quantize spectrum to fit into given number of bits
        qstep = its ? 1 : 32;
585
        do {
586 587
            int prev = -1;
            tbits = 0;
588
            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
589
                start = w*128;
590
                for (g = 0;  g < sce->ics.num_swb; g++) {
591 592
                    const float *coefs = &sce->coeffs[start];
                    const float *scaled = &s->scoefs[start];
593 594
                    int bits = 0;
                    int cb;
595
                    float dist = 0.0f;
596

597
                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
598
                        start += sce->ics.swb_sizes[g];
599
                        continue;
600
                    }
601
                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
602
                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
Alex Converse's avatar
Alex Converse committed
603 604 605 606 607 608 609
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                        int b;
                        dist += quantize_band_cost(s, coefs + w2*128,
                                                   scaled + w2*128,
                                                   sce->ics.swb_sizes[g],
                                                   sce->sf_idx[w*16+g],
                                                   cb,
610
                                                   1.0f,
Alex Converse's avatar
Alex Converse committed
611
                                                   INFINITY,
612 613
                                                   &b,
                                                   0);
Alex Converse's avatar
Alex Converse committed
614 615
                        bits += b;
                    }
616
                    dists[w*16+g] = dist - bits;
617
                    if (prev != -1) {
618 619 620 621 622 623 624
                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
                    }
                    tbits += bits;
                    start += sce->ics.swb_sizes[g];
                    prev = sce->sf_idx[w*16+g];
                }
            }
625
            if (tbits > destbits) {
626 627
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] < 218 - qstep)
628
                        sce->sf_idx[i] += qstep;
629
            } else {
630 631
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] > 60 - qstep)
632 633 634
                        sce->sf_idx[i] -= qstep;
            }
            qstep >>= 1;
635
            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
636
                qstep = 1;
637
        } while (qstep);
638 639 640

        fflag = 0;
        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
641

642 643
        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
            for (g = 0; g < sce->ics.num_swb; g++) {
644
                int prevsc = sce->sf_idx[w*16+g];
645
                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
646
                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
Young Han Lee's avatar
Young Han Lee committed
647
                        sce->sf_idx[w*16+g]--;
648 649 650
                    else //Try to make sure there is some energy in every band
                        sce->sf_idx[w*16+g]-=2;
                }
651 652
                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
653
                if (sce->sf_idx[w*16+g] != prevsc)
654
                    fflag = 1;
655
                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
656 657 658
            }
        }
        its++;
659
    } while (fflag && its < 10);
660 661 662
}

static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
663 664
                                       SingleChannelElement *sce,
                                       const float lambda)
665 666 667 668 669 670 671
{
    int start = 0, i, w, w2, g;
    float uplim[128], maxq[128];
    int minq, maxsf;
    float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
    int last = 0, lastband = 0, curband = 0;
    float avg_energy = 0.0;
672
    if (sce->ics.num_windows == 1) {
673
        start = 0;
674 675
        for (i = 0; i < 1024; i++) {
            if (i - start >= sce->ics.swb_sizes[curband]) {
676 677 678
                start += sce->ics.swb_sizes[curband];
                curband++;
            }
679
            if (sce->coeffs[i]) {
680 681 682 683 684
                avg_energy += sce->coeffs[i] * sce->coeffs[i];
                last = i;
                lastband = curband;
            }
        }
685 686
    } else {
        for (w = 0; w < 8; w++) {
687
            const float *coeffs = &sce->coeffs[w*128];
688
            curband = start = 0;
689 690
            for (i = 0; i < 128; i++) {
                if (i - start >= sce->ics.swb_sizes[curband]) {
691 692 693
                    start += sce->ics.swb_sizes[curband];
                    curband++;
                }
694
                if (coeffs[i]) {
695 696 697 698 699 700 701 702 703
                    avg_energy += coeffs[i] * coeffs[i];
                    last = FFMAX(last, i);
                    lastband = FFMAX(lastband, curband);
                }
            }
        }
    }
    last++;
    avg_energy /= last;
704 705
    if (avg_energy == 0.0f) {
        for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
706 707 708
            sce->sf_idx[i] = SCALE_ONE_POS;
        return;
    }
709
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
710
        start = w*128;
711
        for (g = 0; g < sce->ics.num_swb; g++) {
712
            float *coefs   = &sce->coeffs[start];
713 714 715 716
            const int size = sce->ics.swb_sizes[g];
            int start2 = start, end2 = start + size, peakpos = start;
            float maxval = -1, thr = 0.0f, t;
            maxq[w*16+g] = 0.0f;
717
            if (g > lastband) {
718 719
                maxq[w*16+g] = 0.0f;
                start += size;
720
                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
721 722 723
                    memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
                continue;
            }
724 725
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                for (i = 0; i < size; i++) {
726
                    float t = coefs[w2*128+i]*coefs[w2*128+i];
727
                    maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
728
                    thr += t;
729
                    if (sce->ics.num_windows == 1 && maxval < t) {
730
                        maxval  = t;
731 732 733 734
                        peakpos = start+i;
                    }
                }
            }
735
            if (sce->ics.num_windows == 1) {
736 737
                start2 = FFMAX(peakpos - 2, start2);
                end2   = FFMIN(peakpos + 3, end2);
738
            } else {
739 740 741 742 743
                start2 -= start;
                end2   -= start;
            }
            start += size;
            thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
744
            t   = 1.0 - (1.0 * start2 / last);
745 746 747 748 749
            uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
        }
    }
    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
750
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
751
        start = w*128;
752
        for (g = 0;  g < sce->ics.num_swb; g++) {
753 754
            const float *coefs  = &sce->coeffs[start];
            const float *scaled = &s->scoefs[start];
755
            const int size      = sce->ics.swb_sizes[g];
756
            int scf, prev_scf, step;
757
            int min_scf = -1, max_scf = 256;
758
            float curdiff;
759
            if (maxq[w*16+g] < 21.544) {
760 761 762 763 764
                sce->zeroes[w*16+g] = 1;
                start += size;
                continue;
            }
            sce->zeroes[w*16+g] = 0;
765
            scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
766
            for (;;) {
767 768 769
                float dist = 0.0f;
                int quant_max;

770
                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
771 772 773 774 775 776
                    int b;
                    dist += quantize_band_cost(s, coefs + w2*128,
                                               scaled + w2*128,
                                               sce->ics.swb_sizes[g],
                                               scf,
                                               ESC_BT,
777
                                               lambda,
778
                                               INFINITY,
779 780
                                               &b,
                                               0);
781 782
                    dist -= b;
                }
783
                dist *= 1.0f / 512.0f / lambda;
784
                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512], ROUND_STANDARD);
785
                if (quant_max >= 8191) { // too much, return to the previous quantizer
786 787 788 789 790
                    sce->sf_idx[w*16+g] = prev_scf;
                    break;
                }
                prev_scf = scf;
                curdiff = fabsf(dist - uplim[w*16+g]);
791
                if (curdiff <= 1.0f)
792 793
                    step = 0;
                else
794
                    step = log2f(curdiff);
795
                if (dist > uplim[w*16+g])
796
                    step = -step;
797
                scf += step;
798
                scf = av_clip_uint8(scf);
799
                step = scf - prev_scf;
800
                if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
801
                    sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
802 803
                    break;
                }
804
                if (step > 0)
805
                    min_scf = prev_scf;
806
                else
807
                    max_scf = prev_scf;
808 809 810 811 812
            }
            start += size;
        }
    }
    minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
813 814
    for (i = 1; i < 128; i++) {
        if (!sce->sf_idx[i])
815 816 817 818
            sce->sf_idx[i] = sce->sf_idx[i-1];
        else
            minq = FFMIN(minq, sce->sf_idx[i]);
    }
819 820
    if (minq == INT_MAX)
        minq = 0;
821 822
    minq = FFMIN(minq, SCALE_MAX_POS);
    maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
823 824
    for (i = 126; i >= 0; i--) {
        if (!sce->sf_idx[i])
825 826 827 828 829 830
            sce->sf_idx[i] = sce->sf_idx[i+1];
        sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
    }
}

static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
831 832
                                       SingleChannelElement *sce,
                                       const float lambda)
833
{
Mans Rullgard's avatar
Mans Rullgard committed
834
    int i, w, w2, g;
835 836 837
    int minq = 255;

    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
838 839 840
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0; g < sce->ics.num_swb; g++) {
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
841
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
842
                if (band->energy <= band->threshold) {
843 844
                    sce->sf_idx[(w+w2)*16+g] = 218;
                    sce->zeroes[(w+w2)*16+g] = 1;
845
                } else {
846
                    sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
847 848 849 850 851 852
                    sce->zeroes[(w+w2)*16+g] = 0;
                }
                minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
            }
        }
    }
853
    for (i = 0; i < 128; i++) {
854 855
        sce->sf_idx[i] = 140;
        //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
856 857
    }
    //set the same quantizers inside window groups
858 859 860
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
        for (g = 0;  g < sce->ics.num_swb; g++)
            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
861 862 863
                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}

864
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
865 866
{
    int start = 0, w, w2, g;
867
    const float lambda = s->lambda;
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
    const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
    const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/120.f);
    const float thr_mult = NOISE_LAMBDA_NUMERATOR/lambda;

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        start = 0;
        for (g = 0;  g < sce->ics.num_swb; g++) {
            if (start*freq_mult > NOISE_LOW_LIMIT*(lambda/170.0f)) {
                float energy = 0.0f, threshold = 0.0f, spread = 0.0f;
                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                    FFPsyBand *band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
                    energy += band->energy;
                    threshold += band->threshold;
                    spread += band->spread;
                }
                if (spread > spread_threshold*sce->ics.group_len[w] &&
                    ((sce->zeroes[w*16+g] && energy >= threshold) ||
                    energy < threshold*thr_mult*sce->ics.group_len[w])) {
                    sce->band_type[w*16+g] = NOISE_BT;
                    sce->pns_ener[w*16+g] = energy / sce->ics.group_len[w];
                    sce->zeroes[w*16+g] = 0;
                }
            }
            start += sce->ics.swb_sizes[g];
        }
    }
}

896
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
897 898 899 900
{
    int start = 0, i, w, w2, g;
    float M[128], S[128];
    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
901
    const float lambda = s->lambda;
902 903
    SingleChannelElement *sce0 = &cpe->ch[0];
    SingleChannelElement *sce1 = &cpe->ch[1];
904
    if (!cpe->common_window)
905
        return;
906
    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
907
        start = 0;
908
        for (g = 0;  g < sce0->ics.num_swb; g++) {
909
            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
910
                float dist1 = 0.0f, dist2 = 0.0f;
911
                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
912 913
                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
914 915
                    float minthr = FFMIN(band0->threshold, band1->threshold);
                    float maxthr = FFMAX(band0->threshold, band1->threshold);
916
                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
917 918
                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
919
                        S[i] =  M[i]
920
                              - sce1->coeffs[start+(w+w2)*128+i];
921
                    }
922 923
                    abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                    abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
924 925
                    abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
                    abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
926
                    dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
927 928 929 930
                                                L34,
                                                sce0->ics.swb_sizes[g],
                                                sce0->sf_idx[(w+w2)*16+g],
                                                sce0->band_type[(w+w2)*16+g],
931
                                                lambda / band0->threshold, INFINITY, NULL, 0);
932
                    dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
933 934 935 936
                                                R34,
                                                sce1->ics.swb_sizes[g],
                                                sce1->sf_idx[(w+w2)*16+g],
                                                sce1->band_type[(w+w2)*16+g],
937
                                                lambda / band1->threshold, INFINITY, NULL, 0);
938 939 940 941 942
                    dist2 += quantize_band_cost(s, M,
                                                M34,
                                                sce0->ics.swb_sizes[g],
                                                sce0->sf_idx[(w+w2)*16+g],
                                                sce0->band_type[(w+w2)*16+g],
943
                                                lambda / maxthr, INFINITY, NULL, 0);
944 945 946 947 948
                    dist2 += quantize_band_cost(s, S,
                                                S34,
                                                sce1->ics.swb_sizes[g],
                                                sce1->sf_idx[(w+w2)*16+g],
                                                sce1->band_type[(w+w2)*16+g],
949
                                                lambda / minthr, INFINITY, NULL, 0);
950 951 952 953 954 955 956 957
                }
                cpe->ms_mask[w*16+g] = dist2 < dist1;
            }
            start += sce0->ics.swb_sizes[g];
        }
    }
}

958
AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
959
    [AAC_CODER_FAAC] = {
960
        search_for_quantizers_faac,
961
        encode_window_bands_info,
962
        quantize_and_encode_band,
963 964 965 966 967
        ff_aac_encode_tns_info,
        ff_aac_encode_main_pred,
        ff_aac_adjust_common_prediction,
        ff_aac_apply_main_pred,
        ff_aac_update_main_pred,
968
        set_special_band_scalefactors,
969
        search_for_pns,
970
        ff_aac_search_for_tns,
971
        search_for_ms,
972 973
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
974
    },
975
    [AAC_CODER_ANMR] = {
976 977 978
        search_for_quantizers_anmr,
        encode_window_bands_info,
        quantize_and_encode_band,
979 980 981 982 983
        ff_aac_encode_tns_info,
        ff_aac_encode_main_pred,
        ff_aac_adjust_common_prediction,
        ff_aac_apply_main_pred,
        ff_aac_update_main_pred,
984
        set_special_band_scalefactors,
985
        search_for_pns,
986
        ff_aac_search_for_tns,
987
        search_for_ms,
988 989
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
990
    },
991
    [AAC_CODER_TWOLOOP] = {
992
        search_for_quantizers_twoloop,
993
        codebook_trellis_rate,
994
        quantize_and_encode_band,
995 996 997 998 999
        ff_aac_encode_tns_info,
        ff_aac_encode_main_pred,
        ff_aac_adjust_common_prediction,
        ff_aac_apply_main_pred,
        ff_aac_update_main_pred,
1000
        set_special_band_scalefactors,
1001
        search_for_pns,
1002
        ff_aac_search_for_tns,
1003
        search_for_ms,
1004 1005
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
1006
    },
1007
    [AAC_CODER_FAST] = {
1008 1009 1010
        search_for_quantizers_fast,
        encode_window_bands_info,
        quantize_and_encode_band,
1011 1012 1013 1014 1015
        ff_aac_encode_tns_info,
        ff_aac_encode_main_pred,
        ff_aac_adjust_common_prediction,
        ff_aac_apply_main_pred,
        ff_aac_update_main_pred,
1016
        set_special_band_scalefactors,
1017
        search_for_pns,
1018
        ff_aac_search_for_tns,
1019
        search_for_ms,
1020 1021
        ff_aac_search_for_is,
        ff_aac_search_for_pred,
1022 1023
    },
};