aacdec.c 73.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * AAC decoder
 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
24
 * @file
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 * AAC decoder
 * @author Oded Shimon  ( ods15 ods15 dyndns org )
 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
 */

/*
 * supported tools
 *
 * Support?             Name
 * N (code in SoC repo) gain control
 * Y                    block switching
 * Y                    window shapes - standard
 * N                    window shapes - Low Delay
 * Y                    filterbank - standard
 * N (code in SoC repo) filterbank - Scalable Sample Rate
 * Y                    Temporal Noise Shaping
 * N (code in SoC repo) Long Term Prediction
 * Y                    intensity stereo
 * Y                    channel coupling
44
 * Y                    frequency domain prediction
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
 * Y                    Perceptual Noise Substitution
 * Y                    Mid/Side stereo
 * N                    Scalable Inverse AAC Quantization
 * N                    Frequency Selective Switch
 * N                    upsampling filter
 * Y                    quantization & coding - AAC
 * N                    quantization & coding - TwinVQ
 * N                    quantization & coding - BSAC
 * N                    AAC Error Resilience tools
 * N                    Error Resilience payload syntax
 * N                    Error Protection tool
 * N                    CELP
 * N                    Silence Compression
 * N                    HVXC
 * N                    HVXC 4kbits/s VR
 * N                    Structured Audio tools
 * N                    Structured Audio Sample Bank Format
 * N                    MIDI
 * N                    Harmonic and Individual Lines plus Noise
 * N                    Text-To-Speech Interface
65
 * Y                    Spectral Band Replication
66 67 68 69
 * Y (not in this code) Layer-1
 * Y (not in this code) Layer-2
 * Y (not in this code) Layer-3
 * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
70
 * Y                    Parametric Stereo
71 72 73 74 75 76 77 78 79
 * N                    Direct Stream Transfer
 *
 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
 *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
           Parametric Stereo.
 */


#include "avcodec.h"
80
#include "internal.h"
81
#include "get_bits.h"
82
#include "dsputil.h"
83
#include "fft.h"
84
#include "lpc.h"
85 86 87

#include "aac.h"
#include "aactab.h"
88
#include "aacdectab.h"
89
#include "cbrt_tablegen.h"
90 91
#include "sbr.h"
#include "aacsbr.h"
92
#include "mpeg4audio.h"
93
#include "aacadtsdec.h"
94 95 96 97 98 99

#include <assert.h>
#include <errno.h>
#include <math.h>
#include <string.h>

100 101 102 103
#if ARCH_ARM
#   include "arm/aac.h"
#endif

104 105 106 107
union float754 {
    float f;
    uint32_t i;
};
108

109 110 111
static VLC vlc_scalefactors;
static VLC vlc_spectral[11];

112 113
static const char overread_err[] = "Input buffer exhausted before END element found\n";

114 115
static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
{
116 117 118 119 120 121 122
    /* Some buggy encoders appear to set all elem_ids to zero and rely on
    channels always occurring in the same order. This is expressly forbidden
    by the spec but we will try to work around it.
    */
    int err_printed = 0;
    while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
        if (ac->output_configured < OC_LOCKED && !err_printed) {
123
            av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
124 125 126 127 128 129 130 131
            err_printed = 1;
        }
        elem_id++;
    }
    if (elem_id == MAX_ELEM_ID)
        return NULL;
    ac->tags_seen_this_frame[type][elem_id] = 1;

132 133 134 135 136 137 138
    if (ac->tag_che_map[type][elem_id]) {
        return ac->tag_che_map[type][elem_id];
    }
    if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
        return NULL;
    }
    switch (ac->m4ac.chan_config) {
139 140 141 142 143 144 145
    case 7:
        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
        }
    case 6:
        /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 147
           instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
           encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
        if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
            ac->tags_mapped++;
            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
        }
    case 5:
        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
        }
    case 4:
        if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
        }
    case 3:
    case 2:
        if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
        } else if (ac->m4ac.chan_config == 2) {
168
            return NULL;
169 170 171 172 173 174 175 176
        }
    case 1:
        if (!ac->tags_mapped && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
        }
    default:
        return NULL;
177 178 179
    }
}

180 181 182 183 184 185 186 187 188 189 190 191
/**
 * Check for the channel element in the current channel position configuration.
 * If it exists, make sure the appropriate element is allocated and map the
 * channel order to match the internal FFmpeg channel layout.
 *
 * @param   che_pos current channel position configuration
 * @param   type channel element type
 * @param   id channel element id
 * @param   channels count of the number of channels in the configuration
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
192
static av_cold int che_configure(AACContext *ac,
193 194 195 196 197 198 199
                         enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                         int type, int id,
                         int *channels)
{
    if (che_pos[type][id]) {
        if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
            return AVERROR(ENOMEM);
200
        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 202
        if (type != TYPE_CCE) {
            ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 204
            if (type == TYPE_CPE ||
                (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 206 207
                ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
            }
        }
208 209 210
    } else {
        if (ac->che[type][id])
            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211
        av_freep(&ac->che[type][id]);
212
    }
213 214 215
    return 0;
}

216 217 218 219 220 221 222 223
/**
 * Configure output channel order based on the current program configuration element.
 *
 * @param   che_pos current channel position configuration
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
224
static av_cold int output_configure(AACContext *ac,
225 226
                            enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227
                            int channel_config, enum OCStatus oc_type)
228
{
229
    AVCodecContext *avctx = ac->avctx;
230
    int i, type, channels = 0, ret;
231

232
    if (new_che_pos != che_pos)
233 234
    memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));

235 236
    if (channel_config) {
        for (i = 0; i < tags_per_config[channel_config]; i++) {
237 238 239 240 241
            if ((ret = che_configure(ac, che_pos,
                                     aac_channel_layout_map[channel_config - 1][i][0],
                                     aac_channel_layout_map[channel_config - 1][i][1],
                                     &channels)))
                return ret;
242 243 244 245 246 247 248
        }

        memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
        ac->tags_mapped = 0;

        avctx->channel_layout = aac_channel_layout[channel_config - 1];
    } else {
249 250 251 252 253 254 255 256 257 258 259
        /* Allocate or free elements depending on if they are in the
         * current program configuration.
         *
         * Set up default 1:1 output mapping.
         *
         * For a 5.1 stream the output order will be:
         *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
         */

        for (i = 0; i < MAX_ELEM_ID; i++) {
            for (type = 0; type < 4; type++) {
260 261
                if ((ret = che_configure(ac, che_pos, type, i, &channels)))
                    return ret;
262
            }
263 264
        }

265
        memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266
        ac->tags_mapped = 4 * MAX_ELEM_ID;
267 268

        avctx->channel_layout = 0;
269 270
    }

271
    avctx->channels = channels;
272

273
    ac->output_configured = oc_type;
274

275 276 277
    return 0;
}

278 279 280 281 282 283 284 285
/**
 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 *
 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 * @param sce_map mono (Single Channel Element) map
 * @param type speaker type/position for these channels
 */
static void decode_channel_map(enum ChannelPosition *cpe_map,
286 287 288 289 290
                               enum ChannelPosition *sce_map,
                               enum ChannelPosition type,
                               GetBitContext *gb, int n)
{
    while (n--) {
291 292 293 294 295 296 297 298 299 300 301 302
        enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
        map[get_bits(gb, 4)] = type;
    }
}

/**
 * Decode program configuration element; reference: table 4.2.
 *
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
303 304 305
static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                      GetBitContext *gb)
{
306
    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
307
    int comment_len;
308 309 310

    skip_bits(gb, 2);  // object_type

311
    sampling_index = get_bits(gb, 4);
312
    if (ac->m4ac.sampling_index != sampling_index)
313
        av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
314

315 316 317 318 319 320 321
    num_front       = get_bits(gb, 4);
    num_side        = get_bits(gb, 4);
    num_back        = get_bits(gb, 4);
    num_lfe         = get_bits(gb, 2);
    num_assoc_data  = get_bits(gb, 3);
    num_cc          = get_bits(gb, 4);

322 323 324 325
    if (get_bits1(gb))
        skip_bits(gb, 4); // mono_mixdown_tag
    if (get_bits1(gb))
        skip_bits(gb, 4); // stereo_mixdown_tag
326

327 328
    if (get_bits1(gb))
        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
329

330 331 332 333
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
    decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
334 335 336

    skip_bits_long(gb, 4 * num_assoc_data);

337
    decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
338 339 340 341

    align_get_bits(gb);

    /* comment field, first byte is length */
342 343
    comment_len = get_bits(gb, 8) * 8;
    if (get_bits_left(gb) < comment_len) {
344
        av_log(ac->avctx, AV_LOG_ERROR, overread_err);
345 346 347
        return -1;
    }
    skip_bits_long(gb, comment_len);
348 349
    return 0;
}
350

351 352 353 354 355 356 357 358
/**
 * Set up channel positions based on a default channel configuration
 * as specified in table 1.17.
 *
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
359
static av_cold int set_default_channel_config(AACContext *ac,
360 361
                                      enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                                      int channel_config)
362
{
363
    if (channel_config < 1 || channel_config > 7) {
364
        av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
               channel_config);
        return -1;
    }

    /* default channel configurations:
     *
     * 1ch : front center (mono)
     * 2ch : L + R (stereo)
     * 3ch : front center + L + R
     * 4ch : front center + L + R + back center
     * 5ch : front center + L + R + back stereo
     * 6ch : front center + L + R + back stereo + LFE
     * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
     */

380
    if (channel_config != 2)
381
        new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382
    if (channel_config > 1)
383
        new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384
    if (channel_config == 4)
385
        new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
386
    if (channel_config > 4)
387
        new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 389
        = AAC_CHANNEL_BACK;  // back stereo
    if (channel_config > 5)
390
        new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
391
    if (channel_config == 7)
392 393 394 395 396
        new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right

    return 0;
}

397 398 399 400 401
/**
 * Decode GA "General Audio" specific configuration; reference: table 4.1.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
402 403 404
static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
                                     int channel_config)
{
405 406 407
    enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
    int extension_flag, ret;

408
    if (get_bits1(gb)) { // frameLengthFlag
409
        av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
410 411 412 413 414 415 416
        return -1;
    }

    if (get_bits1(gb))       // dependsOnCoreCoder
        skip_bits(gb, 14);   // coreCoderDelay
    extension_flag = get_bits1(gb);

417 418
    if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
        ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 420 421 422 423
        skip_bits(gb, 3);     // layerNr

    memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
    if (channel_config == 0) {
        skip_bits(gb, 4);  // element_instance_tag
424
        if ((ret = decode_pce(ac, new_che_pos, gb)))
425 426
            return ret;
    } else {
427
        if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
428 429
            return ret;
    }
430
    if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
431 432 433 434
        return ret;

    if (extension_flag) {
        switch (ac->m4ac.object_type) {
435 436 437 438 439 440 441 442 443
        case AOT_ER_BSAC:
            skip_bits(gb, 5);    // numOfSubFrame
            skip_bits(gb, 11);   // layer_length
            break;
        case AOT_ER_AAC_LC:
        case AOT_ER_AAC_LTP:
        case AOT_ER_AAC_SCALABLE:
        case AOT_ER_AAC_LD:
            skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
444 445 446
                                    * aacScalefactorDataResilienceFlag
                                    * aacSpectralDataResilienceFlag
                                    */
447
            break;
448 449 450 451 452 453 454 455 456 457 458 459 460 461
        }
        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
    }
    return 0;
}

/**
 * Decode audio specific configuration; reference: table 1.13.
 *
 * @param   data        pointer to AVCodecContext extradata
 * @param   data_size   size of AVCCodecContext extradata
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
462 463 464
static int decode_audio_specific_config(AACContext *ac, void *data,
                                        int data_size)
{
465 466 467 468 469
    GetBitContext gb;
    int i;

    init_get_bits(&gb, data, data_size * 8);

470
    if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
471
        return -1;
472
    if (ac->m4ac.sampling_index > 12) {
473
        av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
474 475
        return -1;
    }
476 477
    if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
        ac->m4ac.ps = 1;
478 479 480 481

    skip_bits_long(&gb, i);

    switch (ac->m4ac.object_type) {
482
    case AOT_AAC_MAIN:
483 484 485 486 487
    case AOT_AAC_LC:
        if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
            return -1;
        break;
    default:
488
        av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 490 491 492 493 494
               ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
        return -1;
    }
    return 0;
}

495 496 497 498 499 500 501
/**
 * linear congruential pseudorandom number generator
 *
 * @param   previous_val    pointer to the current state of the generator
 *
 * @return  Returns a 32-bit pseudorandom integer
 */
502 503
static av_always_inline int lcg_random(int previous_val)
{
504 505 506
    return previous_val * 1664525 + 1013904223;
}

507
static av_always_inline void reset_predict_state(PredictorState *ps)
508 509 510
{
    ps->r0   = 0.0f;
    ps->r1   = 0.0f;
511 512 513 514 515 516
    ps->cor0 = 0.0f;
    ps->cor1 = 0.0f;
    ps->var0 = 1.0f;
    ps->var1 = 1.0f;
}

517 518
static void reset_all_predictors(PredictorState *ps)
{
519 520 521 522 523
    int i;
    for (i = 0; i < MAX_PREDICTORS; i++)
        reset_predict_state(&ps[i]);
}

524 525
static void reset_predictor_group(PredictorState *ps, int group_num)
{
526
    int i;
527
    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 529 530
        reset_predict_state(&ps[i]);
}

531 532 533 534 535 536
#define AAC_INIT_VLC_STATIC(num, size) \
    INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
         ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
        ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
        size);

537
static av_cold int aac_decode_init(AVCodecContext *avctx)
538
{
539
    AACContext *ac = avctx->priv_data;
540

541 542
    ac->avctx = avctx;
    ac->m4ac.sample_rate = avctx->sample_rate;
543

544 545
    if (avctx->extradata_size > 0) {
        if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
546 547
            return -1;
    }
548

549
    avctx->sample_fmt = SAMPLE_FMT_S16;
550

551 552 553 554 555 556 557 558 559 560 561
    AAC_INIT_VLC_STATIC( 0, 304);
    AAC_INIT_VLC_STATIC( 1, 270);
    AAC_INIT_VLC_STATIC( 2, 550);
    AAC_INIT_VLC_STATIC( 3, 300);
    AAC_INIT_VLC_STATIC( 4, 328);
    AAC_INIT_VLC_STATIC( 5, 294);
    AAC_INIT_VLC_STATIC( 6, 306);
    AAC_INIT_VLC_STATIC( 7, 268);
    AAC_INIT_VLC_STATIC( 8, 510);
    AAC_INIT_VLC_STATIC( 9, 366);
    AAC_INIT_VLC_STATIC(10, 462);
562

563 564
    ff_aac_sbr_init();

565
    dsputil_init(&ac->dsp, avctx);
566

567 568
    ac->random_state = 0x1f2e3d4c;

569 570 571 572
    // -1024 - Compensate wrong IMDCT method.
    // 32768 - Required to scale values to the correct range for the bias method
    //         for float to int16 conversion.

573
    if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
574 575
        ac->add_bias  = 385.0f;
        ac->sf_scale  = 1. / (-1024. * 32768.);
576 577
        ac->sf_offset = 0;
    } else {
578 579
        ac->add_bias  = 0.0f;
        ac->sf_scale  = 1. / -1024.;
580 581 582
        ac->sf_offset = 60;
    }

583
    ff_aac_tableinit();
584

585
    INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 587 588
                    ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
                    ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                    352);
589

590 591
    ff_mdct_init(&ac->mdct, 11, 1, 1.0);
    ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 593 594
    // window initialization
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 596
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows( 7);
597

598
    cbrt_tableinit();
599

600 601 602
    return 0;
}

603 604 605
/**
 * Skip data_stream_element; reference: table 4.10.
 */
606
static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
607
{
608 609 610 611 612 613
    int byte_align = get_bits1(gb);
    int count = get_bits(gb, 8);
    if (count == 255)
        count += get_bits(gb, 8);
    if (byte_align)
        align_get_bits(gb);
614 615

    if (get_bits_left(gb) < 8 * count) {
616
        av_log(ac->avctx, AV_LOG_ERROR, overread_err);
617 618
        return -1;
    }
619
    skip_bits_long(gb, 8 * count);
620
    return 0;
621 622
}

623 624 625
static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
                             GetBitContext *gb)
{
626 627 628 629
    int sfb;
    if (get_bits1(gb)) {
        ics->predictor_reset_group = get_bits(gb, 5);
        if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630
            av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
631 632 633 634 635 636 637 638 639
            return -1;
        }
    }
    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
        ics->prediction_used[sfb] = get_bits1(gb);
    }
    return 0;
}

640 641 642 643 644
/**
 * Decode Individual Channel Stream info; reference: table 4.6.
 *
 * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 */
645 646 647
static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
                           GetBitContext *gb, int common_window)
{
648
    if (get_bits1(gb)) {
649
        av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 651 652 653 654
        memset(ics, 0, sizeof(IndividualChannelStream));
        return -1;
    }
    ics->window_sequence[1] = ics->window_sequence[0];
    ics->window_sequence[0] = get_bits(gb, 2);
655 656 657 658
    ics->use_kb_window[1]   = ics->use_kb_window[0];
    ics->use_kb_window[0]   = get_bits1(gb);
    ics->num_window_groups  = 1;
    ics->group_len[0]       = 1;
659 660 661 662 663
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
        int i;
        ics->max_sfb = get_bits(gb, 4);
        for (i = 0; i < 7; i++) {
            if (get_bits1(gb)) {
664
                ics->group_len[ics->num_window_groups - 1]++;
665 666
            } else {
                ics->num_window_groups++;
667
                ics->group_len[ics->num_window_groups - 1] = 1;
668 669
            }
        }
670 671 672 673
        ics->num_windows       = 8;
        ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
        ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
        ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674
        ics->predictor_present = 0;
675
    } else {
676 677 678 679 680 681
        ics->max_sfb               = get_bits(gb, 6);
        ics->num_windows           = 1;
        ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
        ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
        ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
        ics->predictor_present     = get_bits1(gb);
682 683 684 685 686 687 688 689
        ics->predictor_reset_group = 0;
        if (ics->predictor_present) {
            if (ac->m4ac.object_type == AOT_AAC_MAIN) {
                if (decode_prediction(ac, ics, gb)) {
                    memset(ics, 0, sizeof(IndividualChannelStream));
                    return -1;
                }
            } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690
                av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 692 693
                memset(ics, 0, sizeof(IndividualChannelStream));
                return -1;
            } else {
694
                av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 696
                memset(ics, 0, sizeof(IndividualChannelStream));
                return -1;
697
            }
698 699 700
        }
    }

701
    if (ics->max_sfb > ics->num_swb) {
702
        av_log(ac->avctx, AV_LOG_ERROR,
703 704
               "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
               ics->max_sfb, ics->num_swb);
705 706 707 708
        memset(ics, 0, sizeof(IndividualChannelStream));
        return -1;
    }

709 710 711 712 713 714 715 716 717 718 719
    return 0;
}

/**
 * Decode band types (section_data payload); reference: table 4.46.
 *
 * @param   band_type           array of the used band type
 * @param   band_type_run_end   array of the last scalefactor band of a band type run
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
720 721 722 723
static int decode_band_types(AACContext *ac, enum BandType band_type[120],
                             int band_type_run_end[120], GetBitContext *gb,
                             IndividualChannelStream *ics)
{
724 725 726 727 728
    int g, idx = 0;
    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
    for (g = 0; g < ics->num_window_groups; g++) {
        int k = 0;
        while (k < ics->max_sfb) {
729
            uint8_t sect_end = k;
730 731 732
            int sect_len_incr;
            int sect_band_type = get_bits(gb, 4);
            if (sect_band_type == 12) {
733
                av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
734 735
                return -1;
            }
736
            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 738
                sect_end += sect_len_incr;
            sect_end += sect_len_incr;
739
            if (get_bits_left(gb) < 0) {
740
                av_log(ac->avctx, AV_LOG_ERROR, overread_err);
741 742
                return -1;
            }
743
            if (sect_end > ics->max_sfb) {
744
                av_log(ac->avctx, AV_LOG_ERROR,
745
                       "Number of bands (%d) exceeds limit (%d).\n",
746
                       sect_end, ics->max_sfb);
747 748
                return -1;
            }
749
            for (; k < sect_end; k++) {
750
                band_type        [idx]   = sect_band_type;
751
                band_type_run_end[idx++] = sect_end;
752
            }
753 754 755 756
        }
    }
    return 0;
}
757

758 759
/**
 * Decode scalefactors; reference: table 4.47.
760 761 762 763 764 765 766 767
 *
 * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 * @param   band_type           array of the used band type
 * @param   band_type_run_end   array of the last scalefactor band of a band type run
 * @param   sf                  array of scalefactors or intensity stereo positions
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
768 769 770 771 772 773
static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                               unsigned int global_gain,
                               IndividualChannelStream *ics,
                               enum BandType band_type[120],
                               int band_type_run_end[120])
{
774 775 776 777 778 779 780 781 782
    const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
    int g, i, idx = 0;
    int offset[3] = { global_gain, global_gain - 90, 100 };
    int noise_flag = 1;
    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb;) {
            int run_end = band_type_run_end[idx];
            if (band_type[idx] == ZERO_BT) {
783
                for (; i < run_end; i++, idx++)
784
                    sf[idx] = 0.;
785 786
            } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
                for (; i < run_end; i++, idx++) {
787
                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788
                    if (offset[2] > 255U) {
789
                        av_log(ac->avctx, AV_LOG_ERROR,
790
                               "%s (%d) out of range.\n", sf_str[2], offset[2]);
791 792
                        return -1;
                    }
793
                    sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
794
                }
795 796 797
            } else if (band_type[idx] == NOISE_BT) {
                for (; i < run_end; i++, idx++) {
                    if (noise_flag-- > 0)
798 799 800
                        offset[1] += get_bits(gb, 9) - 256;
                    else
                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801
                    if (offset[1] > 255U) {
802
                        av_log(ac->avctx, AV_LOG_ERROR,
803
                               "%s (%d) out of range.\n", sf_str[1], offset[1]);
804 805
                        return -1;
                    }
806
                    sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
807
                }
808 809
            } else {
                for (; i < run_end; i++, idx++) {
810
                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811
                    if (offset[0] > 255U) {
812
                        av_log(ac->avctx, AV_LOG_ERROR,
813
                               "%s (%d) out of range.\n", sf_str[0], offset[0]);
814 815 816 817 818 819 820 821 822 823 824 825 826
                        return -1;
                    }
                    sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
                }
            }
        }
    }
    return 0;
}

/**
 * Decode pulse data; reference: table 4.7.
 */
827 828 829
static int decode_pulses(Pulse *pulse, GetBitContext *gb,
                         const uint16_t *swb_offset, int num_swb)
{
830
    int i, pulse_swb;
831
    pulse->num_pulse = get_bits(gb, 2) + 1;
832 833 834 835
    pulse_swb        = get_bits(gb, 6);
    if (pulse_swb >= num_swb)
        return -1;
    pulse->pos[0]    = swb_offset[pulse_swb];
836
    pulse->pos[0]   += get_bits(gb, 5);
837 838
    if (pulse->pos[0] > 1023)
        return -1;
839 840
    pulse->amp[0]    = get_bits(gb, 4);
    for (i = 1; i < pulse->num_pulse; i++) {
841
        pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 843
        if (pulse->pos[i] > 1023)
            return -1;
844
        pulse->amp[i] = get_bits(gb, 4);
845
    }
846
    return 0;
847 848
}

849 850 851 852 853
/**
 * Decode Temporal Noise Shaping data; reference: table 4.48.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
854 855 856
static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
                      GetBitContext *gb, const IndividualChannelStream *ics)
{
857 858 859 860
    int w, filt, i, coef_len, coef_res, coef_compress;
    const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
    const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
    for (w = 0; w < ics->num_windows; w++) {
861
        if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 863
            coef_res = get_bits1(gb);

864 865
            for (filt = 0; filt < tns->n_filt[w]; filt++) {
                int tmp2_idx;
866
                tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
867

868
                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869
                    av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 871 872 873
                           tns->order[w][filt], tns_max_order);
                    tns->order[w][filt] = 0;
                    return -1;
                }
874
                if (tns->order[w][filt]) {
875 876 877
                    tns->direction[w][filt] = get_bits1(gb);
                    coef_compress = get_bits1(gb);
                    coef_len = coef_res + 3 - coef_compress;
878
                    tmp2_idx = 2 * coef_compress + coef_res;
879

880 881
                    for (i = 0; i < tns->order[w][filt]; i++)
                        tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
882
                }
883
            }
884
        }
885 886 887 888
    }
    return 0;
}

889 890 891 892 893 894 895
/**
 * Decode Mid/Side data; reference: table 4.54.
 *
 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 *                      [3] reserved for scalable AAC
 */
896 897 898
static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
                                   int ms_present)
{
899 900 901 902 903 904 905 906
    int idx;
    if (ms_present == 1) {
        for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
            cpe->ms_mask[idx] = get_bits1(gb);
    } else if (ms_present == 2) {
        memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
    }
}
907

908
#ifndef VMUL2
909 910 911 912 913 914 915 916
static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 15] * s;
    *dst++ = v[idx>>4 & 15] * s;
    return dst;
}
917
#endif
918

919
#ifndef VMUL4
920 921 922 923 924 925 926 927 928 929
static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 3] * s;
    *dst++ = v[idx>>2 & 3] * s;
    *dst++ = v[idx>>4 & 3] * s;
    *dst++ = v[idx>>6 & 3] * s;
    return dst;
}
930
#endif
931

932
#ifndef VMUL2S
933 934 935 936 937 938 939 940 941 942 943 944 945 946
static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
    union float754 s0, s1;

    s0.f = s1.f = *scale;
    s0.i ^= sign >> 1 << 31;
    s1.i ^= sign      << 31;

    *dst++ = v[idx    & 15] * s0.f;
    *dst++ = v[idx>>4 & 15] * s1.f;

    return dst;
}
947
#endif
948

949
#ifndef VMUL4S
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
    unsigned nz = idx >> 12;
    union float754 s = { .f = *scale };
    union float754 t;

    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx    & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>2 & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>4 & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>6 & 3] * t.f;

    return dst;
}
974
#endif
975

976 977 978 979 980 981 982 983 984 985 986 987
/**
 * Decode spectral data; reference: table 4.50.
 * Dequantize and scale spectral data; reference: 4.6.3.3.
 *
 * @param   coef            array of dequantized, scaled spectral data
 * @param   sf              array of scalefactors or intensity stereo positions
 * @param   pulse_present   set if pulses are present
 * @param   pulse           pointer to pulse data struct
 * @param   band_type       array of the used band type
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
988
static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989
                                       GetBitContext *gb, const float sf[120],
990 991 992 993
                                       int pulse_present, const Pulse *pulse,
                                       const IndividualChannelStream *ics,
                                       enum BandType band_type[120])
{
994
    int i, k, g, idx = 0;
995 996
    const int c = 1024 / ics->num_windows;
    const uint16_t *offsets = ics->swb_offset;
997 998 999
    float *coef_base = coef;

    for (g = 0; g < ics->num_windows; g++)
1000
        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1001 1002

    for (g = 0; g < ics->num_window_groups; g++) {
1003 1004
        unsigned g_len = ics->group_len[g];

1005
        for (i = 0; i < ics->max_sfb; i++, idx++) {
1006 1007 1008
            const unsigned cbt_m1 = band_type[idx] - 1;
            float *cfo = coef + offsets[i];
            int off_len = offsets[i + 1] - offsets[i];
1009
            int group;
1010 1011 1012 1013

            if (cbt_m1 >= INTENSITY_BT2 - 1) {
                for (group = 0; group < g_len; group++, cfo+=128) {
                    memset(cfo, 0, off_len * sizeof(float));
1014
                }
1015 1016
            } else if (cbt_m1 == NOISE_BT - 1) {
                for (group = 0; group < g_len; group++, cfo+=128) {
1017
                    float scale;
1018
                    float band_energy;
1019

1020
                    for (k = 0; k < off_len; k++) {
1021
                        ac->random_state  = lcg_random(ac->random_state);
1022
                        cfo[k] = ac->random_state;
1023
                    }
1024

1025
                    band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1026
                    scale = sf[idx] / sqrtf(band_energy);
1027
                    ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1028
                }
1029
            } else {
1030 1031 1032
                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
                const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
                VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1033
                OPEN_READER(re, gb);
1034

1035 1036 1037 1038 1039
                switch (cbt_m1 >> 1) {
                case 0:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;
1040

1041
                        do {
1042
                            int code;
1043 1044
                            unsigned cb_idx;

1045 1046 1047
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
                            cb_idx = cb_vector_idx[code];
1048 1049
                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
                        } while (len -= 4);
1050 1051 1052 1053 1054 1055 1056 1057
                    }
                    break;

                case 1:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1058
                        do {
1059
                            int code;
1060 1061 1062 1063
                            unsigned nnz;
                            unsigned cb_idx;
                            uint32_t bits;

1064 1065 1066 1067 1068 1069
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
#if MIN_CACHE_BITS < 20
                            UPDATE_CACHE(re, gb);
#endif
                            cb_idx = cb_vector_idx[code];
1070
                            nnz = cb_idx >> 8 & 15;
1071 1072
                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
                            LAST_SKIP_BITS(re, gb, nnz);
1073 1074
                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
                        } while (len -= 4);
1075 1076 1077 1078 1079 1080 1081 1082
                    }
                    break;

                case 2:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1083
                        do {
1084
                            int code;
1085 1086
                            unsigned cb_idx;

1087 1088 1089
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
                            cb_idx = cb_vector_idx[code];
1090 1091
                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
                        } while (len -= 2);
1092 1093 1094 1095 1096 1097 1098 1099 1100
                    }
                    break;

                case 3:
                case 4:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1101
                        do {
1102
                            int code;
1103 1104 1105 1106
                            unsigned nnz;
                            unsigned cb_idx;
                            unsigned sign;

1107 1108 1109
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
                            cb_idx = cb_vector_idx[code];
1110
                            nnz = cb_idx >> 8 & 15;
1111 1112
                            sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
                            LAST_SKIP_BITS(re, gb, nnz);
1113 1114
                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
                        } while (len -= 2);
1115 1116 1117 1118 1119 1120 1121 1122 1123
                    }
                    break;

                default:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        uint32_t *icf = (uint32_t *) cf;
                        int len = off_len;

1124
                        do {
1125
                            int code;
1126 1127 1128 1129 1130
                            unsigned nzt, nnz;
                            unsigned cb_idx;
                            uint32_t bits;
                            int j;

1131 1132 1133 1134
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (!code) {
1135 1136
                                *icf++ = 0;
                                *icf++ = 0;
1137 1138 1139
                                continue;
                            }

1140
                            cb_idx = cb_vector_idx[code];
1141 1142
                            nnz = cb_idx >> 12;
                            nzt = cb_idx >> 8;
1143 1144
                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
                            LAST_SKIP_BITS(re, gb, nnz);
1145 1146 1147

                            for (j = 0; j < 2; j++) {
                                if (nzt & 1<<j) {
1148 1149
                                    uint32_t b;
                                    int n;
1150 1151
                                    /* The total length of escape_sequence must be < 22 bits according
                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1152 1153 1154 1155 1156
                                    UPDATE_CACHE(re, gb);
                                    b = GET_CACHE(re, gb);
                                    b = 31 - av_log2(~b);

                                    if (b > 8) {
1157
                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1158 1159
                                        return -1;
                                    }
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169

#if MIN_CACHE_BITS < 21
                                    LAST_SKIP_BITS(re, gb, b + 1);
                                    UPDATE_CACHE(re, gb);
#else
                                    SKIP_BITS(re, gb, b + 1);
#endif
                                    b += 4;
                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
                                    LAST_SKIP_BITS(re, gb, b);
1170
                                    *icf++ = cbrt_tab[n] | (bits & 1<<31);
1171 1172 1173
                                    bits <<= 1;
                                } else {
                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1174
                                    *icf++ = (bits & 1<<31) | v;
1175
                                    bits <<= !!v;
1176
                                }
1177
                                cb_idx >>= 4;
1178
                            }
1179
                        } while (len -= 2);
1180

1181
                        ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1182
                    }
1183
                }
1184 1185

                CLOSE_READER(re, gb);
1186 1187
            }
        }
1188
        coef += g_len << 7;
1189 1190 1191
    }

    if (pulse_present) {
1192
        idx = 0;
1193 1194 1195
        for (i = 0; i < pulse->num_pulse; i++) {
            float co = coef_base[ pulse->pos[i] ];
            while (offsets[idx + 1] <= pulse->pos[i])
1196 1197
                idx++;
            if (band_type[idx] != NOISE_BT && sf[idx]) {
1198 1199 1200 1201 1202 1203
                float ico = -pulse->amp[i];
                if (co) {
                    co /= sf[idx];
                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
                }
                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1204
            }
1205 1206 1207 1208 1209
        }
    }
    return 0;
}

1210 1211
static av_always_inline float flt16_round(float pf)
{
1212 1213 1214 1215
    union float754 tmp;
    tmp.f = pf;
    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
    return tmp.f;
1216 1217
}

1218 1219
static av_always_inline float flt16_even(float pf)
{
1220 1221
    union float754 tmp;
    tmp.f = pf;
1222
    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1223
    return tmp.f;
1224 1225
}

1226 1227
static av_always_inline float flt16_trunc(float pf)
{
1228 1229 1230 1231
    union float754 pun;
    pun.f = pf;
    pun.i &= 0xFFFF0000U;
    return pun.f;
1232 1233
}

1234
static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1235 1236 1237 1238
                    int output_enable)
{
    const float a     = 0.953125; // 61.0 / 64
    const float alpha = 0.90625;  // 29.0 / 32
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
    float e0, e1;
    float pv;
    float k1, k2;

    k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
    k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;

    pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
    if (output_enable)
        *coef += pv * ac->sf_scale;

    e0 = *coef / ac->sf_scale;
    e1 = e0 - k1 * ps->r0;

    ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1254
    ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5f * (ps->r1 * ps->r1 + e1 * e1));
1255
    ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1256
    ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5f * (ps->r0 * ps->r0 + e0 * e0));
1257 1258 1259 1260 1261 1262 1263 1264

    ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
    ps->r0 = flt16_trunc(a * e0);
}

/**
 * Apply AAC-Main style frequency domain prediction.
 */
1265 1266
static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
{
1267 1268 1269
    int sfb, k;

    if (!sce->ics.predictor_initialized) {
1270
        reset_all_predictors(sce->predictor_state);
1271 1272 1273 1274 1275 1276
        sce->ics.predictor_initialized = 1;
    }

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
        for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
            for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1277
                predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1278
                        sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1279 1280 1281
            }
        }
        if (sce->ics.predictor_reset_group)
1282
            reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1283
    } else
1284
        reset_all_predictors(sce->predictor_state);
1285 1286
}

1287
/**
1288 1289 1290 1291 1292 1293 1294
 * Decode an individual_channel_stream payload; reference: table 4.44.
 *
 * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1295 1296 1297
static int decode_ics(AACContext *ac, SingleChannelElement *sce,
                      GetBitContext *gb, int common_window, int scale_flag)
{
1298
    Pulse pulse;
1299 1300 1301
    TemporalNoiseShaping    *tns = &sce->tns;
    IndividualChannelStream *ics = &sce->ics;
    float *out = sce->coeffs;
1302 1303
    int global_gain, pulse_present = 0;

1304 1305
    /* This assignment is to silence a GCC warning about the variable being used
     * uninitialized when in fact it always is.
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
     */
    pulse.num_pulse = 0;

    global_gain = get_bits(gb, 8);

    if (!common_window && !scale_flag) {
        if (decode_ics_info(ac, ics, gb, 0) < 0)
            return -1;
    }

    if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
        return -1;
    if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
        return -1;

    pulse_present = 0;
    if (!scale_flag) {
        if ((pulse_present = get_bits1(gb))) {
            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1325
                av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1326 1327
                return -1;
            }
1328
            if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1329
                av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1330 1331
                return -1;
            }
1332 1333 1334 1335
        }
        if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
            return -1;
        if (get_bits1(gb)) {
1336
            av_log_missing_feature(ac->avctx, "SSR", 1);
1337 1338 1339 1340
            return -1;
        }
    }

1341
    if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1342
        return -1;
1343

1344
    if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1345 1346
        apply_prediction(ac, sce);

1347 1348 1349
    return 0;
}

1350 1351 1352
/**
 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
 */
1353
static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1354 1355
{
    const IndividualChannelStream *ics = &cpe->ch[0].ics;
1356 1357
    float *ch0 = cpe->ch[0].coeffs;
    float *ch1 = cpe->ch[1].coeffs;
1358
    int g, i, group, idx = 0;
1359
    const uint16_t *offsets = ics->swb_offset;
1360 1361 1362
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb; i++, idx++) {
            if (cpe->ms_mask[idx] &&
1363
                    cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1364
                for (group = 0; group < ics->group_len[g]; group++) {
1365 1366 1367
                    ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
                                              ch1 + group * 128 + offsets[i],
                                              offsets[i+1] - offsets[i]);
1368 1369 1370
                }
            }
        }
1371 1372
        ch0 += ics->group_len[g] * 128;
        ch1 += ics->group_len[g] * 128;
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382
    }
}

/**
 * intensity stereo decoding; reference: 4.6.8.2.3
 *
 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 *                      [3] reserved for scalable AAC
 */
1383 1384 1385 1386
static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
{
    const IndividualChannelStream *ics = &cpe->ch[1].ics;
    SingleChannelElement         *sce1 = &cpe->ch[1];
1387
    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1388
    const uint16_t *offsets = ics->swb_offset;
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
    int g, group, i, k, idx = 0;
    int c;
    float scale;
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb;) {
            if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
                const int bt_run_end = sce1->band_type_run_end[idx];
                for (; i < bt_run_end; i++, idx++) {
                    c = -1 + 2 * (sce1->band_type[idx] - 14);
                    if (ms_present)
                        c *= 1 - 2 * cpe->ms_mask[idx];
                    scale = c * sce1->sf[idx];
                    for (group = 0; group < ics->group_len[g]; group++)
1402 1403
                        for (k = offsets[i]; k < offsets[i + 1]; k++)
                            coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1404 1405 1406 1407 1408 1409 1410
                }
            } else {
                int bt_run_end = sce1->band_type_run_end[idx];
                idx += bt_run_end - i;
                i    = bt_run_end;
            }
        }
1411 1412
        coef0 += ics->group_len[g] * 128;
        coef1 += ics->group_len[g] * 128;
1413 1414 1415
    }
}

1416 1417 1418 1419 1420
/**
 * Decode a channel_pair_element; reference: table 4.4.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1421 1422
static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
{
1423 1424 1425 1426 1427 1428 1429 1430 1431 1432
    int i, ret, common_window, ms_present = 0;

    common_window = get_bits1(gb);
    if (common_window) {
        if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
            return -1;
        i = cpe->ch[1].ics.use_kb_window[0];
        cpe->ch[1].ics = cpe->ch[0].ics;
        cpe->ch[1].ics.use_kb_window[1] = i;
        ms_present = get_bits(gb, 2);
1433
        if (ms_present == 3) {
1434
            av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1435
            return -1;
1436
        } else if (ms_present)
1437 1438 1439 1440 1441 1442 1443
            decode_mid_side_stereo(cpe, gb, ms_present);
    }
    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
        return ret;
    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
        return ret;

1444 1445
    if (common_window) {
        if (ms_present)
1446
            apply_mid_side_stereo(ac, cpe);
1447 1448 1449 1450 1451
        if (ac->m4ac.object_type == AOT_AAC_MAIN) {
            apply_prediction(ac, &cpe->ch[0]);
            apply_prediction(ac, &cpe->ch[1]);
        }
    }
1452

1453
    apply_intensity_stereo(cpe, ms_present);
1454 1455 1456
    return 0;
}

1457 1458 1459 1460 1461 1462 1463
static const float cce_scale[] = {
    1.09050773266525765921, //2^(1/8)
    1.18920711500272106672, //2^(1/4)
    M_SQRT2,
    2,
};

1464 1465 1466 1467 1468
/**
 * Decode coupling_channel_element; reference: table 4.8.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1469 1470
static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
{
1471
    int num_gain = 0;
1472
    int c, g, sfb, ret;
1473 1474
    int sign;
    float scale;
1475 1476
    SingleChannelElement *sce = &che->ch[0];
    ChannelCoupling     *coup = &che->coup;
1477

1478
    coup->coupling_point = 2 * get_bits1(gb);
1479 1480 1481 1482 1483 1484 1485 1486 1487 1488
    coup->num_coupled = get_bits(gb, 3);
    for (c = 0; c <= coup->num_coupled; c++) {
        num_gain++;
        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
        coup->id_select[c] = get_bits(gb, 4);
        if (coup->type[c] == TYPE_CPE) {
            coup->ch_select[c] = get_bits(gb, 2);
            if (coup->ch_select[c] == 3)
                num_gain++;
        } else
1489
            coup->ch_select[c] = 2;
1490
    }
1491
    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1492

1493
    sign  = get_bits(gb, 1);
1494
    scale = cce_scale[get_bits(gb, 2)];
1495 1496 1497 1498 1499

    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
        return ret;

    for (c = 0; c < num_gain; c++) {
1500 1501
        int idx  = 0;
        int cge  = 1;
1502 1503 1504 1505 1506
        int gain = 0;
        float gain_cache = 1.;
        if (c) {
            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1507
            gain_cache = powf(scale, -gain);
1508
        }
1509 1510 1511
        if (coup->coupling_point == AFTER_IMDCT) {
            coup->gain[c][0] = gain_cache;
        } else {
1512 1513 1514 1515 1516
            for (g = 0; g < sce->ics.num_window_groups; g++) {
                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
                    if (sce->band_type[idx] != ZERO_BT) {
                        if (!cge) {
                            int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1517
                            if (t) {
1518 1519 1520 1521 1522 1523
                                int s = 1;
                                t = gain += t;
                                if (sign) {
                                    s  -= 2 * (t & 0x1);
                                    t >>= 1;
                                }
1524
                                gain_cache = powf(scale, -t) * s;
1525 1526
                            }
                        }
1527
                        coup->gain[c][idx] = gain_cache;
1528 1529
                    }
                }
1530 1531
            }
        }
1532 1533 1534 1535 1536 1537 1538 1539 1540
    }
    return 0;
}

/**
 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
 *
 * @return  Returns number of bytes consumed.
 */
1541 1542 1543
static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
                                         GetBitContext *gb)
{
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
    int i;
    int num_excl_chan = 0;

    do {
        for (i = 0; i < 7; i++)
            che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));

    return num_excl_chan / 7;
}

1555 1556 1557 1558 1559 1560 1561
/**
 * Decode dynamic range information; reference: table 4.52.
 *
 * @param   cnt length of TYPE_FIL syntactic element in bytes
 *
 * @return  Returns number of bytes consumed.
 */
1562 1563 1564 1565
static int decode_dynamic_range(DynamicRangeControl *che_drc,
                                GetBitContext *gb, int cnt)
{
    int n             = 1;
1566 1567 1568 1569
    int drc_num_bands = 1;
    int i;

    /* pce_tag_present? */
1570
    if (get_bits1(gb)) {
1571 1572 1573 1574 1575 1576
        che_drc->pce_instance_tag  = get_bits(gb, 4);
        skip_bits(gb, 4); // tag_reserved_bits
        n++;
    }

    /* excluded_chns_present? */
1577
    if (get_bits1(gb)) {
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615
        n += decode_drc_channel_exclusions(che_drc, gb);
    }

    /* drc_bands_present? */
    if (get_bits1(gb)) {
        che_drc->band_incr            = get_bits(gb, 4);
        che_drc->interpolation_scheme = get_bits(gb, 4);
        n++;
        drc_num_bands += che_drc->band_incr;
        for (i = 0; i < drc_num_bands; i++) {
            che_drc->band_top[i] = get_bits(gb, 8);
            n++;
        }
    }

    /* prog_ref_level_present? */
    if (get_bits1(gb)) {
        che_drc->prog_ref_level = get_bits(gb, 7);
        skip_bits1(gb); // prog_ref_level_reserved_bits
        n++;
    }

    for (i = 0; i < drc_num_bands; i++) {
        che_drc->dyn_rng_sgn[i] = get_bits1(gb);
        che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
        n++;
    }

    return n;
}

/**
 * Decode extension data (incomplete); reference: table 4.51.
 *
 * @param   cnt length of TYPE_FIL syntactic element in bytes
 *
 * @return Returns number of bytes consumed
 */
1616 1617
static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
                                    ChannelElement *che, enum RawDataBlockType elem_type)
1618
{
1619 1620 1621
    int crc_flag = 0;
    int res = cnt;
    switch (get_bits(gb, 4)) { // extension type
1622 1623 1624
    case EXT_SBR_DATA_CRC:
        crc_flag++;
    case EXT_SBR_DATA:
1625
        if (!che) {
1626
            av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1627 1628
            return res;
        } else if (!ac->m4ac.sbr) {
1629
            av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1630 1631 1632
            skip_bits_long(gb, 8 * cnt - 4);
            return res;
        } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1633
            av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1634 1635
            skip_bits_long(gb, 8 * cnt - 4);
            return res;
1636 1637 1638 1639
        } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
            ac->m4ac.sbr = 1;
            ac->m4ac.ps = 1;
            output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1640 1641 1642 1643
        } else {
            ac->m4ac.sbr = 1;
        }
        res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
        break;
    case EXT_DYNAMIC_RANGE:
        res = decode_dynamic_range(&ac->che_drc, gb, cnt);
        break;
    case EXT_FILL:
    case EXT_FILL_DATA:
    case EXT_DATA_ELEMENT:
    default:
        skip_bits_long(gb, 8 * cnt - 4);
        break;
1654 1655 1656 1657
    };
    return res;
}

1658 1659 1660 1661 1662 1663
/**
 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
 *
 * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
 * @param   coef    spectral coefficients
 */
1664 1665 1666 1667
static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
                      IndividualChannelStream *ics, int decode)
{
    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
Robert Swain's avatar
Robert Swain committed
1668
    int w, filt, m, i;
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680
    int bottom, top, order, start, end, size, inc;
    float lpc[TNS_MAX_ORDER];

    for (w = 0; w < ics->num_windows; w++) {
        bottom = ics->num_swb;
        for (filt = 0; filt < tns->n_filt[w]; filt++) {
            top    = bottom;
            bottom = FFMAX(0, top - tns->length[w][filt]);
            order  = tns->order[w][filt];
            if (order == 0)
                continue;

1681 1682
            // tns_decode_coef
            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1683

1684 1685 1686 1687 1688
            start = ics->swb_offset[FFMIN(bottom, mmm)];
            end   = ics->swb_offset[FFMIN(   top, mmm)];
            if ((size = end - start) <= 0)
                continue;
            if (tns->direction[w][filt]) {
1689 1690
                inc = -1;
                start = end - 1;
1691 1692 1693 1694 1695 1696 1697 1698
            } else {
                inc = 1;
            }
            start += w * 128;

            // ar filter
            for (m = 0; m < size; m++, start += inc)
                for (i = 1; i <= FFMIN(m, order); i++)
1699
                    coef[start] -= coef[start - i * inc] * lpc[i - 1];
1700 1701 1702 1703
        }
    }
}

1704 1705 1706
/**
 * Conduct IMDCT and windowing.
 */
1707
static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1708 1709 1710 1711 1712 1713 1714 1715 1716 1717
{
    IndividualChannelStream *ics = &sce->ics;
    float *in    = sce->coeffs;
    float *out   = sce->ret;
    float *saved = sce->saved;
    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *buf  = ac->buf_mdct;
    float *temp = ac->temp;
1718 1719
    int i;

1720
    // imdct
1721
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1722 1723
        for (i = 0; i < 1024; i += 128)
            ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1724
    } else
1725
        ff_imdct_half(&ac->mdct, buf, in);
1726 1727 1728 1729 1730 1731 1732 1733

    /* window overlapping
     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
     * and long to short transitions are considered to be short to short
     * transitions. This leaves just two cases (long to long and short to short)
     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
     */
    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1734
            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1735
        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1736
    } else {
1737
        for (i = 0; i < 448; i++)
1738
            out[i] = saved[i] + bias;
1739

1740
        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1741 1742 1743 1744 1745
            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1746
            memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1747
        } else {
1748
            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1749
            for (i = 576; i < 1024; i++)
1750
                out[i] = buf[i-512] + bias;
1751 1752
        }
    }
1753

1754 1755
    // buffer update
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1756
        for (i = 0; i < 64; i++)
1757
            saved[i] = temp[64 + i] - bias;
1758 1759 1760 1761
        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1762
    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1763 1764
        memcpy(                    saved,       buf + 512,        448 * sizeof(float));
        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1765
    } else { // LONG_STOP or ONLY_LONG
1766
        memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1767 1768 1769
    }
}

1770 1771 1772 1773 1774
/**
 * Apply dependent channel coupling (applied before IMDCT).
 *
 * @param   index   index into coupling gain array
 */
1775 1776 1777 1778 1779 1780 1781 1782
static void apply_dependent_coupling(AACContext *ac,
                                     SingleChannelElement *target,
                                     ChannelElement *cce, int index)
{
    IndividualChannelStream *ics = &cce->ch[0].ics;
    const uint16_t *offsets = ics->swb_offset;
    float *dest = target->coeffs;
    const float *src = cce->ch[0].coeffs;
1783
    int g, i, group, k, idx = 0;
1784
    if (ac->m4ac.object_type == AOT_AAC_LTP) {
1785
        av_log(ac->avctx, AV_LOG_ERROR,
1786 1787 1788 1789 1790
               "Dependent coupling is not supported together with LTP\n");
        return;
    }
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb; i++, idx++) {
1791
            if (cce->ch[0].band_type[idx] != ZERO_BT) {
1792
                const float gain = cce->coup.gain[index][idx];
1793
                for (group = 0; group < ics->group_len[g]; group++) {
1794
                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
1795
                        // XXX dsputil-ize
1796
                        dest[group * 128 + k] += gain * src[group * 128 + k];
1797 1798 1799 1800
                    }
                }
            }
        }
1801 1802
        dest += ics->group_len[g] * 128;
        src  += ics->group_len[g] * 128;
1803 1804 1805 1806 1807 1808 1809 1810
    }
}

/**
 * Apply independent channel coupling (applied after IMDCT).
 *
 * @param   index   index into coupling gain array
 */
1811 1812 1813 1814
static void apply_independent_coupling(AACContext *ac,
                                       SingleChannelElement *target,
                                       ChannelElement *cce, int index)
{
1815
    int i;
1816 1817
    const float gain = cce->coup.gain[index][0];
    const float bias = ac->add_bias;
1818 1819
    const float *src = cce->ch[0].ret;
    float *dest = target->ret;
1820
    const int len = 1024 << (ac->m4ac.sbr == 1);
1821

1822
    for (i = 0; i < len; i++)
1823
        dest[i] += gain * (src[i] - bias);
1824 1825
}

1826 1827 1828 1829 1830
/**
 * channel coupling transformation interface
 *
 * @param   apply_coupling_method   pointer to (in)dependent coupling function
 */
1831 1832 1833 1834
static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
                                   enum RawDataBlockType type, int elem_id,
                                   enum CouplingPoint coupling_point,
                                   void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1835
{
1836 1837 1838 1839 1840 1841 1842
    int i, c;

    for (i = 0; i < MAX_ELEM_ID; i++) {
        ChannelElement *cce = ac->che[TYPE_CCE][i];
        int index = 0;

        if (cce && cce->coup.coupling_point == coupling_point) {
1843
            ChannelCoupling *coup = &cce->coup;
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855

            for (c = 0; c <= coup->num_coupled; c++) {
                if (coup->type[c] == type && coup->id_select[c] == elem_id) {
                    if (coup->ch_select[c] != 1) {
                        apply_coupling_method(ac, &cc->ch[0], cce, index);
                        if (coup->ch_select[c] != 0)
                            index++;
                    }
                    if (coup->ch_select[c] != 2)
                        apply_coupling_method(ac, &cc->ch[1], cce, index++);
                } else
                    index += 1 + (coup->ch_select[c] == 3);
1856 1857 1858 1859 1860 1861 1862 1863
            }
        }
    }
}

/**
 * Convert spectral data to float samples, applying all supported tools as appropriate.
 */
1864 1865
static void spectral_to_sample(AACContext *ac)
{
1866
    int i, type;
1867
    float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1868
    for (type = 3; type >= 0; type--) {
1869
        for (i = 0; i < MAX_ELEM_ID; i++) {
1870
            ChannelElement *che = ac->che[type][i];
1871 1872
            if (che) {
                if (type <= TYPE_CPE)
1873
                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1874
                if (che->ch[0].tns.present)
1875
                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1876
                if (che->ch[1].tns.present)
1877
                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1878
                if (type <= TYPE_CPE)
1879
                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1880
                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1881
                    imdct_and_windowing(ac, &che->ch[0], imdct_bias);
Alex Converse's avatar
Alex Converse committed
1882 1883 1884
                    if (type == TYPE_CPE) {
                        imdct_and_windowing(ac, &che->ch[1], imdct_bias);
                    }
1885 1886 1887
                    if (ac->m4ac.sbr > 0) {
                        ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
                    }
1888
                }
1889
                if (type <= TYPE_CCE)
1890
                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1891 1892 1893 1894 1895
            }
        }
    }
}

1896 1897
static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
{
1898 1899 1900 1901 1902
    int size;
    AACADTSHeaderInfo hdr_info;

    size = ff_aac_parse_header(gb, &hdr_info);
    if (size > 0) {
1903
        if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1904 1905
            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
            memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1906
            ac->m4ac.chan_config = hdr_info.chan_config;
1907 1908
            if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
                return -7;
1909
            if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1910
                return -7;
1911 1912
        } else if (ac->output_configured != OC_LOCKED) {
            ac->output_configured = OC_NONE;
1913
        }
1914
        if (ac->output_configured != OC_LOCKED) {
1915
            ac->m4ac.sbr = -1;
1916 1917
            ac->m4ac.ps  = -1;
        }
1918 1919 1920
        ac->m4ac.sample_rate     = hdr_info.sample_rate;
        ac->m4ac.sampling_index  = hdr_info.sampling_index;
        ac->m4ac.object_type     = hdr_info.object_type;
1921 1922
        if (!ac->avctx->sample_rate)
            ac->avctx->sample_rate = hdr_info.sample_rate;
1923 1924 1925 1926
        if (hdr_info.num_aac_frames == 1) {
            if (!hdr_info.crc_absent)
                skip_bits(gb, 16);
        } else {
1927
            av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1928 1929
            return -1;
        }
1930
    }
1931 1932 1933
    return size;
}

1934
static int aac_decode_frame(AVCodecContext *avctx, void *data,
1935 1936
                            int *data_size, AVPacket *avpkt)
{
1937 1938
    const uint8_t *buf = avpkt->data;
    int buf_size = avpkt->size;
1939
    AACContext *ac = avctx->priv_data;
1940
    ChannelElement *che = NULL, *che_prev = NULL;
1941
    GetBitContext gb;
1942
    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1943
    int err, elem_id, data_size_tmp;
1944
    int buf_consumed;
1945
    int samples = 0, multiplier;
1946
    int buf_offset;
1947

1948
    init_get_bits(&gb, buf, buf_size * 8);
1949

1950
    if (show_bits(&gb, 12) == 0xfff) {
1951
        if (parse_adts_frame_header(ac, &gb) < 0) {
1952
            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1953 1954
            return -1;
        }
1955
        if (ac->m4ac.sampling_index > 12) {
1956
            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1957 1958
            return -1;
        }
1959 1960
    }

1961
    memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1962 1963 1964 1965
    // parse
    while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
        elem_id = get_bits(&gb, 4);

1966
        if (elem_type < TYPE_DSE) {
1967 1968 1969 1970 1971
            if (!(che=get_che(ac, elem_type, elem_id))) {
                av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
                       elem_type, elem_id);
                return -1;
            }
1972
            samples = 1024;
1973
        }
1974

1975 1976 1977
        switch (elem_type) {

        case TYPE_SCE:
1978
            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1979 1980 1981
            break;

        case TYPE_CPE:
1982
            err = decode_cpe(ac, &gb, che);
1983 1984 1985
            break;

        case TYPE_CCE:
1986
            err = decode_cce(ac, &gb, che);
1987 1988 1989
            break;

        case TYPE_LFE:
1990
            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1991 1992 1993
            break;

        case TYPE_DSE:
1994
            err = skip_data_stream_element(ac, &gb);
1995 1996
            break;

1997
        case TYPE_PCE: {
1998 1999
            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
            memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2000
            if ((err = decode_pce(ac, new_che_pos, &gb)))
2001
                break;
2002
            if (ac->output_configured > OC_TRIAL_PCE)
2003
                av_log(avctx, AV_LOG_ERROR,
2004 2005
                       "Not evaluating a further program_config_element as this construct is dubious at best.\n");
            else
2006
                err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2007 2008 2009 2010 2011 2012
            break;
        }

        case TYPE_FIL:
            if (elem_id == 15)
                elem_id += get_bits(&gb, 8) - 1;
2013
            if (get_bits_left(&gb) < 8 * elem_id) {
2014
                    av_log(avctx, AV_LOG_ERROR, overread_err);
2015 2016
                    return -1;
            }
2017
            while (elem_id > 0)
2018
                elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2019 2020 2021 2022 2023 2024 2025 2026
            err = 0; /* FIXME */
            break;

        default:
            err = -1; /* should not happen, but keeps compiler happy */
            break;
        }

2027 2028 2029
        che_prev       = che;
        elem_type_prev = elem_type;

2030
        if (err)
2031
            return err;
2032 2033

        if (get_bits_left(&gb) < 3) {
2034
            av_log(avctx, AV_LOG_ERROR, overread_err);
2035 2036
            return -1;
        }
2037 2038 2039 2040
    }

    spectral_to_sample(ac);

2041
    multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2042 2043
    samples <<= multiplier;
    if (ac->output_configured < OC_LOCKED) {
2044 2045
        avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
        avctx->frame_size = samples;
2046 2047
    }

2048
    data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2049
    if (*data_size < data_size_tmp) {
2050
        av_log(avctx, AV_LOG_ERROR,
2051 2052 2053 2054 2055 2056
               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
               *data_size, data_size_tmp);
        return -1;
    }
    *data_size = data_size_tmp;

2057
    if (samples)
2058
        ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2059

2060 2061 2062
    if (ac->output_configured)
        ac->output_configured = OC_LOCKED;

2063
    buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2064 2065 2066 2067 2068
    for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
        if (buf[buf_offset])
            break;

    return buf_size > buf_offset ? buf_consumed : buf_size;
2069 2070
}

2071
static av_cold int aac_decode_close(AVCodecContext *avctx)
2072
{
2073
    AACContext *ac = avctx->priv_data;
2074
    int i, type;
2075

2076
    for (i = 0; i < MAX_ELEM_ID; i++) {
2077 2078 2079
        for (type = 0; type < 4; type++) {
            if (ac->che[type][i])
                ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2080
            av_freep(&ac->che[type][i]);
2081
        }
2082 2083 2084 2085
    }

    ff_mdct_end(&ac->mdct);
    ff_mdct_end(&ac->mdct_small);
2086
    return 0;
2087 2088 2089 2090
}

AVCodec aac_decoder = {
    "aac",
2091
    AVMEDIA_TYPE_AUDIO,
2092 2093 2094 2095 2096 2097 2098
    CODEC_ID_AAC,
    sizeof(AACContext),
    aac_decode_init,
    NULL,
    aac_decode_close,
    aac_decode_frame,
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2099
    .sample_fmts = (const enum SampleFormat[]) {
2100 2101
        SAMPLE_FMT_S16,SAMPLE_FMT_NONE
    },
2102
    .channel_layouts = aac_channel_layout,
2103
};