aac.c 71.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * AAC decoder
 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
24
 * @file libavcodec/aac.c
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 * AAC decoder
 * @author Oded Shimon  ( ods15 ods15 dyndns org )
 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
 */

/*
 * supported tools
 *
 * Support?             Name
 * N (code in SoC repo) gain control
 * Y                    block switching
 * Y                    window shapes - standard
 * N                    window shapes - Low Delay
 * Y                    filterbank - standard
 * N (code in SoC repo) filterbank - Scalable Sample Rate
 * Y                    Temporal Noise Shaping
 * N (code in SoC repo) Long Term Prediction
 * Y                    intensity stereo
 * Y                    channel coupling
44
 * Y                    frequency domain prediction
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
 * Y                    Perceptual Noise Substitution
 * Y                    Mid/Side stereo
 * N                    Scalable Inverse AAC Quantization
 * N                    Frequency Selective Switch
 * N                    upsampling filter
 * Y                    quantization & coding - AAC
 * N                    quantization & coding - TwinVQ
 * N                    quantization & coding - BSAC
 * N                    AAC Error Resilience tools
 * N                    Error Resilience payload syntax
 * N                    Error Protection tool
 * N                    CELP
 * N                    Silence Compression
 * N                    HVXC
 * N                    HVXC 4kbits/s VR
 * N                    Structured Audio tools
 * N                    Structured Audio Sample Bank Format
 * N                    MIDI
 * N                    Harmonic and Individual Lines plus Noise
 * N                    Text-To-Speech Interface
 * N (in progress)      Spectral Band Replication
 * Y (not in this code) Layer-1
 * Y (not in this code) Layer-2
 * Y (not in this code) Layer-3
 * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
 * N (planned)          Parametric Stereo
 * N                    Direct Stream Transfer
 *
 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
 *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
           Parametric Stereo.
 */


#include "avcodec.h"
80
#include "internal.h"
81
#include "get_bits.h"
82
#include "dsputil.h"
83
#include "lpc.h"
84 85 86

#include "aac.h"
#include "aactab.h"
87
#include "aacdectab.h"
88
#include "mpeg4audio.h"
89
#include "aac_parser.h"
90 91 92 93 94 95

#include <assert.h>
#include <errno.h>
#include <math.h>
#include <string.h>

96 97 98 99
#if ARCH_ARM
#   include "arm/aac.h"
#endif

100 101 102 103
union float754 {
    float f;
    uint32_t i;
};
104

105 106 107
static VLC vlc_scalefactors;
static VLC vlc_spectral[11];

108
static uint32_t cbrt_tab[1<<13];
109

110 111
static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
{
112 113 114 115 116 117 118
    if (ac->tag_che_map[type][elem_id]) {
        return ac->tag_che_map[type][elem_id];
    }
    if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
        return NULL;
    }
    switch (ac->m4ac.chan_config) {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
    case 7:
        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
        }
    case 6:
        /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
           instead of SCE[0] CPE[0] CPE[0] LFE[0]. If we seem to have
           encountered such a stream, transfer the LFE[0] element to SCE[1] */
        if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
            ac->tags_mapped++;
            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
        }
    case 5:
        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
        }
    case 4:
        if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
        }
    case 3:
    case 2:
        if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
        } else if (ac->m4ac.chan_config == 2) {
148
            return NULL;
149 150 151 152 153 154 155 156
        }
    case 1:
        if (!ac->tags_mapped && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
        }
    default:
        return NULL;
157 158 159
    }
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
/**
 * Check for the channel element in the current channel position configuration.
 * If it exists, make sure the appropriate element is allocated and map the
 * channel order to match the internal FFmpeg channel layout.
 *
 * @param   che_pos current channel position configuration
 * @param   type channel element type
 * @param   id channel element id
 * @param   channels count of the number of channels in the configuration
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static int che_configure(AACContext *ac,
                         enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                         int type, int id,
                         int *channels)
{
    if (che_pos[type][id]) {
        if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
            return AVERROR(ENOMEM);
        if (type != TYPE_CCE) {
            ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
            if (type == TYPE_CPE) {
                ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
            }
        }
    } else
        av_freep(&ac->che[type][id]);
    return 0;
}

191 192 193 194 195 196 197 198
/**
 * Configure output channel order based on the current program configuration element.
 *
 * @param   che_pos current channel position configuration
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
199 200 201
static int output_configure(AACContext *ac,
                            enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
202
                            int channel_config, enum OCStatus oc_type)
203
{
204
    AVCodecContext *avctx = ac->avccontext;
205
    int i, type, channels = 0, ret;
206 207 208

    memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));

209 210
    if (channel_config) {
        for (i = 0; i < tags_per_config[channel_config]; i++) {
211 212 213 214 215
            if ((ret = che_configure(ac, che_pos,
                                     aac_channel_layout_map[channel_config - 1][i][0],
                                     aac_channel_layout_map[channel_config - 1][i][1],
                                     &channels)))
                return ret;
216 217 218 219 220 221 222
        }

        memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
        ac->tags_mapped = 0;

        avctx->channel_layout = aac_channel_layout[channel_config - 1];
    } else {
223 224 225 226 227 228 229 230 231 232 233
        /* Allocate or free elements depending on if they are in the
         * current program configuration.
         *
         * Set up default 1:1 output mapping.
         *
         * For a 5.1 stream the output order will be:
         *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
         */

        for (i = 0; i < MAX_ELEM_ID; i++) {
            for (type = 0; type < 4; type++) {
234 235
                if ((ret = che_configure(ac, che_pos, type, i, &channels)))
                    return ret;
236
            }
237 238
        }

239
        memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
240
        ac->tags_mapped = 4 * MAX_ELEM_ID;
241 242

        avctx->channel_layout = 0;
243 244
    }

245
    avctx->channels = channels;
246

247
    ac->output_configured = oc_type;
248

249 250 251
    return 0;
}

252 253 254 255 256 257 258 259
/**
 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 *
 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 * @param sce_map mono (Single Channel Element) map
 * @param type speaker type/position for these channels
 */
static void decode_channel_map(enum ChannelPosition *cpe_map,
260 261 262 263 264
                               enum ChannelPosition *sce_map,
                               enum ChannelPosition type,
                               GetBitContext *gb, int n)
{
    while (n--) {
265 266 267 268 269 270 271 272 273 274 275 276
        enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
        map[get_bits(gb, 4)] = type;
    }
}

/**
 * Decode program configuration element; reference: table 4.2.
 *
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
277 278 279
static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                      GetBitContext *gb)
{
280
    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
281 282 283

    skip_bits(gb, 2);  // object_type

284
    sampling_index = get_bits(gb, 4);
285 286 287
    if (ac->m4ac.sampling_index != sampling_index)
        av_log(ac->avccontext, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");

288 289 290 291 292 293 294
    num_front       = get_bits(gb, 4);
    num_side        = get_bits(gb, 4);
    num_back        = get_bits(gb, 4);
    num_lfe         = get_bits(gb, 2);
    num_assoc_data  = get_bits(gb, 3);
    num_cc          = get_bits(gb, 4);

295 296 297 298
    if (get_bits1(gb))
        skip_bits(gb, 4); // mono_mixdown_tag
    if (get_bits1(gb))
        skip_bits(gb, 4); // stereo_mixdown_tag
299

300 301
    if (get_bits1(gb))
        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
302

303 304 305 306
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
    decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
    decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
307 308 309

    skip_bits_long(gb, 4 * num_assoc_data);

310
    decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
311 312 313 314 315

    align_get_bits(gb);

    /* comment field, first byte is length */
    skip_bits_long(gb, 8 * get_bits(gb, 8));
316 317
    return 0;
}
318

319 320 321 322 323 324 325 326
/**
 * Set up channel positions based on a default channel configuration
 * as specified in table 1.17.
 *
 * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
327 328 329
static int set_default_channel_config(AACContext *ac,
                                      enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                                      int channel_config)
330
{
331
    if (channel_config < 1 || channel_config > 7) {
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
        av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
               channel_config);
        return -1;
    }

    /* default channel configurations:
     *
     * 1ch : front center (mono)
     * 2ch : L + R (stereo)
     * 3ch : front center + L + R
     * 4ch : front center + L + R + back center
     * 5ch : front center + L + R + back stereo
     * 6ch : front center + L + R + back stereo + LFE
     * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
     */

348
    if (channel_config != 2)
349
        new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
350
    if (channel_config > 1)
351
        new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
352
    if (channel_config == 4)
353
        new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
354
    if (channel_config > 4)
355
        new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
356 357
        = AAC_CHANNEL_BACK;  // back stereo
    if (channel_config > 5)
358
        new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
359
    if (channel_config == 7)
360 361 362 363 364
        new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right

    return 0;
}

365 366 367 368 369
/**
 * Decode GA "General Audio" specific configuration; reference: table 4.1.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
370 371 372
static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
                                     int channel_config)
{
373 374 375
    enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
    int extension_flag, ret;

376
    if (get_bits1(gb)) { // frameLengthFlag
377
        av_log_missing_feature(ac->avccontext, "960/120 MDCT window is", 1);
378 379 380 381 382 383 384
        return -1;
    }

    if (get_bits1(gb))       // dependsOnCoreCoder
        skip_bits(gb, 14);   // coreCoderDelay
    extension_flag = get_bits1(gb);

385 386
    if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
        ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
387 388 389 390 391
        skip_bits(gb, 3);     // layerNr

    memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
    if (channel_config == 0) {
        skip_bits(gb, 4);  // element_instance_tag
392
        if ((ret = decode_pce(ac, new_che_pos, gb)))
393 394
            return ret;
    } else {
395
        if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
396 397
            return ret;
    }
398
    if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
399 400 401 402
        return ret;

    if (extension_flag) {
        switch (ac->m4ac.object_type) {
403 404 405 406 407 408 409 410 411
        case AOT_ER_BSAC:
            skip_bits(gb, 5);    // numOfSubFrame
            skip_bits(gb, 11);   // layer_length
            break;
        case AOT_ER_AAC_LC:
        case AOT_ER_AAC_LTP:
        case AOT_ER_AAC_SCALABLE:
        case AOT_ER_AAC_LD:
            skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
412 413 414
                                    * aacScalefactorDataResilienceFlag
                                    * aacSpectralDataResilienceFlag
                                    */
415
            break;
416 417 418 419 420 421 422 423 424 425 426 427 428 429
        }
        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
    }
    return 0;
}

/**
 * Decode audio specific configuration; reference: table 1.13.
 *
 * @param   data        pointer to AVCodecContext extradata
 * @param   data_size   size of AVCCodecContext extradata
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
430 431 432
static int decode_audio_specific_config(AACContext *ac, void *data,
                                        int data_size)
{
433 434 435 436 437
    GetBitContext gb;
    int i;

    init_get_bits(&gb, data, data_size * 8);

438
    if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
439
        return -1;
440
    if (ac->m4ac.sampling_index > 12) {
441 442 443 444 445 446 447
        av_log(ac->avccontext, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
        return -1;
    }

    skip_bits_long(&gb, i);

    switch (ac->m4ac.object_type) {
448
    case AOT_AAC_MAIN:
449 450 451 452 453 454 455 456 457 458 459 460
    case AOT_AAC_LC:
        if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
            return -1;
        break;
    default:
        av_log(ac->avccontext, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
               ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
        return -1;
    }
    return 0;
}

461 462 463 464 465 466 467
/**
 * linear congruential pseudorandom number generator
 *
 * @param   previous_val    pointer to the current state of the generator
 *
 * @return  Returns a 32-bit pseudorandom integer
 */
468 469
static av_always_inline int lcg_random(int previous_val)
{
470 471 472
    return previous_val * 1664525 + 1013904223;
}

473 474 475 476
static void reset_predict_state(PredictorState *ps)
{
    ps->r0   = 0.0f;
    ps->r1   = 0.0f;
477 478 479 480 481 482
    ps->cor0 = 0.0f;
    ps->cor1 = 0.0f;
    ps->var0 = 1.0f;
    ps->var1 = 1.0f;
}

483 484
static void reset_all_predictors(PredictorState *ps)
{
485 486 487 488 489
    int i;
    for (i = 0; i < MAX_PREDICTORS; i++)
        reset_predict_state(&ps[i]);
}

490 491
static void reset_predictor_group(PredictorState *ps, int group_num)
{
492
    int i;
493
    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
494 495 496
        reset_predict_state(&ps[i]);
}

497 498 499
static av_cold int aac_decode_init(AVCodecContext *avccontext)
{
    AACContext *ac = avccontext->priv_data;
500 501 502 503
    int i;

    ac->avccontext = avccontext;

504
    if (avccontext->extradata_size > 0) {
505
        if (decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))
506 507 508 509 510
            return -1;
        avccontext->sample_rate = ac->m4ac.sample_rate;
    } else if (avccontext->channels > 0) {
        ac->m4ac.sample_rate = avccontext->sample_rate;
    }
511

512 513
    avccontext->sample_fmt = SAMPLE_FMT_S16;
    avccontext->frame_size = 1024;
514

515 516 517 518 519 520 521 522 523 524 525
    AAC_INIT_VLC_STATIC( 0, 304);
    AAC_INIT_VLC_STATIC( 1, 270);
    AAC_INIT_VLC_STATIC( 2, 550);
    AAC_INIT_VLC_STATIC( 3, 300);
    AAC_INIT_VLC_STATIC( 4, 328);
    AAC_INIT_VLC_STATIC( 5, 294);
    AAC_INIT_VLC_STATIC( 6, 306);
    AAC_INIT_VLC_STATIC( 7, 268);
    AAC_INIT_VLC_STATIC( 8, 510);
    AAC_INIT_VLC_STATIC( 9, 366);
    AAC_INIT_VLC_STATIC(10, 462);
526 527 528

    dsputil_init(&ac->dsp, avccontext);

529 530
    ac->random_state = 0x1f2e3d4c;

531 532 533 534
    // -1024 - Compensate wrong IMDCT method.
    // 32768 - Required to scale values to the correct range for the bias method
    //         for float to int16 conversion.

535
    if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
536 537
        ac->add_bias  = 385.0f;
        ac->sf_scale  = 1. / (-1024. * 32768.);
538 539
        ac->sf_offset = 0;
    } else {
540 541
        ac->add_bias  = 0.0f;
        ac->sf_scale  = 1. / -1024.;
542 543 544
        ac->sf_offset = 60;
    }

545
#if !CONFIG_HARDCODED_TABLES
546
    for (i = 0; i < 428; i++)
547
        ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
548 549
#endif /* CONFIG_HARDCODED_TABLES */

550
    INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
551 552 553
                    ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
                    ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                    352);
554

555 556
    ff_mdct_init(&ac->mdct, 11, 1, 1.0);
    ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
557 558 559
    // window initialization
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
560 561
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows( 7);
562

563 564 565 566 567 568 569
    if (!cbrt_tab[(1<<13) - 1]) {
        for (i = 0; i < 1<<13; i++) {
            union float754 f;
            f.f = cbrtf(i) * i;
            cbrt_tab[i] = f.i;
        }
    }
570

571 572 573
    return 0;
}

574 575 576
/**
 * Skip data_stream_element; reference: table 4.10.
 */
577 578
static void skip_data_stream_element(GetBitContext *gb)
{
579 580 581 582 583 584 585 586 587
    int byte_align = get_bits1(gb);
    int count = get_bits(gb, 8);
    if (count == 255)
        count += get_bits(gb, 8);
    if (byte_align)
        align_get_bits(gb);
    skip_bits_long(gb, 8 * count);
}

588 589 590
static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
                             GetBitContext *gb)
{
591 592 593 594 595 596 597 598 599 600 601 602 603 604
    int sfb;
    if (get_bits1(gb)) {
        ics->predictor_reset_group = get_bits(gb, 5);
        if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
            av_log(ac->avccontext, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
            return -1;
        }
    }
    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
        ics->prediction_used[sfb] = get_bits1(gb);
    }
    return 0;
}

605 606 607 608 609
/**
 * Decode Individual Channel Stream info; reference: table 4.6.
 *
 * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 */
610 611 612
static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
                           GetBitContext *gb, int common_window)
{
613 614 615 616 617 618 619
    if (get_bits1(gb)) {
        av_log(ac->avccontext, AV_LOG_ERROR, "Reserved bit set.\n");
        memset(ics, 0, sizeof(IndividualChannelStream));
        return -1;
    }
    ics->window_sequence[1] = ics->window_sequence[0];
    ics->window_sequence[0] = get_bits(gb, 2);
620 621 622 623
    ics->use_kb_window[1]   = ics->use_kb_window[0];
    ics->use_kb_window[0]   = get_bits1(gb);
    ics->num_window_groups  = 1;
    ics->group_len[0]       = 1;
624 625 626 627 628
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
        int i;
        ics->max_sfb = get_bits(gb, 4);
        for (i = 0; i < 7; i++) {
            if (get_bits1(gb)) {
629
                ics->group_len[ics->num_window_groups - 1]++;
630 631
            } else {
                ics->num_window_groups++;
632
                ics->group_len[ics->num_window_groups - 1] = 1;
633 634
            }
        }
635 636 637 638
        ics->num_windows       = 8;
        ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
        ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
        ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
639
        ics->predictor_present = 0;
640
    } else {
641 642 643 644 645 646
        ics->max_sfb               = get_bits(gb, 6);
        ics->num_windows           = 1;
        ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
        ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
        ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
        ics->predictor_present     = get_bits1(gb);
647 648 649 650 651 652 653 654 655 656 657 658
        ics->predictor_reset_group = 0;
        if (ics->predictor_present) {
            if (ac->m4ac.object_type == AOT_AAC_MAIN) {
                if (decode_prediction(ac, ics, gb)) {
                    memset(ics, 0, sizeof(IndividualChannelStream));
                    return -1;
                }
            } else if (ac->m4ac.object_type == AOT_AAC_LC) {
                av_log(ac->avccontext, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
                memset(ics, 0, sizeof(IndividualChannelStream));
                return -1;
            } else {
659
                av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1);
660 661
                memset(ics, 0, sizeof(IndividualChannelStream));
                return -1;
662
            }
663 664 665
        }
    }

666
    if (ics->max_sfb > ics->num_swb) {
667
        av_log(ac->avccontext, AV_LOG_ERROR,
668 669
               "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
               ics->max_sfb, ics->num_swb);
670 671 672 673
        memset(ics, 0, sizeof(IndividualChannelStream));
        return -1;
    }

674 675 676 677 678 679 680 681 682 683 684
    return 0;
}

/**
 * Decode band types (section_data payload); reference: table 4.46.
 *
 * @param   band_type           array of the used band type
 * @param   band_type_run_end   array of the last scalefactor band of a band type run
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
685 686 687 688
static int decode_band_types(AACContext *ac, enum BandType band_type[120],
                             int band_type_run_end[120], GetBitContext *gb,
                             IndividualChannelStream *ics)
{
689 690 691 692 693
    int g, idx = 0;
    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
    for (g = 0; g < ics->num_window_groups; g++) {
        int k = 0;
        while (k < ics->max_sfb) {
694
            uint8_t sect_end = k;
695 696 697 698 699 700
            int sect_len_incr;
            int sect_band_type = get_bits(gb, 4);
            if (sect_band_type == 12) {
                av_log(ac->avccontext, AV_LOG_ERROR, "invalid band type\n");
                return -1;
            }
701
            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
702 703 704
                sect_end += sect_len_incr;
            sect_end += sect_len_incr;
            if (sect_end > ics->max_sfb) {
705
                av_log(ac->avccontext, AV_LOG_ERROR,
706
                       "Number of bands (%d) exceeds limit (%d).\n",
707
                       sect_end, ics->max_sfb);
708 709
                return -1;
            }
710
            for (; k < sect_end; k++) {
711
                band_type        [idx]   = sect_band_type;
712
                band_type_run_end[idx++] = sect_end;
713
            }
714 715 716 717
        }
    }
    return 0;
}
718

719 720
/**
 * Decode scalefactors; reference: table 4.47.
721 722 723 724 725 726 727 728
 *
 * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 * @param   band_type           array of the used band type
 * @param   band_type_run_end   array of the last scalefactor band of a band type run
 * @param   sf                  array of scalefactors or intensity stereo positions
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
729 730 731 732 733 734
static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                               unsigned int global_gain,
                               IndividualChannelStream *ics,
                               enum BandType band_type[120],
                               int band_type_run_end[120])
{
735 736 737 738 739 740 741 742 743
    const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
    int g, i, idx = 0;
    int offset[3] = { global_gain, global_gain - 90, 100 };
    int noise_flag = 1;
    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb;) {
            int run_end = band_type_run_end[idx];
            if (band_type[idx] == ZERO_BT) {
744
                for (; i < run_end; i++, idx++)
745
                    sf[idx] = 0.;
746 747
            } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
                for (; i < run_end; i++, idx++) {
748
                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
749
                    if (offset[2] > 255U) {
750
                        av_log(ac->avccontext, AV_LOG_ERROR,
751
                               "%s (%d) out of range.\n", sf_str[2], offset[2]);
752 753
                        return -1;
                    }
754
                    sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
755
                }
756 757 758
            } else if (band_type[idx] == NOISE_BT) {
                for (; i < run_end; i++, idx++) {
                    if (noise_flag-- > 0)
759 760 761
                        offset[1] += get_bits(gb, 9) - 256;
                    else
                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
762
                    if (offset[1] > 255U) {
763
                        av_log(ac->avccontext, AV_LOG_ERROR,
764
                               "%s (%d) out of range.\n", sf_str[1], offset[1]);
765 766
                        return -1;
                    }
767
                    sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
768
                }
769 770
            } else {
                for (; i < run_end; i++, idx++) {
771
                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
772
                    if (offset[0] > 255U) {
773
                        av_log(ac->avccontext, AV_LOG_ERROR,
774
                               "%s (%d) out of range.\n", sf_str[0], offset[0]);
775 776 777 778 779 780 781 782 783 784 785 786 787
                        return -1;
                    }
                    sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
                }
            }
        }
    }
    return 0;
}

/**
 * Decode pulse data; reference: table 4.7.
 */
788 789 790
static int decode_pulses(Pulse *pulse, GetBitContext *gb,
                         const uint16_t *swb_offset, int num_swb)
{
791
    int i, pulse_swb;
792
    pulse->num_pulse = get_bits(gb, 2) + 1;
793 794 795 796
    pulse_swb        = get_bits(gb, 6);
    if (pulse_swb >= num_swb)
        return -1;
    pulse->pos[0]    = swb_offset[pulse_swb];
797
    pulse->pos[0]   += get_bits(gb, 5);
798 799
    if (pulse->pos[0] > 1023)
        return -1;
800 801
    pulse->amp[0]    = get_bits(gb, 4);
    for (i = 1; i < pulse->num_pulse; i++) {
802
        pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
803 804
        if (pulse->pos[i] > 1023)
            return -1;
805
        pulse->amp[i] = get_bits(gb, 4);
806
    }
807
    return 0;
808 809
}

810 811 812 813 814
/**
 * Decode Temporal Noise Shaping data; reference: table 4.48.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
815 816 817
static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
                      GetBitContext *gb, const IndividualChannelStream *ics)
{
818 819 820 821
    int w, filt, i, coef_len, coef_res, coef_compress;
    const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
    const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
    for (w = 0; w < ics->num_windows; w++) {
822
        if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
823 824
            coef_res = get_bits1(gb);

825 826
            for (filt = 0; filt < tns->n_filt[w]; filt++) {
                int tmp2_idx;
827
                tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
828

829
                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
830 831 832 833 834
                    av_log(ac->avccontext, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.",
                           tns->order[w][filt], tns_max_order);
                    tns->order[w][filt] = 0;
                    return -1;
                }
835
                if (tns->order[w][filt]) {
836 837 838
                    tns->direction[w][filt] = get_bits1(gb);
                    coef_compress = get_bits1(gb);
                    coef_len = coef_res + 3 - coef_compress;
839
                    tmp2_idx = 2 * coef_compress + coef_res;
840

841 842
                    for (i = 0; i < tns->order[w][filt]; i++)
                        tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
843
                }
844
            }
845
        }
846 847 848 849
    }
    return 0;
}

850 851 852 853 854 855 856
/**
 * Decode Mid/Side data; reference: table 4.54.
 *
 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 *                      [3] reserved for scalable AAC
 */
857 858 859
static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
                                   int ms_present)
{
860 861 862 863 864 865 866 867
    int idx;
    if (ms_present == 1) {
        for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
            cpe->ms_mask[idx] = get_bits1(gb);
    } else if (ms_present == 2) {
        memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
    }
}
868

869
#ifndef VMUL2
870 871 872 873 874 875 876 877
static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 15] * s;
    *dst++ = v[idx>>4 & 15] * s;
    return dst;
}
878
#endif
879

880
#ifndef VMUL4
881 882 883 884 885 886 887 888 889 890
static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 3] * s;
    *dst++ = v[idx>>2 & 3] * s;
    *dst++ = v[idx>>4 & 3] * s;
    *dst++ = v[idx>>6 & 3] * s;
    return dst;
}
891
#endif
892

893
#ifndef VMUL2S
894 895 896 897 898 899 900 901 902 903 904 905 906 907
static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
    union float754 s0, s1;

    s0.f = s1.f = *scale;
    s0.i ^= sign >> 1 << 31;
    s1.i ^= sign      << 31;

    *dst++ = v[idx    & 15] * s0.f;
    *dst++ = v[idx>>4 & 15] * s1.f;

    return dst;
}
908
#endif
909

910
#ifndef VMUL4S
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
    unsigned nz = idx >> 12;
    union float754 s = { .f = *scale };
    union float754 t;

    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx    & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>2 & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>4 & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
    t.i = s.i ^ (sign & 1<<31);
    *dst++ = v[idx>>6 & 3] * t.f;

    return dst;
}
935
#endif
936

937 938 939 940 941 942 943 944 945 946 947 948
/**
 * Decode spectral data; reference: table 4.50.
 * Dequantize and scale spectral data; reference: 4.6.3.3.
 *
 * @param   coef            array of dequantized, scaled spectral data
 * @param   sf              array of scalefactors or intensity stereo positions
 * @param   pulse_present   set if pulses are present
 * @param   pulse           pointer to pulse data struct
 * @param   band_type       array of the used band type
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
949
static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
950
                                       GetBitContext *gb, const float sf[120],
951 952 953 954
                                       int pulse_present, const Pulse *pulse,
                                       const IndividualChannelStream *ics,
                                       enum BandType band_type[120])
{
955
    int i, k, g, idx = 0;
956 957
    const int c = 1024 / ics->num_windows;
    const uint16_t *offsets = ics->swb_offset;
958
    float *coef_base = coef;
959
    int err_idx;
960 961

    for (g = 0; g < ics->num_windows; g++)
962
        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
963 964

    for (g = 0; g < ics->num_window_groups; g++) {
965 966
        unsigned g_len = ics->group_len[g];

967
        for (i = 0; i < ics->max_sfb; i++, idx++) {
968 969 970
            const unsigned cbt_m1 = band_type[idx] - 1;
            float *cfo = coef + offsets[i];
            int off_len = offsets[i + 1] - offsets[i];
971
            int group;
972 973 974 975

            if (cbt_m1 >= INTENSITY_BT2 - 1) {
                for (group = 0; group < g_len; group++, cfo+=128) {
                    memset(cfo, 0, off_len * sizeof(float));
976
                }
977 978
            } else if (cbt_m1 == NOISE_BT - 1) {
                for (group = 0; group < g_len; group++, cfo+=128) {
979
                    float scale;
980
                    float band_energy;
981

982
                    for (k = 0; k < off_len; k++) {
983
                        ac->random_state  = lcg_random(ac->random_state);
984
                        cfo[k] = ac->random_state;
985
                    }
986

987
                    band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
988
                    scale = sf[idx] / sqrtf(band_energy);
989
                    ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
990
                }
991
            } else {
992 993 994 995
                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
                const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
                VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
                const int cb_size = ff_aac_spectral_sizes[cbt_m1];
996
                OPEN_READER(re, gb);
997

998 999 1000 1001 1002
                switch (cbt_m1 >> 1) {
                case 0:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;
1003

1004
                        do {
1005
                            int code;
1006 1007
                            unsigned cb_idx;

1008 1009 1010 1011 1012
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (code >= cb_size) {
                                err_idx = code;
1013 1014 1015
                                goto err_cb_overflow;
                            }

1016
                            cb_idx = cb_vector_idx[code];
1017 1018
                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
                        } while (len -= 4);
1019 1020 1021 1022 1023 1024 1025 1026
                    }
                    break;

                case 1:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1027
                        do {
1028
                            int code;
1029 1030 1031 1032
                            unsigned nnz;
                            unsigned cb_idx;
                            uint32_t bits;

1033 1034 1035 1036 1037
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (code >= cb_size) {
                                err_idx = code;
1038 1039 1040
                                goto err_cb_overflow;
                            }

1041 1042 1043 1044
#if MIN_CACHE_BITS < 20
                            UPDATE_CACHE(re, gb);
#endif
                            cb_idx = cb_vector_idx[code];
1045
                            nnz = cb_idx >> 8 & 15;
1046 1047
                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
                            LAST_SKIP_BITS(re, gb, nnz);
1048 1049
                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
                        } while (len -= 4);
1050 1051 1052 1053 1054 1055 1056 1057
                    }
                    break;

                case 2:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1058
                        do {
1059
                            int code;
1060 1061
                            unsigned cb_idx;

1062 1063 1064 1065 1066
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (code >= cb_size) {
                                err_idx = code;
1067
                                goto err_cb_overflow;
1068
                            }
1069

1070
                            cb_idx = cb_vector_idx[code];
1071 1072
                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
                        } while (len -= 2);
1073 1074 1075 1076 1077 1078 1079 1080 1081
                    }
                    break;

                case 3:
                case 4:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        int len = off_len;

1082
                        do {
1083
                            int code;
1084 1085 1086 1087
                            unsigned nnz;
                            unsigned cb_idx;
                            unsigned sign;

1088 1089 1090 1091 1092
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (code >= cb_size) {
                                err_idx = code;
1093 1094 1095
                                goto err_cb_overflow;
                            }

1096
                            cb_idx = cb_vector_idx[code];
1097
                            nnz = cb_idx >> 8 & 15;
1098 1099
                            sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
                            LAST_SKIP_BITS(re, gb, nnz);
1100 1101
                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
                        } while (len -= 2);
1102 1103 1104 1105 1106 1107 1108 1109 1110
                    }
                    break;

                default:
                    for (group = 0; group < g_len; group++, cfo+=128) {
                        float *cf = cfo;
                        uint32_t *icf = (uint32_t *) cf;
                        int len = off_len;

1111
                        do {
1112
                            int code;
1113 1114 1115 1116 1117
                            unsigned nzt, nnz;
                            unsigned cb_idx;
                            uint32_t bits;
                            int j;

1118 1119 1120 1121
                            UPDATE_CACHE(re, gb);
                            GET_VLC(code, re, gb, vlc_tab, 8, 2);

                            if (!code) {
1122 1123
                                *icf++ = 0;
                                *icf++ = 0;
1124 1125 1126
                                continue;
                            }

1127 1128
                            if (code >= cb_size) {
                                err_idx = code;
1129 1130 1131
                                goto err_cb_overflow;
                            }

1132
                            cb_idx = cb_vector_idx[code];
1133 1134
                            nnz = cb_idx >> 12;
                            nzt = cb_idx >> 8;
1135 1136
                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
                            LAST_SKIP_BITS(re, gb, nnz);
1137 1138 1139

                            for (j = 0; j < 2; j++) {
                                if (nzt & 1<<j) {
1140 1141
                                    uint32_t b;
                                    int n;
1142 1143
                                    /* The total length of escape_sequence must be < 22 bits according
                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1144 1145 1146 1147 1148
                                    UPDATE_CACHE(re, gb);
                                    b = GET_CACHE(re, gb);
                                    b = 31 - av_log2(~b);

                                    if (b > 8) {
1149 1150 1151
                                        av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
                                        return -1;
                                    }
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161

#if MIN_CACHE_BITS < 21
                                    LAST_SKIP_BITS(re, gb, b + 1);
                                    UPDATE_CACHE(re, gb);
#else
                                    SKIP_BITS(re, gb, b + 1);
#endif
                                    b += 4;
                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
                                    LAST_SKIP_BITS(re, gb, b);
1162
                                    *icf++ = cbrt_tab[n] | (bits & 1<<31);
1163 1164 1165
                                    bits <<= 1;
                                } else {
                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1166
                                    *icf++ = (bits & 1<<31) | v;
1167
                                    bits <<= !!v;
1168
                                }
1169
                                cb_idx >>= 4;
1170
                            }
1171
                        } while (len -= 2);
1172

1173
                        ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1174
                    }
1175
                }
1176 1177

                CLOSE_READER(re, gb);
1178 1179
            }
        }
1180
        coef += g_len << 7;
1181 1182 1183
    }

    if (pulse_present) {
1184
        idx = 0;
1185 1186 1187
        for (i = 0; i < pulse->num_pulse; i++) {
            float co = coef_base[ pulse->pos[i] ];
            while (offsets[idx + 1] <= pulse->pos[i])
1188 1189
                idx++;
            if (band_type[idx] != NOISE_BT && sf[idx]) {
1190 1191 1192 1193 1194 1195
                float ico = -pulse->amp[i];
                if (co) {
                    co /= sf[idx];
                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
                }
                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1196
            }
1197 1198 1199
        }
    }
    return 0;
1200 1201 1202 1203 1204 1205

err_cb_overflow:
    av_log(ac->avccontext, AV_LOG_ERROR,
           "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
           band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
    return -1;
1206 1207
}

1208 1209
static av_always_inline float flt16_round(float pf)
{
1210 1211 1212 1213
    union float754 tmp;
    tmp.f = pf;
    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
    return tmp.f;
1214 1215
}

1216 1217
static av_always_inline float flt16_even(float pf)
{
1218 1219
    union float754 tmp;
    tmp.f = pf;
1220
    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1221
    return tmp.f;
1222 1223
}

1224 1225
static av_always_inline float flt16_trunc(float pf)
{
1226 1227 1228 1229
    union float754 pun;
    pun.f = pf;
    pun.i &= 0xFFFF0000U;
    return pun.f;
1230 1231
}

1232 1233 1234 1235 1236
static void predict(AACContext *ac, PredictorState *ps, float *coef,
                    int output_enable)
{
    const float a     = 0.953125; // 61.0 / 64
    const float alpha = 0.90625;  // 29.0 / 32
1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
    float e0, e1;
    float pv;
    float k1, k2;

    k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
    k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;

    pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
    if (output_enable)
        *coef += pv * ac->sf_scale;

    e0 = *coef / ac->sf_scale;
    e1 = e0 - k1 * ps->r0;

    ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
    ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
    ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
    ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));

    ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
    ps->r0 = flt16_trunc(a * e0);
}

/**
 * Apply AAC-Main style frequency domain prediction.
 */
1263 1264
static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
{
1265 1266 1267
    int sfb, k;

    if (!sce->ics.predictor_initialized) {
1268
        reset_all_predictors(sce->predictor_state);
1269 1270 1271 1272 1273 1274
        sce->ics.predictor_initialized = 1;
    }

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
        for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
            for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1275
                predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1276
                        sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1277 1278 1279
            }
        }
        if (sce->ics.predictor_reset_group)
1280
            reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1281
    } else
1282
        reset_all_predictors(sce->predictor_state);
1283 1284
}

1285
/**
1286 1287 1288 1289 1290 1291 1292
 * Decode an individual_channel_stream payload; reference: table 4.44.
 *
 * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1293 1294 1295
static int decode_ics(AACContext *ac, SingleChannelElement *sce,
                      GetBitContext *gb, int common_window, int scale_flag)
{
1296
    Pulse pulse;
1297 1298 1299
    TemporalNoiseShaping    *tns = &sce->tns;
    IndividualChannelStream *ics = &sce->ics;
    float *out = sce->coeffs;
1300 1301
    int global_gain, pulse_present = 0;

1302 1303
    /* This assignment is to silence a GCC warning about the variable being used
     * uninitialized when in fact it always is.
1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
     */
    pulse.num_pulse = 0;

    global_gain = get_bits(gb, 8);

    if (!common_window && !scale_flag) {
        if (decode_ics_info(ac, ics, gb, 0) < 0)
            return -1;
    }

    if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
        return -1;
    if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
        return -1;

    pulse_present = 0;
    if (!scale_flag) {
        if ((pulse_present = get_bits1(gb))) {
            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
                av_log(ac->avccontext, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
                return -1;
            }
1326 1327 1328 1329
            if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
                av_log(ac->avccontext, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
                return -1;
            }
1330 1331 1332 1333
        }
        if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
            return -1;
        if (get_bits1(gb)) {
1334
            av_log_missing_feature(ac->avccontext, "SSR", 1);
1335 1336 1337 1338
            return -1;
        }
    }

1339
    if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1340
        return -1;
1341

1342
    if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1343 1344
        apply_prediction(ac, sce);

1345 1346 1347
    return 0;
}

1348 1349 1350
/**
 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
 */
1351
static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1352 1353
{
    const IndividualChannelStream *ics = &cpe->ch[0].ics;
1354 1355
    float *ch0 = cpe->ch[0].coeffs;
    float *ch1 = cpe->ch[1].coeffs;
1356
    int g, i, group, idx = 0;
1357
    const uint16_t *offsets = ics->swb_offset;
1358 1359 1360
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb; i++, idx++) {
            if (cpe->ms_mask[idx] &&
1361
                    cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1362
                for (group = 0; group < ics->group_len[g]; group++) {
1363 1364 1365
                    ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
                                              ch1 + group * 128 + offsets[i],
                                              offsets[i+1] - offsets[i]);
1366 1367 1368
                }
            }
        }
1369 1370
        ch0 += ics->group_len[g] * 128;
        ch1 += ics->group_len[g] * 128;
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
    }
}

/**
 * intensity stereo decoding; reference: 4.6.8.2.3
 *
 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 *                      [3] reserved for scalable AAC
 */
1381 1382 1383 1384
static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
{
    const IndividualChannelStream *ics = &cpe->ch[1].ics;
    SingleChannelElement         *sce1 = &cpe->ch[1];
1385
    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1386
    const uint16_t *offsets = ics->swb_offset;
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
    int g, group, i, k, idx = 0;
    int c;
    float scale;
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb;) {
            if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
                const int bt_run_end = sce1->band_type_run_end[idx];
                for (; i < bt_run_end; i++, idx++) {
                    c = -1 + 2 * (sce1->band_type[idx] - 14);
                    if (ms_present)
                        c *= 1 - 2 * cpe->ms_mask[idx];
                    scale = c * sce1->sf[idx];
                    for (group = 0; group < ics->group_len[g]; group++)
1400 1401
                        for (k = offsets[i]; k < offsets[i + 1]; k++)
                            coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1402 1403 1404 1405 1406 1407 1408
                }
            } else {
                int bt_run_end = sce1->band_type_run_end[idx];
                idx += bt_run_end - i;
                i    = bt_run_end;
            }
        }
1409 1410
        coef0 += ics->group_len[g] * 128;
        coef1 += ics->group_len[g] * 128;
1411 1412 1413
    }
}

1414 1415 1416 1417 1418 1419 1420
/**
 * Decode a channel_pair_element; reference: table 4.4.
 *
 * @param   elem_id Identifies the instance of a syntax element.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1421 1422
static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
{
1423 1424 1425 1426 1427 1428 1429 1430 1431 1432
    int i, ret, common_window, ms_present = 0;

    common_window = get_bits1(gb);
    if (common_window) {
        if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
            return -1;
        i = cpe->ch[1].ics.use_kb_window[0];
        cpe->ch[1].ics = cpe->ch[0].ics;
        cpe->ch[1].ics.use_kb_window[1] = i;
        ms_present = get_bits(gb, 2);
1433
        if (ms_present == 3) {
1434 1435
            av_log(ac->avccontext, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
            return -1;
1436
        } else if (ms_present)
1437 1438 1439 1440 1441 1442 1443
            decode_mid_side_stereo(cpe, gb, ms_present);
    }
    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
        return ret;
    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
        return ret;

1444 1445
    if (common_window) {
        if (ms_present)
1446
            apply_mid_side_stereo(ac, cpe);
1447 1448 1449 1450 1451
        if (ac->m4ac.object_type == AOT_AAC_MAIN) {
            apply_prediction(ac, &cpe->ch[0]);
            apply_prediction(ac, &cpe->ch[1]);
        }
    }
1452

1453
    apply_intensity_stereo(cpe, ms_present);
1454 1455 1456
    return 0;
}

1457 1458 1459 1460 1461 1462 1463
/**
 * Decode coupling_channel_element; reference: table 4.8.
 *
 * @param   elem_id Identifies the instance of a syntax element.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
1464 1465
static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
{
1466
    int num_gain = 0;
1467
    int c, g, sfb, ret;
1468 1469
    int sign;
    float scale;
1470 1471
    SingleChannelElement *sce = &che->ch[0];
    ChannelCoupling     *coup = &che->coup;
1472

1473
    coup->coupling_point = 2 * get_bits1(gb);
1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
    coup->num_coupled = get_bits(gb, 3);
    for (c = 0; c <= coup->num_coupled; c++) {
        num_gain++;
        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
        coup->id_select[c] = get_bits(gb, 4);
        if (coup->type[c] == TYPE_CPE) {
            coup->ch_select[c] = get_bits(gb, 2);
            if (coup->ch_select[c] == 3)
                num_gain++;
        } else
1484
            coup->ch_select[c] = 2;
1485
    }
1486
    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1487

1488
    sign  = get_bits(gb, 1);
1489
    scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1490 1491 1492 1493 1494

    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
        return ret;

    for (c = 0; c < num_gain; c++) {
1495 1496
        int idx  = 0;
        int cge  = 1;
1497 1498 1499 1500 1501
        int gain = 0;
        float gain_cache = 1.;
        if (c) {
            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1502
            gain_cache = pow(scale, -gain);
1503
        }
1504 1505 1506
        if (coup->coupling_point == AFTER_IMDCT) {
            coup->gain[c][0] = gain_cache;
        } else {
1507 1508 1509 1510 1511
            for (g = 0; g < sce->ics.num_window_groups; g++) {
                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
                    if (sce->band_type[idx] != ZERO_BT) {
                        if (!cge) {
                            int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1512
                            if (t) {
1513 1514 1515 1516 1517 1518 1519
                                int s = 1;
                                t = gain += t;
                                if (sign) {
                                    s  -= 2 * (t & 0x1);
                                    t >>= 1;
                                }
                                gain_cache = pow(scale, -t) * s;
1520 1521
                            }
                        }
1522
                        coup->gain[c][idx] = gain_cache;
1523 1524
                    }
                }
1525 1526
            }
        }
1527 1528 1529 1530
    }
    return 0;
}

1531 1532
/**
 * Decode Spectral Band Replication extension data; reference: table 4.55.
1533 1534 1535
 *
 * @param   crc flag indicating the presence of CRC checksum
 * @param   cnt length of TYPE_FIL syntactic element in bytes
1536
 *
1537 1538
 * @return  Returns number of bytes consumed from the TYPE_FIL element.
 */
1539 1540 1541
static int decode_sbr_extension(AACContext *ac, GetBitContext *gb,
                                int crc, int cnt)
{
1542
    // TODO : sbr_extension implementation
1543
    av_log_missing_feature(ac->avccontext, "SBR", 0);
1544
    skip_bits_long(gb, 8 * cnt - 4); // -4 due to reading extension type
1545 1546 1547
    return cnt;
}

1548 1549 1550 1551 1552
/**
 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
 *
 * @return  Returns number of bytes consumed.
 */
1553 1554 1555
static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
                                         GetBitContext *gb)
{
1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566
    int i;
    int num_excl_chan = 0;

    do {
        for (i = 0; i < 7; i++)
            che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));

    return num_excl_chan / 7;
}

1567 1568 1569 1570 1571 1572 1573
/**
 * Decode dynamic range information; reference: table 4.52.
 *
 * @param   cnt length of TYPE_FIL syntactic element in bytes
 *
 * @return  Returns number of bytes consumed.
 */
1574 1575 1576 1577
static int decode_dynamic_range(DynamicRangeControl *che_drc,
                                GetBitContext *gb, int cnt)
{
    int n             = 1;
1578 1579 1580 1581
    int drc_num_bands = 1;
    int i;

    /* pce_tag_present? */
1582
    if (get_bits1(gb)) {
1583 1584 1585 1586 1587 1588
        che_drc->pce_instance_tag  = get_bits(gb, 4);
        skip_bits(gb, 4); // tag_reserved_bits
        n++;
    }

    /* excluded_chns_present? */
1589
    if (get_bits1(gb)) {
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627
        n += decode_drc_channel_exclusions(che_drc, gb);
    }

    /* drc_bands_present? */
    if (get_bits1(gb)) {
        che_drc->band_incr            = get_bits(gb, 4);
        che_drc->interpolation_scheme = get_bits(gb, 4);
        n++;
        drc_num_bands += che_drc->band_incr;
        for (i = 0; i < drc_num_bands; i++) {
            che_drc->band_top[i] = get_bits(gb, 8);
            n++;
        }
    }

    /* prog_ref_level_present? */
    if (get_bits1(gb)) {
        che_drc->prog_ref_level = get_bits(gb, 7);
        skip_bits1(gb); // prog_ref_level_reserved_bits
        n++;
    }

    for (i = 0; i < drc_num_bands; i++) {
        che_drc->dyn_rng_sgn[i] = get_bits1(gb);
        che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
        n++;
    }

    return n;
}

/**
 * Decode extension data (incomplete); reference: table 4.51.
 *
 * @param   cnt length of TYPE_FIL syntactic element in bytes
 *
 * @return Returns number of bytes consumed
 */
1628 1629
static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
{
1630 1631 1632
    int crc_flag = 0;
    int res = cnt;
    switch (get_bits(gb, 4)) { // extension type
1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
    case EXT_SBR_DATA_CRC:
        crc_flag++;
    case EXT_SBR_DATA:
        res = decode_sbr_extension(ac, gb, crc_flag, cnt);
        break;
    case EXT_DYNAMIC_RANGE:
        res = decode_dynamic_range(&ac->che_drc, gb, cnt);
        break;
    case EXT_FILL:
    case EXT_FILL_DATA:
    case EXT_DATA_ELEMENT:
    default:
        skip_bits_long(gb, 8 * cnt - 4);
        break;
1647 1648 1649 1650
    };
    return res;
}

1651 1652 1653 1654 1655 1656
/**
 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
 *
 * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
 * @param   coef    spectral coefficients
 */
1657 1658 1659 1660
static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
                      IndividualChannelStream *ics, int decode)
{
    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
Robert Swain's avatar
Robert Swain committed
1661
    int w, filt, m, i;
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673
    int bottom, top, order, start, end, size, inc;
    float lpc[TNS_MAX_ORDER];

    for (w = 0; w < ics->num_windows; w++) {
        bottom = ics->num_swb;
        for (filt = 0; filt < tns->n_filt[w]; filt++) {
            top    = bottom;
            bottom = FFMAX(0, top - tns->length[w][filt]);
            order  = tns->order[w][filt];
            if (order == 0)
                continue;

1674 1675
            // tns_decode_coef
            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1676

1677 1678 1679 1680 1681
            start = ics->swb_offset[FFMIN(bottom, mmm)];
            end   = ics->swb_offset[FFMIN(   top, mmm)];
            if ((size = end - start) <= 0)
                continue;
            if (tns->direction[w][filt]) {
1682 1683
                inc = -1;
                start = end - 1;
1684 1685 1686 1687 1688 1689 1690 1691
            } else {
                inc = 1;
            }
            start += w * 128;

            // ar filter
            for (m = 0; m < size; m++, start += inc)
                for (i = 1; i <= FFMIN(m, order); i++)
1692
                    coef[start] -= coef[start - i * inc] * lpc[i - 1];
1693 1694 1695 1696
        }
    }
}

1697 1698 1699
/**
 * Conduct IMDCT and windowing.
 */
1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
{
    IndividualChannelStream *ics = &sce->ics;
    float *in    = sce->coeffs;
    float *out   = sce->ret;
    float *saved = sce->saved;
    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *buf  = ac->buf_mdct;
    float *temp = ac->temp;
1711 1712
    int i;

1713
    // imdct
1714 1715 1716 1717 1718
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
        if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
            av_log(ac->avccontext, AV_LOG_WARNING,
                   "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
                   "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1719 1720
        for (i = 0; i < 1024; i += 128)
            ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1721
    } else
1722
        ff_imdct_half(&ac->mdct, buf, in);
1723 1724 1725 1726 1727 1728 1729 1730

    /* window overlapping
     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
     * and long to short transitions are considered to be short to short
     * transitions. This leaves just two cases (long to long and short to short)
     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
     */
    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1731
            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1732
        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, ac->add_bias, 512);
1733
    } else {
1734 1735
        for (i = 0; i < 448; i++)
            out[i] = saved[i] + ac->add_bias;
1736

1737
        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1738 1739 1740 1741 1742 1743
            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, ac->add_bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      ac->add_bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      ac->add_bias, 64);
            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      ac->add_bias, 64);
            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      ac->add_bias, 64);
            memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1744
        } else {
1745
            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, ac->add_bias, 64);
1746
            for (i = 576; i < 1024; i++)
1747
                out[i] = buf[i-512] + ac->add_bias;
1748 1749
        }
    }
1750

1751 1752
    // buffer update
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1753 1754 1755 1756 1757 1758
        for (i = 0; i < 64; i++)
            saved[i] = temp[64 + i] - ac->add_bias;
        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1759
    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1760 1761
        memcpy(                    saved,       buf + 512,        448 * sizeof(float));
        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1762
    } else { // LONG_STOP or ONLY_LONG
1763
        memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1764 1765 1766
    }
}

1767 1768 1769 1770 1771
/**
 * Apply dependent channel coupling (applied before IMDCT).
 *
 * @param   index   index into coupling gain array
 */
1772 1773 1774 1775 1776 1777 1778 1779
static void apply_dependent_coupling(AACContext *ac,
                                     SingleChannelElement *target,
                                     ChannelElement *cce, int index)
{
    IndividualChannelStream *ics = &cce->ch[0].ics;
    const uint16_t *offsets = ics->swb_offset;
    float *dest = target->coeffs;
    const float *src = cce->ch[0].coeffs;
1780
    int g, i, group, k, idx = 0;
1781
    if (ac->m4ac.object_type == AOT_AAC_LTP) {
1782 1783 1784 1785 1786 1787
        av_log(ac->avccontext, AV_LOG_ERROR,
               "Dependent coupling is not supported together with LTP\n");
        return;
    }
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb; i++, idx++) {
1788
            if (cce->ch[0].band_type[idx] != ZERO_BT) {
1789
                const float gain = cce->coup.gain[index][idx];
1790
                for (group = 0; group < ics->group_len[g]; group++) {
1791
                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
1792
                        // XXX dsputil-ize
1793
                        dest[group * 128 + k] += gain * src[group * 128 + k];
1794 1795 1796 1797
                    }
                }
            }
        }
1798 1799
        dest += ics->group_len[g] * 128;
        src  += ics->group_len[g] * 128;
1800 1801 1802 1803 1804 1805 1806 1807
    }
}

/**
 * Apply independent channel coupling (applied after IMDCT).
 *
 * @param   index   index into coupling gain array
 */
1808 1809 1810 1811
static void apply_independent_coupling(AACContext *ac,
                                       SingleChannelElement *target,
                                       ChannelElement *cce, int index)
{
1812
    int i;
1813 1814
    const float gain = cce->coup.gain[index][0];
    const float bias = ac->add_bias;
1815 1816
    const float *src = cce->ch[0].ret;
    float *dest = target->ret;
1817

1818
    for (i = 0; i < 1024; i++)
1819
        dest[i] += gain * (src[i] - bias);
1820 1821
}

1822 1823 1824 1825 1826 1827
/**
 * channel coupling transformation interface
 *
 * @param   index   index into coupling gain array
 * @param   apply_coupling_method   pointer to (in)dependent coupling function
 */
1828 1829 1830 1831
static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
                                   enum RawDataBlockType type, int elem_id,
                                   enum CouplingPoint coupling_point,
                                   void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1832
{
1833 1834 1835 1836 1837 1838 1839
    int i, c;

    for (i = 0; i < MAX_ELEM_ID; i++) {
        ChannelElement *cce = ac->che[TYPE_CCE][i];
        int index = 0;

        if (cce && cce->coup.coupling_point == coupling_point) {
1840
            ChannelCoupling *coup = &cce->coup;
1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852

            for (c = 0; c <= coup->num_coupled; c++) {
                if (coup->type[c] == type && coup->id_select[c] == elem_id) {
                    if (coup->ch_select[c] != 1) {
                        apply_coupling_method(ac, &cc->ch[0], cce, index);
                        if (coup->ch_select[c] != 0)
                            index++;
                    }
                    if (coup->ch_select[c] != 2)
                        apply_coupling_method(ac, &cc->ch[1], cce, index++);
                } else
                    index += 1 + (coup->ch_select[c] == 3);
1853 1854 1855 1856 1857 1858 1859 1860
            }
        }
    }
}

/**
 * Convert spectral data to float samples, applying all supported tools as appropriate.
 */
1861 1862
static void spectral_to_sample(AACContext *ac)
{
1863 1864
    int i, type;
    for (type = 3; type >= 0; type--) {
1865
        for (i = 0; i < MAX_ELEM_ID; i++) {
1866
            ChannelElement *che = ac->che[type][i];
1867 1868
            if (che) {
                if (type <= TYPE_CPE)
1869
                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1870
                if (che->ch[0].tns.present)
1871
                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1872
                if (che->ch[1].tns.present)
1873
                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1874
                if (type <= TYPE_CPE)
1875
                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1876
                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT)
1877
                    imdct_and_windowing(ac, &che->ch[0]);
1878
                if (type == TYPE_CPE)
1879
                    imdct_and_windowing(ac, &che->ch[1]);
1880
                if (type <= TYPE_CCE)
1881
                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1882 1883 1884 1885 1886
            }
        }
    }
}

1887 1888
static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
{
1889 1890 1891 1892 1893
    int size;
    AACADTSHeaderInfo hdr_info;

    size = ff_aac_parse_header(gb, &hdr_info);
    if (size > 0) {
1894
        if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1895 1896
            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
            memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1897
            ac->m4ac.chan_config = hdr_info.chan_config;
1898 1899
            if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
                return -7;
1900
            if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1901
                return -7;
1902 1903
        } else if (ac->output_configured != OC_LOCKED) {
            ac->output_configured = OC_NONE;
1904
        }
1905 1906
        if (ac->output_configured != OC_LOCKED)
            ac->m4ac.sbr = -1;
1907 1908 1909
        ac->m4ac.sample_rate     = hdr_info.sample_rate;
        ac->m4ac.sampling_index  = hdr_info.sampling_index;
        ac->m4ac.object_type     = hdr_info.object_type;
1910 1911
        if (!ac->avccontext->sample_rate)
            ac->avccontext->sample_rate = hdr_info.sample_rate;
1912 1913 1914 1915
        if (hdr_info.num_aac_frames == 1) {
            if (!hdr_info.crc_absent)
                skip_bits(gb, 16);
        } else {
1916
            av_log_missing_feature(ac->avccontext, "More than one AAC RDB per ADTS frame is", 0);
1917 1918
            return -1;
        }
1919
    }
1920 1921 1922
    return size;
}

1923 1924 1925
static int aac_decode_frame(AVCodecContext *avccontext, void *data,
                            int *data_size, AVPacket *avpkt)
{
1926 1927
    const uint8_t *buf = avpkt->data;
    int buf_size = avpkt->size;
1928 1929
    AACContext *ac = avccontext->priv_data;
    ChannelElement *che = NULL;
1930 1931 1932 1933
    GetBitContext gb;
    enum RawDataBlockType elem_type;
    int err, elem_id, data_size_tmp;

1934
    init_get_bits(&gb, buf, buf_size * 8);
1935

1936
    if (show_bits(&gb, 12) == 0xfff) {
1937
        if (parse_adts_frame_header(ac, &gb) < 0) {
1938 1939 1940
            av_log(avccontext, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
            return -1;
        }
1941
        if (ac->m4ac.sampling_index > 12) {
1942 1943 1944
            av_log(ac->avccontext, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
            return -1;
        }
1945 1946
    }

1947 1948 1949 1950
    // parse
    while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
        elem_id = get_bits(&gb, 4);

1951
        if (elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) {
1952 1953
            av_log(ac->avccontext, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
            return -1;
1954 1955 1956 1957 1958
        }

        switch (elem_type) {

        case TYPE_SCE:
1959
            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1960 1961 1962
            break;

        case TYPE_CPE:
1963
            err = decode_cpe(ac, &gb, che);
1964 1965 1966
            break;

        case TYPE_CCE:
1967
            err = decode_cce(ac, &gb, che);
1968 1969 1970
            break;

        case TYPE_LFE:
1971
            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1972 1973 1974 1975 1976 1977 1978
            break;

        case TYPE_DSE:
            skip_data_stream_element(&gb);
            err = 0;
            break;

1979
        case TYPE_PCE: {
1980 1981
            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
            memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1982
            if ((err = decode_pce(ac, new_che_pos, &gb)))
1983
                break;
1984
            if (ac->output_configured > OC_TRIAL_PCE)
1985 1986 1987
                av_log(avccontext, AV_LOG_ERROR,
                       "Not evaluating a further program_config_element as this construct is dubious at best.\n");
            else
1988
                err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
            break;
        }

        case TYPE_FIL:
            if (elem_id == 15)
                elem_id += get_bits(&gb, 8) - 1;
            while (elem_id > 0)
                elem_id -= decode_extension_payload(ac, &gb, elem_id);
            err = 0; /* FIXME */
            break;

        default:
            err = -1; /* should not happen, but keeps compiler happy */
            break;
        }

2005
        if (err)
2006 2007 2008 2009 2010
            return err;
    }

    spectral_to_sample(ac);

2011 2012 2013
    if (!ac->is_saved) {
        ac->is_saved = 1;
        *data_size = 0;
2014
        return buf_size;
2015 2016 2017
    }

    data_size_tmp = 1024 * avccontext->channels * sizeof(int16_t);
2018
    if (*data_size < data_size_tmp) {
2019 2020 2021 2022 2023 2024 2025 2026 2027
        av_log(avccontext, AV_LOG_ERROR,
               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
               *data_size, data_size_tmp);
        return -1;
    }
    *data_size = data_size_tmp;

    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);

2028 2029 2030
    if (ac->output_configured)
        ac->output_configured = OC_LOCKED;

2031 2032 2033
    return buf_size;
}

2034 2035 2036
static av_cold int aac_decode_close(AVCodecContext *avccontext)
{
    AACContext *ac = avccontext->priv_data;
2037
    int i, type;
2038

2039
    for (i = 0; i < MAX_ELEM_ID; i++) {
2040
        for (type = 0; type < 4; type++)
2041
            av_freep(&ac->che[type][i]);
2042 2043 2044 2045
    }

    ff_mdct_end(&ac->mdct);
    ff_mdct_end(&ac->mdct_small);
2046
    return 0;
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
}

AVCodec aac_decoder = {
    "aac",
    CODEC_TYPE_AUDIO,
    CODEC_ID_AAC,
    sizeof(AACContext),
    aac_decode_init,
    NULL,
    aac_decode_close,
    aac_decode_frame,
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2059
    .sample_fmts = (const enum SampleFormat[]) {
2060 2061
        SAMPLE_FMT_S16,SAMPLE_FMT_NONE
    },
2062
    .channel_layouts = aac_channel_layout,
2063
};