aacdec.c 17.8 KB
Newer Older
1 2 3 4
/*
 * AAC decoder
 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5
 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6
 *
7 8
 * AAC LATM decoder
 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9
 * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
10
 *
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
29
 * @file
30 31 32 33 34
 * AAC decoder
 * @author Oded Shimon  ( ods15 ods15 dyndns org )
 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
 */

35 36 37 38
#define FFT_FLOAT 1
#define FFT_FIXED_32 0
#define USE_FIXED 0

39
#include "libavutil/float_dsp.h"
40
#include "libavutil/opt.h"
41
#include "avcodec.h"
42
#include "internal.h"
43
#include "get_bits.h"
44
#include "fft.h"
45
#include "mdct15.h"
46
#include "lpc.h"
47
#include "kbdwin.h"
48
#include "sinewin.h"
49 50 51

#include "aac.h"
#include "aactab.h"
52
#include "aacdectab.h"
53
#include "adts_header.h"
54
#include "cbrt_data.h"
55 56
#include "sbr.h"
#include "aacsbr.h"
57
#include "mpeg4audio.h"
58
#include "profiles.h"
59
#include "libavutil/intfloat.h"
60 61 62

#include <errno.h>
#include <math.h>
63
#include <stdint.h>
64 65
#include <string.h>

66
#if ARCH_ARM
67 68 69 70
#   include "arm/aac.h"
#elif ARCH_MIPS
#   include "mips/aacdec_mips.h"
#endif
71

72
static av_always_inline void reset_predict_state(PredictorState *ps)
73
{
74 75 76 77 78 79
    ps->r0   = 0.0f;
    ps->r1   = 0.0f;
    ps->cor0 = 0.0f;
    ps->cor1 = 0.0f;
    ps->var0 = 1.0f;
    ps->var1 = 1.0f;
80
}
81

82
#ifndef VMUL2
83 84 85 86 87 88 89 90
static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 15] * s;
    *dst++ = v[idx>>4 & 15] * s;
    return dst;
}
91
#endif
92

93
#ifndef VMUL4
94 95 96 97 98 99 100 101 102 103
static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                           const float *scale)
{
    float s = *scale;
    *dst++ = v[idx    & 3] * s;
    *dst++ = v[idx>>2 & 3] * s;
    *dst++ = v[idx>>4 & 3] * s;
    *dst++ = v[idx>>6 & 3] * s;
    return dst;
}
104
#endif
105

106
#ifndef VMUL2S
107 108 109
static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
110
    union av_intfloat32 s0, s1;
111 112 113 114 115 116 117 118 119 120

    s0.f = s1.f = *scale;
    s0.i ^= sign >> 1 << 31;
    s1.i ^= sign      << 31;

    *dst++ = v[idx    & 15] * s0.f;
    *dst++ = v[idx>>4 & 15] * s1.f;

    return dst;
}
121
#endif
122

123
#ifndef VMUL4S
124 125 126 127
static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
{
    unsigned nz = idx >> 12;
128 129
    union av_intfloat32 s = { .f = *scale };
    union av_intfloat32 t;
130

131
    t.i = s.i ^ (sign & 1U<<31);
132 133 134
    *dst++ = v[idx    & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
135
    t.i = s.i ^ (sign & 1U<<31);
136 137 138
    *dst++ = v[idx>>2 & 3] * t.f;

    sign <<= nz & 1; nz >>= 1;
139 140
    t.i = s.i ^ (sign & 1U<<31);
    *dst++ = v[idx>>4 & 3] * t.f;
141

142 143 144
    sign <<= nz & 1;
    t.i = s.i ^ (sign & 1U<<31);
    *dst++ = v[idx>>6 & 3] * t.f;
145

146
    return dst;
147
}
148
#endif
149

150
static av_always_inline float flt16_round(float pf)
151
{
152 153 154 155
    union av_intfloat32 tmp;
    tmp.f = pf;
    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
    return tmp.f;
156 157
}

158
static av_always_inline float flt16_even(float pf)
159
{
160 161 162 163
    union av_intfloat32 tmp;
    tmp.f = pf;
    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
    return tmp.f;
164 165
}

166
static av_always_inline float flt16_trunc(float pf)
167
{
168 169 170 171
    union av_intfloat32 pun;
    pun.f = pf;
    pun.i &= 0xFFFF0000U;
    return pun.f;
172 173
}

174 175
static av_always_inline void predict(PredictorState *ps, float *coef,
                                     int output_enable)
176
{
177 178 179 180 181 182 183 184
    const float a     = 0.953125; // 61.0 / 64
    const float alpha = 0.90625;  // 29.0 / 32
    float e0, e1;
    float pv;
    float k1, k2;
    float   r0 = ps->r0,     r1 = ps->r1;
    float cor0 = ps->cor0, cor1 = ps->cor1;
    float var0 = ps->var0, var1 = ps->var1;
185

186 187
    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
188

189 190 191
    pv = flt16_round(k1 * r0 + k2 * r1);
    if (output_enable)
        *coef += pv;
192

193 194
    e0 = *coef;
    e1 = e0 - k1 * r0;
195

196 197 198 199
    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
200

201 202
    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
    ps->r0 = flt16_trunc(a * e0);
203 204
}

205 206 207 208 209
/**
 * Apply dependent channel coupling (applied before IMDCT).
 *
 * @param   index   index into coupling gain array
 */
210 211 212 213 214 215 216 217
static void apply_dependent_coupling(AACContext *ac,
                                     SingleChannelElement *target,
                                     ChannelElement *cce, int index)
{
    IndividualChannelStream *ics = &cce->ch[0].ics;
    const uint16_t *offsets = ics->swb_offset;
    float *dest = target->coeffs;
    const float *src = cce->ch[0].coeffs;
218
    int g, i, group, k, idx = 0;
219
    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
220
        av_log(ac->avctx, AV_LOG_ERROR,
221 222 223 224 225
               "Dependent coupling is not supported together with LTP\n");
        return;
    }
    for (g = 0; g < ics->num_window_groups; g++) {
        for (i = 0; i < ics->max_sfb; i++, idx++) {
226
            if (cce->ch[0].band_type[idx] != ZERO_BT) {
227
                const float gain = cce->coup.gain[index][idx];
228
                for (group = 0; group < ics->group_len[g]; group++) {
229
                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
230
                        // FIXME: SIMDify
231
                        dest[group * 128 + k] += gain * src[group * 128 + k];
232 233 234 235
                    }
                }
            }
        }
236 237
        dest += ics->group_len[g] * 128;
        src  += ics->group_len[g] * 128;
238 239 240 241 242 243 244 245
    }
}

/**
 * Apply independent channel coupling (applied after IMDCT).
 *
 * @param   index   index into coupling gain array
 */
246 247 248 249
static void apply_independent_coupling(AACContext *ac,
                                       SingleChannelElement *target,
                                       ChannelElement *cce, int index)
{
250
    int i;
251
    const float gain = cce->coup.gain[index][0];
252 253
    const float *src = cce->ch[0].ret;
    float *dest = target->ret;
254
    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
255

256
    for (i = 0; i < len; i++)
257
        dest[i] += gain * src[i];
258 259
}

260
#include "aacdec_template.c"
261 262 263 264

#define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word

struct LATMContext {
265
    AACContext aac_ctx;     ///< containing AACContext
266
    int initialized;        ///< initialized after a valid extradata was seen
267 268

    // parser data
269 270 271
    int audio_mux_version_A; ///< LATM syntax version
    int frame_length_type;   ///< 0/1 variable/fixed frame length
    int frame_length;        ///< frame length for fixed frame length
272 273 274 275 276 277 278 279 280 281
};

static inline uint32_t latm_get_value(GetBitContext *b)
{
    int length = get_bits(b, 2);

    return get_bits_long(b, (length+1)*8);
}

static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
282
                                             GetBitContext *gb, int asclen)
283
{
284 285
    AACContext *ac        = &latmctx->aac_ctx;
    AVCodecContext *avctx = ac->avctx;
286
    MPEG4AudioConfig m4ac = { 0 };
287
    GetBitContext gbc;
288 289
    int config_start_bit  = get_bits_count(gb);
    int sync_extension    = 0;
290
    int bits_consumed, esize, i;
291

292
    if (asclen > 0) {
293 294
        sync_extension = 1;
        asclen         = FFMIN(asclen, get_bits_left(gb));
295 296 297 298 299
        init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
        skip_bits_long(&gbc, config_start_bit);
    } else if (asclen == 0) {
        gbc = *gb;
    } else {
300
        return AVERROR_INVALIDDATA;
301
    }
302

303 304
    if (get_bits_left(gb) <= 0)
        return AVERROR_INVALIDDATA;
305 306 307 308 309 310

    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
                                                    &gbc, config_start_bit,
                                                    sync_extension);

    if (bits_consumed < config_start_bit)
311
        return AVERROR_INVALIDDATA;
312
    bits_consumed -= config_start_bit;
313

314 315 316
    if (asclen == 0)
      asclen = bits_consumed;

317 318
    if (!latmctx->initialized ||
        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
319
        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
320

321 322 323
        if(latmctx->initialized) {
            av_log(avctx, AV_LOG_INFO, "audio config changed\n");
        } else {
324
            av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
325
        }
326
        latmctx->initialized = 0;
327

328
        esize = (asclen + 7) / 8;
329

330
        if (avctx->extradata_size < esize) {
331
            av_free(avctx->extradata);
332
            avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
333 334 335 336 337
            if (!avctx->extradata)
                return AVERROR(ENOMEM);
        }

        avctx->extradata_size = esize;
338 339 340 341
        gbc = *gb;
        for (i = 0; i < esize; i++) {
          avctx->extradata[i] = get_bits(&gbc, 8);
        }
342
        memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
343
    }
344
    skip_bits_long(gb, asclen);
345

346
    return 0;
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
}

static int read_stream_mux_config(struct LATMContext *latmctx,
                                  GetBitContext *gb)
{
    int ret, audio_mux_version = get_bits(gb, 1);

    latmctx->audio_mux_version_A = 0;
    if (audio_mux_version)
        latmctx->audio_mux_version_A = get_bits(gb, 1);

    if (!latmctx->audio_mux_version_A) {

        if (audio_mux_version)
            latm_get_value(gb);                 // taraFullness

        skip_bits(gb, 1);                       // allStreamSameTimeFraming
        skip_bits(gb, 6);                       // numSubFrames
        // numPrograms
        if (get_bits(gb, 4)) {                  // numPrograms
367
            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
368 369 370
            return AVERROR_PATCHWELCOME;
        }

371
        // for each program (which there is only one in DVB)
372

373
        // for each layer (which there is only one in DVB)
374
        if (get_bits(gb, 3)) {                   // numLayer
375
            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
376 377 378 379 380
            return AVERROR_PATCHWELCOME;
        }

        // for all but first stream: use_same_config = get_bits(gb, 1);
        if (!audio_mux_version) {
381
            if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
382 383 384
                return ret;
        } else {
            int ascLen = latm_get_value(gb);
385
            if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
                return ret;
        }

        latmctx->frame_length_type = get_bits(gb, 3);
        switch (latmctx->frame_length_type) {
        case 0:
            skip_bits(gb, 8);       // latmBufferFullness
            break;
        case 1:
            latmctx->frame_length = get_bits(gb, 9);
            break;
        case 3:
        case 4:
        case 5:
            skip_bits(gb, 6);       // CELP frame length table index
            break;
        case 6:
        case 7:
            skip_bits(gb, 1);       // HVXC frame length table index
            break;
        }

        if (get_bits(gb, 1)) {                  // other data
            if (audio_mux_version) {
                latm_get_value(gb);             // other_data_bits
            } else {
                int esc;
                do {
                    esc = get_bits(gb, 1);
                    skip_bits(gb, 8);
                } while (esc);
            }
        }

        if (get_bits(gb, 1))                     // crc present
            skip_bits(gb, 8);                    // config_crc
    }

    return 0;
}

static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
{
    uint8_t tmp;

    if (ctx->frame_length_type == 0) {
        int mux_slot_length = 0;
        do {
434 435
            if (get_bits_left(gb) < 8)
                return AVERROR_INVALIDDATA;
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
            tmp = get_bits(gb, 8);
            mux_slot_length += tmp;
        } while (tmp == 255);
        return mux_slot_length;
    } else if (ctx->frame_length_type == 1) {
        return ctx->frame_length;
    } else if (ctx->frame_length_type == 3 ||
               ctx->frame_length_type == 5 ||
               ctx->frame_length_type == 7) {
        skip_bits(gb, 2);          // mux_slot_length_coded
    }
    return 0;
}

static int read_audio_mux_element(struct LATMContext *latmctx,
                                  GetBitContext *gb)
{
    int err;
    uint8_t use_same_mux = get_bits(gb, 1);
    if (!use_same_mux) {
        if ((err = read_stream_mux_config(latmctx, gb)) < 0)
            return err;
    } else if (!latmctx->aac_ctx.avctx->extradata) {
        av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
               "no decoder config found\n");
461
        return 1;
462 463 464
    }
    if (latmctx->audio_mux_version_A == 0) {
        int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
465
        if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
466 467 468 469 470 471 472 473 474 475 476 477 478
            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
            return AVERROR_INVALIDDATA;
        } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
                   "frame length mismatch %d << %d\n",
                   mux_slot_length_bytes * 8, get_bits_left(gb));
            return AVERROR_INVALIDDATA;
        }
    }
    return 0;
}


479 480
static int latm_decode_frame(AVCodecContext *avctx, void *out,
                             int *got_frame_ptr, AVPacket *avpkt)
481 482 483 484 485
{
    struct LATMContext *latmctx = avctx->priv_data;
    int                 muxlength, err;
    GetBitContext       gb;

486 487
    if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
        return err;
488 489 490 491 492

    // check for LOAS sync word
    if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
        return AVERROR_INVALIDDATA;

493
    muxlength = get_bits(&gb, 13) + 3;
494
    // not enough data, the parser should have sorted this out
495
    if (muxlength > avpkt->size)
496 497
        return AVERROR_INVALIDDATA;

498 499
    if ((err = read_audio_mux_element(latmctx, &gb)))
        return (err < 0) ? err : avpkt->size;
500 501 502

    if (!latmctx->initialized) {
        if (!avctx->extradata) {
503
            *got_frame_ptr = 0;
504 505
            return avpkt->size;
        } else {
506
            push_output_configuration(&latmctx->aac_ctx);
507
            if ((err = decode_audio_specific_config(
508
                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
509
                    avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
510
                pop_output_configuration(&latmctx->aac_ctx);
511
                return err;
512
            }
513 514 515 516 517 518 519 520 521 522 523
            latmctx->initialized = 1;
        }
    }

    if (show_bits(&gb, 12) == 0xfff) {
        av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
               "ADTS header detected, probably as result of configuration "
               "misparsing\n");
        return AVERROR_INVALIDDATA;
    }

524 525 526 527 528 529 530 531
    switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
    case AOT_ER_AAC_LC:
    case AOT_ER_AAC_LTP:
    case AOT_ER_AAC_LD:
    case AOT_ER_AAC_ELD:
        err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
        break;
    default:
532
        err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
533 534
    }
    if (err < 0)
535 536 537 538 539
        return err;

    return muxlength;
}

540
static av_cold int latm_decode_init(AVCodecContext *avctx)
541 542
{
    struct LATMContext *latmctx = avctx->priv_data;
543
    int ret = aac_decode_init(avctx);
544

545
    if (avctx->extradata_size > 0)
546 547 548 549 550
        latmctx->initialized = !ret;

    return ret;
}

551
AVCodec ff_aac_decoder = {
552
    .name            = "aac",
553
    .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
554
    .type            = AVMEDIA_TYPE_AUDIO,
555
    .id              = AV_CODEC_ID_AAC,
556 557 558 559 560
    .priv_data_size  = sizeof(AACContext),
    .init            = aac_decode_init,
    .close           = aac_decode_close,
    .decode          = aac_decode_frame,
    .sample_fmts     = (const enum AVSampleFormat[]) {
561
        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
562
    },
563
    .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
564
    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
565
    .channel_layouts = aac_channel_layout,
566
    .flush = flush,
567
    .priv_class      = &aac_decoder_class,
568
    .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
569
};
570 571 572 573 574 575

/*
    Note: This decoder filter is intended to decode LATM streams transferred
    in MPEG transport streams which only contain one program.
    To do a more complex LATM demuxing a separate LATM demuxer should be used.
*/
576
AVCodec ff_aac_latm_decoder = {
577
    .name            = "aac_latm",
578
    .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
579
    .type            = AVMEDIA_TYPE_AUDIO,
580
    .id              = AV_CODEC_ID_AAC_LATM,
581 582 583 584 585
    .priv_data_size  = sizeof(struct LATMContext),
    .init            = latm_decode_init,
    .close           = aac_decode_close,
    .decode          = latm_decode_frame,
    .sample_fmts     = (const enum AVSampleFormat[]) {
586
        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
587
    },
588
    .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
589
    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
590
    .channel_layouts = aac_channel_layout,
591
    .flush = flush,
592
    .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
593
};