binkaudio.c 11 KB
Newer Older
Peter Ross's avatar
Peter Ross committed
1 2
/*
 * Bink Audio decoder
3
 * Copyright (c) 2007-2011 Peter Ross (pross@xvid.org)
Peter Ross's avatar
Peter Ross committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Copyright (c) 2009 Daniel Verkamp (daniel@drv.nu)
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
24
 * @file
Peter Ross's avatar
Peter Ross committed
25 26 27 28 29 30
 * Bink Audio decoder
 *
 * Technical details here:
 *  http://wiki.multimedia.cx/index.php?title=Bink_Audio
 */

31
#include "libavutil/channel_layout.h"
Peter Ross's avatar
Peter Ross committed
32
#include "avcodec.h"
33
#define BITSTREAM_READER_LE
Peter Ross's avatar
Peter Ross committed
34
#include "get_bits.h"
35 36
#include "dct.h"
#include "rdft.h"
37
#include "internal.h"
38
#include "wma_freqs.h"
39
#include "libavutil/intfloat.h"
40

41
static float quant_table[96];
42

Peter Ross's avatar
Peter Ross committed
43 44 45
#define MAX_CHANNELS 2
#define BINK_BLOCK_MAX_SIZE (MAX_CHANNELS << 11)

46
typedef struct BinkAudioContext {
Peter Ross's avatar
Peter Ross committed
47
    GetBitContext gb;
48
    int version_b;          ///< Bink version 'b'
Peter Ross's avatar
Peter Ross committed
49 50 51 52 53 54 55 56
    int first;
    int channels;
    int frame_len;          ///< transform size (samples)
    int overlap_len;        ///< overlap size (samples)
    int block_size;
    int num_bands;
    unsigned int *bands;
    float root;
57
    DECLARE_ALIGNED(32, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
58
    float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from previous audio block
59
    uint8_t *packet_buffer;
Peter Ross's avatar
Peter Ross committed
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
    union {
        RDFTContext rdft;
        DCTContext dct;
    } trans;
} BinkAudioContext;


static av_cold int decode_init(AVCodecContext *avctx)
{
    BinkAudioContext *s = avctx->priv_data;
    int sample_rate = avctx->sample_rate;
    int sample_rate_half;
    int i;
    int frame_len_bits;

    /* determine frame length */
    if (avctx->sample_rate < 22050) {
        frame_len_bits = 9;
    } else if (avctx->sample_rate < 44100) {
        frame_len_bits = 10;
    } else {
        frame_len_bits = 11;
    }

84 85 86
    if (avctx->channels < 1 || avctx->channels > MAX_CHANNELS) {
        av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n", avctx->channels);
        return AVERROR_INVALIDDATA;
Peter Ross's avatar
Peter Ross committed
87
    }
88 89
    avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO :
                                                   AV_CH_LAYOUT_STEREO;
Peter Ross's avatar
Peter Ross committed
90

91
    s->version_b = avctx->extradata_size >= 4 && avctx->extradata[3] == 'b';
92

93
    if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT) {
Peter Ross's avatar
Peter Ross committed
94
        // audio is already interleaved for the RDFT format variant
95
        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
Peter Ross's avatar
Peter Ross committed
96 97
        sample_rate  *= avctx->channels;
        s->channels = 1;
98 99
        if (!s->version_b)
            frame_len_bits += av_log2(avctx->channels);
Peter Ross's avatar
Peter Ross committed
100 101
    } else {
        s->channels = avctx->channels;
102
        avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
Peter Ross's avatar
Peter Ross committed
103 104
    }

105
    s->frame_len     = 1 << frame_len_bits;
Peter Ross's avatar
Peter Ross committed
106 107 108
    s->overlap_len   = s->frame_len / 16;
    s->block_size    = (s->frame_len - s->overlap_len) * s->channels;
    sample_rate_half = (sample_rate + 1) / 2;
109
    if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
110
        s->root = 2.0 / (sqrt(s->frame_len) * 32768.0);
111
    else
112
        s->root = s->frame_len / (sqrt(s->frame_len) * 32768.0);
113
    for (i = 0; i < 96; i++) {
114 115 116
        /* constant is result of 0.066399999/log10(M_E) */
        quant_table[i] = expf(i * 0.15289164787221953823f) * s->root;
    }
Peter Ross's avatar
Peter Ross committed
117 118 119 120 121 122 123 124 125 126 127

    /* calculate number of bands */
    for (s->num_bands = 1; s->num_bands < 25; s->num_bands++)
        if (sample_rate_half <= ff_wma_critical_freqs[s->num_bands - 1])
            break;

    s->bands = av_malloc((s->num_bands + 1) * sizeof(*s->bands));
    if (!s->bands)
        return AVERROR(ENOMEM);

    /* populate bands data */
128
    s->bands[0] = 2;
Peter Ross's avatar
Peter Ross committed
129
    for (i = 1; i < s->num_bands; i++)
130 131
        s->bands[i] = (ff_wma_critical_freqs[i - 1] * s->frame_len / sample_rate_half) & ~1;
    s->bands[s->num_bands] = s->frame_len;
Peter Ross's avatar
Peter Ross committed
132 133 134

    s->first = 1;

135
    if (CONFIG_BINKAUDIO_RDFT_DECODER && avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
136
        ff_rdft_init(&s->trans.rdft, frame_len_bits, DFT_C2R);
137
    else if (CONFIG_BINKAUDIO_DCT_DECODER)
138
        ff_dct_init(&s->trans.dct, frame_len_bits, DCT_III);
139 140
    else
        return -1;
Peter Ross's avatar
Peter Ross committed
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

    return 0;
}

static float get_float(GetBitContext *gb)
{
    int power = get_bits(gb, 5);
    float f = ldexpf(get_bits_long(gb, 23), power - 23);
    if (get_bits1(gb))
        f = -f;
    return f;
}

static const uint8_t rle_length_tab[16] = {
    2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64
};

/**
 * Decode Bink Audio block
 * @param[out] out Output buffer (must contain s->block_size elements)
161
 * @return 0 on success, negative error code on failure
Peter Ross's avatar
Peter Ross committed
162
 */
163
static int decode_block(BinkAudioContext *s, float **out, int use_dct)
Peter Ross's avatar
Peter Ross committed
164 165 166 167 168 169 170 171 172 173
{
    int ch, i, j, k;
    float q, quant[25];
    int width, coeff;
    GetBitContext *gb = &s->gb;

    if (use_dct)
        skip_bits(gb, 2);

    for (ch = 0; ch < s->channels; ch++) {
174 175
        FFTSample *coeffs = out[ch];

176
        if (s->version_b) {
177 178
            if (get_bits_left(gb) < 64)
                return AVERROR_INVALIDDATA;
179 180
            coeffs[0] = av_int2float(get_bits_long(gb, 32)) * s->root;
            coeffs[1] = av_int2float(get_bits_long(gb, 32)) * s->root;
181
        } else {
182 183
            if (get_bits_left(gb) < 58)
                return AVERROR_INVALIDDATA;
184 185 186
            coeffs[0] = get_float(gb) * s->root;
            coeffs[1] = get_float(gb) * s->root;
        }
Peter Ross's avatar
Peter Ross committed
187

188 189
        if (get_bits_left(gb) < s->num_bands * 8)
            return AVERROR_INVALIDDATA;
Peter Ross's avatar
Peter Ross committed
190 191
        for (i = 0; i < s->num_bands; i++) {
            int value = get_bits(gb, 8);
192
            quant[i]  = quant_table[FFMIN(value, 95)];
Peter Ross's avatar
Peter Ross committed
193 194
        }

195 196
        k = 0;
        q = quant[0];
Peter Ross's avatar
Peter Ross committed
197 198 199 200

        // parse coefficients
        i = 2;
        while (i < s->frame_len) {
201 202
            if (s->version_b) {
                j = i + 16;
Peter Ross's avatar
Peter Ross committed
203
            } else {
204
                int v = get_bits1(gb);
205
                if (v) {
206
                    v = get_bits(gb, 4);
207 208 209 210
                    j = i + rle_length_tab[v] * 8;
                } else {
                    j = i + 8;
                }
Peter Ross's avatar
Peter Ross committed
211 212 213 214
            }

            j = FFMIN(j, s->frame_len);

215
            width = get_bits(gb, 4);
Peter Ross's avatar
Peter Ross committed
216 217 218
            if (width == 0) {
                memset(coeffs + i, 0, (j - i) * sizeof(*coeffs));
                i = j;
219
                while (s->bands[k] < i)
Peter Ross's avatar
Peter Ross committed
220 221 222
                    q = quant[k++];
            } else {
                while (i < j) {
223
                    if (s->bands[k] == i)
Peter Ross's avatar
Peter Ross committed
224
                        q = quant[k++];
225
                    coeff = get_bits(gb, width);
Peter Ross's avatar
Peter Ross committed
226
                    if (coeff) {
227
                        int v;
228
                        v = get_bits1(gb);
229
                        if (v)
Peter Ross's avatar
Peter Ross committed
230 231 232 233 234 235 236 237 238 239 240
                            coeffs[i] = -q * coeff;
                        else
                            coeffs[i] =  q * coeff;
                    } else {
                        coeffs[i] = 0.0f;
                    }
                    i++;
                }
            }
        }

241 242
        if (CONFIG_BINKAUDIO_DCT_DECODER && use_dct) {
            coeffs[0] /= 0.5;
243
            s->trans.dct.dct_calc(&s->trans.dct,  coeffs);
244
        }
245
        else if (CONFIG_BINKAUDIO_RDFT_DECODER)
246
            s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
Peter Ross's avatar
Peter Ross committed
247 248
    }

249 250
    for (ch = 0; ch < s->channels; ch++) {
        int j;
Peter Ross's avatar
Peter Ross committed
251
        int count = s->overlap_len * s->channels;
252 253 254 255 256
        if (!s->first) {
            j = ch;
            for (i = 0; i < s->overlap_len; i++, j += s->channels)
                out[ch][i] = (s->previous[ch][i] * (count - j) +
                                      out[ch][i] *          j) / count;
Peter Ross's avatar
Peter Ross committed
257
        }
258 259
        memcpy(s->previous[ch], &out[ch][s->frame_len - s->overlap_len],
               s->overlap_len * sizeof(*s->previous[ch]));
Peter Ross's avatar
Peter Ross committed
260 261 262
    }

    s->first = 0;
263 264

    return 0;
Peter Ross's avatar
Peter Ross committed
265 266 267 268 269 270
}

static av_cold int decode_end(AVCodecContext *avctx)
{
    BinkAudioContext * s = avctx->priv_data;
    av_freep(&s->bands);
271
    av_freep(&s->packet_buffer);
272
    if (CONFIG_BINKAUDIO_RDFT_DECODER && avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
Peter Ross's avatar
Peter Ross committed
273
        ff_rdft_end(&s->trans.rdft);
274
    else if (CONFIG_BINKAUDIO_DCT_DECODER)
Peter Ross's avatar
Peter Ross committed
275
        ff_dct_end(&s->trans.dct);
276

Peter Ross's avatar
Peter Ross committed
277 278 279 280 281 282 283 284 285
    return 0;
}

static void get_bits_align32(GetBitContext *s)
{
    int n = (-get_bits_count(s)) & 31;
    if (n) skip_bits(s, n);
}

286 287
static int decode_frame(AVCodecContext *avctx, void *data,
                        int *got_frame_ptr, AVPacket *avpkt)
Peter Ross's avatar
Peter Ross committed
288 289
{
    BinkAudioContext *s = avctx->priv_data;
290
    AVFrame *frame      = data;
Peter Ross's avatar
Peter Ross committed
291
    GetBitContext *gb = &s->gb;
292
    int ret, consumed = 0;
293 294 295 296 297

    if (!get_bits_left(gb)) {
        uint8_t *buf;
        /* handle end-of-stream */
        if (!avpkt->size) {
298
            *got_frame_ptr = 0;
299 300 301 302 303 304
            return 0;
        }
        if (avpkt->size < 4) {
            av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
            return AVERROR_INVALIDDATA;
        }
305
        buf = av_realloc(s->packet_buffer, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
306 307
        if (!buf)
            return AVERROR(ENOMEM);
308
        memset(buf + avpkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
309 310
        s->packet_buffer = buf;
        memcpy(s->packet_buffer, avpkt->data, avpkt->size);
311 312
        if ((ret = init_get_bits8(gb, s->packet_buffer, avpkt->size)) < 0)
            return ret;
313 314 315 316
        consumed = avpkt->size;

        /* skip reported size */
        skip_bits_long(gb, 32);
317
    }
Peter Ross's avatar
Peter Ross committed
318

319
    /* get output buffer */
320
    frame->nb_samples = s->frame_len;
321
    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
322
        return ret;
Peter Ross's avatar
Peter Ross committed
323

324
    if (decode_block(s, (float **)frame->extended_data,
325
                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
326 327
        av_log(avctx, AV_LOG_ERROR, "Incomplete packet\n");
        return AVERROR_INVALIDDATA;
Peter Ross's avatar
Peter Ross committed
328
    }
329
    get_bits_align32(gb);
Peter Ross's avatar
Peter Ross committed
330

331 332
    frame->nb_samples = s->block_size / avctx->channels;
    *got_frame_ptr    = 1;
333

334
    return consumed;
Peter Ross's avatar
Peter Ross committed
335 336
}

337
AVCodec ff_binkaudio_rdft_decoder = {
338
    .name           = "binkaudio_rdft",
339
    .long_name      = NULL_IF_CONFIG_SMALL("Bink Audio (RDFT)"),
340
    .type           = AVMEDIA_TYPE_AUDIO,
341
    .id             = AV_CODEC_ID_BINKAUDIO_RDFT,
342 343 344 345
    .priv_data_size = sizeof(BinkAudioContext),
    .init           = decode_init,
    .close          = decode_end,
    .decode         = decode_frame,
346
    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
Peter Ross's avatar
Peter Ross committed
347 348
};

349
AVCodec ff_binkaudio_dct_decoder = {
350
    .name           = "binkaudio_dct",
351
    .long_name      = NULL_IF_CONFIG_SMALL("Bink Audio (DCT)"),
352
    .type           = AVMEDIA_TYPE_AUDIO,
353
    .id             = AV_CODEC_ID_BINKAUDIO_DCT,
354 355 356 357
    .priv_data_size = sizeof(BinkAudioContext),
    .init           = decode_init,
    .close          = decode_end,
    .decode         = decode_frame,
358
    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
Peter Ross's avatar
Peter Ross committed
359
};