mp3dec.c 16.7 KB
Newer Older
1
/*
2
 * MP3 demuxer
3
 * Copyright (c) 2003 Fabrice Bellard
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

22
#include "libavutil/opt.h"
23
#include "libavutil/avstring.h"
24
#include "libavutil/intreadwrite.h"
25
#include "libavutil/crc.h"
26
#include "libavutil/dict.h"
27
#include "libavutil/mathematics.h"
28
#include "avformat.h"
29
#include "internal.h"
30
#include "avio_internal.h"
31
#include "id3v2.h"
32
#include "id3v1.h"
33 34
#include "replaygain.h"

35
#include "libavcodec/avcodec.h"
36 37
#include "libavcodec/mpegaudiodecheader.h"

38 39
#define XING_FLAG_FRAMES 0x01
#define XING_FLAG_SIZE   0x02
40
#define XING_FLAG_TOC    0x04
41
#define XING_FLAC_QSCALE 0x08
42 43

#define XING_TOC_COUNT 100
44

45 46 47
#define SAME_HEADER_MASK \
   (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))

48
typedef struct {
49
    AVClass *class;
50
    int64_t filesize;
51
    int xing_toc;
52 53
    int start_pad;
    int end_pad;
54
    int usetoc;
55
    unsigned frames; /* Total number of frames in file */
56
    unsigned header_filesize;   /* Total number of bytes in the stream */
57
    int is_cbr;
58
} MP3DecContext;
59

60
static int check(AVIOContext *pb, int64_t pos, uint32_t *header);
61

62
/* mp3 read */
Måns Rullgård's avatar
Måns Rullgård committed
63 64 65

static int mp3_read_probe(AVProbeData *p)
{
66
    int max_frames, first_frames = 0;
67
    int fsize, frames;
68
    uint32_t header;
69
    const uint8_t *buf, *buf0, *buf2, *end;
70
    AVCodecContext *avctx = avcodec_alloc_context3(NULL);
Måns Rullgård's avatar
Måns Rullgård committed
71

72
    if (!avctx)
73
        return AVERROR(ENOMEM);
74

75
    buf0 = p->buf;
76 77 78
    end = p->buf + p->buf_size - sizeof(uint32_t);
    while(buf0 < end && !*buf0)
        buf0++;
Måns Rullgård's avatar
Måns Rullgård committed
79

80
    max_frames = 0;
81
    buf = buf0;
Måns Rullgård's avatar
Måns Rullgård committed
82

83
    for(; buf < end; buf= buf2+1) {
84
        buf2 = buf;
85 86
        if(ff_mpa_check_header(AV_RB32(buf2)))
            continue;
Måns Rullgård's avatar
Måns Rullgård committed
87

Michael Niedermayer's avatar
Michael Niedermayer committed
88
        for(frames = 0; buf2 < end; frames++) {
89
            int dummy;
90
            header = AV_RB32(buf2);
91 92
            fsize = avpriv_mpa_decode_header(avctx, header,
                                             &dummy, &dummy, &dummy, &dummy);
93 94 95 96 97
            if(fsize < 0)
                break;
            buf2 += fsize;
        }
        max_frames = FFMAX(max_frames, frames);
98
        if(buf == buf0)
99
            first_frames= frames;
100
    }
101
    avcodec_free_context(&avctx);
102 103
    // keep this in sync with ac3 probe, both need to avoid
    // issues with MPEG-files!
104
    if   (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1;
105
    else if(max_frames>200)return AVPROBE_SCORE_EXTENSION;
106
    else if(max_frames>=4 && max_frames >= p->buf_size/10000) return AVPROBE_SCORE_EXTENSION / 2;
107
    else if(ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 2*ff_id3v2_tag_len(buf0) >= p->buf_size)
108
                           return p->buf_size < PROBE_BUF_MAX ? AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2;
109
    else if(max_frames>=1 && max_frames >= p->buf_size/10000) return 1;
110
    else                   return 0;
111
//mpegps_mp3_unrecognized_format.mpg has max_frames=3
Måns Rullgård's avatar
Måns Rullgård committed
112 113
}

114
static void read_xing_toc(AVFormatContext *s, int64_t filesize, int64_t duration)
115 116
{
    int i;
117
    MP3DecContext *mp3 = s->priv_data;
118 119
    int fast_seek = s->flags & AVFMT_FLAG_FAST_SEEK;
    int fill_index = (mp3->usetoc || fast_seek) && duration > 0;
120 121 122 123

    if (!filesize &&
        !(filesize = avio_size(s->pb))) {
        av_log(s, AV_LOG_WARNING, "Cannot determine file size, skipping TOC table.\n");
124
        fill_index = 0;
125 126 127 128
    }

    for (i = 0; i < XING_TOC_COUNT; i++) {
        uint8_t b = avio_r8(s->pb);
129
        if (fill_index)
130
            av_add_index_entry(s->streams[0],
131
                           av_rescale(b, filesize, 256),
132 133 134
                           av_rescale(i, duration, XING_TOC_COUNT),
                           0, 0, AVINDEX_KEYFRAME);
    }
135 136
    if (fill_index)
        mp3->xing_toc = 1;
137 138
}

139
static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st,
140
                               MPADecodeHeader *c, uint32_t spf)
141
{
142 143 144 145
#define LAST_BITS(k, n) ((k) & ((1 << (n)) - 1))
#define MIDDLE_BITS(k, m, n) LAST_BITS((k) >> (m), ((n) - (m)))

    uint16_t crc;
146
    uint32_t v;
147 148 149 150 151 152

    char version[10];

    uint32_t peak   = 0;
    int32_t  r_gain = INT32_MIN, a_gain = INT32_MIN;

153
    MP3DecContext *mp3 = s->priv_data;
154
    static const int64_t xing_offtbl[2][2] = {{32, 17}, {17,9}};
155
    uint64_t fsize = avio_size(s->pb);
156
    fsize = fsize >= avio_tell(s->pb) ? fsize - avio_tell(s->pb) : 0;
157

158
    /* Check for Xing / Info tag */
159
    avio_skip(s->pb, xing_offtbl[c->lsf == 1][c->nb_channels == 1]);
160
    v = avio_rb32(s->pb);
161
    mp3->is_cbr = v == MKBETAG('I', 'n', 'f', 'o');
162 163 164 165 166 167 168
    if (v != MKBETAG('X', 'i', 'n', 'g') && !mp3->is_cbr)
        return;

    v = avio_rb32(s->pb);
    if (v & XING_FLAG_FRAMES)
        mp3->frames = avio_rb32(s->pb);
    if (v & XING_FLAG_SIZE)
169
        mp3->header_filesize = avio_rb32(s->pb);
170 171 172 173 174 175
    if (fsize && mp3->header_filesize) {
        uint64_t min, delta;
        min = FFMIN(fsize, mp3->header_filesize);
        delta = FFMAX(fsize, mp3->header_filesize) - min;
        if (fsize > mp3->header_filesize && delta > min >> 4) {
            mp3->frames = 0;
176 177
            av_log(s, AV_LOG_WARNING,
                   "invalid concatenated file detected - using bitrate for duration\n");
178 179 180 181 182
        } else if (delta > min >> 4) {
            av_log(s, AV_LOG_WARNING,
                   "filesize and duration do not match (growing file?)\n");
        }
    }
183
    if (v & XING_FLAG_TOC)
184
        read_xing_toc(s, mp3->header_filesize, av_rescale_q(mp3->frames,
185 186
                                       (AVRational){spf, c->sample_rate},
                                       st->time_base));
187
    /* VBR quality */
188 189
    if (v & XING_FLAC_QSCALE)
        avio_rb32(s->pb);
190

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
    /* Encoder short version string */
    memset(version, 0, sizeof(version));
    avio_read(s->pb, version, 9);

    /* Info Tag revision + VBR method */
    avio_r8(s->pb);

    /* Lowpass filter value */
    avio_r8(s->pb);

    /* ReplayGain peak */
    v    = avio_rb32(s->pb);
    peak = av_rescale(v, 100000, 1 << 23);

    /* Radio ReplayGain */
    v = avio_rb16(s->pb);

    if (MIDDLE_BITS(v, 13, 15) == 1) {
        r_gain = MIDDLE_BITS(v, 0, 8) * 10000;

        if (v & (1 << 9))
            r_gain *= -1;
    }

    /* Audiophile ReplayGain */
    v = avio_rb16(s->pb);

    if (MIDDLE_BITS(v, 13, 15) == 2) {
        a_gain = MIDDLE_BITS(v, 0, 8) * 10000;

        if (v & (1 << 9))
            a_gain *= -1;
    }

    /* Encoding flags + ATH Type */
    avio_r8(s->pb);

    /* if ABR {specified bitrate} else {minimal bitrate} */
    avio_r8(s->pb);

    /* Encoder delays */
232 233
    v= avio_rb24(s->pb);
    if(AV_RB32(version) == MKBETAG('L', 'A', 'M', 'E')
234 235 236
        || AV_RB32(version) == MKBETAG('L', 'a', 'v', 'f')
        || AV_RB32(version) == MKBETAG('L', 'a', 'v', 'c')
    ) {
237 238 239

        mp3->start_pad = v>>12;
        mp3->  end_pad = v&4095;
240
        st->start_skip_samples = mp3->start_pad + 528 + 1;
241 242 243 244
        if (mp3->frames) {
            st->first_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf;
            st->last_discard_sample = mp3->frames * (int64_t)spf;
        }
245
        if (!st->start_time)
246
            st->start_time = av_rescale_q(st->start_skip_samples,
247 248 249 250
                                            (AVRational){1, c->sample_rate},
                                            st->time_base);
        av_log(s, AV_LOG_DEBUG, "pad %d %d\n", mp3->start_pad, mp3->  end_pad);
    }
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

    /* Misc */
    avio_r8(s->pb);

    /* MP3 gain */
    avio_r8(s->pb);

    /* Preset and surround info */
    avio_rb16(s->pb);

    /* Music length */
    avio_rb32(s->pb);

    /* Music CRC */
    avio_rb16(s->pb);

    /* Info Tag CRC */
    crc = ffio_get_checksum(s->pb);
    v   = avio_rb16(s->pb);

    if (v == crc) {
        ff_replaygain_export_raw(st, r_gain, peak, a_gain, 0);
        av_dict_set(&st->metadata, "encoder", version, 0);
274
    }
275 276 277 278 279 280
}

static void mp3_parse_vbri_tag(AVFormatContext *s, AVStream *st, int64_t base)
{
    uint32_t v;
    MP3DecContext *mp3 = s->priv_data;
281

282
    /* Check for VBRI tag (always 32 bytes after end of mpegaudio header) */
283
    avio_seek(s->pb, base + 4 + 32, SEEK_SET);
284
    v = avio_rb32(s->pb);
285
    if (v == MKBETAG('V', 'B', 'R', 'I')) {
286
        /* Check tag version */
287
        if (avio_rb16(s->pb) == 1) {
288
            /* skip delay and quality */
289
            avio_skip(s->pb, 4);
290
            mp3->header_filesize = avio_rb32(s->pb);
291
            mp3->frames = avio_rb32(s->pb);
292 293
        }
    }
294
}
295

296
/**
297
 * Try to find Xing/Info/VBRI tags and compute duration from info therein
298
 */
299
static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
300
{
301
    uint32_t v, spf;
302
    MPADecodeHeader c;
303
    int vbrtag_size = 0;
304
    MP3DecContext *mp3 = s->priv_data;
305

306 307
    ffio_init_checksum(s->pb, ff_crcA001_update, 0);

308
    v = avio_rb32(s->pb);
309
    if(ff_mpa_check_header(v) < 0)
310
      return -1;
311

312
    if (avpriv_mpegaudio_decode_header(&c, v) == 0)
313
        vbrtag_size = c.frame_size;
314
    if(c.layer != 3)
315
        return -1;
316

317 318
    spf = c.lsf ? 576 : 1152; /* Samples per frame, layer 3 */

319
    mp3->frames = 0;
320
    mp3->header_filesize   = 0;
321

322
    mp3_parse_info_tag(s, st, &c, spf);
323
    mp3_parse_vbri_tag(s, st, base);
324

325
    if (!mp3->frames && !mp3->header_filesize)
326 327 328
        return -1;

    /* Skip the vbr tag frame */
329
    avio_seek(s->pb, base + vbrtag_size, SEEK_SET);
330

331 332
    if (mp3->frames)
        st->duration = av_rescale_q(mp3->frames, (AVRational){spf, c.sample_rate},
333
                                    st->time_base);
334 335
    if (mp3->header_filesize && mp3->frames && !mp3->is_cbr)
        st->codec->bit_rate = av_rescale(mp3->header_filesize, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
336

337
    return 0;
338 339
}

340
static int mp3_read_header(AVFormatContext *s)
341
{
342
    MP3DecContext *mp3 = s->priv_data;
343
    AVStream *st;
344
    int64_t off;
345
    int ret;
346
    int i;
347

348
    st = avformat_new_stream(s, NULL);
349
    if (!st)
350
        return AVERROR(ENOMEM);
351

352
    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
353
    st->codec->codec_id = AV_CODEC_ID_MP3;
354
    st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
355
    st->start_time = 0;
356

357
    // lcm of all mp3 sample rates
358
    avpriv_set_pts_info(st, 64, 1, 14112000);
359

360
    s->pb->maxsize = -1;
361
    off = avio_tell(s->pb);
362

363
    if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
364
        ff_id3v1_read(s);
365

366 367 368
    if(s->pb->seekable)
        mp3->filesize = avio_size(s->pb);

369
    if (mp3_parse_vbr_tags(s, st, off) < 0)
370
        avio_seek(s->pb, off, SEEK_SET);
371

372 373 374 375
    ret = ff_replaygain_export(st, s->metadata);
    if (ret < 0)
        return ret;

376 377
    off = avio_tell(s->pb);
    for (i = 0; i < 64 * 1024; i++) {
378 379
        uint32_t header, header2;
        int frame_size;
380 381
        if (!(i&1023))
            ffio_ensure_seekback(s->pb, i + 1024 + 4);
382 383 384 385 386 387 388 389 390 391 392
        frame_size = check(s->pb, off + i, &header);
        if (frame_size > 0) {
            avio_seek(s->pb, off, SEEK_SET);
            ffio_ensure_seekback(s->pb, i + 1024 + frame_size + 4);
            if (check(s->pb, off + i + frame_size, &header2) >= 0 &&
                (header & SAME_HEADER_MASK) == (header2 & SAME_HEADER_MASK))
            {
                av_log(s, AV_LOG_INFO, "Skipping %d bytes of junk at %"PRId64".\n", i, off);
                avio_seek(s->pb, off + i, SEEK_SET);
                break;
            }
393 394 395 396
        }
        avio_seek(s->pb, off, SEEK_SET);
    }

397 398 399 400
    // the seek index is relative to the end of the xing vbr headers
    for (i = 0; i < st->nb_index_entries; i++)
        st->index_entries[i].pos += avio_tell(s->pb);

401 402 403 404 405 406 407 408
    /* the parameters will be extracted from the compressed bitstream */
    return 0;
}

#define MP3_PACKET_SIZE 1024

static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
{
409
    MP3DecContext *mp3 = s->priv_data;
410
    int ret, size;
411
    int64_t pos;
412

413
    size= MP3_PACKET_SIZE;
414 415 416
    pos = avio_tell(s->pb);
    if(mp3->filesize > ID3v1_TAG_SIZE && pos < mp3->filesize)
        size= FFMIN(size, mp3->filesize - pos);
417

418
    ret= av_get_packet(s->pb, pkt, size);
419
    if (ret <= 0) {
420 421 422
        if(ret<0)
            return ret;
        return AVERROR_EOF;
423
    }
424

425
    pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
426
    pkt->stream_index = 0;
427

428 429 430
    return ret;
}

431
#define SEEK_WINDOW 4096
432

433
static int check(AVIOContext *pb, int64_t pos, uint32_t *ret_header)
434
{
435
    int64_t ret = avio_seek(pb, pos, SEEK_SET);
436 437 438 439
    unsigned header;
    MPADecodeHeader sd;
    if (ret < 0)
        return ret;
440

441
    header = avio_rb32(pb);
442 443 444 445
    if (ff_mpa_check_header(header) < 0)
        return -1;
    if (avpriv_mpegaudio_decode_header(&sd, header) == 1)
        return -1;
446

447 448
    if (ret_header)
        *ret_header = header;
449 450 451
    return sd.frame_size;
}

452
static int64_t mp3_sync(AVFormatContext *s, int64_t target_pos, int flags)
453
{
454
    int dir = (flags&AVSEEK_FLAG_BACKWARD) ? -1 : 1;
455
    int64_t best_pos;
456 457
    int best_score, i, j;
    int64_t ret;
458

459 460
    avio_seek(s->pb, FFMAX(target_pos - SEEK_WINDOW, 0), SEEK_SET);
    ret = avio_seek(s->pb, target_pos, SEEK_SET);
461 462
    if (ret < 0)
        return ret;
463

464
#define MIN_VALID 3
465
    best_pos = target_pos;
466
    best_score = 999;
467
    for(i=0; i<SEEK_WINDOW; i++) {
468
        int64_t pos = target_pos + (dir > 0 ? i - SEEK_WINDOW/4 : -i);
469 470
        int64_t candidate = -1;
        int score = 999;
471 472 473 474

        if (pos < 0)
            continue;

475
        for(j=0; j<MIN_VALID; j++) {
476
            ret = check(s->pb, pos, NULL);
477 478
            if(ret < 0)
                break;
479
            if ((target_pos - pos)*dir <= 0 && abs(MIN_VALID/2-j) < score) {
480 481 482
                candidate = pos;
                score = abs(MIN_VALID/2-j);
            }
483
            pos += ret;
484
        }
485 486 487 488 489 490
        if (best_score > score && j == MIN_VALID) {
            best_pos = candidate;
            best_score = score;
            if(score == 0)
                break;
        }
491
    }
492

493 494 495 496 497 498 499 500 501 502
    return avio_seek(s->pb, best_pos, SEEK_SET);
}

static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
                    int flags)
{
    MP3DecContext *mp3 = s->priv_data;
    AVIndexEntry *ie, ie1;
    AVStream *st = s->streams[0];
    int64_t best_pos;
503
    int fast_seek = s->flags & AVFMT_FLAG_FAST_SEEK;
504
    int64_t filesize = mp3->header_filesize;
505

506 507 508 509 510 511
    if (filesize <= 0) {
        int64_t size = avio_size(s->pb);
        if (size > 0 && size > s->internal->data_offset)
            filesize = size - s->internal->data_offset;
    }

512 513 514 515 516 517 518
    if (mp3->xing_toc && (mp3->usetoc || (fast_seek && !mp3->is_cbr))) {
        int64_t ret = av_index_search_timestamp(st, timestamp, flags);

        // NOTE: The MP3 TOC is not a precise lookup table. Accuracy is worse
        // for bigger files.
        av_log(s, AV_LOG_WARNING, "Using MP3 TOC to seek; may be imprecise.\n");

519 520 521 522
        if (ret < 0)
            return ret;

        ie = &st->index_entries[ret];
523 524 525 526 527 528 529 530
    } else if (fast_seek && st->duration > 0 && filesize > 0) {
        if (!mp3->is_cbr)
            av_log(s, AV_LOG_WARNING, "Using scaling to seek VBR MP3; may be imprecise.\n");

        ie = &ie1;
        timestamp = av_clip64(timestamp, 0, st->duration);
        ie->timestamp = timestamp;
        ie->pos       = av_rescale(timestamp, filesize, st->duration) + s->internal->data_offset;
531
    } else {
532
        return -1; // generic index code
533 534
    }

535
    best_pos = mp3_sync(s, ie->pos, flags);
536 537
    if (best_pos < 0)
        return best_pos;
538

539
    if (mp3->is_cbr && ie == &ie1 && mp3->frames) {
540 541 542 543
        int frame_duration = av_rescale(st->duration, 1, mp3->frames);
        ie1.timestamp = frame_duration * av_rescale(best_pos - s->internal->data_offset, mp3->frames, mp3->header_filesize);
    }

544 545
    ff_update_cur_dts(s, st, ie->timestamp);
    return 0;
546 547
}

548
static const AVOption options[] = {
549
    { "usetoc", "use table of contents", offsetof(MP3DecContext, usetoc), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM},
550 551 552 553 554 555 556 557 558 559 560
    { NULL },
};

static const AVClass demuxer_class = {
    .class_name = "mp3",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
    .category   = AV_CLASS_CATEGORY_DEMUXER,
};

561
AVInputFormat ff_mp3_demuxer = {
562
    .name           = "mp3",
563
    .long_name      = NULL_IF_CONFIG_SMALL("MP2/3 (MPEG audio layer 2/3)"),
564 565 566
    .read_probe     = mp3_read_probe,
    .read_header    = mp3_read_header,
    .read_packet    = mp3_read_packet,
567
    .read_seek      = mp3_seek,
568
    .priv_data_size = sizeof(MP3DecContext),
569
    .flags          = AVFMT_GENERIC_INDEX,
570
    .extensions     = "mp2,mp3,m2a,mpa", /* XXX: use probe */
571
    .priv_class     = &demuxer_class,
572
};