dvenc.c 17.4 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * General DV muxer/demuxer
 * Copyright (c) 2003 Roman Shaposhnik
 *
 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
 * of DV technical info.
 *
 * Raw DV format
9
 * Copyright (c) 2002 Fabrice Bellard
10 11 12 13
 *
 * 50 Mbps (DVCPRO50) support
 * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
 *
14 15 16
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
17 18
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
19
 * version 2.1 of the License, or (at your option) any later version.
20
 *
21
 * FFmpeg is distributed in the hope that it will be useful,
22 23 24 25 26
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
27
 * License along with FFmpeg; if not, write to the Free Software
28 29 30
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
#include <time.h>
31
#include <stdarg.h>
32

33
#include "avformat.h"
34
#include "internal.h"
35
#include "libavcodec/dv_profile.h"
36
#include "libavcodec/dv.h"
37
#include "dv.h"
38
#include "libavutil/avassert.h"
39
#include "libavutil/fifo.h"
40
#include "libavutil/mathematics.h"
41 42
#include "libavutil/intreadwrite.h"
#include "libavutil/opt.h"
43
#include "libavutil/timecode.h"
44

45
#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32-bit audio
46

47
struct DVMuxContext {
48
    AVClass          *av_class;
49
    const AVDVProfile*  sys;           /* current DV profile, e.g.: 525/60, 625/50 */
Diego Biurrun's avatar
Diego Biurrun committed
50 51
    int               n_ast;         /* number of stereo audio streams (up to 2) */
    AVStream         *ast[2];        /* stereo audio streams */
52
    AVFifoBuffer     *audio_data[2]; /* FIFO for storing excessive amounts of PCM */
Diego Biurrun's avatar
Diego Biurrun committed
53
    int               frames;        /* current frame number */
54
    int64_t           start_time;    /* recording start time */
Diego Biurrun's avatar
Diego Biurrun committed
55 56 57
    int               has_audio;     /* frame under construction has audio */
    int               has_video;     /* frame under construction has video */
    uint8_t           frame_buf[DV_MAX_FRAME_SIZE]; /* frame under construction */
58
    AVTimecode        tc;            /* timecode context */
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
};

static const int dv_aaux_packs_dist[12][9] = {
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
};

76
static int dv_audio_frame_size(const AVDVProfile* sys, int frame, int sample_rate)
77
{
78 79 80 81 82 83 84 85
    if ((sys->time_base.den == 25 || sys->time_base.den == 50) && sys->time_base.num == 1) {
        if      (sample_rate == 32000) return 1280;
        else if (sample_rate == 44100) return 1764;
        else                           return 1920;
    }

    av_assert0(sample_rate == 48000);

Diego Biurrun's avatar
Diego Biurrun committed
86
    return sys->audio_samples_dist[frame % (sizeof(sys->audio_samples_dist) /
87 88 89
                                            sizeof(sys->audio_samples_dist[0]))];
}

90
static int dv_write_pack(enum dv_pack_type pack_id, DVMuxContext *c, uint8_t* buf, ...)
91 92 93
{
    struct tm tc;
    time_t ct;
94
    uint32_t timecode;
95
    va_list ap;
96 97
    int audio_type = 0;
    int channel;
98 99 100 101

    buf[0] = (uint8_t)pack_id;
    switch (pack_id) {
    case dv_timecode:
102
        timecode  = av_timecode_get_smpte_from_framenum(&c->tc, c->frames);
103 104
        timecode |= 1<<23 | 1<<15 | 1<<7 | 1<<6; // biphase and binary group flags
        AV_WB32(buf + 1, timecode);
105
        break;
106
    case dv_audio_source:  /* AAUX source pack */
107
        va_start(ap, buf);
108
        channel = va_arg(ap, int);
109
        if (c->ast[channel]->codecpar->sample_rate == 44100) {
110
            audio_type = 1;
111
        } else if (c->ast[channel]->codecpar->sample_rate == 32000)
112
            audio_type = 2;
113 114
        buf[1] = (1 << 7) | /* locked mode -- SMPTE only supports locked mode */
                 (1 << 6) | /* reserved -- always 1 */
115
                 (dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codecpar->sample_rate) -
116
                  c->sys->audio_min_samples[audio_type]);
117 118 119 120 121 122 123 124 125 126 127
                            /* # of samples      */
        buf[2] = (0 << 7) | /* multi-stereo      */
                 (0 << 5) | /* #of audio channels per block: 0 -- 1 channel */
                 (0 << 4) | /* pair bit: 0 -- one pair of channels */
                 !!va_arg(ap, int); /* audio mode        */
        buf[3] = (1 << 7) | /* res               */
                 (1 << 6) | /* multi-language flag */
                 (c->sys->dsf << 5) | /*  system: 60fields/50fields */
                 (c->sys->n_difchan & 2); /* definition: 0 -- 25Mbps, 2 -- 50Mbps */
        buf[4] = (1 << 7) | /* emphasis: 1 -- off */
                 (0 << 6) | /* emphasis time constant: 0 -- reserved */
128
                 (audio_type << 3) | /* frequency: 0 -- 48kHz, 1 -- 44,1kHz, 2 -- 32kHz */
129
                  0;        /* quantization: 0 -- 16-bit linear, 1 -- 12-bit nonlinear */
130

131 132
        va_end(ap);
        break;
133
    case dv_audio_control:
134 135 136 137 138 139 140 141 142
        buf[1] = (0 << 6) | /* copy protection: 0 -- unrestricted */
                 (1 << 4) | /* input source: 1 -- digital input */
                 (3 << 2) | /* compression: 3 -- no information */
                  0;        /* misc. info/SMPTE emphasis off */
        buf[2] = (1 << 7) | /* recording start point: 1 -- no */
                 (1 << 6) | /* recording end point: 1 -- no */
                 (1 << 3) | /* recording mode: 1 -- original */
                  7;
        buf[3] = (1 << 7) | /* direction: 1 -- forward */
143
                 (c->sys->pix_fmt == AV_PIX_FMT_YUV420P ? 0x20 : /* speed */
Diego Biurrun's avatar
Diego Biurrun committed
144
                                                       c->sys->ltc_divisor * 4);
145 146 147
        buf[4] = (1 << 7) | /* reserved -- always 1 */
                  0x7f;     /* genre category */
        break;
148 149
    case dv_audio_recdate:
    case dv_video_recdate:  /* VAUX recording date */
150
        ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num,
Diego Biurrun's avatar
Diego Biurrun committed
151
                                            c->sys->time_base.den, AV_ROUND_DOWN);
152
        ff_brktimegm(ct, &tc);
153 154 155 156 157 158 159 160 161 162 163
        buf[1] = 0xff; /* ds, tm, tens of time zone, units of time zone */
                       /* 0xff is very likely to be "unknown" */
        buf[2] = (3 << 6) | /* reserved -- always 1 */
                 ((tc.tm_mday / 10) << 4) | /* Tens of day */
                 (tc.tm_mday % 10);         /* Units of day */
        buf[3] = /* we set high 4 bits to 0, shouldn't we set them to week? */
                 ((tc.tm_mon / 10) << 4) |    /* Tens of month */
                 (tc.tm_mon  % 10);           /* Units of month */
        buf[4] = (((tc.tm_year % 100) / 10) << 4) | /* Tens of year */
                 (tc.tm_year % 10);                 /* Units of year */
        break;
164 165
    case dv_audio_rectime:  /* AAUX recording time */
    case dv_video_rectime:  /* VAUX recording time */
166 167
        ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num,
                                                       c->sys->time_base.den, AV_ROUND_DOWN);
168
        ff_brktimegm(ct, &tc);
169 170 171 172 173 174 175 176 177 178 179 180
        buf[1] = (3 << 6) | /* reserved -- always 1 */
                 0x3f; /* tens of frame, units of frame: 0x3f - "unknown" ? */
        buf[2] = (1 << 7) | /* reserved -- always 1 */
                 ((tc.tm_sec / 10) << 4) | /* Tens of seconds */
                 (tc.tm_sec % 10);         /* Units of seconds */
        buf[3] = (1 << 7) | /* reserved -- always 1 */
                 ((tc.tm_min / 10) << 4) | /* Tens of minutes */
                 (tc.tm_min % 10);         /* Units of minutes */
        buf[4] = (3 << 6) | /* reserved -- always 1 */
                 ((tc.tm_hour / 10) << 4) | /* Tens of hours */
                 (tc.tm_hour % 10);         /* Units of hours */
        break;
181
    default:
182
        buf[1] = buf[2] = buf[3] = buf[4] = 0xff;
183 184 185 186 187 188 189
    }
    return 5;
}

static void dv_inject_audio(DVMuxContext *c, int channel, uint8_t* frame_ptr)
{
    int i, j, d, of, size;
190
    size = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codecpar->sample_rate);
191 192
    frame_ptr += channel * c->sys->difseg_size * 150 * 80;
    for (i = 0; i < c->sys->difseg_size; i++) {
193 194
        frame_ptr += 6 * 80; /* skip DIF segment header */
        for (j = 0; j < 9; j++) {
195
            dv_write_pack(dv_aaux_packs_dist[i][j], c, &frame_ptr[3], channel, i >= c->sys->difseg_size/2);
196 197 198 199
            for (d = 8; d < 80; d+=2) {
                of = c->sys->audio_shuffle[i][j] + (d - 8)/2 * c->sys->audio_stride;
                if (of*2 >= size)
                    continue;
200

201 202
                frame_ptr[d]   = *av_fifo_peek2(c->audio_data[channel], of*2+1); // FIXME: maybe we have to admit
                frame_ptr[d+1] = *av_fifo_peek2(c->audio_data[channel], of*2);   //        that DV is a big-endian PCM
203 204 205
            }
            frame_ptr += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */
        }
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    }
}

static void dv_inject_metadata(DVMuxContext *c, uint8_t* frame)
{
    int j, k;
    uint8_t* buf;

    for (buf = frame; buf < frame + c->sys->frame_size; buf += 150 * 80) {
        /* DV subcode: 2nd and 3d DIFs */
        for (j = 80; j < 80 * 3; j += 80) {
            for (k = 6; k < 6 * 8; k += 8)
                dv_write_pack(dv_timecode, c, &buf[j+k]);

            if (((long)(buf-frame)/(c->sys->frame_size/(c->sys->difseg_size*c->sys->n_difchan))%c->sys->difseg_size) > 5) { /* FIXME: is this really needed ? */
                dv_write_pack(dv_video_recdate, c, &buf[j+14]);
                dv_write_pack(dv_video_rectime, c, &buf[j+22]);
                dv_write_pack(dv_video_recdate, c, &buf[j+38]);
                dv_write_pack(dv_video_rectime, c, &buf[j+46]);
            }
        }

        /* DV VAUX: 4th, 5th and 6th 3DIFs */
        for (j = 80*3 + 3; j < 80*6; j += 80) {
            dv_write_pack(dv_video_recdate, c, &buf[j+5*2]);
            dv_write_pack(dv_video_rectime, c, &buf[j+5*3]);
            dv_write_pack(dv_video_recdate, c, &buf[j+5*11]);
            dv_write_pack(dv_video_rectime, c, &buf[j+5*12]);
        }
    }
}

/*
 * The following 3 functions constitute our interface to the world
 */

242 243
static int dv_assemble_frame(AVFormatContext *s,
                             DVMuxContext *c, AVStream* st,
244
                             uint8_t* data, int data_size, uint8_t** frame)
245 246 247 248 249
{
    int i, reqasize;

    *frame = &c->frame_buf[0];

250
    switch (st->codecpar->codec_type) {
251
    case AVMEDIA_TYPE_VIDEO:
252 253
        /* FIXME: we have to have more sensible approach than this one */
        if (c->has_video)
254
            av_log(s, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient audio data or severe sync problem.\n", c->frames);
255
        if (data_size != c->sys->frame_size) {
256
            av_log(s, AV_LOG_ERROR, "Unexpected frame size, %d != %d\n",
257 258 259
                   data_size, c->sys->frame_size);
            return AVERROR(ENOSYS);
        }
260

261 262 263
        memcpy(*frame, data, c->sys->frame_size);
        c->has_video = 1;
        break;
264
    case AVMEDIA_TYPE_AUDIO:
265
        for (i = 0; i < c->n_ast && st != c->ast[i]; i++);
266 267

          /* FIXME: we have to have more sensible approach than this one */
268
        if (av_fifo_size(c->audio_data[i]) + data_size >= 100*MAX_AUDIO_FRAME_SIZE)
269
            av_log(s, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient video data or severe sync problem.\n", c->frames);
270
        av_fifo_generic_write(c->audio_data[i], data, data_size, NULL);
271

272
        reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, st->codecpar->sample_rate);
273

274
        /* Let us see if we've got enough audio for one DV frame. */
275
        c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i);
276

277
        break;
278
    default:
279
        break;
280 281
    }

282
    /* Let us see if we have enough data to construct one DV frame. */
Diego Biurrun's avatar
Diego Biurrun committed
283
    if (c->has_video == 1 && c->has_audio + 1 == 1 << c->n_ast) {
284
        dv_inject_metadata(c, *frame);
285
        c->has_audio = 0;
Diego Biurrun's avatar
Diego Biurrun committed
286
        for (i=0; i < c->n_ast; i++) {
287
            dv_inject_audio(c, i, *frame);
288
            reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[i]->codecpar->sample_rate);
289 290
            av_fifo_drain(c->audio_data[i], reqasize);
            c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i);
291 292 293
        }

        c->has_video = 0;
294

295 296 297 298 299 300 301 302
        c->frames++;

        return c->sys->frame_size;
    }

    return 0;
}

303
static DVMuxContext* dv_init_mux(AVFormatContext* s)
304
{
305
    DVMuxContext *c = s->priv_data;
306 307 308 309 310 311 312
    AVStream *vst = NULL;
    int i;

    /* we support at most 1 video and 2 audio streams */
    if (s->nb_streams > 3)
        return NULL;

Diego Biurrun's avatar
Diego Biurrun committed
313
    c->n_ast  = 0;
314 315 316 317
    c->ast[0] = c->ast[1] = NULL;

    /* We have to sort out where audio and where video stream is */
    for (i=0; i<s->nb_streams; i++) {
318
        switch (s->streams[i]->codecpar->codec_type) {
319
        case AVMEDIA_TYPE_VIDEO:
320 321 322
            if (vst) return NULL;
            vst = s->streams[i];
            break;
323
        case AVMEDIA_TYPE_AUDIO:
324 325 326 327 328 329
            if (c->n_ast > 1) return NULL;
            c->ast[c->n_ast++] = s->streams[i];
            break;
        default:
            goto bail_out;
        }
330 331 332
    }

    /* Some checks -- DV format is very picky about its incoming streams */
333
    if (!vst || vst->codecpar->codec_id != AV_CODEC_ID_DVVIDEO)
334 335
        goto bail_out;
    for (i=0; i<c->n_ast; i++) {
336
        if (c->ast[i]) {
337 338
            if(c->ast[i]->codecpar->codec_id    != AV_CODEC_ID_PCM_S16LE ||
               c->ast[i]->codecpar->channels    != 2)
339
                goto bail_out;
340 341 342
            if (c->ast[i]->codecpar->sample_rate != 48000 &&
                c->ast[i]->codecpar->sample_rate != 44100 &&
                c->ast[i]->codecpar->sample_rate != 32000    )
343 344
                goto bail_out;
        }
345
    }
346 347
    c->sys = av_dv_codec_profile2(vst->codecpar->width, vst->codecpar->height,
                                  vst->codecpar->format, vst->time_base);
348 349 350
    if (!c->sys)
        goto bail_out;

351
    if ((c->sys->time_base.den != 25 && c->sys->time_base.den != 50) || c->sys->time_base.num != 1) {
352
        if (c->ast[0] && c->ast[0]->codecpar->sample_rate != 48000)
353
            goto bail_out;
354
        if (c->ast[1] && c->ast[1]->codecpar->sample_rate != 48000)
355 356 357
            goto bail_out;
    }

Diego Biurrun's avatar
Diego Biurrun committed
358
    if ((c->n_ast > 1) && (c->sys->n_difchan < 2)) {
359 360 361 362 363
        /* only 1 stereo pair is allowed in 25Mbps mode */
        goto bail_out;
    }

    /* Ok, everything seems to be in working order */
Diego Biurrun's avatar
Diego Biurrun committed
364 365 366
    c->frames     = 0;
    c->has_audio  = 0;
    c->has_video  = 0;
367
    ff_parse_creation_time_metadata(s, &c->start_time, 1);
368

Diego Biurrun's avatar
Diego Biurrun committed
369
    for (i=0; i < c->n_ast; i++) {
370
        if (c->ast[i] && !(c->audio_data[i]=av_fifo_alloc_array(100, MAX_AUDIO_FRAME_SIZE))) {
Diego Biurrun's avatar
Diego Biurrun committed
371
            while (i > 0) {
372
                i--;
Lukasz Marek's avatar
Lukasz Marek committed
373
                av_fifo_freep(&c->audio_data[i]);
374 375 376 377 378 379 380 381 382 383 384
            }
            goto bail_out;
        }
    }

    return c;

bail_out:
    return NULL;
}

385
static void dv_delete_mux(DVMuxContext *c)
386 387 388
{
    int i;
    for (i=0; i < c->n_ast; i++)
Lukasz Marek's avatar
Lukasz Marek committed
389
        av_fifo_freep(&c->audio_data[i]);
390 391 392 393
}

static int dv_write_header(AVFormatContext *s)
{
394
    AVRational rate;
395
    DVMuxContext *dvc = s->priv_data;
396
    AVDictionaryEntry *tcr = av_dict_get(s->metadata, "timecode", NULL, 0);
397

398
    if (!dv_init_mux(s)) {
399 400
        av_log(s, AV_LOG_ERROR, "Can't initialize DV format!\n"
                    "Make sure that you supply exactly two streams:\n"
401
                    "     video: 25fps or 29.97fps, audio: 2ch/48|44|32kHz/PCM\n"
402 403 404
                    "     (50Mbps allows an optional second audio stream)\n");
        return -1;
    }
405 406
    rate.num = dvc->sys->ltc_divisor;
    rate.den = 1;
407 408 409 410 411 412 413 414
    if (!tcr) { // no global timecode, look into the streams
        int i;
        for (i = 0; i < s->nb_streams; i++) {
            tcr = av_dict_get(s->streams[i]->metadata, "timecode", NULL, 0);
            if (tcr)
                break;
        }
    }
415 416
    if (tcr && av_timecode_init_from_string(&dvc->tc, rate, tcr->value, s) >= 0)
        return 0;
417
    return av_timecode_init(&dvc->tc, rate, 0, 0, s);
418 419 420 421 422 423 424
}

static int dv_write_packet(struct AVFormatContext *s, AVPacket *pkt)
{
    uint8_t* frame;
    int fsize;

425
    fsize = dv_assemble_frame(s, s->priv_data, s->streams[pkt->stream_index],
426 427
                              pkt->data, pkt->size, &frame);
    if (fsize > 0) {
428
        avio_write(s->pb, frame, fsize);
429 430 431 432 433 434 435 436 437 438 439 440
    }
    return 0;
}

/*
 * We might end up with some extra A/V data without matching counterpart.
 * E.g. video data without enough audio to write the complete frame.
 * Currently we simply drop the last frame. I don't know whether this
 * is the best strategy of all
 */
static int dv_write_trailer(struct AVFormatContext *s)
{
441
    dv_delete_mux(s->priv_data);
442 443 444
    return 0;
}

445
AVOutputFormat ff_dv_muxer = {
446
    .name              = "dv",
447
    .long_name         = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
448 449
    .extensions        = "dv",
    .priv_data_size    = sizeof(DVMuxContext),
450 451
    .audio_codec       = AV_CODEC_ID_PCM_S16LE,
    .video_codec       = AV_CODEC_ID_DVVIDEO,
452 453 454
    .write_header      = dv_write_header,
    .write_packet      = dv_write_packet,
    .write_trailer     = dv_write_trailer,
455
};