dvenc.c 17.2 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * General DV muxer/demuxer
 * Copyright (c) 2003 Roman Shaposhnik
 *
 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
 * of DV technical info.
 *
 * Raw DV format
9
 * Copyright (c) 2002 Fabrice Bellard
10 11 12 13
 *
 * 50 Mbps (DVCPRO50) support
 * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
 *
14 15 16
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
17 18
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
19
 * version 2.1 of the License, or (at your option) any later version.
20
 *
21
 * FFmpeg is distributed in the hope that it will be useful,
22 23 24 25 26
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
27
 * License along with FFmpeg; if not, write to the Free Software
28 29 30
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
#include <time.h>
31
#include <stdarg.h>
32

33
#include "avformat.h"
34
#include "internal.h"
35
#include "libavcodec/dv_profile.h"
36
#include "libavcodec/dv.h"
37
#include "dv.h"
38
#include "libavutil/fifo.h"
39
#include "libavutil/mathematics.h"
40 41
#include "libavutil/intreadwrite.h"
#include "libavutil/opt.h"
42
#include "libavutil/timecode.h"
43

44 45
#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio

46
struct DVMuxContext {
47
    AVClass          *av_class;
48
    const AVDVProfile*  sys;           /* current DV profile, e.g.: 525/60, 625/50 */
Diego Biurrun's avatar
Diego Biurrun committed
49 50
    int               n_ast;         /* number of stereo audio streams (up to 2) */
    AVStream         *ast[2];        /* stereo audio streams */
51
    AVFifoBuffer     *audio_data[2]; /* FIFO for storing excessive amounts of PCM */
Diego Biurrun's avatar
Diego Biurrun committed
52
    int               frames;        /* current frame number */
53
    int64_t           start_time;    /* recording start time */
Diego Biurrun's avatar
Diego Biurrun committed
54 55 56
    int               has_audio;     /* frame under construction has audio */
    int               has_video;     /* frame under construction has video */
    uint8_t           frame_buf[DV_MAX_FRAME_SIZE]; /* frame under construction */
57
    AVTimecode        tc;            /* timecode context */
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
};

static const int dv_aaux_packs_dist[12][9] = {
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
    { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff },
    { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff },
};

75
static int dv_audio_frame_size(const AVDVProfile* sys, int frame, int sample_rate)
76
{
77 78 79 80 81 82 83 84
    if ((sys->time_base.den == 25 || sys->time_base.den == 50) && sys->time_base.num == 1) {
        if      (sample_rate == 32000) return 1280;
        else if (sample_rate == 44100) return 1764;
        else                           return 1920;
    }

    av_assert0(sample_rate == 48000);

Diego Biurrun's avatar
Diego Biurrun committed
85
    return sys->audio_samples_dist[frame % (sizeof(sys->audio_samples_dist) /
86 87 88
                                            sizeof(sys->audio_samples_dist[0]))];
}

89
static int dv_write_pack(enum dv_pack_type pack_id, DVMuxContext *c, uint8_t* buf, ...)
90 91 92
{
    struct tm tc;
    time_t ct;
93
    uint32_t timecode;
94
    va_list ap;
95 96
    int audio_type = 0;
    int channel;
97 98 99 100

    buf[0] = (uint8_t)pack_id;
    switch (pack_id) {
    case dv_timecode:
101
        timecode  = av_timecode_get_smpte_from_framenum(&c->tc, c->frames);
102 103
        timecode |= 1<<23 | 1<<15 | 1<<7 | 1<<6; // biphase and binary group flags
        AV_WB32(buf + 1, timecode);
104
        break;
105
    case dv_audio_source:  /* AAUX source pack */
106
        va_start(ap, buf);
107 108 109 110 111
        channel = va_arg(ap, int);
        if (c->ast[channel]->codec->sample_rate == 44100) {
            audio_type = 1;
        } else if (c->ast[channel]->codec->sample_rate == 32000)
            audio_type = 2;
112 113
        buf[1] = (1 << 7) | /* locked mode -- SMPTE only supports locked mode */
                 (1 << 6) | /* reserved -- always 1 */
114 115
                 (dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codec->sample_rate) -
                  c->sys->audio_min_samples[audio_type]);
116 117 118 119 120 121 122 123 124 125 126
                            /* # of samples      */
        buf[2] = (0 << 7) | /* multi-stereo      */
                 (0 << 5) | /* #of audio channels per block: 0 -- 1 channel */
                 (0 << 4) | /* pair bit: 0 -- one pair of channels */
                 !!va_arg(ap, int); /* audio mode        */
        buf[3] = (1 << 7) | /* res               */
                 (1 << 6) | /* multi-language flag */
                 (c->sys->dsf << 5) | /*  system: 60fields/50fields */
                 (c->sys->n_difchan & 2); /* definition: 0 -- 25Mbps, 2 -- 50Mbps */
        buf[4] = (1 << 7) | /* emphasis: 1 -- off */
                 (0 << 6) | /* emphasis time constant: 0 -- reserved */
127
                 (audio_type << 3) | /* frequency: 0 -- 48kHz, 1 -- 44,1kHz, 2 -- 32kHz */
128
                  0;        /* quantization: 0 -- 16bit linear, 1 -- 12bit nonlinear */
129

130 131
        va_end(ap);
        break;
132
    case dv_audio_control:
133 134 135 136 137 138 139 140 141
        buf[1] = (0 << 6) | /* copy protection: 0 -- unrestricted */
                 (1 << 4) | /* input source: 1 -- digital input */
                 (3 << 2) | /* compression: 3 -- no information */
                  0;        /* misc. info/SMPTE emphasis off */
        buf[2] = (1 << 7) | /* recording start point: 1 -- no */
                 (1 << 6) | /* recording end point: 1 -- no */
                 (1 << 3) | /* recording mode: 1 -- original */
                  7;
        buf[3] = (1 << 7) | /* direction: 1 -- forward */
142
                 (c->sys->pix_fmt == AV_PIX_FMT_YUV420P ? 0x20 : /* speed */
Diego Biurrun's avatar
Diego Biurrun committed
143
                                                       c->sys->ltc_divisor * 4);
144 145 146
        buf[4] = (1 << 7) | /* reserved -- always 1 */
                  0x7f;     /* genre category */
        break;
147 148
    case dv_audio_recdate:
    case dv_video_recdate:  /* VAUX recording date */
149
        ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num,
Diego Biurrun's avatar
Diego Biurrun committed
150
                                            c->sys->time_base.den, AV_ROUND_DOWN);
151
        ff_brktimegm(ct, &tc);
152 153 154 155 156 157 158 159 160 161 162
        buf[1] = 0xff; /* ds, tm, tens of time zone, units of time zone */
                       /* 0xff is very likely to be "unknown" */
        buf[2] = (3 << 6) | /* reserved -- always 1 */
                 ((tc.tm_mday / 10) << 4) | /* Tens of day */
                 (tc.tm_mday % 10);         /* Units of day */
        buf[3] = /* we set high 4 bits to 0, shouldn't we set them to week? */
                 ((tc.tm_mon / 10) << 4) |    /* Tens of month */
                 (tc.tm_mon  % 10);           /* Units of month */
        buf[4] = (((tc.tm_year % 100) / 10) << 4) | /* Tens of year */
                 (tc.tm_year % 10);                 /* Units of year */
        break;
163 164
    case dv_audio_rectime:  /* AAUX recording time */
    case dv_video_rectime:  /* VAUX recording time */
165 166
        ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num,
                                                       c->sys->time_base.den, AV_ROUND_DOWN);
167
        ff_brktimegm(ct, &tc);
168 169 170 171 172 173 174 175 176 177 178 179
        buf[1] = (3 << 6) | /* reserved -- always 1 */
                 0x3f; /* tens of frame, units of frame: 0x3f - "unknown" ? */
        buf[2] = (1 << 7) | /* reserved -- always 1 */
                 ((tc.tm_sec / 10) << 4) | /* Tens of seconds */
                 (tc.tm_sec % 10);         /* Units of seconds */
        buf[3] = (1 << 7) | /* reserved -- always 1 */
                 ((tc.tm_min / 10) << 4) | /* Tens of minutes */
                 (tc.tm_min % 10);         /* Units of minutes */
        buf[4] = (3 << 6) | /* reserved -- always 1 */
                 ((tc.tm_hour / 10) << 4) | /* Tens of hours */
                 (tc.tm_hour % 10);         /* Units of hours */
        break;
180
    default:
181
        buf[1] = buf[2] = buf[3] = buf[4] = 0xff;
182 183 184 185 186 187 188
    }
    return 5;
}

static void dv_inject_audio(DVMuxContext *c, int channel, uint8_t* frame_ptr)
{
    int i, j, d, of, size;
189
    size = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codec->sample_rate);
190 191
    frame_ptr += channel * c->sys->difseg_size * 150 * 80;
    for (i = 0; i < c->sys->difseg_size; i++) {
192 193
        frame_ptr += 6 * 80; /* skip DIF segment header */
        for (j = 0; j < 9; j++) {
194
            dv_write_pack(dv_aaux_packs_dist[i][j], c, &frame_ptr[3], channel, i >= c->sys->difseg_size/2);
195 196 197 198
            for (d = 8; d < 80; d+=2) {
                of = c->sys->audio_shuffle[i][j] + (d - 8)/2 * c->sys->audio_stride;
                if (of*2 >= size)
                    continue;
199

200 201
                frame_ptr[d]   = *av_fifo_peek2(c->audio_data[channel], of*2+1); // FIXME: maybe we have to admit
                frame_ptr[d+1] = *av_fifo_peek2(c->audio_data[channel], of*2);   //        that DV is a big-endian PCM
202 203 204
            }
            frame_ptr += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */
        }
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
    }
}

static void dv_inject_metadata(DVMuxContext *c, uint8_t* frame)
{
    int j, k;
    uint8_t* buf;

    for (buf = frame; buf < frame + c->sys->frame_size; buf += 150 * 80) {
        /* DV subcode: 2nd and 3d DIFs */
        for (j = 80; j < 80 * 3; j += 80) {
            for (k = 6; k < 6 * 8; k += 8)
                dv_write_pack(dv_timecode, c, &buf[j+k]);

            if (((long)(buf-frame)/(c->sys->frame_size/(c->sys->difseg_size*c->sys->n_difchan))%c->sys->difseg_size) > 5) { /* FIXME: is this really needed ? */
                dv_write_pack(dv_video_recdate, c, &buf[j+14]);
                dv_write_pack(dv_video_rectime, c, &buf[j+22]);
                dv_write_pack(dv_video_recdate, c, &buf[j+38]);
                dv_write_pack(dv_video_rectime, c, &buf[j+46]);
            }
        }

        /* DV VAUX: 4th, 5th and 6th 3DIFs */
        for (j = 80*3 + 3; j < 80*6; j += 80) {
            dv_write_pack(dv_video_recdate, c, &buf[j+5*2]);
            dv_write_pack(dv_video_rectime, c, &buf[j+5*3]);
            dv_write_pack(dv_video_recdate, c, &buf[j+5*11]);
            dv_write_pack(dv_video_rectime, c, &buf[j+5*12]);
        }
    }
}

/*
 * The following 3 functions constitute our interface to the world
 */

241 242
static int dv_assemble_frame(DVMuxContext *c, AVStream* st,
                             uint8_t* data, int data_size, uint8_t** frame)
243 244 245 246 247 248
{
    int i, reqasize;

    *frame = &c->frame_buf[0];

    switch (st->codec->codec_type) {
249
    case AVMEDIA_TYPE_VIDEO:
250 251 252
        /* FIXME: we have to have more sensible approach than this one */
        if (c->has_video)
            av_log(st->codec, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient audio data or severe sync problem.\n", c->frames);
253

254 255 256
        memcpy(*frame, data, c->sys->frame_size);
        c->has_video = 1;
        break;
257
    case AVMEDIA_TYPE_AUDIO:
258
        for (i = 0; i < c->n_ast && st != c->ast[i]; i++);
259 260

          /* FIXME: we have to have more sensible approach than this one */
261
        if (av_fifo_size(c->audio_data[i]) + data_size >= 100*MAX_AUDIO_FRAME_SIZE)
262
            av_log(st->codec, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient video data or severe sync problem.\n", c->frames);
263
        av_fifo_generic_write(c->audio_data[i], data, data_size, NULL);
264

265 266
        reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, st->codec->sample_rate);

267
        /* Let us see if we've got enough audio for one DV frame. */
268
        c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i);
269

270
        break;
271
    default:
272
        break;
273 274
    }

275
    /* Let us see if we have enough data to construct one DV frame. */
Diego Biurrun's avatar
Diego Biurrun committed
276
    if (c->has_video == 1 && c->has_audio + 1 == 1 << c->n_ast) {
277
        dv_inject_metadata(c, *frame);
278
        c->has_audio = 0;
Diego Biurrun's avatar
Diego Biurrun committed
279
        for (i=0; i < c->n_ast; i++) {
280
            dv_inject_audio(c, i, *frame);
281
            reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[i]->codec->sample_rate);
282 283
            av_fifo_drain(c->audio_data[i], reqasize);
            c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i);
284 285 286
        }

        c->has_video = 0;
287

288 289 290 291 292 293 294 295
        c->frames++;

        return c->sys->frame_size;
    }

    return 0;
}

296
static DVMuxContext* dv_init_mux(AVFormatContext* s)
297
{
298
    DVMuxContext *c = s->priv_data;
299
    AVStream *vst = NULL;
300
    AVDictionaryEntry *t;
301 302 303 304 305 306
    int i;

    /* we support at most 1 video and 2 audio streams */
    if (s->nb_streams > 3)
        return NULL;

Diego Biurrun's avatar
Diego Biurrun committed
307
    c->n_ast  = 0;
308 309 310 311
    c->ast[0] = c->ast[1] = NULL;

    /* We have to sort out where audio and where video stream is */
    for (i=0; i<s->nb_streams; i++) {
312
        switch (s->streams[i]->codec->codec_type) {
313
        case AVMEDIA_TYPE_VIDEO:
314 315 316
            if (vst) return NULL;
            vst = s->streams[i];
            break;
317
        case AVMEDIA_TYPE_AUDIO:
318 319 320 321 322 323
            if (c->n_ast > 1) return NULL;
            c->ast[c->n_ast++] = s->streams[i];
            break;
        default:
            goto bail_out;
        }
324 325 326
    }

    /* Some checks -- DV format is very picky about its incoming streams */
327
    if (!vst || vst->codec->codec_id != AV_CODEC_ID_DVVIDEO)
328 329
        goto bail_out;
    for (i=0; i<c->n_ast; i++) {
330 331 332 333 334 335 336 337 338
        if (c->ast[i]) {
            if(c->ast[i]->codec->codec_id    != AV_CODEC_ID_PCM_S16LE ||
               c->ast[i]->codec->channels    != 2)
                goto bail_out;
            if (c->ast[i]->codec->sample_rate != 48000 &&
                c->ast[i]->codec->sample_rate != 44100 &&
                c->ast[i]->codec->sample_rate != 32000    )
                goto bail_out;
        }
339
    }
340 341
    c->sys = av_dv_codec_profile2(vst->codec->width, vst->codec->height,
                                  vst->codec->pix_fmt, vst->codec->time_base);
342 343 344
    if (!c->sys)
        goto bail_out;

345 346 347 348 349 350 351
    if ((c->sys->time_base.den != 25 && c->sys->time_base.den != 50) || c->sys->time_base.num != 1) {
        if (c->ast[0] && c->ast[0]->codec->sample_rate != 48000)
            goto bail_out;
        if (c->ast[1] && c->ast[1]->codec->sample_rate != 48000)
            goto bail_out;
    }

Diego Biurrun's avatar
Diego Biurrun committed
352
    if ((c->n_ast > 1) && (c->sys->n_difchan < 2)) {
353 354 355 356 357
        /* only 1 stereo pair is allowed in 25Mbps mode */
        goto bail_out;
    }

    /* Ok, everything seems to be in working order */
Diego Biurrun's avatar
Diego Biurrun committed
358 359 360
    c->frames     = 0;
    c->has_audio  = 0;
    c->has_video  = 0;
361 362
    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0))
        c->start_time = ff_iso8601_to_unix_time(t->value);
363

Diego Biurrun's avatar
Diego Biurrun committed
364
    for (i=0; i < c->n_ast; i++) {
365
        if (c->ast[i] && !(c->audio_data[i]=av_fifo_alloc_array(100, MAX_AUDIO_FRAME_SIZE))) {
Diego Biurrun's avatar
Diego Biurrun committed
366
            while (i > 0) {
367
                i--;
Lukasz Marek's avatar
Lukasz Marek committed
368
                av_fifo_freep(&c->audio_data[i]);
369 370 371 372 373 374 375 376 377 378 379
            }
            goto bail_out;
        }
    }

    return c;

bail_out:
    return NULL;
}

380
static void dv_delete_mux(DVMuxContext *c)
381 382 383
{
    int i;
    for (i=0; i < c->n_ast; i++)
Lukasz Marek's avatar
Lukasz Marek committed
384
        av_fifo_freep(&c->audio_data[i]);
385 386 387 388
}

static int dv_write_header(AVFormatContext *s)
{
389
    AVRational rate;
390
    DVMuxContext *dvc = s->priv_data;
391
    AVDictionaryEntry *tcr = av_dict_get(s->metadata, "timecode", NULL, 0);
392

393
    if (!dv_init_mux(s)) {
394 395
        av_log(s, AV_LOG_ERROR, "Can't initialize DV format!\n"
                    "Make sure that you supply exactly two streams:\n"
396
                    "     video: 25fps or 29.97fps, audio: 2ch/48|44|32kHz/PCM\n"
397 398 399
                    "     (50Mbps allows an optional second audio stream)\n");
        return -1;
    }
400 401
    rate.num = dvc->sys->ltc_divisor;
    rate.den = 1;
402 403 404 405 406 407 408 409
    if (!tcr) { // no global timecode, look into the streams
        int i;
        for (i = 0; i < s->nb_streams; i++) {
            tcr = av_dict_get(s->streams[i]->metadata, "timecode", NULL, 0);
            if (tcr)
                break;
        }
    }
410 411
    if (tcr && av_timecode_init_from_string(&dvc->tc, rate, tcr->value, s) >= 0)
        return 0;
412
    return av_timecode_init(&dvc->tc, rate, 0, 0, s);
413 414 415 416 417 418 419
}

static int dv_write_packet(struct AVFormatContext *s, AVPacket *pkt)
{
    uint8_t* frame;
    int fsize;

420
    fsize = dv_assemble_frame(s->priv_data, s->streams[pkt->stream_index],
421 422
                              pkt->data, pkt->size, &frame);
    if (fsize > 0) {
423
        avio_write(s->pb, frame, fsize);
424 425 426 427 428 429 430 431 432 433 434 435
    }
    return 0;
}

/*
 * We might end up with some extra A/V data without matching counterpart.
 * E.g. video data without enough audio to write the complete frame.
 * Currently we simply drop the last frame. I don't know whether this
 * is the best strategy of all
 */
static int dv_write_trailer(struct AVFormatContext *s)
{
436
    dv_delete_mux(s->priv_data);
437 438 439
    return 0;
}

440
AVOutputFormat ff_dv_muxer = {
441
    .name              = "dv",
442
    .long_name         = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
443 444
    .extensions        = "dv",
    .priv_data_size    = sizeof(DVMuxContext),
445 446
    .audio_codec       = AV_CODEC_ID_PCM_S16LE,
    .video_codec       = AV_CODEC_ID_DVVIDEO,
447 448 449
    .write_header      = dv_write_header,
    .write_packet      = dv_write_packet,
    .write_trailer     = dv_write_trailer,
450
};