srtdec.c 10.8 KB
Newer Older
Aurelien Jacobs's avatar
Aurelien Jacobs committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * SubRip subtitle decoder
 * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avstring.h"
23
#include "libavutil/common.h"
24
#include "libavutil/intreadwrite.h"
25
#include "libavutil/parseutils.h"
Aurelien Jacobs's avatar
Aurelien Jacobs committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
#include "avcodec.h"
#include "ass.h"

static int html_color_parse(AVCodecContext *avctx, const char *str)
{
    uint8_t rgba[4];
    if (av_parse_color(rgba, str, strcspn(str, "\" >"), avctx) < 0)
        return -1;
    return rgba[0] | rgba[1] << 8 | rgba[2] << 16;
}

enum {
    PARAM_UNKNOWN = -1,
    PARAM_SIZE,
    PARAM_COLOR,
    PARAM_FACE,
    PARAM_NUMBER
};

typedef struct {
    char tag[128];
    char param[PARAM_NUMBER][128];
} SrtStack;

static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
                              const char *in, int x1, int y1, int x2, int y2)
{
53
    char *param, buffer[128], tmp[128];
Aurelien Jacobs's avatar
Aurelien Jacobs committed
54 55 56 57 58 59 60 61 62 63
    int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0;
    SrtStack stack[16];

    stack[0].tag[0] = 0;
    strcpy(stack[0].param[PARAM_SIZE],  "{\\fs}");
    strcpy(stack[0].param[PARAM_COLOR], "{\\c}");
    strcpy(stack[0].param[PARAM_FACE],  "{\\fn}");

    if (x1 >= 0 && y1 >= 0) {
        if (x2 >= 0 && y2 >= 0 && (x2 != x1 || y2 != y1))
64
            snprintf(out, out_end-out,
Aurelien Jacobs's avatar
Aurelien Jacobs committed
65 66
                            "{\\an1}{\\move(%d,%d,%d,%d)}", x1, y1, x2, y2);
        else
67 68
            snprintf(out, out_end-out, "{\\an1}{\\pos(%d,%d)}", x1, y1);
        out += strlen(out);
Aurelien Jacobs's avatar
Aurelien Jacobs committed
69 70
    }

71
    for (; out < out_end && !end && *in; in++) {
Aurelien Jacobs's avatar
Aurelien Jacobs committed
72 73 74 75 76 77 78 79 80 81
        switch (*in) {
        case '\r':
            break;
        case '\n':
            if (line_start) {
                end = 1;
                break;
            }
            while (out[-1] == ' ')
                out--;
82 83
            snprintf(out, out_end-out, "\\N");
            if(out<out_end) out += strlen(out);
Aurelien Jacobs's avatar
Aurelien Jacobs committed
84 85 86 87 88 89 90 91
            line_start = 1;
            break;
        case ' ':
            if (!line_start)
                *out++ = *in;
            break;
        case '{':    /* skip all {\xxx} substrings except for {\an%d}
                        and all microdvd like styles such as {Y:xxx} */
92 93 94 95
            len = 0;
            an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0;
            if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) ||
                (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) {
Aurelien Jacobs's avatar
Aurelien Jacobs committed
96 97 98 99 100 101
                in += len - 1;
            } else
                *out++ = *in;
            break;
        case '<':
            tag_close = in[1] == '/';
102 103
            len = 0;
            if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) {
Aurelien Jacobs's avatar
Aurelien Jacobs committed
104 105 106 107 108 109 110 111 112 113 114 115 116 117
                if ((param = strchr(buffer, ' ')))
                    *param++ = 0;
                if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) ||
                    ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, buffer))) {
                    int i, j, unknown = 0;
                    in += len + tag_close;
                    if (!tag_close)
                        memset(stack+sptr, 0, sizeof(*stack));
                    if (!strcmp(buffer, "font")) {
                        if (tag_close) {
                            for (i=PARAM_NUMBER-1; i>=0; i--)
                                if (stack[sptr-1].param[i][0])
                                    for (j=sptr-2; j>=0; j--)
                                        if (stack[j].param[i][0]) {
118
                                            snprintf(out, out_end-out,
119
                                                            "%s", stack[j].param[i]);
120
                                            if(out<out_end) out += strlen(out);
Aurelien Jacobs's avatar
Aurelien Jacobs committed
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
                                            break;
                                        }
                        } else {
                            while (param) {
                                if (!strncmp(param, "size=", 5)) {
                                    unsigned font_size;
                                    param += 5 + (param[5] == '"');
                                    if (sscanf(param, "%u", &font_size) == 1) {
                                        snprintf(stack[sptr].param[PARAM_SIZE],
                                             sizeof(stack[0].param[PARAM_SIZE]),
                                             "{\\fs%u}", font_size);
                                    }
                                } else if (!strncmp(param, "color=", 6)) {
                                    param += 6 + (param[6] == '"');
                                    snprintf(stack[sptr].param[PARAM_COLOR],
                                         sizeof(stack[0].param[PARAM_COLOR]),
                                         "{\\c&H%X&}",
                                         html_color_parse(avctx, param));
                                } else if (!strncmp(param, "face=", 5)) {
                                    param += 5 + (param[5] == '"');
                                    len = strcspn(param,
                                                  param[-1] == '"' ? "\"" :" ");
                                    av_strlcpy(tmp, param,
                                               FFMIN(sizeof(tmp), len+1));
                                    param += len;
                                    snprintf(stack[sptr].param[PARAM_FACE],
                                             sizeof(stack[0].param[PARAM_FACE]),
                                             "{\\fn%s}", tmp);
                                }
                                if ((param = strchr(param, ' ')))
                                    param++;
                            }
                            for (i=0; i<PARAM_NUMBER; i++)
154 155
                                if (stack[sptr].param[i][0]) {
                                    snprintf(out, out_end-out,
156
                                                    "%s", stack[sptr].param[i]);
157 158
                                    if(out<out_end) out += strlen(out);
                                }
Aurelien Jacobs's avatar
Aurelien Jacobs committed
159 160
                        }
                    } else if (!buffer[1] && strspn(buffer, "bisu") == 1) {
161
                        snprintf(out, out_end-out,
Aurelien Jacobs's avatar
Aurelien Jacobs committed
162
                                        "{\\%c%d}", buffer[0], !tag_close);
163
                        if(out<out_end) out += strlen(out);
Aurelien Jacobs's avatar
Aurelien Jacobs committed
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
                    } else {
                        unknown = 1;
                        snprintf(tmp, sizeof(tmp), "</%s>", buffer);
                    }
                    if (tag_close) {
                        sptr--;
                    } else if (unknown && !strstr(in, tmp)) {
                        in -= len + tag_close;
                        *out++ = *in;
                    } else
                        av_strlcpy(stack[sptr++].tag, buffer,
                                   sizeof(stack[0].tag));
                    break;
                }
            }
        default:
            *out++ = *in;
            break;
        }
        if (*in != ' ' && *in != '\r' && *in != '\n')
            line_start = 0;
    }

    out = FFMIN(out, out_end-3);
    while (!strncmp(out-2, "\\N", 2))
        out -= 2;
    while (out[-1] == ' ')
        out--;
192
    snprintf(out, out_end-out, "\r\n");
Aurelien Jacobs's avatar
Aurelien Jacobs committed
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
    return in;
}

static const char *read_ts(const char *buf, int *ts_start, int *ts_end,
                           int *x1, int *y1, int *x2, int *y2)
{
    int i, hs, ms, ss, he, me, se;

    for (i=0; i<2; i++) {
        /* try to read timestamps in either the first or second line */
        int c = sscanf(buf, "%d:%2d:%2d%*1[,.]%3d --> %d:%2d:%2d%*1[,.]%3d"
                       "%*[ ]X1:%u X2:%u Y1:%u Y2:%u",
                       &hs, &ms, &ss, ts_start, &he, &me, &se, ts_end,
                       x1, x2, y1, y2);
        buf += strcspn(buf, "\n") + 1;
        if (c >= 8) {
            *ts_start = 100*(ss + 60*(ms + 60*hs)) + *ts_start/10;
            *ts_end   = 100*(se + 60*(me + 60*he)) + *ts_end  /10;
            return buf;
        }
    }
    return NULL;
}

static int srt_decode_frame(AVCodecContext *avctx,
                            void *data, int *got_sub_ptr, AVPacket *avpkt)
{
    AVSubtitle *sub = data;
    int ts_start, ts_end, x1 = -1, y1 = -1, x2 = -1, y2 = -1;
    char buffer[2048];
    const char *ptr = avpkt->data;
224
    const char *end = avpkt->data + avpkt->size;
225 226 227 228 229 230 231 232 233
    int size;
    const uint8_t *p = av_packet_get_side_data(avpkt, AV_PKT_DATA_SUBTITLE_POSITION, &size);

    if (p && size == 16) {
        x1 = AV_RL32(p     );
        y1 = AV_RL32(p +  4);
        x2 = AV_RL32(p +  8);
        y2 = AV_RL32(p + 12);
    }
Aurelien Jacobs's avatar
Aurelien Jacobs committed
234 235 236 237

    if (avpkt->size <= 0)
        return avpkt->size;

238
    while (ptr < end && *ptr) {
239
        if (avctx->codec->id == AV_CODEC_ID_SRT) {
240 241 242 243 244 245 246 247 248 249 250 251
            ptr = read_ts(ptr, &ts_start, &ts_end, &x1, &y1, &x2, &y2);
            if (!ptr)
                break;
        } else {
            // Do final divide-by-10 outside rescale to force rounding down.
            ts_start = av_rescale_q(avpkt->pts,
                                    avctx->time_base,
                                    (AVRational){1,100});
            ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
                                    avctx->time_base,
                                    (AVRational){1,100});
        }
Aurelien Jacobs's avatar
Aurelien Jacobs committed
252 253
        ptr = srt_to_ass(avctx, buffer, buffer+sizeof(buffer), ptr,
                         x1, y1, x2, y2);
254
        ff_ass_add_rect(sub, buffer, ts_start, ts_end-ts_start, 0);
Aurelien Jacobs's avatar
Aurelien Jacobs committed
255 256 257 258 259 260
    }

    *got_sub_ptr = sub->num_rects > 0;
    return avpkt->size;
}

261
#if CONFIG_SRT_DECODER
262
/* deprecated decoder */
263
AVCodec ff_srt_decoder = {
Aurelien Jacobs's avatar
Aurelien Jacobs committed
264
    .name         = "srt",
265
    .long_name    = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
Aurelien Jacobs's avatar
Aurelien Jacobs committed
266
    .type         = AVMEDIA_TYPE_SUBTITLE,
267
    .id           = AV_CODEC_ID_SRT,
Aurelien Jacobs's avatar
Aurelien Jacobs committed
268 269 270
    .init         = ff_ass_subtitle_header_default,
    .decode       = srt_decode_frame,
};
271 272 273 274 275 276 277 278 279 280 281 282
#endif

#if CONFIG_SUBRIP_DECODER
AVCodec ff_subrip_decoder = {
    .name         = "subrip",
    .long_name    = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
    .type         = AVMEDIA_TYPE_SUBTITLE,
    .id           = AV_CODEC_ID_SUBRIP,
    .init         = ff_ass_subtitle_header_default,
    .decode       = srt_decode_frame,
};
#endif