samidec.c 5.62 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright (c) 2012 Clément Bœsch
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * SAMI subtitle decoder
 * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
 */

#include "ass.h"
#include "libavutil/avstring.h"
#include "libavutil/bprint.h"
30
#include "htmlsubtitles.h"
31 32 33 34

typedef struct {
    AVBPrint source;
    AVBPrint content;
35 36
    AVBPrint encoded_source;
    AVBPrint encoded_content;
37
    AVBPrint full;
38
    int readorder;
39 40 41 42 43 44 45 46 47
} SAMIContext;

static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
{
    SAMIContext *sami = avctx->priv_data;
    int ret = 0;
    char *tag = NULL;
    char *dupsrc = av_strdup(src);
    char *p = dupsrc;
48 49
    AVBPrint *dst_content = &sami->encoded_content;
    AVBPrint *dst_source = &sami->encoded_source;
50

51
    av_bprint_clear(&sami->encoded_content);
52
    av_bprint_clear(&sami->content);
53
    av_bprint_clear(&sami->encoded_source);
54 55 56 57 58 59 60 61 62
    for (;;) {
        char *saveptr = NULL;
        int prev_chr_is_space = 0;
        AVBPrint *dst = &sami->content;

        /* parse & extract paragraph tag */
        p = av_stristr(p, "<P");
        if (!p)
            break;
63
        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
            p++;
            continue;
        }
        if (dst->len) // add a separator with the previous paragraph if there was one
            av_bprintf(dst, "\\N");
        tag = av_strtok(p, ">", &saveptr);
        if (!tag || !saveptr)
            break;
        p = saveptr;

        /* check if the current paragraph is the "source" (speaker name) */
        if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
            dst = &sami->source;
            av_bprint_clear(dst);
        }

        /* if empty event -> skip subtitle */
81
        while (av_isspace(*p))
82 83 84 85 86 87 88 89 90
            p++;
        if (!strncmp(p, "&nbsp;", 6)) {
            ret = -1;
            goto end;
        }

        /* extract the text, stripping most of the tags */
        while (*p) {
            if (*p == '<') {
91
                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
92
                    break;
93 94
            }
            if (!av_strncasecmp(p, "<BR", 3)) {
95
                av_bprintf(dst, "\\N");
96 97 98 99 100 101 102
                p++;
                while (*p && *p != '>')
                    p++;
                if (!*p)
                    break;
                if (*p == '>')
                    p++;
103
                continue;
104
            }
105
            if (!av_isspace(*p))
106 107 108
                av_bprint_chars(dst, *p, 1);
            else if (!prev_chr_is_space)
                av_bprint_chars(dst, ' ', 1);
109
            prev_chr_is_space = av_isspace(*p);
110 111 112 113 114
            p++;
        }
    }

    av_bprint_clear(&sami->full);
115 116 117 118 119 120
    if (sami->source.len) {
        ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
    }
    ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
    av_bprintf(&sami->full, "%s", sami->encoded_content.str);
121 122 123 124 125 126 127 128 129 130 131 132 133 134

end:
    av_free(dupsrc);
    return ret;
}

static int sami_decode_frame(AVCodecContext *avctx,
                             void *data, int *got_sub_ptr, AVPacket *avpkt)
{
    AVSubtitle *sub = data;
    const char *ptr = avpkt->data;
    SAMIContext *sami = avctx->priv_data;

    if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
135 136
        // TODO: pass escaped sami->encoded_source.str as source
        int ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
137 138
        if (ret < 0)
            return ret;
139 140 141 142 143 144 145 146 147 148
    }
    *got_sub_ptr = sub->num_rects > 0;
    return avpkt->size;
}

static av_cold int sami_init(AVCodecContext *avctx)
{
    SAMIContext *sami = avctx->priv_data;
    av_bprint_init(&sami->source,  0, 2048);
    av_bprint_init(&sami->content, 0, 2048);
149 150
    av_bprint_init(&sami->encoded_source,  0, 2048);
    av_bprint_init(&sami->encoded_content, 0, 2048);
151 152 153 154 155 156 157 158 159
    av_bprint_init(&sami->full,    0, 2048);
    return ff_ass_subtitle_header_default(avctx);
}

static av_cold int sami_close(AVCodecContext *avctx)
{
    SAMIContext *sami = avctx->priv_data;
    av_bprint_finalize(&sami->source,  NULL);
    av_bprint_finalize(&sami->content, NULL);
160 161
    av_bprint_finalize(&sami->encoded_source,  NULL);
    av_bprint_finalize(&sami->encoded_content, NULL);
162 163 164 165
    av_bprint_finalize(&sami->full,    NULL);
    return 0;
}

166 167 168
static void sami_flush(AVCodecContext *avctx)
{
    SAMIContext *sami = avctx->priv_data;
169 170
    if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
        sami->readorder = 0;
171 172
}

173 174 175 176
AVCodec ff_sami_decoder = {
    .name           = "sami",
    .long_name      = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
    .type           = AVMEDIA_TYPE_SUBTITLE,
177
    .id             = AV_CODEC_ID_SAMI,
178 179 180 181
    .priv_data_size = sizeof(SAMIContext),
    .init           = sami_init,
    .close          = sami_close,
    .decode         = sami_decode_frame,
182
    .flush          = sami_flush,
183
};