Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
16001373
Commit
16001373
authored
Jun 05, 2012
by
Nicolas George
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavf: TED Talks JSON captions demuxer.
parent
18eb3196
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
446 additions
and
1 deletion
+446
-1
Changelog
Changelog
+1
-0
demuxers.texi
doc/demuxers.texi
+21
-0
general.texi
doc/general.texi
+1
-0
Makefile
libavformat/Makefile
+1
-0
allformats.c
libavformat/allformats.c
+1
-0
tedcaptionsdec.c
libavformat/tedcaptionsdec.c
+365
-0
version.h
libavformat/version.h
+1
-1
bookmarklets.html
tools/bookmarklets.html
+55
-0
No files found.
Changelog
View file @
16001373
...
...
@@ -39,6 +39,7 @@ version <next>:
- Paris Audio File demuxer
- Virtual concatenation demuxer
- VobSub demuxer
- JSON captions for TED talks decoding support
version 1.0:
...
...
doc/demuxers.texi
View file @
16001373
...
...
@@ -215,4 +215,25 @@ backslash or single quotes.
@end table
@section tedcaptions
JSON captions used for @url{http://www.ted.com/, TED Talks}.
TED does not provide links to the captions, but they can be guessed from the
page. The file @file{tools/bookmarklets.html} from the FFmpeg source tree
contains a bookmarklet to expose them.
This demuxer accepts the following option:
@table @option
@item start_time
Set the start time of the TED talk, in milliseconds. The default is 15000
(15s). It is used to sync the captions with the downloadable videos, because
they include a 15s intro.
@end table
Example: convert the captions to a format most players understand:
@example
ffmpeg -i http://www.ted.com/talks/subtitles/id/1/lang/en talk1-en.srt
@end example
@c man end INPUT DEVICES
doc/general.texi
View file @
16001373
...
...
@@ -924,6 +924,7 @@ performance on systems without hardware floating point support).
@item SAMI @tab @tab X @tab @tab X
@item SubRip (SRT) @tab X @tab X @tab X @tab X
@item SubViewer @tab @tab X @tab @tab X
@item TED Talks captions @tab @tab X @tab @tab X
@item VobSub (IDX+SUB) @tab @tab X @tab @tab X
@item 3GPP Timed Text @tab @tab @tab X @tab X
@item WebVTT @tab @tab X @tab @tab X
...
...
libavformat/Makefile
View file @
16001373
...
...
@@ -346,6 +346,7 @@ OBJS-$(CONFIG_SUBVIEWER_DEMUXER) += subviewerdec.o
OBJS-$(CONFIG_SWF_DEMUXER)
+=
swfdec.o
swf.o
OBJS-$(CONFIG_SWF_MUXER)
+=
swfenc.o
swf.o
OBJS-$(CONFIG_TAK_DEMUXER)
+=
takdec.o
apetag.o
img2.o
rawdec.o
OBJS-$(CONFIG_TEDCAPTIONS_DEMUXER)
+=
tedcaptionsdec.o
OBJS-$(CONFIG_THP_DEMUXER)
+=
thp.o
OBJS-$(CONFIG_TIERTEXSEQ_DEMUXER)
+=
tiertexseq.o
OBJS-$(CONFIG_MKVTIMESTAMP_V2_MUXER)
+=
mkvtimestamp_v2.o
...
...
libavformat/allformats.c
View file @
16001373
...
...
@@ -244,6 +244,7 @@ void av_register_all(void)
REGISTER_DEMUXER
(
SUBVIEWER
,
subviewer
);
REGISTER_MUXDEMUX
(
SWF
,
swf
);
REGISTER_DEMUXER
(
TAK
,
tak
);
REGISTER_DEMUXER
(
TEDCAPTIONS
,
tedcaptions
);
REGISTER_MUXER
(
TG2
,
tg2
);
REGISTER_MUXER
(
TGP
,
tgp
);
REGISTER_DEMUXER
(
THP
,
thp
);
...
...
libavformat/tedcaptionsdec.c
0 → 100644
View file @
16001373
/*
* TED Talks captions format decoder
* Copyright (c) 2012 Nicolas George
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/bprint.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "avformat.h"
#include "internal.h"
#include "subtitles.h"
typedef
struct
{
AVClass
*
class
;
int64_t
start_time
;
FFDemuxSubtitlesQueue
subs
;
}
TEDCaptionsDemuxer
;
static
const
AVOption
tedcaptions_options
[]
=
{
{
"start_time"
,
"set the start time (offset) of the subtitles, in ms"
,
offsetof
(
TEDCaptionsDemuxer
,
start_time
),
FF_OPT_TYPE_INT64
,
{
.
i64
=
15000
},
INT64_MIN
,
INT64_MAX
,
AV_OPT_FLAG_SUBTITLE_PARAM
|
AV_OPT_FLAG_DECODING_PARAM
},
{
NULL
},
};
static
const
AVClass
tedcaptions_demuxer_class
=
{
.
class_name
=
"tedcaptions_demuxer"
,
.
item_name
=
av_default_item_name
,
.
option
=
tedcaptions_options
,
.
version
=
LIBAVUTIL_VERSION_INT
,
};
#define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
#define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
static
void
av_bprint_utf8
(
AVBPrint
*
bp
,
unsigned
c
)
{
int
bytes
,
i
;
if
(
c
<=
0x7F
)
{
av_bprint_chars
(
bp
,
c
,
1
);
return
;
}
bytes
=
(
av_log2
(
c
)
-
2
)
/
5
;
av_bprint_chars
(
bp
,
(
c
>>
(
bytes
*
6
))
|
((
0xFF80
>>
bytes
)
&
0xFF
),
1
);
for
(
i
=
bytes
-
1
;
i
>=
0
;
i
--
)
av_bprint_chars
(
bp
,
((
c
>>
(
i
*
6
))
&
0x3F
)
|
0x80
,
1
);
}
static
void
next_byte
(
AVIOContext
*
pb
,
int
*
cur_byte
)
{
uint8_t
b
;
int
ret
=
avio_read
(
pb
,
&
b
,
1
);
*
cur_byte
=
ret
>
0
?
b
:
ret
==
0
?
AVERROR_EOF
:
ret
;
}
static
void
skip_spaces
(
AVIOContext
*
pb
,
int
*
cur_byte
)
{
while
(
*
cur_byte
==
' '
||
*
cur_byte
==
'\t'
||
*
cur_byte
==
'\n'
||
*
cur_byte
==
'\r'
)
next_byte
(
pb
,
cur_byte
);
}
static
int
expect_byte
(
AVIOContext
*
pb
,
int
*
cur_byte
,
uint8_t
c
)
{
skip_spaces
(
pb
,
cur_byte
);
if
(
*
cur_byte
!=
c
)
return
ERR_CODE
(
*
cur_byte
);
next_byte
(
pb
,
cur_byte
);
return
0
;
}
static
int
parse_string
(
AVIOContext
*
pb
,
int
*
cur_byte
,
AVBPrint
*
bp
,
int
full
)
{
int
ret
;
av_bprint_init
(
bp
,
0
,
full
?
-
1
:
1
);
ret
=
expect_byte
(
pb
,
cur_byte
,
'"'
);
if
(
ret
<
0
)
goto
fail
;
while
(
*
cur_byte
>
0
&&
*
cur_byte
!=
'"'
)
{
if
(
*
cur_byte
==
'\\'
)
{
next_byte
(
pb
,
cur_byte
);
if
(
*
cur_byte
<
0
)
{
ret
=
AVERROR_INVALIDDATA
;
goto
fail
;
}
if
((
*
cur_byte
|
32
)
==
'u'
)
{
unsigned
chr
=
0
,
i
;
for
(
i
=
0
;
i
<
4
;
i
++
)
{
next_byte
(
pb
,
cur_byte
);
if
(
!
HEX_DIGIT_TEST
(
*
cur_byte
))
{
ret
=
ERR_CODE
(
*
cur_byte
);
goto
fail
;
}
chr
=
chr
*
16
+
HEX_DIGIT_VAL
(
*
cur_byte
);
}
av_bprint_utf8
(
bp
,
chr
);
}
else
{
av_bprint_chars
(
bp
,
*
cur_byte
,
1
);
}
}
else
{
av_bprint_chars
(
bp
,
*
cur_byte
,
1
);
}
next_byte
(
pb
,
cur_byte
);
}
ret
=
expect_byte
(
pb
,
cur_byte
,
'"'
);
if
(
ret
<
0
)
goto
fail
;
if
(
full
&&
!
av_bprint_is_complete
(
bp
))
{
ret
=
AVERROR
(
ENOMEM
);
goto
fail
;
}
return
0
;
fail:
av_bprint_finalize
(
bp
,
NULL
);
return
ret
;
}
static
int
parse_label
(
AVIOContext
*
pb
,
int
*
cur_byte
,
AVBPrint
*
bp
)
{
int
ret
;
ret
=
parse_string
(
pb
,
cur_byte
,
bp
,
0
);
if
(
ret
<
0
)
return
ret
;
ret
=
expect_byte
(
pb
,
cur_byte
,
':'
);
if
(
ret
<
0
)
return
ret
;
return
0
;
}
static
int
parse_boolean
(
AVIOContext
*
pb
,
int
*
cur_byte
,
int
*
result
)
{
const
char
*
text
[]
=
{
"false"
,
"true"
},
*
p
;
int
i
;
skip_spaces
(
pb
,
cur_byte
);
for
(
i
=
0
;
i
<
2
;
i
++
)
{
p
=
text
[
i
];
if
(
*
cur_byte
!=
*
p
)
continue
;
for
(;
*
p
;
p
++
,
next_byte
(
pb
,
cur_byte
))
if
(
*
cur_byte
!=
*
p
)
return
AVERROR_INVALIDDATA
;
if
(
BETWEEN
(
*
cur_byte
|
32
,
'a'
,
'z'
))
return
AVERROR_INVALIDDATA
;
*
result
=
i
;
return
0
;
}
return
AVERROR_INVALIDDATA
;
}
static
int
parse_int
(
AVIOContext
*
pb
,
int
*
cur_byte
,
int64_t
*
result
)
{
int64_t
val
=
0
;
skip_spaces
(
pb
,
cur_byte
);
if
((
unsigned
)
*
cur_byte
-
'0'
>
9
)
return
AVERROR_INVALIDDATA
;
while
(
BETWEEN
(
*
cur_byte
,
'0'
,
'9'
))
{
val
=
val
*
10
+
(
*
cur_byte
-
'0'
);
next_byte
(
pb
,
cur_byte
);
}
*
result
=
val
;
return
0
;
}
static
int
parse_file
(
AVIOContext
*
pb
,
FFDemuxSubtitlesQueue
*
subs
)
{
int
ret
,
cur_byte
,
start_of_par
;
AVBPrint
label
,
content
;
int64_t
pos
,
start
,
duration
;
AVPacket
*
pkt
;
next_byte
(
pb
,
&
cur_byte
);
ret
=
expect_byte
(
pb
,
&
cur_byte
,
'{'
);
if
(
ret
<
0
)
return
AVERROR_INVALIDDATA
;
ret
=
parse_label
(
pb
,
&
cur_byte
,
&
label
);
if
(
ret
<
0
||
strcmp
(
label
.
str
,
"captions"
))
return
AVERROR_INVALIDDATA
;
ret
=
expect_byte
(
pb
,
&
cur_byte
,
'['
);
if
(
ret
<
0
)
return
AVERROR_INVALIDDATA
;
while
(
1
)
{
content
.
size
=
0
;
start
=
duration
=
AV_NOPTS_VALUE
;
ret
=
expect_byte
(
pb
,
&
cur_byte
,
'{'
);
if
(
ret
<
0
)
return
ret
;
pos
=
avio_tell
(
pb
)
-
1
;
while
(
1
)
{
ret
=
parse_label
(
pb
,
&
cur_byte
,
&
label
);
if
(
ret
<
0
)
return
ret
;
if
(
!
strcmp
(
label
.
str
,
"startOfParagraph"
))
{
ret
=
parse_boolean
(
pb
,
&
cur_byte
,
&
start_of_par
);
if
(
ret
<
0
)
return
ret
;
}
else
if
(
!
strcmp
(
label
.
str
,
"content"
))
{
ret
=
parse_string
(
pb
,
&
cur_byte
,
&
content
,
1
);
if
(
ret
<
0
)
return
ret
;
}
else
if
(
!
strcmp
(
label
.
str
,
"startTime"
))
{
ret
=
parse_int
(
pb
,
&
cur_byte
,
&
start
);
if
(
ret
<
0
)
return
ret
;
}
else
if
(
!
strcmp
(
label
.
str
,
"duration"
))
{
ret
=
parse_int
(
pb
,
&
cur_byte
,
&
duration
);
if
(
ret
<
0
)
return
ret
;
}
else
{
return
AVERROR_INVALIDDATA
;
}
skip_spaces
(
pb
,
&
cur_byte
);
if
(
cur_byte
!=
','
)
break
;
next_byte
(
pb
,
&
cur_byte
);
}
ret
=
expect_byte
(
pb
,
&
cur_byte
,
'}'
);
if
(
ret
<
0
)
return
ret
;
if
(
!
content
.
size
||
start
==
AV_NOPTS_VALUE
||
duration
==
AV_NOPTS_VALUE
)
return
AVERROR_INVALIDDATA
;
pkt
=
ff_subtitles_queue_insert
(
subs
,
content
.
str
,
content
.
len
,
0
);
if
(
!
pkt
)
return
AVERROR
(
ENOMEM
);
pkt
->
pos
=
pos
;
pkt
->
pts
=
start
;
pkt
->
duration
=
duration
;
av_bprint_finalize
(
&
content
,
NULL
);
skip_spaces
(
pb
,
&
cur_byte
);
if
(
cur_byte
!=
','
)
break
;
next_byte
(
pb
,
&
cur_byte
);
}
ret
=
expect_byte
(
pb
,
&
cur_byte
,
']'
);
if
(
ret
<
0
)
return
ret
;
ret
=
expect_byte
(
pb
,
&
cur_byte
,
'}'
);
if
(
ret
<
0
)
return
ret
;
skip_spaces
(
pb
,
&
cur_byte
);
if
(
cur_byte
!=
AVERROR_EOF
)
return
ERR_CODE
(
cur_byte
);
return
0
;
}
static
av_cold
int
tedcaptions_read_header
(
AVFormatContext
*
avf
)
{
TEDCaptionsDemuxer
*
tc
=
avf
->
priv_data
;
AVStream
*
st
;
int
ret
,
i
;
AVPacket
*
last
;
ret
=
parse_file
(
avf
->
pb
,
&
tc
->
subs
);
if
(
ret
<
0
)
{
if
(
ret
==
AVERROR_INVALIDDATA
)
av_log
(
avf
,
AV_LOG_ERROR
,
"Syntax error near offset %"
PRId64
".
\n
"
,
avio_tell
(
avf
->
pb
));
ff_subtitles_queue_clean
(
&
tc
->
subs
);
return
ret
;
}
ff_subtitles_queue_finalize
(
&
tc
->
subs
);
for
(
i
=
0
;
i
<
tc
->
subs
.
nb_subs
;
i
++
)
tc
->
subs
.
subs
[
i
].
pts
+=
tc
->
start_time
;
last
=
&
tc
->
subs
.
subs
[
tc
->
subs
.
nb_subs
-
1
];
st
=
avformat_new_stream
(
avf
,
NULL
);
if
(
!
st
)
return
AVERROR
(
ENOMEM
);
st
->
codec
->
codec_type
=
AVMEDIA_TYPE_SUBTITLE
;
st
->
codec
->
codec_id
=
CODEC_ID_TEXT
;
avpriv_set_pts_info
(
st
,
64
,
1
,
1000
);
st
->
probe_packets
=
0
;
st
->
start_time
=
0
;
st
->
duration
=
last
->
pts
+
last
->
duration
;
st
->
cur_dts
=
0
;
return
0
;
}
static
int
tedcaptions_read_packet
(
AVFormatContext
*
avf
,
AVPacket
*
packet
)
{
TEDCaptionsDemuxer
*
tc
=
avf
->
priv_data
;
return
ff_subtitles_queue_read_packet
(
&
tc
->
subs
,
packet
);
}
static
int
tedcaptions_read_close
(
AVFormatContext
*
avf
)
{
TEDCaptionsDemuxer
*
tc
=
avf
->
priv_data
;
ff_subtitles_queue_clean
(
&
tc
->
subs
);
return
0
;
}
static
av_cold
int
tedcaptions_read_probe
(
AVProbeData
*
p
)
{
static
const
char
*
const
tags
[]
=
{
"
\"
captions
\"
"
,
"
\"
duration
\"
"
,
"
\"
content
\"
"
,
"
\"
startOfParagraph
\"
"
,
"
\"
startTime
\"
"
,
};
unsigned
i
,
count
=
0
;
const
char
*
t
;
if
(
p
->
buf
[
strspn
(
p
->
buf
,
"
\t\r\n
"
)]
!=
'{'
)
return
0
;
for
(
i
=
0
;
i
<
FF_ARRAY_ELEMS
(
tags
);
i
++
)
{
if
(
!
(
t
=
strstr
(
p
->
buf
,
tags
[
i
])))
continue
;
t
+=
strlen
(
tags
[
i
]);
t
+=
strspn
(
t
,
"
\t\r\n
"
);
if
(
*
t
==
':'
)
count
++
;
}
return
count
==
FF_ARRAY_ELEMS
(
tags
)
?
AVPROBE_SCORE_MAX
:
count
?
AVPROBE_SCORE_MAX
/
2
:
0
;
}
static
int
tedcaptions_read_seek
(
AVFormatContext
*
avf
,
int
stream_index
,
int64_t
min_ts
,
int64_t
ts
,
int64_t
max_ts
,
int
flags
)
{
TEDCaptionsDemuxer
*
tc
=
avf
->
priv_data
;
return
ff_subtitles_queue_seek
(
&
tc
->
subs
,
avf
,
stream_index
,
min_ts
,
ts
,
max_ts
,
flags
);
}
AVInputFormat
ff_tedcaptions_demuxer
=
{
.
name
=
"tedcaptions"
,
.
long_name
=
NULL_IF_CONFIG_SMALL
(
"TED Talks captions"
),
.
priv_data_size
=
sizeof
(
TEDCaptionsDemuxer
),
.
priv_class
=
&
tedcaptions_demuxer_class
,
.
read_header
=
tedcaptions_read_header
,
.
read_packet
=
tedcaptions_read_packet
,
.
read_close
=
tedcaptions_read_close
,
.
read_probe
=
tedcaptions_read_probe
,
.
read_seek2
=
tedcaptions_read_seek
,
};
libavformat/version.h
View file @
16001373
...
...
@@ -30,7 +30,7 @@
#include "libavutil/avutil.h"
#define LIBAVFORMAT_VERSION_MAJOR 54
#define LIBAVFORMAT_VERSION_MINOR 4
8
#define LIBAVFORMAT_VERSION_MINOR 4
9
#define LIBAVFORMAT_VERSION_MICRO 100
#define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
...
...
tools/bookmarklets.html
0 → 100644
View file @
16001373
<!DOCTYPE html>
<html>
<head>
<!--
This file is part of FFmpeg.
All scripts contained in this file can be considered public domain.
-->
<title>
FFmpeg bookmarklets
</title>
<meta
charset=
"UTF-8"
>
<script
type=
"text/javascript"
>
function
convert
(
js
)
{
js
=
js
.
replace
(
/
\/\*
.*
?\*\/
/g
,
""
);
/* comments */
js
=
js
.
replace
(
/
\s
+/g
,
" "
);
js
=
js
.
replace
(
/
\s
+
\z
/
,
""
);
js
=
"(function(){"
+
js
+
"})();void 0"
;
return
"javascript:"
+
escape
(
js
);
}
function
init
()
{
var
pre
=
document
.
getElementsByTagName
(
"pre"
);
for
(
var
i
=
0
;
pre
.
length
>
i
;
i
++
)
{
document
.
getElementById
(
pre
[
i
].
id
+
"-link"
).
href
=
convert
(
pre
[
i
].
textContent
);
}
}
</script>
<style
type=
"text/css"
>
pre
{
border
:
solid
black
1px
;
padding
:
0.2ex
;
font-size
:
80%
}
</style>
</head>
<body
onload=
"init()"
>
<h1>
Introduction
</h1>
The scripts in this page are
<a
href=
"http://en.wikipedia.org/wiki/Bookmarklet"
>
bookmarklets
</a>
: store
their link version in a bookmark, and later activate the bookmark on a page
to run the script.
<h1>
TED Talks captions
</h1>
<p><a
id=
"ted_talks_captions-link"
href=
"#"
>
Get links to the captions
</a></p>
<pre
id=
"ted_talks_captions"
>
d = window.open("", "sub", "width=256,height=512,resizable=yes,scrollbars=yes").document;
l = document.getElementById("languageCode").getElementsByTagName("option");
for (i = 1; i
<
l.length ; i++) {
d.body.appendChild(p = d.createElement("p"));
p.appendChild(a = d.createElement("a"));
a.appendChild(d.createTextNode(l[i].textContent));
a.href="http://www.ted.com/talks/subtitles/id/" + talkID+"/lang/" + l[i].value;
}
</pre>
</body>
</html>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment