Browse code

lavf: TED Talks JSON captions demuxer.

Nicolas George authored on 2012/06/05 23:34:01
Showing 8 changed files
... ...
@@ -39,6 +39,7 @@ version <next>:
39 39
 - Paris Audio File demuxer
40 40
 - Virtual concatenation demuxer
41 41
 - VobSub demuxer
42
+- JSON captions for TED talks decoding support
42 43
 
43 44
 
44 45
 version 1.0:
... ...
@@ -215,4 +215,25 @@ backslash or single quotes.
215 215
 
216 216
 @end table
217 217
 
218
+@section tedcaptions
219
+
220
+JSON captions used for @url{http://www.ted.com/, TED Talks}.
221
+
222
+TED does not provide links to the captions, but they can be guessed from the
223
+page. The file @file{tools/bookmarklets.html} from the FFmpeg source tree
224
+contains a bookmarklet to expose them.
225
+
226
+This demuxer accepts the following option:
227
+@table @option
228
+@item start_time
229
+Set the start time of the TED talk, in milliseconds. The default is 15000
230
+(15s). It is used to sync the captions with the downloadable videos, because
231
+they include a 15s intro.
232
+@end table
233
+
234
+Example: convert the captions to a format most players understand:
235
+@example
236
+ffmpeg -i http://www.ted.com/talks/subtitles/id/1/lang/en talk1-en.srt
237
+@end example
238
+
218 239
 @c man end INPUT DEVICES
... ...
@@ -924,6 +924,7 @@ performance on systems without hardware floating point support).
924 924
 @item SAMI             @tab   @tab X @tab   @tab X
925 925
 @item SubRip (SRT)     @tab X @tab X @tab X @tab X
926 926
 @item SubViewer        @tab   @tab X @tab   @tab X
927
+@item TED Talks captions @tab @tab X @tab   @tab X
927 928
 @item VobSub (IDX+SUB) @tab   @tab X @tab   @tab X
928 929
 @item 3GPP Timed Text  @tab   @tab   @tab X @tab X
929 930
 @item WebVTT           @tab   @tab X @tab   @tab X
... ...
@@ -346,6 +346,7 @@ OBJS-$(CONFIG_SUBVIEWER_DEMUXER)         += subviewerdec.o
346 346
 OBJS-$(CONFIG_SWF_DEMUXER)               += swfdec.o swf.o
347 347
 OBJS-$(CONFIG_SWF_MUXER)                 += swfenc.o swf.o
348 348
 OBJS-$(CONFIG_TAK_DEMUXER)               += takdec.o apetag.o img2.o rawdec.o
349
+OBJS-$(CONFIG_TEDCAPTIONS_DEMUXER)       += tedcaptionsdec.o
349 350
 OBJS-$(CONFIG_THP_DEMUXER)               += thp.o
350 351
 OBJS-$(CONFIG_TIERTEXSEQ_DEMUXER)        += tiertexseq.o
351 352
 OBJS-$(CONFIG_MKVTIMESTAMP_V2_MUXER)     += mkvtimestamp_v2.o
... ...
@@ -244,6 +244,7 @@ void av_register_all(void)
244 244
     REGISTER_DEMUXER  (SUBVIEWER, subviewer);
245 245
     REGISTER_MUXDEMUX (SWF, swf);
246 246
     REGISTER_DEMUXER  (TAK, tak);
247
+    REGISTER_DEMUXER  (TEDCAPTIONS, tedcaptions);
247 248
     REGISTER_MUXER    (TG2, tg2);
248 249
     REGISTER_MUXER    (TGP, tgp);
249 250
     REGISTER_DEMUXER  (THP, thp);
250 251
new file mode 100644
... ...
@@ -0,0 +1,365 @@
0
+/*
1
+ * TED Talks captions format decoder
2
+ * Copyright (c) 2012 Nicolas George
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "libavutil/bprint.h"
22
+#include "libavutil/log.h"
23
+#include "libavutil/opt.h"
24
+#include "avformat.h"
25
+#include "internal.h"
26
+#include "subtitles.h"
27
+
28
+typedef struct {
29
+    AVClass *class;
30
+    int64_t start_time;
31
+    FFDemuxSubtitlesQueue subs;
32
+} TEDCaptionsDemuxer;
33
+
34
+static const AVOption tedcaptions_options[] = {
35
+    { "start_time", "set the start time (offset) of the subtitles, in ms",
36
+      offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
37
+      { .i64 = 15000 }, INT64_MIN, INT64_MAX,
38
+      AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
39
+    { NULL },
40
+};
41
+
42
+static const AVClass tedcaptions_demuxer_class = {
43
+    .class_name = "tedcaptions_demuxer",
44
+    .item_name  = av_default_item_name,
45
+    .option     = tedcaptions_options,
46
+    .version    = LIBAVUTIL_VERSION_INT,
47
+};
48
+
49
+#define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
50
+
51
+#define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
52
+#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
53
+#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
54
+
55
+static void av_bprint_utf8(AVBPrint *bp, unsigned c)
56
+{
57
+    int bytes, i;
58
+
59
+    if (c <= 0x7F) {
60
+        av_bprint_chars(bp, c, 1);
61
+        return;
62
+    }
63
+    bytes = (av_log2(c) - 2) / 5;
64
+    av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
65
+    for (i = bytes - 1; i >= 0; i--)
66
+        av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
67
+}
68
+
69
+static void next_byte(AVIOContext *pb, int *cur_byte)
70
+{
71
+    uint8_t b;
72
+    int ret = avio_read(pb, &b, 1);
73
+    *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
74
+}
75
+
76
+static void skip_spaces(AVIOContext *pb, int *cur_byte)
77
+{
78
+    while (*cur_byte == ' '  || *cur_byte == '\t' ||
79
+           *cur_byte == '\n' || *cur_byte == '\r')
80
+        next_byte(pb, cur_byte);
81
+}
82
+
83
+static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
84
+{
85
+    skip_spaces(pb, cur_byte);
86
+    if (*cur_byte != c)
87
+        return ERR_CODE(*cur_byte);
88
+    next_byte(pb, cur_byte);
89
+    return 0;
90
+}
91
+
92
+static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
93
+{
94
+    int ret;
95
+
96
+    av_bprint_init(bp, 0, full ? -1 : 1);
97
+    ret = expect_byte(pb, cur_byte, '"');
98
+    if (ret < 0)
99
+        goto fail;
100
+    while (*cur_byte > 0 && *cur_byte != '"') {
101
+        if (*cur_byte == '\\') {
102
+            next_byte(pb, cur_byte);
103
+            if (*cur_byte < 0) {
104
+                ret = AVERROR_INVALIDDATA;
105
+                goto fail;
106
+            }
107
+            if ((*cur_byte | 32) == 'u') {
108
+                unsigned chr = 0, i;
109
+                for (i = 0; i < 4; i++) {
110
+                    next_byte(pb, cur_byte);
111
+                    if (!HEX_DIGIT_TEST(*cur_byte)) {
112
+                        ret = ERR_CODE(*cur_byte);
113
+                        goto fail;
114
+                    }
115
+                    chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
116
+                }
117
+                av_bprint_utf8(bp, chr);
118
+            } else {
119
+                av_bprint_chars(bp, *cur_byte, 1);
120
+            }
121
+        } else {
122
+            av_bprint_chars(bp, *cur_byte, 1);
123
+        }
124
+        next_byte(pb, cur_byte);
125
+    }
126
+    ret = expect_byte(pb, cur_byte, '"');
127
+    if (ret < 0)
128
+        goto fail;
129
+    if (full && !av_bprint_is_complete(bp)) {
130
+        ret = AVERROR(ENOMEM);
131
+        goto fail;
132
+    }
133
+    return 0;
134
+
135
+fail:
136
+    av_bprint_finalize(bp, NULL);
137
+    return ret;
138
+}
139
+
140
+static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
141
+{
142
+    int ret;
143
+
144
+    ret = parse_string(pb, cur_byte, bp, 0);
145
+    if (ret < 0)
146
+        return ret;
147
+    ret = expect_byte(pb, cur_byte, ':');
148
+    if (ret < 0)
149
+        return ret;
150
+    return 0;
151
+}
152
+
153
+static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
154
+{
155
+    const char *text[] = { "false", "true" }, *p;
156
+    int i;
157
+
158
+    skip_spaces(pb, cur_byte);
159
+    for (i = 0; i < 2; i++) {
160
+        p = text[i];
161
+        if (*cur_byte != *p)
162
+            continue;
163
+        for (; *p; p++, next_byte(pb, cur_byte))
164
+            if (*cur_byte != *p)
165
+                return AVERROR_INVALIDDATA;
166
+        if (BETWEEN(*cur_byte | 32, 'a', 'z'))
167
+            return AVERROR_INVALIDDATA;
168
+        *result = i;
169
+        return 0;
170
+    }
171
+    return AVERROR_INVALIDDATA;
172
+}
173
+
174
+static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
175
+{
176
+    int64_t val = 0;
177
+
178
+    skip_spaces(pb, cur_byte);
179
+    if ((unsigned)*cur_byte - '0' > 9)
180
+        return AVERROR_INVALIDDATA;
181
+    while (BETWEEN(*cur_byte, '0', '9')) {
182
+        val = val * 10 + (*cur_byte - '0');
183
+        next_byte(pb, cur_byte);
184
+    }
185
+    *result = val;
186
+    return 0;
187
+}
188
+
189
+static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
190
+{
191
+    int ret, cur_byte, start_of_par;
192
+    AVBPrint label, content;
193
+    int64_t pos, start, duration;
194
+    AVPacket *pkt;
195
+
196
+    next_byte(pb, &cur_byte);
197
+    ret = expect_byte(pb, &cur_byte, '{');
198
+    if (ret < 0)
199
+        return AVERROR_INVALIDDATA;
200
+    ret = parse_label(pb, &cur_byte, &label);
201
+    if (ret < 0 || strcmp(label.str, "captions"))
202
+        return AVERROR_INVALIDDATA;
203
+    ret = expect_byte(pb, &cur_byte, '[');
204
+    if (ret < 0)
205
+        return AVERROR_INVALIDDATA;
206
+    while (1) {
207
+        content.size = 0;
208
+        start = duration = AV_NOPTS_VALUE;
209
+        ret = expect_byte(pb, &cur_byte, '{');
210
+        if (ret < 0)
211
+            return ret;
212
+        pos = avio_tell(pb) - 1;
213
+        while (1) {
214
+            ret = parse_label(pb, &cur_byte, &label);
215
+            if (ret < 0)
216
+                return ret;
217
+            if (!strcmp(label.str, "startOfParagraph")) {
218
+                ret = parse_boolean(pb, &cur_byte, &start_of_par);
219
+                if (ret < 0)
220
+                    return ret;
221
+            } else if (!strcmp(label.str, "content")) {
222
+                ret = parse_string(pb, &cur_byte, &content, 1);
223
+                if (ret < 0)
224
+                    return ret;
225
+            } else if (!strcmp(label.str, "startTime")) {
226
+                ret = parse_int(pb, &cur_byte, &start);
227
+                if (ret < 0)
228
+                    return ret;
229
+            } else if (!strcmp(label.str, "duration")) {
230
+                ret = parse_int(pb, &cur_byte, &duration);
231
+                if (ret < 0)
232
+                    return ret;
233
+            } else {
234
+                return AVERROR_INVALIDDATA;
235
+            }
236
+            skip_spaces(pb, &cur_byte);
237
+            if (cur_byte != ',')
238
+                break;
239
+            next_byte(pb, &cur_byte);
240
+        }
241
+        ret = expect_byte(pb, &cur_byte, '}');
242
+        if (ret < 0)
243
+            return ret;
244
+
245
+        if (!content.size || start == AV_NOPTS_VALUE ||
246
+            duration == AV_NOPTS_VALUE)
247
+            return AVERROR_INVALIDDATA;
248
+        pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
249
+        if (!pkt)
250
+            return AVERROR(ENOMEM);
251
+        pkt->pos      = pos;
252
+        pkt->pts      = start;
253
+        pkt->duration = duration;
254
+        av_bprint_finalize(&content, NULL);
255
+
256
+        skip_spaces(pb, &cur_byte);
257
+        if (cur_byte != ',')
258
+            break;
259
+        next_byte(pb, &cur_byte);
260
+    }
261
+    ret = expect_byte(pb, &cur_byte, ']');
262
+    if (ret < 0)
263
+        return ret;
264
+    ret = expect_byte(pb, &cur_byte, '}');
265
+    if (ret < 0)
266
+        return ret;
267
+    skip_spaces(pb, &cur_byte);
268
+    if (cur_byte != AVERROR_EOF)
269
+        return ERR_CODE(cur_byte);
270
+    return 0;
271
+}
272
+
273
+static av_cold int tedcaptions_read_header(AVFormatContext *avf)
274
+{
275
+    TEDCaptionsDemuxer *tc = avf->priv_data;
276
+    AVStream *st;
277
+    int ret, i;
278
+    AVPacket *last;
279
+
280
+    ret = parse_file(avf->pb, &tc->subs);
281
+    if (ret < 0) {
282
+        if (ret == AVERROR_INVALIDDATA)
283
+            av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
284
+                   avio_tell(avf->pb));
285
+        ff_subtitles_queue_clean(&tc->subs);
286
+        return ret;
287
+    }
288
+    ff_subtitles_queue_finalize(&tc->subs);
289
+    for (i = 0; i < tc->subs.nb_subs; i++)
290
+        tc->subs.subs[i].pts += tc->start_time;
291
+
292
+    last = &tc->subs.subs[tc->subs.nb_subs - 1];
293
+    st = avformat_new_stream(avf, NULL);
294
+    if (!st)
295
+        return AVERROR(ENOMEM);
296
+    st->codec->codec_type     = AVMEDIA_TYPE_SUBTITLE;
297
+    st->codec->codec_id       = CODEC_ID_TEXT;
298
+    avpriv_set_pts_info(st, 64, 1, 1000);
299
+    st->probe_packets = 0;
300
+    st->start_time    = 0;
301
+    st->duration      = last->pts + last->duration;
302
+    st->cur_dts       = 0;
303
+
304
+    return 0;
305
+}
306
+
307
+static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
308
+{
309
+    TEDCaptionsDemuxer *tc = avf->priv_data;
310
+
311
+    return ff_subtitles_queue_read_packet(&tc->subs, packet);
312
+}
313
+
314
+static int tedcaptions_read_close(AVFormatContext *avf)
315
+{
316
+    TEDCaptionsDemuxer *tc = avf->priv_data;
317
+
318
+    ff_subtitles_queue_clean(&tc->subs);
319
+    return 0;
320
+}
321
+
322
+static av_cold int tedcaptions_read_probe(AVProbeData *p)
323
+{
324
+    static const char *const tags[] = {
325
+        "\"captions\"", "\"duration\"", "\"content\"",
326
+        "\"startOfParagraph\"", "\"startTime\"",
327
+    };
328
+    unsigned i, count = 0;
329
+    const char *t;
330
+
331
+    if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
332
+        return 0;
333
+    for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
334
+        if (!(t = strstr(p->buf, tags[i])))
335
+            continue;
336
+        t += strlen(tags[i]);
337
+        t += strspn(t, " \t\r\n");
338
+        if (*t == ':')
339
+            count++;
340
+    }
341
+    return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
342
+           count                         ? AVPROBE_SCORE_MAX / 2 : 0;
343
+}
344
+
345
+static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index,
346
+                                 int64_t min_ts, int64_t ts, int64_t max_ts,
347
+                                 int flags)
348
+{
349
+    TEDCaptionsDemuxer *tc = avf->priv_data;
350
+    return ff_subtitles_queue_seek(&tc->subs, avf, stream_index,
351
+                                   min_ts, ts, max_ts, flags);
352
+}
353
+
354
+AVInputFormat ff_tedcaptions_demuxer = {
355
+    .name           = "tedcaptions",
356
+    .long_name      = NULL_IF_CONFIG_SMALL("TED Talks captions"),
357
+    .priv_data_size = sizeof(TEDCaptionsDemuxer),
358
+    .priv_class     = &tedcaptions_demuxer_class,
359
+    .read_header    = tedcaptions_read_header,
360
+    .read_packet    = tedcaptions_read_packet,
361
+    .read_close     = tedcaptions_read_close,
362
+    .read_probe     = tedcaptions_read_probe,
363
+    .read_seek2     = tedcaptions_read_seek,
364
+};
... ...
@@ -30,7 +30,7 @@
30 30
 #include "libavutil/avutil.h"
31 31
 
32 32
 #define LIBAVFORMAT_VERSION_MAJOR 54
33
-#define LIBAVFORMAT_VERSION_MINOR 48
33
+#define LIBAVFORMAT_VERSION_MINOR 49
34 34
 #define LIBAVFORMAT_VERSION_MICRO 100
35 35
 
36 36
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
37 37
new file mode 100644
... ...
@@ -0,0 +1,55 @@
0
+<!DOCTYPE html>
1
+<html>
2
+<head>
3
+<!--
4
+    This file is part of FFmpeg.
5
+
6
+    All scripts contained in this file can be considered public domain.
7
+  -->
8
+<title>FFmpeg bookmarklets</title>
9
+<meta charset="UTF-8">
10
+<script type="text/javascript">
11
+function convert(js) {
12
+  js = js.replace(/\/\*.*?\*\//g, ""); /* comments */
13
+  js = js.replace(/\s+/g, " ");
14
+  js = js.replace(/\s+\z/, "");
15
+  js = "(function(){" + js + "})();void 0";
16
+  return "javascript:" + escape(js);
17
+}
18
+function init() {
19
+  var pre = document.getElementsByTagName("pre");
20
+  for (var i = 0; pre.length > i; i++) {
21
+    document.getElementById(pre[i].id + "-link").href = convert(pre[i].textContent);
22
+  }
23
+}
24
+</script>
25
+<style type="text/css">
26
+pre { border: solid black 1px; padding: 0.2ex; font-size: 80% }
27
+</style>
28
+</head>
29
+<body onload="init()">
30
+
31
+<h1>Introduction</h1>
32
+
33
+The scripts in this page are
34
+<a href="http://en.wikipedia.org/wiki/Bookmarklet">bookmarklets</a>: store
35
+their link version in a bookmark, and later activate the bookmark on a page
36
+to run the script.
37
+
38
+<h1>TED Talks captions</h1>
39
+
40
+<p><a id="ted_talks_captions-link" href="#">Get links to the captions</a></p>
41
+
42
+<pre id="ted_talks_captions">
43
+d = window.open("", "sub", "width=256,height=512,resizable=yes,scrollbars=yes").document;
44
+l = document.getElementById("languageCode").getElementsByTagName("option");
45
+for (i = 1; i &lt; l.length ; i++) {
46
+  d.body.appendChild(p = d.createElement("p"));
47
+  p.appendChild(a = d.createElement("a"));
48
+  a.appendChild(d.createTextNode(l[i].textContent));
49
+  a.href="http://www.ted.com/talks/subtitles/id/" + talkID+"/lang/" + l[i].value;
50
+}
51
+</pre>
52
+
53
+</body>
54
+</html>