Browse code

avformat/hls: add support for alternative renditions

HLS protocol version 4 added alternative renditions to the
specification (e.g. alternative audio tracks).

The EXT-X-MEDIA tags can also contain metadata for "renditions" (i.e.
tracks) of the main Media Playlist.

Add support for those.

Note that the same rendition (AVStream) may be associated with multiple
variants (AVPrograms).

Alternative subtitle tracks will require additional work and are
therefore not enabled yet.

Signed-off-by: Anssi Hannula <anssi.hannula@iki.fi>

Anssi Hannula authored on 2013/12/28 07:09:45
Showing 2 changed files
... ...
@@ -15,6 +15,7 @@ version <next>:
15 15
 - improvments to OpenEXR image decoder
16 16
 - support decoding 16-bit RLE SGI images
17 17
 - GDI screen grabbing for Windows
18
+- alternative rendition support for HTTP Live Streaming
18 19
 
19 20
 
20 21
 version 2.2:
... ...
@@ -1,6 +1,7 @@
1 1
 /*
2 2
  * Apple HTTP Live Streaming demuxer
3 3
  * Copyright (c) 2010 Martin Storsjo
4
+ * Copyright (c) 2013 Anssi Hannula
4 5
  *
5 6
  * This file is part of FFmpeg.
6 7
  *
... ...
@@ -38,6 +39,9 @@
38 38
 
39 39
 #define INITIAL_BUFFER_SIZE 32768
40 40
 
41
+#define MAX_FIELD_LEN 64
42
+#define MAX_CHARACTERISTICS_LEN 512
43
+
41 44
 /*
42 45
  * An apple http stream consists of a playlist with media segment files,
43 46
  * played sequentially. There may be several playlists with the same
... ...
@@ -63,6 +67,8 @@ struct segment {
63 63
     uint8_t iv[16];
64 64
 };
65 65
 
66
+struct rendition;
67
+
66 68
 /*
67 69
  * Each playlist has its own demuxer. If it currently is active,
68 70
  * it has an open AVIOContext too, and potentially an AVPacket
... ...
@@ -90,12 +96,40 @@ struct playlist {
90 90
 
91 91
     char key_url[MAX_URL_SIZE];
92 92
     uint8_t key[16];
93
+
94
+    /* Renditions associated with this playlist, if any.
95
+     * Alternative rendition playlists have a single rendition associated
96
+     * with them, and variant main Media Playlists may have
97
+     * multiple (playlist-less) renditions associated with them. */
98
+    int n_renditions;
99
+    struct rendition **renditions;
100
+};
101
+
102
+/*
103
+ * Renditions are e.g. alternative subtitle or audio streams.
104
+ * The rendition may either be an external playlist or it may be
105
+ * contained in the main Media Playlist of the variant (in which case
106
+ * playlist is NULL).
107
+ */
108
+struct rendition {
109
+    enum AVMediaType type;
110
+    struct playlist *playlist;
111
+    char group_id[MAX_FIELD_LEN];
112
+    char language[MAX_FIELD_LEN];
113
+    char name[MAX_FIELD_LEN];
114
+    int disposition;
93 115
 };
94 116
 
95 117
 struct variant {
96 118
     int bandwidth;
119
+
120
+    /* every variant contains at least the main Media Playlist in index 0 */
97 121
     int n_playlists;
98 122
     struct playlist **playlists;
123
+
124
+    char audio_group[MAX_FIELD_LEN];
125
+    char video_group[MAX_FIELD_LEN];
126
+    char subtitles_group[MAX_FIELD_LEN];
99 127
 };
100 128
 
101 129
 typedef struct HLSContext {
... ...
@@ -103,6 +137,8 @@ typedef struct HLSContext {
103 103
     struct variant **variants;
104 104
     int n_playlists;
105 105
     struct playlist **playlists;
106
+    int n_renditions;
107
+    struct rendition **renditions;
106 108
 
107 109
     int cur_seq_no;
108 110
     int end_of_segment;
... ...
@@ -139,6 +175,7 @@ static void free_playlist_list(HLSContext *c)
139 139
     for (i = 0; i < c->n_playlists; i++) {
140 140
         struct playlist *pls = c->playlists[i];
141 141
         free_segment_list(pls);
142
+        av_freep(&pls->renditions);
142 143
         av_free_packet(&pls->pkt);
143 144
         av_free(pls->pb.buffer);
144 145
         if (pls->input)
... ...
@@ -167,6 +204,15 @@ static void free_variant_list(HLSContext *c)
167 167
     c->n_variants = 0;
168 168
 }
169 169
 
170
+static void free_rendition_list(HLSContext *c)
171
+{
172
+    int i;
173
+    for (i = 0; i < c->n_renditions; i++)
174
+        av_free(c->renditions[i]);
175
+    av_freep(&c->renditions);
176
+    c->n_renditions = 0;
177
+}
178
+
170 179
 /*
171 180
  * Used to reset a statically allocated AVPacket to a clean slate,
172 181
  * containing no data.
... ...
@@ -189,7 +235,15 @@ static struct playlist *new_playlist(HLSContext *c, const char *url,
189 189
     return pls;
190 190
 }
191 191
 
192
-static struct variant *new_variant(HLSContext *c, int bandwidth,
192
+struct variant_info {
193
+    char bandwidth[20];
194
+    /* variant group ids: */
195
+    char audio[MAX_FIELD_LEN];
196
+    char video[MAX_FIELD_LEN];
197
+    char subtitles[MAX_FIELD_LEN];
198
+};
199
+
200
+static struct variant *new_variant(HLSContext *c, struct variant_info *info,
193 201
                                    const char *url, const char *base)
194 202
 {
195 203
     struct variant *var;
... ...
@@ -203,22 +257,33 @@ static struct variant *new_variant(HLSContext *c, int bandwidth,
203 203
     if (!var)
204 204
         return NULL;
205 205
 
206
-    var->bandwidth = bandwidth;
206
+    if (info) {
207
+        var->bandwidth = atoi(info->bandwidth);
208
+        strcpy(var->audio_group, info->audio);
209
+        strcpy(var->video_group, info->video);
210
+        strcpy(var->subtitles_group, info->subtitles);
211
+    }
212
+
207 213
     dynarray_add(&c->variants, &c->n_variants, var);
208 214
     dynarray_add(&var->playlists, &var->n_playlists, pls);
209 215
     return var;
210 216
 }
211 217
 
212
-struct variant_info {
213
-    char bandwidth[20];
214
-};
215
-
216 218
 static void handle_variant_args(struct variant_info *info, const char *key,
217 219
                                 int key_len, char **dest, int *dest_len)
218 220
 {
219 221
     if (!strncmp(key, "BANDWIDTH=", key_len)) {
220 222
         *dest     =        info->bandwidth;
221 223
         *dest_len = sizeof(info->bandwidth);
224
+    } else if (!strncmp(key, "AUDIO=", key_len)) {
225
+        *dest     =        info->audio;
226
+        *dest_len = sizeof(info->audio);
227
+    } else if (!strncmp(key, "VIDEO=", key_len)) {
228
+        *dest     =        info->video;
229
+        *dest_len = sizeof(info->video);
230
+    } else if (!strncmp(key, "SUBTITLES=", key_len)) {
231
+        *dest     =        info->subtitles;
232
+        *dest_len = sizeof(info->subtitles);
222 233
     }
223 234
 }
224 235
 
... ...
@@ -243,10 +308,137 @@ static void handle_key_args(struct key_info *info, const char *key,
243 243
     }
244 244
 }
245 245
 
246
+struct rendition_info {
247
+    char type[16];
248
+    char uri[MAX_URL_SIZE];
249
+    char group_id[MAX_FIELD_LEN];
250
+    char language[MAX_FIELD_LEN];
251
+    char assoc_language[MAX_FIELD_LEN];
252
+    char name[MAX_FIELD_LEN];
253
+    char defaultr[4];
254
+    char forced[4];
255
+    char characteristics[MAX_CHARACTERISTICS_LEN];
256
+};
257
+
258
+static struct rendition *new_rendition(HLSContext *c, struct rendition_info *info,
259
+                                      const char *url_base)
260
+{
261
+    struct rendition *rend;
262
+    enum AVMediaType type = AVMEDIA_TYPE_UNKNOWN;
263
+    char *characteristic;
264
+    char *chr_ptr;
265
+    char *saveptr;
266
+
267
+    if (!strcmp(info->type, "AUDIO"))
268
+        type = AVMEDIA_TYPE_AUDIO;
269
+    else if (!strcmp(info->type, "VIDEO"))
270
+        type = AVMEDIA_TYPE_VIDEO;
271
+    else if (!strcmp(info->type, "SUBTITLES"))
272
+        type = AVMEDIA_TYPE_SUBTITLE;
273
+    else if (!strcmp(info->type, "CLOSED-CAPTIONS"))
274
+        /* CLOSED-CAPTIONS is ignored since we do not support CEA-608 CC in
275
+         * AVC SEI RBSP anyway */
276
+        return NULL;
277
+
278
+    if (type == AVMEDIA_TYPE_UNKNOWN)
279
+        return NULL;
280
+
281
+    /* URI is mandatory for subtitles as per spec */
282
+    if (type == AVMEDIA_TYPE_SUBTITLE && !info->uri[0])
283
+        return NULL;
284
+
285
+    /* TODO: handle subtitles (each segment has to parsed separately) */
286
+    if (type == AVMEDIA_TYPE_SUBTITLE)
287
+        return NULL;
288
+
289
+    rend = av_mallocz(sizeof(struct rendition));
290
+    if (!rend)
291
+        return NULL;
292
+
293
+    dynarray_add(&c->renditions, &c->n_renditions, rend);
294
+
295
+    rend->type = type;
296
+    strcpy(rend->group_id, info->group_id);
297
+    strcpy(rend->language, info->language);
298
+    strcpy(rend->name, info->name);
299
+
300
+    /* add the playlist if this is an external rendition */
301
+    if (info->uri[0]) {
302
+        rend->playlist = new_playlist(c, info->uri, url_base);
303
+        if (rend->playlist)
304
+            dynarray_add(&rend->playlist->renditions,
305
+                         &rend->playlist->n_renditions, rend);
306
+    }
307
+
308
+    if (info->assoc_language[0]) {
309
+        int langlen = strlen(rend->language);
310
+        if (langlen < sizeof(rend->language) - 3) {
311
+            rend->language[langlen] = ',';
312
+            strncpy(rend->language + langlen + 1, info->assoc_language,
313
+                    sizeof(rend->language) - langlen - 2);
314
+        }
315
+    }
316
+
317
+    if (!strcmp(info->defaultr, "YES"))
318
+        rend->disposition |= AV_DISPOSITION_DEFAULT;
319
+    if (!strcmp(info->forced, "YES"))
320
+        rend->disposition |= AV_DISPOSITION_FORCED;
321
+
322
+    chr_ptr = info->characteristics;
323
+    while ((characteristic = av_strtok(chr_ptr, ",", &saveptr))) {
324
+        if (!strcmp(characteristic, "public.accessibility.describes-music-and-sound"))
325
+            rend->disposition |= AV_DISPOSITION_HEARING_IMPAIRED;
326
+        else if (!strcmp(characteristic, "public.accessibility.describes-video"))
327
+            rend->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
328
+
329
+        chr_ptr = NULL;
330
+    }
331
+
332
+    return rend;
333
+}
334
+
335
+static void handle_rendition_args(struct rendition_info *info, const char *key,
336
+                                  int key_len, char **dest, int *dest_len)
337
+{
338
+    if (!strncmp(key, "TYPE=", key_len)) {
339
+        *dest     =        info->type;
340
+        *dest_len = sizeof(info->type);
341
+    } else if (!strncmp(key, "URI=", key_len)) {
342
+        *dest     =        info->uri;
343
+        *dest_len = sizeof(info->uri);
344
+    } else if (!strncmp(key, "GROUP-ID=", key_len)) {
345
+        *dest     =        info->group_id;
346
+        *dest_len = sizeof(info->group_id);
347
+    } else if (!strncmp(key, "LANGUAGE=", key_len)) {
348
+        *dest     =        info->language;
349
+        *dest_len = sizeof(info->language);
350
+    } else if (!strncmp(key, "ASSOC-LANGUAGE=", key_len)) {
351
+        *dest     =        info->assoc_language;
352
+        *dest_len = sizeof(info->assoc_language);
353
+    } else if (!strncmp(key, "NAME=", key_len)) {
354
+        *dest     =        info->name;
355
+        *dest_len = sizeof(info->name);
356
+    } else if (!strncmp(key, "DEFAULT=", key_len)) {
357
+        *dest     =        info->defaultr;
358
+        *dest_len = sizeof(info->defaultr);
359
+    } else if (!strncmp(key, "FORCED=", key_len)) {
360
+        *dest     =        info->forced;
361
+        *dest_len = sizeof(info->forced);
362
+    } else if (!strncmp(key, "CHARACTERISTICS=", key_len)) {
363
+        *dest     =        info->characteristics;
364
+        *dest_len = sizeof(info->characteristics);
365
+    }
366
+    /*
367
+     * ignored:
368
+     * - AUTOSELECT: client may autoselect based on e.g. system language
369
+     * - INSTREAM-ID: EIA-608 closed caption number ("CC1".."CC4")
370
+     */
371
+}
372
+
246 373
 static int parse_playlist(HLSContext *c, const char *url,
247 374
                           struct playlist *pls, AVIOContext *in)
248 375
 {
249
-    int ret = 0, is_segment = 0, is_variant = 0, bandwidth = 0;
376
+    int ret = 0, is_segment = 0, is_variant = 0;
250 377
     int64_t duration = 0;
251 378
     enum KeyType key_type = KEY_NONE;
252 379
     uint8_t iv[16] = "";
... ...
@@ -256,6 +448,7 @@ static int parse_playlist(HLSContext *c, const char *url,
256 256
     const char *ptr;
257 257
     int close_in = 0;
258 258
     uint8_t *new_url = NULL;
259
+    struct variant_info variant_info;
259 260
 
260 261
     if (!in) {
261 262
         AVDictionary *opts = NULL;
... ...
@@ -291,11 +484,10 @@ static int parse_playlist(HLSContext *c, const char *url,
291 291
     while (!url_feof(in)) {
292 292
         read_chomp_line(in, line, sizeof(line));
293 293
         if (av_strstart(line, "#EXT-X-STREAM-INF:", &ptr)) {
294
-            struct variant_info info = {{0}};
295 294
             is_variant = 1;
295
+            memset(&variant_info, 0, sizeof(variant_info));
296 296
             ff_parse_key_value(ptr, (ff_parse_key_val_cb) handle_variant_args,
297
-                               &info);
298
-            bandwidth = atoi(info.bandwidth);
297
+                               &variant_info);
299 298
         } else if (av_strstart(line, "#EXT-X-KEY:", &ptr)) {
300 299
             struct key_info info = {{0}};
301 300
             ff_parse_key_value(ptr, (ff_parse_key_val_cb) handle_key_args,
... ...
@@ -309,9 +501,14 @@ static int parse_playlist(HLSContext *c, const char *url,
309 309
                 has_iv = 1;
310 310
             }
311 311
             av_strlcpy(key, info.uri, sizeof(key));
312
+        } else if (av_strstart(line, "#EXT-X-MEDIA:", &ptr)) {
313
+            struct rendition_info info = {{0}};
314
+            ff_parse_key_value(ptr, (ff_parse_key_val_cb) handle_rendition_args,
315
+                               &info);
316
+            new_rendition(c, &info, url);
312 317
         } else if (av_strstart(line, "#EXT-X-TARGETDURATION:", &ptr)) {
313 318
             if (!pls) {
314
-                if (!new_variant(c, 0, url, NULL)) {
319
+                if (!new_variant(c, NULL, url, NULL)) {
315 320
                     ret = AVERROR(ENOMEM);
316 321
                     goto fail;
317 322
                 }
... ...
@@ -320,7 +517,7 @@ static int parse_playlist(HLSContext *c, const char *url,
320 320
             pls->target_duration = atoi(ptr) * AV_TIME_BASE;
321 321
         } else if (av_strstart(line, "#EXT-X-MEDIA-SEQUENCE:", &ptr)) {
322 322
             if (!pls) {
323
-                if (!new_variant(c, 0, url, NULL)) {
323
+                if (!new_variant(c, NULL, url, NULL)) {
324 324
                     ret = AVERROR(ENOMEM);
325 325
                     goto fail;
326 326
                 }
... ...
@@ -337,12 +534,11 @@ static int parse_playlist(HLSContext *c, const char *url,
337 337
             continue;
338 338
         } else if (line[0]) {
339 339
             if (is_variant) {
340
-                if (!new_variant(c, bandwidth, line, url)) {
340
+                if (!new_variant(c, &variant_info, line, url)) {
341 341
                     ret = AVERROR(ENOMEM);
342 342
                     goto fail;
343 343
                 }
344 344
                 is_variant = 0;
345
-                bandwidth  = 0;
346 345
             }
347 346
             if (is_segment) {
348 347
                 struct segment *seg;
... ...
@@ -543,6 +739,60 @@ static int playlist_in_multiple_variants(HLSContext *c, struct playlist *pls)
543 543
     return variant_count >= 2;
544 544
 }
545 545
 
546
+static void add_renditions_to_variant(HLSContext *c, struct variant *var,
547
+                                      enum AVMediaType type, const char *group_id)
548
+{
549
+    int i;
550
+
551
+    for (i = 0; i < c->n_renditions; i++) {
552
+        struct rendition *rend = c->renditions[i];
553
+
554
+        if (rend->type == type && !strcmp(rend->group_id, group_id)) {
555
+
556
+            if (rend->playlist)
557
+                /* rendition is an external playlist
558
+                 * => add the playlist to the variant */
559
+                dynarray_add(&var->playlists, &var->n_playlists, rend->playlist);
560
+            else
561
+                /* rendition is part of the variant main Media Playlist
562
+                 * => add the rendition to the main Media Playlist */
563
+                dynarray_add(&var->playlists[0]->renditions,
564
+                             &var->playlists[0]->n_renditions,
565
+                             rend);
566
+        }
567
+    }
568
+}
569
+
570
+static void add_metadata_from_renditions(AVFormatContext *s, struct playlist *pls,
571
+                                         enum AVMediaType type)
572
+{
573
+    int rend_idx = 0;
574
+    int i;
575
+
576
+    for (i = 0; i < pls->ctx->nb_streams; i++) {
577
+        AVStream *st = s->streams[pls->stream_offset + i];
578
+
579
+        if (st->codec->codec_type != type)
580
+            continue;
581
+
582
+        for (; rend_idx < pls->n_renditions; rend_idx++) {
583
+            struct rendition *rend = pls->renditions[rend_idx];
584
+
585
+            if (rend->type != type)
586
+                continue;
587
+
588
+            if (rend->language[0])
589
+                av_dict_set(&st->metadata, "language", rend->language, 0);
590
+            if (rend->name[0])
591
+                av_dict_set(&st->metadata, "comment", rend->name, 0);
592
+
593
+            st->disposition |= rend->disposition;
594
+        }
595
+        if (rend_idx >=pls->n_renditions)
596
+            break;
597
+    }
598
+}
599
+
546 600
 static int hls_read_header(AVFormatContext *s)
547 601
 {
548 602
     URLContext *u = (s->flags & AVFMT_FLAG_CUSTOM_IO) ? NULL : s->pb->opaque;
... ...
@@ -605,6 +855,18 @@ static int hls_read_header(AVFormatContext *s)
605 605
         s->duration = duration;
606 606
     }
607 607
 
608
+    /* Associate renditions with variants */
609
+    for (i = 0; i < c->n_variants; i++) {
610
+        struct variant *var = c->variants[i];
611
+
612
+        if (var->audio_group[0])
613
+            add_renditions_to_variant(c, var, AVMEDIA_TYPE_AUDIO, var->audio_group);
614
+        if (var->video_group[0])
615
+            add_renditions_to_variant(c, var, AVMEDIA_TYPE_VIDEO, var->video_group);
616
+        if (var->subtitles_group[0])
617
+            add_renditions_to_variant(c, var, AVMEDIA_TYPE_SUBTITLE, var->subtitles_group);
618
+    }
619
+
608 620
     /* Open the demuxer for each playlist */
609 621
     for (i = 0; i < c->n_playlists; i++) {
610 622
         struct playlist *pls = c->playlists[i];
... ...
@@ -668,6 +930,10 @@ static int hls_read_header(AVFormatContext *s)
668 668
             avcodec_copy_context(st->codec, pls->ctx->streams[j]->codec);
669 669
         }
670 670
 
671
+        add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_AUDIO);
672
+        add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_VIDEO);
673
+        add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_SUBTITLE);
674
+
671 675
         stream_offset += pls->ctx->nb_streams;
672 676
     }
673 677
 
... ...
@@ -709,6 +975,7 @@ static int hls_read_header(AVFormatContext *s)
709 709
 fail:
710 710
     free_playlist_list(c);
711 711
     free_variant_list(c);
712
+    free_rendition_list(c);
712 713
     return ret;
713 714
 }
714 715
 
... ...
@@ -850,6 +1117,7 @@ static int hls_close(AVFormatContext *s)
850 850
 
851 851
     free_playlist_list(c);
852 852
     free_variant_list(c);
853
+    free_rendition_list(c);
853 854
     return 0;
854 855
 }
855 856