Browse code

ffmpeg: insert bitmap subtitles as video in filters.

With this feature, it becomes possible to perform commonly
requested tasks, such as hardcoding bitmap subtitles.

This will be reverted once libavfilter has proper support
for subtitles. All the changes have the string "sub2video"
in them, it makes it easy to spot the parts.

Nicolas George authored on 2012/07/27 02:29:27
Showing 3 changed files
... ...
@@ -39,6 +39,7 @@ version next:
39 39
 - concat filter
40 40
 - flite filter
41 41
 - Canopus Lossless Codec decoder
42
+- bitmap subtitles in filters (experimental and temporary)
42 43
 
43 44
 
44 45
 version 0.11:
... ...
@@ -989,6 +989,22 @@ ffmpeg -i video.mkv -i image.png -filter_complex 'overlay' out.mkv
989 989
 @end example
990 990
 @end table
991 991
 
992
+As a special exception, you can use a bitmap subtitle stream as input: it
993
+will be converted into a video with the same size as the largest video in
994
+the file, or 720×576 if no video is present. Note that this is an
995
+experimental and temporary solution. It will be removed once libavfilter has
996
+proper support for subtitles.
997
+
998
+For example, to hardcode subtitles on top of a DVB-T recording stored in
999
+MPEG-TS format, delaying the subtitles by 1 second:
1000
+@example
1001
+ffmpeg -i input.ts -filter_complex \
1002
+  '[#0x2ef] setpts=PTS+1/TB [sub] ; [#0x2d0] [sub] overlay' \
1003
+  -sn -map '#0x2dc' output.mkv
1004
+@end example
1005
+(0x2d0, 0x2dc and 0x2ef are the MPEG-TS PIDs of respectively the video,
1006
+audio and subtitles streams; 0:0, 0:3 and 0:7 would have worked too)
1007
+
992 1008
 @section Preset files
993 1009
 A preset file contains a sequence of @var{option}=@var{value} pairs,
994 1010
 one for each line, specifying a sequence of options which would be
... ...
@@ -249,6 +249,12 @@ typedef struct InputStream {
249 249
     int      resample_channels;
250 250
     uint64_t resample_channel_layout;
251 251
 
252
+    struct sub2video {
253
+        int64_t last_pts;
254
+        AVFilterBufferRef *ref;
255
+        int w, h;
256
+    } sub2video;
257
+
252 258
     /* a pool of free buffers for decoded data */
253 259
     FrameBuffer *buffer_pool;
254 260
     int dr1;
... ...
@@ -504,6 +510,155 @@ static void update_benchmark(const char *fmt, ...)
504 504
     }
505 505
 }
506 506
 
507
+/* sub2video hack:
508
+   Convert subtitles to video with alpha to insert them in filter graphs.
509
+   This is a temporary solution until libavfilter gets real subtitles support.
510
+ */
511
+
512
+
513
+static int sub2video_prepare(InputStream *ist)
514
+{
515
+    AVFormatContext *avf = input_files[ist->file_index]->ctx;
516
+    int i, ret, w, h;
517
+    uint8_t *image[4];
518
+    int linesize[4];
519
+
520
+    /* Compute the size of the canvas for the subtitles stream.
521
+       If the subtitles codec has set a size, use it. Otherwise use the
522
+       maximum dimensions of the video streams in the same file. */
523
+    w = ist->st->codec->width;
524
+    h = ist->st->codec->height;
525
+    if (!(w && h)) {
526
+        for (i = 0; i < avf->nb_streams; i++) {
527
+            if (avf->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
528
+                w = FFMAX(w, avf->streams[i]->codec->width);
529
+                h = FFMAX(h, avf->streams[i]->codec->height);
530
+            }
531
+        }
532
+        if (!(w && h)) {
533
+            w = FFMAX(w, 720);
534
+            h = FFMAX(h, 576);
535
+        }
536
+        av_log(avf, AV_LOG_INFO, "sub2video: using %dx%d canvas\n", w, h);
537
+    }
538
+    ist->sub2video.w = ist->st->codec->width  = w;
539
+    ist->sub2video.h = ist->st->codec->height = h;
540
+
541
+    /* rectangles are PIX_FMT_PAL8, but we have no guarantee that the
542
+       palettes for all rectangles are identical or compatible */
543
+    ist->st->codec->pix_fmt = PIX_FMT_RGB32;
544
+
545
+    ret = av_image_alloc(image, linesize, w, h, PIX_FMT_RGB32, 32);
546
+    if (ret < 0)
547
+        return ret;
548
+    memset(image[0], 0, h * linesize[0]);
549
+    ist->sub2video.ref = avfilter_get_video_buffer_ref_from_arrays(
550
+            image, linesize, AV_PERM_READ | AV_PERM_PRESERVE,
551
+            w, h, PIX_FMT_RGB32);
552
+    if (!ist->sub2video.ref) {
553
+        av_free(image[0]);
554
+        return AVERROR(ENOMEM);
555
+    }
556
+    return 0;
557
+}
558
+
559
+static void sub2video_copy_rect(uint8_t *dst, int dst_linesize, int w, int h,
560
+                                AVSubtitleRect *r)
561
+{
562
+    uint32_t *pal, *dst2;
563
+    uint8_t *src, *src2;
564
+    int x, y;
565
+
566
+    if (r->type != SUBTITLE_BITMAP) {
567
+        av_log(NULL, AV_LOG_WARNING, "sub2video: non-bitmap subtitle\n");
568
+        return;
569
+    }
570
+    if (r->x < 0 || r->x + r->w > w || r->y < 0 || r->y + r->h > h) {
571
+        av_log(NULL, AV_LOG_WARNING, "sub2video: rectangle overflowing\n");
572
+        return;
573
+    }
574
+
575
+    dst += r->y * dst_linesize + r->x * 4;
576
+    src = r->pict.data[0];
577
+    pal = (uint32_t *)r->pict.data[1];
578
+    for (y = 0; y < r->h; y++) {
579
+        dst2 = (uint32_t *)dst;
580
+        src2 = src;
581
+        for (x = 0; x < r->w; x++)
582
+            *(dst2++) = pal[*(src2++)];
583
+        dst += dst_linesize;
584
+        src += r->pict.linesize[0];
585
+    }
586
+}
587
+
588
+static void sub2video_push_ref(InputStream *ist, int64_t pts)
589
+{
590
+    AVFilterBufferRef *ref = ist->sub2video.ref;
591
+    int i;
592
+
593
+    ist->sub2video.last_pts = ref->pts = pts;
594
+    for (i = 0; i < ist->nb_filters; i++)
595
+        av_buffersrc_add_ref(ist->filters[i]->filter,
596
+                             avfilter_ref_buffer(ref, ~0),
597
+                             AV_BUFFERSRC_FLAG_NO_CHECK_FORMAT |
598
+                             AV_BUFFERSRC_FLAG_NO_COPY);
599
+}
600
+
601
+static void sub2video_update(InputStream *ist, AVSubtitle *sub, int64_t pts)
602
+{
603
+    int w = ist->sub2video.w, h = ist->sub2video.h;
604
+    AVFilterBufferRef *ref = ist->sub2video.ref;
605
+    int8_t *dst;
606
+    int     dst_linesize;
607
+    int i;
608
+
609
+    if (!ref)
610
+        return;
611
+    dst          = ref->data    [0];
612
+    dst_linesize = ref->linesize[0];
613
+    memset(dst, 0, h * dst_linesize);
614
+    for (i = 0; i < sub->num_rects; i++)
615
+        sub2video_copy_rect(dst, dst_linesize, w, h, sub->rects[i]);
616
+    sub2video_push_ref(ist, pts);
617
+}
618
+
619
+static void sub2video_heartbeat(InputStream *ist, int64_t pts)
620
+{
621
+    InputFile *infile = input_files[ist->file_index];
622
+    int i, j, nb_reqs;
623
+    int64_t pts2;
624
+
625
+    /* When a frame is read from a file, examine all sub2video streams in
626
+       the same file and send the sub2video frame again. Otherwise, decoded
627
+       video frames could be accumulating in the filter graph while a filter
628
+       (possibly overlay) is desperately waiting for a subtitle frame. */
629
+    for (i = 0; i < infile->nb_streams; i++) {
630
+        InputStream *ist2 = input_streams[infile->ist_index + i];
631
+        if (!ist2->sub2video.ref)
632
+            continue;
633
+        /* subtitles seem to be usually muxed ahead of other streams;
634
+           if not, substracting a larger time here is necessary */
635
+        pts2 = av_rescale_q(pts, ist->st->time_base, ist2->st->time_base) - 1;
636
+        /* do not send the heartbeat frame if the subtitle is already ahead */
637
+        if (pts2 <= ist2->sub2video.last_pts)
638
+            continue;
639
+        for (j = 0, nb_reqs = 0; j < ist2->nb_filters; j++)
640
+            nb_reqs += av_buffersrc_get_nb_failed_requests(ist2->filters[j]->filter);
641
+        if (nb_reqs)
642
+            sub2video_push_ref(ist2, pts2);
643
+    }
644
+}
645
+
646
+static void sub2video_flush(InputStream *ist)
647
+{
648
+    int i;
649
+
650
+    for (i = 0; i < ist->nb_filters; i++)
651
+        av_buffersrc_add_ref(ist->filters[i]->filter, NULL, 0);
652
+}
653
+
654
+/* end of sub2video hack */
655
+
507 656
 static void reset_options(OptionsContext *o, int is_input)
508 657
 {
509 658
     const OptionDef *po = options;
... ...
@@ -745,7 +900,10 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
745 745
         s = input_files[file_idx]->ctx;
746 746
 
747 747
         for (i = 0; i < s->nb_streams; i++) {
748
-            if (s->streams[i]->codec->codec_type != type)
748
+            enum AVMediaType stream_type = s->streams[i]->codec->codec_type;
749
+            if (stream_type != type &&
750
+                !(stream_type == AVMEDIA_TYPE_SUBTITLE &&
751
+                  type == AVMEDIA_TYPE_VIDEO /* sub2video hack */))
749 752
                 continue;
750 753
             if (check_stream_specifier(s, s->streams[i], *p == ':' ? p + 1 : p) == 1) {
751 754
                 st = s->streams[i];
... ...
@@ -1025,6 +1183,12 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
1025 1025
     int pad_idx = in->pad_idx;
1026 1026
     int ret;
1027 1027
 
1028
+    if (ist->st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
1029
+        ret = sub2video_prepare(ist);
1030
+        if (ret < 0)
1031
+            return ret;
1032
+    }
1033
+
1028 1034
     sar = ist->st->sample_aspect_ratio.num ?
1029 1035
           ist->st->sample_aspect_ratio :
1030 1036
           ist->st->codec->sample_aspect_ratio;
... ...
@@ -1413,6 +1577,7 @@ void av_noreturn exit_program(int ret)
1413 1413
         av_freep(&input_streams[i]->decoded_frame);
1414 1414
         av_dict_free(&input_streams[i]->opts);
1415 1415
         free_buffer_pool(&input_streams[i]->buffer_pool);
1416
+        avfilter_unref_bufferp(&input_streams[i]->sub2video.ref);
1416 1417
         av_freep(&input_streams[i]->filters);
1417 1418
         av_freep(&input_streams[i]);
1418 1419
     }
... ...
@@ -2636,13 +2801,16 @@ static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output)
2636 2636
     AVSubtitle subtitle;
2637 2637
     int i, ret = avcodec_decode_subtitle2(ist->st->codec,
2638 2638
                                           &subtitle, got_output, pkt);
2639
-    if (ret < 0)
2640
-        return ret;
2641
-    if (!*got_output)
2639
+    if (ret < 0 || !*got_output) {
2640
+        if (!pkt->size)
2641
+            sub2video_flush(ist);
2642 2642
         return ret;
2643
+    }
2643 2644
 
2644 2645
     rate_emu_sleep(ist);
2645 2646
 
2647
+    sub2video_update(ist, &subtitle, pkt->pts);
2648
+
2646 2649
     for (i = 0; i < nb_output_streams; i++) {
2647 2650
         OutputStream *ost = output_streams[i];
2648 2651
 
... ...
@@ -3847,6 +4015,8 @@ static int transcode(void)
3847 3847
             }
3848 3848
         }
3849 3849
 
3850
+        sub2video_heartbeat(ist, pkt.pts);
3851
+
3850 3852
         // fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->st->index, pkt.size);
3851 3853
         if ((ret = output_packet(ist, &pkt)) < 0 ||
3852 3854
             ((ret = poll_filters()) < 0 && ret != AVERROR_EOF)) {