libavcodec/samidec.c
53640f42
 /*
  * Copyright (c) 2012 Clément Bœsch
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * SAMI subtitle decoder
  * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
  */
 
 #include "ass.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
87f90be4
 #include "htmlsubtitles.h"
53640f42
 
 typedef struct {
     AVBPrint source;
     AVBPrint content;
87f90be4
     AVBPrint encoded_source;
     AVBPrint encoded_content;
53640f42
     AVBPrint full;
29412821
     int readorder;
53640f42
 } SAMIContext;
 
 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
 {
     SAMIContext *sami = avctx->priv_data;
     int ret = 0;
     char *tag = NULL;
     char *dupsrc = av_strdup(src);
     char *p = dupsrc;
87f90be4
     AVBPrint *dst_content = &sami->encoded_content;
     AVBPrint *dst_source = &sami->encoded_source;
53640f42
 
61bbc537
     if (!dupsrc)
         return AVERROR(ENOMEM);
 
87f90be4
     av_bprint_clear(&sami->encoded_content);
53640f42
     av_bprint_clear(&sami->content);
87f90be4
     av_bprint_clear(&sami->encoded_source);
53640f42
     for (;;) {
         char *saveptr = NULL;
         int prev_chr_is_space = 0;
         AVBPrint *dst = &sami->content;
 
         /* parse & extract paragraph tag */
         p = av_stristr(p, "<P");
         if (!p)
             break;
88d55b82
         if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
53640f42
             p++;
             continue;
         }
         if (dst->len) // add a separator with the previous paragraph if there was one
             av_bprintf(dst, "\\N");
         tag = av_strtok(p, ">", &saveptr);
         if (!tag || !saveptr)
             break;
         p = saveptr;
 
         /* check if the current paragraph is the "source" (speaker name) */
         if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
             dst = &sami->source;
             av_bprint_clear(dst);
         }
 
         /* if empty event -> skip subtitle */
88d55b82
         while (av_isspace(*p))
53640f42
             p++;
         if (!strncmp(p, "&nbsp;", 6)) {
             ret = -1;
             goto end;
         }
 
         /* extract the text, stripping most of the tags */
         while (*p) {
             if (*p == '<') {
88d55b82
                 if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
53640f42
                     break;
87f90be4
             }
             if (!av_strncasecmp(p, "<BR", 3)) {
db18b3d6
                 av_bprintf(dst, "\\N");
53640f42
                 p++;
                 while (*p && *p != '>')
                     p++;
                 if (!*p)
                     break;
                 if (*p == '>')
                     p++;
70082a1e
                 continue;
53640f42
             }
88d55b82
             if (!av_isspace(*p))
53640f42
                 av_bprint_chars(dst, *p, 1);
             else if (!prev_chr_is_space)
                 av_bprint_chars(dst, ' ', 1);
88d55b82
             prev_chr_is_space = av_isspace(*p);
53640f42
             p++;
         }
     }
 
     av_bprint_clear(&sami->full);
87f90be4
     if (sami->source.len) {
983e3fbc
         ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
         if (ret < 0)
             goto end;
87f90be4
         av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
     }
983e3fbc
     ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
     if (ret < 0)
         goto end;
87f90be4
     av_bprintf(&sami->full, "%s", sami->encoded_content.str);
53640f42
 
 end:
     av_free(dupsrc);
     return ret;
 }
 
 static int sami_decode_frame(AVCodecContext *avctx,
                              void *data, int *got_sub_ptr, AVPacket *avpkt)
 {
     AVSubtitle *sub = data;
     const char *ptr = avpkt->data;
     SAMIContext *sami = avctx->priv_data;
 
8d51d10e
     if (ptr && avpkt->size > 0) {
         int ret = sami_paragraph_to_ass(avctx, ptr);
         if (ret < 0)
             return ret;
29412821
         // TODO: pass escaped sami->encoded_source.str as source
8d51d10e
         ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
6a65da3a
         if (ret < 0)
             return ret;
53640f42
     }
     *got_sub_ptr = sub->num_rects > 0;
     return avpkt->size;
 }
 
 static av_cold int sami_init(AVCodecContext *avctx)
 {
     SAMIContext *sami = avctx->priv_data;
     av_bprint_init(&sami->source,  0, 2048);
     av_bprint_init(&sami->content, 0, 2048);
87f90be4
     av_bprint_init(&sami->encoded_source,  0, 2048);
     av_bprint_init(&sami->encoded_content, 0, 2048);
53640f42
     av_bprint_init(&sami->full,    0, 2048);
     return ff_ass_subtitle_header_default(avctx);
 }
 
 static av_cold int sami_close(AVCodecContext *avctx)
 {
     SAMIContext *sami = avctx->priv_data;
     av_bprint_finalize(&sami->source,  NULL);
     av_bprint_finalize(&sami->content, NULL);
87f90be4
     av_bprint_finalize(&sami->encoded_source,  NULL);
     av_bprint_finalize(&sami->encoded_content, NULL);
53640f42
     av_bprint_finalize(&sami->full,    NULL);
     return 0;
 }
 
29412821
 static void sami_flush(AVCodecContext *avctx)
 {
     SAMIContext *sami = avctx->priv_data;
30e76853
     if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
         sami->readorder = 0;
29412821
 }
 
53640f42
 AVCodec ff_sami_decoder = {
     .name           = "sami",
     .long_name      = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
     .type           = AVMEDIA_TYPE_SUBTITLE,
7a72695c
     .id             = AV_CODEC_ID_SAMI,
53640f42
     .priv_data_size = sizeof(SAMIContext),
     .init           = sami_init,
     .close          = sami_close,
     .decode         = sami_decode_frame,
29412821
     .flush          = sami_flush,
53640f42
 };