libavformat/subtitles.c
7c9f9685
 /*
b1319e14
  * Copyright (c) 2012-2013 Clément Bœsch <u pkh me>
7c9f9685
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avformat.h"
 #include "subtitles.h"
3e842617
 #include "avio_internal.h"
2653e125
 #include "libavutil/avassert.h"
d948893d
 #include "libavutil/avstring.h"
7c9f9685
 
19a6431e
 void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
3e842617
 {
     int i;
     r->pb = pb;
     r->buf_pos = r->buf_len = 0;
     r->type = FF_UTF_8;
     for (i = 0; i < 2; i++)
         r->buf[r->buf_len++] = avio_r8(r->pb);
     if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
         r->type = FF_UTF16LE;
         r->buf_pos += 2;
     } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
         r->type = FF_UTF16BE;
         r->buf_pos += 2;
     } else {
         r->buf[r->buf_len++] = avio_r8(r->pb);
         if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
             // UTF8
             r->buf_pos += 3;
         }
     }
19a6431e
     if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE))
e5813d96
         av_log(s, AV_LOG_INFO,
19a6431e
                "UTF16 is automatically converted to UTF8, do not specify a character encoding\n");
3e842617
 }
 
 void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
 {
     memset(&r->buf_pb, 0, sizeof(r->buf_pb));
     ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
19a6431e
     ff_text_init_avio(NULL, r, &r->buf_pb);
3e842617
 }
 
 int64_t ff_text_pos(FFTextReader *r)
 {
     return avio_tell(r->pb) - r->buf_len + r->buf_pos;
 }
 
 int ff_text_r8(FFTextReader *r)
 {
     uint32_t val;
     uint8_t tmp;
     if (r->buf_pos < r->buf_len)
         return r->buf[r->buf_pos++];
     if (r->type == FF_UTF16LE) {
         GET_UTF16(val, avio_rl16(r->pb), return 0;)
     } else if (r->type == FF_UTF16BE) {
         GET_UTF16(val, avio_rb16(r->pb), return 0;)
     } else {
         return avio_r8(r->pb);
     }
     if (!val)
         return 0;
     r->buf_pos = 0;
     r->buf_len = 0;
     PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
     return r->buf[r->buf_pos++]; // buf_len is at least 1
 }
 
 void ff_text_read(FFTextReader *r, char *buf, size_t size)
 {
     for ( ; size > 0; size--)
         *buf++ = ff_text_r8(r);
 }
 
d658ef18
 int ff_text_eof(FFTextReader *r)
 {
     return r->buf_pos >= r->buf_len && avio_feof(r->pb);
 }
 
 int ff_text_peek_r8(FFTextReader *r)
 {
     int c;
     if (r->buf_pos < r->buf_len)
         return r->buf[r->buf_pos];
     c = ff_text_r8(r);
     if (!avio_feof(r->pb)) {
         r->buf_pos = 0;
         r->buf_len = 1;
         r->buf[0] = c;
     }
     return c;
 }
 
7c9f9685
 AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
a633928d
                                     const uint8_t *event, size_t len, int merge)
7c9f9685
 {
     AVPacket *subs, *sub;
 
     if (merge && q->nb_subs > 0) {
         /* merge with previous event */
 
         int old_len;
         sub = &q->subs[q->nb_subs - 1];
         old_len = sub->size;
         if (av_grow_packet(sub, len) < 0)
             return NULL;
         memcpy(sub->data + old_len, event, len);
     } else {
         /* new event */
 
         if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1)
             return NULL;
         subs = av_fast_realloc(q->subs, &q->allocated_size,
                                (q->nb_subs + 1) * sizeof(*q->subs));
         if (!subs)
             return NULL;
         q->subs = subs;
         sub = &subs[q->nb_subs++];
         if (av_new_packet(sub, len) < 0)
             return NULL;
         sub->flags |= AV_PKT_FLAG_KEY;
         sub->pts = sub->dts = 0;
         memcpy(sub->data, event, len);
     }
     return sub;
 }
 
dbfe6110
 static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
7c9f9685
 {
     const AVPacket *s1 = a;
     const AVPacket *s2 = b;
92e483f8
     if (s1->pts == s2->pts)
         return FFDIFFSIGN(s1->pos, s2->pos);
     return FFDIFFSIGN(s1->pts , s2->pts);
7c9f9685
 }
 
dbfe6110
 static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
 {
     const AVPacket *s1 = a;
     const AVPacket *s2 = b;
     if (s1->pos == s2->pos) {
         if (s1->pts == s2->pts)
             return 0;
         return s1->pts > s2->pts ? 1 : -1;
     }
     return s1->pos > s2->pos ? 1 : -1;
 }
 
af924fd9
 static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q)
4f26c999
 {
     int i, drop = 0;
 
     for (i = 1; i < q->nb_subs; i++) {
         const int last_id = i - 1 - drop;
         const AVPacket *last = &q->subs[last_id];
 
         if (q->subs[i].pts        == last->pts &&
             q->subs[i].duration   == last->duration &&
a634649a
             q->subs[i].stream_index == last->stream_index &&
4f26c999
             !strcmp(q->subs[i].data, last->data)) {
 
c2f861ca
             av_packet_unref(&q->subs[i]);
4f26c999
             drop++;
         } else if (drop) {
             q->subs[last_id + 1] = q->subs[i];
             memset(&q->subs[i], 0, sizeof(q->subs[i])); // for safety
         }
     }
 
     if (drop) {
         q->nb_subs -= drop;
af924fd9
         av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop);
4f26c999
     }
 }
 
af924fd9
 void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
7c9f9685
 {
     int i;
 
dbfe6110
     qsort(q->subs, q->nb_subs, sizeof(*q->subs),
           q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
                                      : cmp_pkt_sub_pos_ts);
7c9f9685
     for (i = 0; i < q->nb_subs; i++)
a0ec4aeb
         if (q->subs[i].duration < 0 && i < q->nb_subs - 1)
7c9f9685
             q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
af924fd9
 
c216324a
     if (!q->keep_duplicates)
         drop_dups(log_ctx, q);
7c9f9685
 }
 
 int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
 {
     AVPacket *sub = q->subs + q->current_sub_idx;
 
     if (q->current_sub_idx == q->nb_subs)
         return AVERROR_EOF;
a447b75d
     if (av_packet_ref(pkt, sub) < 0) {
5c504e4d
         return AVERROR(ENOMEM);
     }
2653e125
 
7c9f9685
     pkt->dts = pkt->pts;
     q->current_sub_idx++;
     return 0;
 }
 
b1319e14
 static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts)
 {
     int s1 = 0, s2 = q->nb_subs - 1;
 
     if (s2 < s1)
         return AVERROR(ERANGE);
 
     for (;;) {
         int mid;
 
         if (s1 == s2)
             return s1;
         if (s1 == s2 - 1)
             return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2;
         mid = (s1 + s2) / 2;
         if (q->subs[mid].pts <= ts)
             s1 = mid;
         else
             s2 = mid;
     }
 }
 
ff3624b1
 int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
                             int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
 {
     if (flags & AVSEEK_FLAG_BYTE) {
         return AVERROR(ENOSYS);
     } else if (flags & AVSEEK_FLAG_FRAME) {
         if (ts < 0 || ts >= q->nb_subs)
             return AVERROR(ERANGE);
         q->current_sub_idx = ts;
     } else {
b1319e14
         int i, idx = search_sub_ts(q, ts);
ad5d72b1
         int64_t ts_selected;
b1319e14
 
ff3624b1
         if (idx < 0)
b1319e14
             return idx;
         for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++)
             if (stream_index == -1 || q->subs[i].stream_index == stream_index)
                 idx = i;
         for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--)
             if (stream_index == -1 || q->subs[i].stream_index == stream_index)
                 idx = i;
 
         ts_selected = q->subs[idx].pts;
         if (ts_selected < min_ts || ts_selected > max_ts)
ff3624b1
             return AVERROR(ERANGE);
b1319e14
 
ad5d72b1
         /* look back in the latest subtitles for overlapping subtitles */
         for (i = idx - 1; i >= 0; i--) {
1ca4bf93
             int64_t pts = q->subs[i].pts;
f8678dce
             if (q->subs[i].duration <= 0 ||
                 (stream_index != -1 && q->subs[i].stream_index != stream_index))
ad5d72b1
                 continue;
1ca4bf93
             if (pts >= min_ts && pts > ts_selected - q->subs[i].duration)
ad5d72b1
                 idx = i;
             else
                 break;
         }
f8678dce
 
         /* If the queue is used to store multiple subtitles streams (like with
          * VobSub) and the stream index is not specified, we need to make sure
          * to focus on the smallest file position offset for a same timestamp;
          * queue is ordered by pts and then filepos, so we can take the first
          * entry for a given timestamp. */
         if (stream_index == -1)
             while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts)
                 idx--;
 
ff3624b1
         q->current_sub_idx = idx;
     }
     return 0;
 }
 
7c9f9685
 void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
 {
     int i;
 
     for (i = 0; i < q->nb_subs; i++)
c2f861ca
         av_packet_unref(&q->subs[i]);
7c9f9685
     av_freep(&q->subs);
     q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
 }
d948893d
 
231a514d
 int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c)
d948893d
 {
     int i = 0;
     char end_chr;
 
     if (!*c) // cached char?
231a514d
         *c = ff_text_r8(tr);
d948893d
     if (!*c)
         return 0;
 
     end_chr = *c == '<' ? '>' : '<';
     do {
         av_bprint_chars(buf, *c, 1);
231a514d
         *c = ff_text_r8(tr);
d948893d
         i++;
     } while (*c != end_chr && *c);
     if (end_chr == '>') {
         av_bprint_chars(buf, '>', 1);
         *c = 0;
     }
     return i;
 }
 
 const char *ff_smil_get_attr_ptr(const char *s, const char *attr)
 {
     int in_quotes = 0;
a633928d
     const size_t len = strlen(attr);
d948893d
 
     while (*s) {
         while (*s) {
88d55b82
             if (!in_quotes && av_isspace(*s))
d948893d
                 break;
             in_quotes ^= *s == '"'; // XXX: support escaping?
             s++;
         }
88d55b82
         while (av_isspace(*s))
d948893d
             s++;
         if (!av_strncasecmp(s, attr, len) && s[len] == '=')
             return s + len + 1 + (s[len + 1] == '"');
     }
     return NULL;
 }
d9ac8d29
 
 static inline int is_eol(char c)
 {
     return c == '\r' || c == '\n';
 }
 
d658ef18
 void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf)
d9ac8d29
 {
378a830e
     char eol_buf[5], last_was_cr = 0;
d9ac8d29
     int n = 0, i = 0, nb_eol = 0;
 
     av_bprint_clear(buf);
 
     for (;;) {
d658ef18
         char c = ff_text_r8(tr);
d9ac8d29
 
         if (!c)
             break;
 
         /* ignore all initial line breaks */
         if (n == 0 && is_eol(c))
             continue;
 
         /* line break buffering: we don't want to add the trailing \r\n */
         if (is_eol(c)) {
378a830e
             nb_eol += c == '\n' || last_was_cr;
d9ac8d29
             if (nb_eol == 2)
                 break;
             eol_buf[i++] = c;
             if (i == sizeof(eol_buf) - 1)
                 break;
378a830e
             last_was_cr = c == '\r';
d9ac8d29
             continue;
         }
 
         /* only one line break followed by data: we flush the line breaks
          * buffer */
         if (i) {
             eol_buf[i] = 0;
             av_bprintf(buf, "%s", eol_buf);
             i = nb_eol = 0;
         }
 
         av_bprint_chars(buf, c, 1);
         n++;
     }
 }
d658ef18
 
 void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
 {
     FFTextReader tr;
     tr.buf_pos = tr.buf_len = 0;
     tr.type = 0;
     tr.pb = pb;
     ff_subtitles_read_text_chunk(&tr, buf);
 }
 
 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
 {
     size_t cur = 0;
     if (!size)
         return 0;
     while (cur + 1 < size) {
         unsigned char c = ff_text_r8(tr);
         if (!c)
             return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
         if (c == '\r' || c == '\n')
             break;
         buf[cur++] = c;
         buf[cur] = '\0';
     }
     if (ff_text_peek_r8(tr) == '\r')
         ff_text_r8(tr);
     if (ff_text_peek_r8(tr) == '\n')
         ff_text_r8(tr);
     return cur;
 }