... | ... |
@@ -1390,6 +1390,7 @@ HAVE_LIST=" |
1390 | 1390 |
gnu_as |
1391 | 1391 |
gsm_h |
1392 | 1392 |
ibm_asm |
1393 |
+ iconv |
|
1393 | 1394 |
inet_aton |
1394 | 1395 |
io_h |
1395 | 1396 |
isatty |
... | ... |
@@ -3716,6 +3717,7 @@ check_func getopt |
3716 | 3716 |
check_func getrusage |
3717 | 3717 |
check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss |
3718 | 3718 |
check_func gettimeofday |
3719 |
+check_func iconv |
|
3719 | 3720 |
check_func inet_aton $network_extralibs |
3720 | 3721 |
check_func isatty |
3721 | 3722 |
check_func localtime_r |
... | ... |
@@ -3208,6 +3208,24 @@ typedef struct AVCodecContext { |
3208 | 3208 |
* - encoding: unused |
3209 | 3209 |
*/ |
3210 | 3210 |
AVDictionary *metadata; |
3211 |
+ |
|
3212 |
+ /** |
|
3213 |
+ * Character encoding of the input subtitles file. |
|
3214 |
+ * - decoding: set by user |
|
3215 |
+ * - encoding: unused |
|
3216 |
+ */ |
|
3217 |
+ char *sub_charenc; |
|
3218 |
+ |
|
3219 |
+ /** |
|
3220 |
+ * Subtitles character encoding mode. Formats or codecs might be adjusting |
|
3221 |
+ * this setting (if they are doing the conversion themselves for instance). |
|
3222 |
+ * - decoding: set by libavcodec |
|
3223 |
+ * - encoding: unused |
|
3224 |
+ */ |
|
3225 |
+ int sub_charenc_mode; |
|
3226 |
+#define FF_SUB_CHARENC_MODE_DO_NOTHING -1 ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance) |
|
3227 |
+#define FF_SUB_CHARENC_MODE_AUTOMATIC 0 ///< libavcodec will select the mode itself |
|
3228 |
+#define FF_SUB_CHARENC_MODE_PRE_DECODER 1 ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv |
|
3211 | 3229 |
} AVCodecContext; |
3212 | 3230 |
|
3213 | 3231 |
AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx); |
... | ... |
@@ -406,6 +406,11 @@ static const AVOption options[]={ |
406 | 406 |
{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, |
407 | 407 |
{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"}, |
408 | 408 |
{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0}, |
409 |
+{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D}, |
|
410 |
+{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"}, |
|
411 |
+{"do_nothing", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, |
|
412 |
+{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, |
|
413 |
+{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, |
|
409 | 414 |
{NULL}, |
410 | 415 |
}; |
411 | 416 |
|
... | ... |
@@ -48,6 +48,9 @@ |
48 | 48 |
#include <stdarg.h> |
49 | 49 |
#include <limits.h> |
50 | 50 |
#include <float.h> |
51 |
+#if HAVE_ICONV |
|
52 |
+# include <iconv.h> |
|
53 |
+#endif |
|
51 | 54 |
|
52 | 55 |
volatile int ff_avcodec_locked; |
53 | 56 |
static int volatile entangled_thread_counter = 0; |
... | ... |
@@ -1089,6 +1092,32 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code |
1089 | 1089 |
ret = AVERROR(EINVAL); |
1090 | 1090 |
goto free_and_end; |
1091 | 1091 |
} |
1092 |
+ if (avctx->sub_charenc) { |
|
1093 |
+ if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) { |
|
1094 |
+ av_log(avctx, AV_LOG_ERROR, "Character encoding is only " |
|
1095 |
+ "supported with subtitles codecs\n"); |
|
1096 |
+ ret = AVERROR(EINVAL); |
|
1097 |
+ goto free_and_end; |
|
1098 |
+ } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) { |
|
1099 |
+ av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, " |
|
1100 |
+ "subtitles character encoding will be ignored\n", |
|
1101 |
+ avctx->codec_descriptor->name); |
|
1102 |
+ avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING; |
|
1103 |
+ } else { |
|
1104 |
+ /* input character encoding is set for a text based subtitle |
|
1105 |
+ * codec at this point */ |
|
1106 |
+ if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC) |
|
1107 |
+ avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER; |
|
1108 |
+ |
|
1109 |
+ if (!HAVE_ICONV && avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) { |
|
1110 |
+ av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles " |
|
1111 |
+ "conversion needs a libavcodec built with iconv support " |
|
1112 |
+ "for this codec\n"); |
|
1113 |
+ ret = AVERROR(ENOSYS); |
|
1114 |
+ goto free_and_end; |
|
1115 |
+ } |
|
1116 |
+ } |
|
1117 |
+ } |
|
1092 | 1118 |
} |
1093 | 1119 |
end: |
1094 | 1120 |
ff_unlock_avcodec(); |
... | ... |
@@ -1847,6 +1876,68 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx, |
1847 | 1847 |
return ret; |
1848 | 1848 |
} |
1849 | 1849 |
|
1850 |
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */ |
|
1851 |
+static int recode_subtitle(AVCodecContext *avctx, |
|
1852 |
+ AVPacket *outpkt, const AVPacket *inpkt) |
|
1853 |
+{ |
|
1854 |
+#if HAVE_ICONV |
|
1855 |
+ iconv_t cd = (iconv_t)-1; |
|
1856 |
+ int ret = 0; |
|
1857 |
+ char *inb, *outb; |
|
1858 |
+ size_t inl, outl; |
|
1859 |
+ AVPacket tmp; |
|
1860 |
+#endif |
|
1861 |
+ |
|
1862 |
+ if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER) |
|
1863 |
+ return 0; |
|
1864 |
+ |
|
1865 |
+#if HAVE_ICONV |
|
1866 |
+ cd = iconv_open("UTF-8", avctx->sub_charenc); |
|
1867 |
+ if (cd == (iconv_t)-1) { |
|
1868 |
+ av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context " |
|
1869 |
+ "with input character encoding \"%s\"\n", avctx->sub_charenc); |
|
1870 |
+ ret = AVERROR(errno); |
|
1871 |
+ goto end; |
|
1872 |
+ } |
|
1873 |
+ |
|
1874 |
+ inb = inpkt->data; |
|
1875 |
+ inl = inpkt->size; |
|
1876 |
+ |
|
1877 |
+ if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) { |
|
1878 |
+ av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n"); |
|
1879 |
+ ret = AVERROR(ENOMEM); |
|
1880 |
+ goto end; |
|
1881 |
+ } |
|
1882 |
+ |
|
1883 |
+ ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES); |
|
1884 |
+ if (ret < 0) |
|
1885 |
+ goto end; |
|
1886 |
+ outpkt->data = tmp.data; |
|
1887 |
+ outpkt->size = tmp.size; |
|
1888 |
+ outb = outpkt->data; |
|
1889 |
+ outl = outpkt->size; |
|
1890 |
+ |
|
1891 |
+ if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 || |
|
1892 |
+ iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 || |
|
1893 |
+ outl >= outpkt->size || inl != 0) { |
|
1894 |
+ av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" " |
|
1895 |
+ "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc); |
|
1896 |
+ av_free_packet(&tmp); |
|
1897 |
+ ret = AVERROR(errno); |
|
1898 |
+ goto end; |
|
1899 |
+ } |
|
1900 |
+ outpkt->size -= outl; |
|
1901 |
+ outpkt->data[outpkt->size - 1] = '\0'; |
|
1902 |
+ |
|
1903 |
+end: |
|
1904 |
+ if (cd != (iconv_t)-1) |
|
1905 |
+ iconv_close(cd); |
|
1906 |
+ return ret; |
|
1907 |
+#else |
|
1908 |
+ av_assert0(!"requesting subtitles recoding without iconv"); |
|
1909 |
+#endif |
|
1910 |
+} |
|
1911 |
+ |
|
1850 | 1912 |
int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, |
1851 | 1913 |
int *got_sub_ptr, |
1852 | 1914 |
AVPacket *avpkt) |
... | ... |
@@ -1862,19 +1953,28 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, |
1862 | 1862 |
avcodec_get_subtitle_defaults(sub); |
1863 | 1863 |
|
1864 | 1864 |
if (avpkt->size) { |
1865 |
+ AVPacket pkt_recoded; |
|
1865 | 1866 |
AVPacket tmp = *avpkt; |
1866 | 1867 |
int did_split = av_packet_split_side_data(&tmp); |
1867 | 1868 |
//apply_param_change(avctx, &tmp); |
1868 | 1869 |
|
1869 |
- avctx->pkt = &tmp; |
|
1870 |
+ pkt_recoded = tmp; |
|
1871 |
+ ret = recode_subtitle(avctx, &pkt_recoded, &tmp); |
|
1872 |
+ if (ret < 0) { |
|
1873 |
+ *got_sub_ptr = 0; |
|
1874 |
+ } else { |
|
1875 |
+ avctx->pkt = &pkt_recoded; |
|
1870 | 1876 |
|
1871 | 1877 |
if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE) |
1872 | 1878 |
sub->pts = av_rescale_q(avpkt->pts, |
1873 | 1879 |
avctx->pkt_timebase, AV_TIME_BASE_Q); |
1874 |
- ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp); |
|
1880 |
+ ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded); |
|
1881 |
+ if (tmp.data != pkt_recoded.data) |
|
1882 |
+ av_free(pkt_recoded.data); |
|
1875 | 1883 |
sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB); |
1876 |
- |
|
1877 | 1884 |
avctx->pkt = NULL; |
1885 |
+ } |
|
1886 |
+ |
|
1878 | 1887 |
if (did_split) { |
1879 | 1888 |
ff_packet_free_side_data(&tmp); |
1880 | 1889 |
if(ret == tmp.size) |
... | ... |
@@ -29,8 +29,8 @@ |
29 | 29 |
#include "libavutil/avutil.h" |
30 | 30 |
|
31 | 31 |
#define LIBAVCODEC_VERSION_MAJOR 54 |
32 |
-#define LIBAVCODEC_VERSION_MINOR 91 |
|
33 |
-#define LIBAVCODEC_VERSION_MICRO 103 |
|
32 |
+#define LIBAVCODEC_VERSION_MINOR 92 |
|
33 |
+#define LIBAVCODEC_VERSION_MICRO 100 |
|
34 | 34 |
|
35 | 35 |
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ |
36 | 36 |
LIBAVCODEC_VERSION_MINOR, \ |