... | ... |
@@ -180,6 +180,7 @@ External library support: |
180 | 180 |
and libraw1394 [no] |
181 | 181 |
--enable-libfaac enable AAC encoding via libfaac [no] |
182 | 182 |
--enable-libfdk-aac enable AAC encoding via libfdk-aac [no] |
183 |
+ --enable-libflite enable flite (voice synthesis) support via libflite [no] |
|
183 | 184 |
--enable-libfreetype enable libfreetype [no] |
184 | 185 |
--enable-libgsm enable GSM de/encoding via libgsm [no] |
185 | 186 |
--enable-libiec61883 enable iec61883 via libiec61883 [no] |
... | ... |
@@ -1074,6 +1075,7 @@ CONFIG_LIST=" |
1074 | 1074 |
libdc1394 |
1075 | 1075 |
libfaac |
1076 | 1076 |
libfdk_aac |
1077 |
+ libflite |
|
1077 | 1078 |
libfreetype |
1078 | 1079 |
libgsm |
1079 | 1080 |
libiec61883 |
... | ... |
@@ -1787,6 +1789,7 @@ cropdetect_filter_deps="gpl" |
1787 | 1787 |
delogo_filter_deps="gpl" |
1788 | 1788 |
deshake_filter_deps="avcodec" |
1789 | 1789 |
drawtext_filter_deps="libfreetype" |
1790 |
+flite_filter_deps="libflite" |
|
1790 | 1791 |
frei0r_filter_deps="frei0r dlopen" |
1791 | 1792 |
frei0r_filter_extralibs='$ldl' |
1792 | 1793 |
frei0r_src_filter_deps="frei0r dlopen" |
... | ... |
@@ -3390,6 +3393,8 @@ enabled libcelt && require libcelt celt/celt.h celt_decode -lcelt0 && |
3390 | 3390 |
enabled libcaca && require_pkg_config caca caca.h caca_create_canvas |
3391 | 3391 |
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac |
3392 | 3392 |
enabled libfdk_aac && require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac |
3393 |
+flite_libs="-lflite_cmu_time_awb -lflite_cmu_us_awb -lflite_cmu_us_kal -lflite_cmu_us_kal16 -lflite_cmu_us_rms -lflite_cmu_us_slt -lflite_usenglish -lflite_cmulex -lflite" |
|
3394 |
+enabled libflite && require2 libflite "flite/flite.h" flite_init $flite_libs |
|
3393 | 3395 |
enabled libfreetype && require_pkg_config freetype2 "ft2build.h freetype/freetype.h" FT_Init_FreeType |
3394 | 3396 |
enabled libgsm && require libgsm gsm/gsm.h gsm_create -lgsm |
3395 | 3397 |
enabled libilbc && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc |
... | ... |
@@ -1026,6 +1026,65 @@ Channel layout of the audio data, in the form that can be accepted by |
1026 | 1026 |
|
1027 | 1027 |
All the parameters need to be explicitly defined. |
1028 | 1028 |
|
1029 |
+@section flite |
|
1030 |
+ |
|
1031 |
+Synthesize a voice utterance using the libflite library. |
|
1032 |
+ |
|
1033 |
+To enable compilation of this filter you need to configure FFmpeg with |
|
1034 |
+@code{--enable-libflite}. |
|
1035 |
+ |
|
1036 |
+The source accepts parameters as a list of @var{key}=@var{value} pairs, |
|
1037 |
+separated by ":". |
|
1038 |
+ |
|
1039 |
+The description of the accepted parameters follows. |
|
1040 |
+ |
|
1041 |
+@table @option |
|
1042 |
+ |
|
1043 |
+@item list_voices |
|
1044 |
+If set to 1, list the names of the available voices and exit |
|
1045 |
+immediately. Default value is 0. |
|
1046 |
+ |
|
1047 |
+@item nb_samples, n |
|
1048 |
+Set the maximum number of samples per frame. Default value is 512. |
|
1049 |
+ |
|
1050 |
+@item textfile |
|
1051 |
+Set the filename containing the text to speak. |
|
1052 |
+ |
|
1053 |
+@item text |
|
1054 |
+Set the text to speak. |
|
1055 |
+ |
|
1056 |
+@item voice, v |
|
1057 |
+Set the voice to use for the speech synthesis. Default value is |
|
1058 |
+@code{kal}. See also the @var{list_voices} option. |
|
1059 |
+@end table |
|
1060 |
+ |
|
1061 |
+@section Examples |
|
1062 |
+ |
|
1063 |
+@itemize |
|
1064 |
+@item |
|
1065 |
+Read from file @file{speech.txt}, and synthetize the text using the |
|
1066 |
+standard flite voice: |
|
1067 |
+@example |
|
1068 |
+flite=textfile=speech.txt |
|
1069 |
+@end example |
|
1070 |
+ |
|
1071 |
+@item |
|
1072 |
+Read the specified text selecting the @code{slt} voice: |
|
1073 |
+@example |
|
1074 |
+flite=text='So fare thee well, poor devil of a Sub-Sub, whose commentator I am':voice=slt |
|
1075 |
+@end example |
|
1076 |
+ |
|
1077 |
+@item |
|
1078 |
+Make @file{ffplay} speech the specified text, using @code{flite} and |
|
1079 |
+the @code{lavfi} device: |
|
1080 |
+@example |
|
1081 |
+ffplay -f lavfi flite='No more be grieved for which that thou hast done.' |
|
1082 |
+@end example |
|
1083 |
+@end itemize |
|
1084 |
+ |
|
1085 |
+For more information about libflite, check: |
|
1086 |
+@url{http://www.speech.cs.cmu.edu/flite/} |
|
1087 |
+ |
|
1029 | 1088 |
@c man end AUDIO SOURCES |
1030 | 1089 |
|
1031 | 1090 |
@chapter Audio Sinks |
... | ... |
@@ -70,6 +70,7 @@ OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o |
70 | 70 |
|
71 | 71 |
OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o |
72 | 72 |
OBJS-$(CONFIG_ANULLSRC_FILTER) += asrc_anullsrc.o |
73 |
+OBJS-$(CONFIG_FLITE_FILTER) += asrc_flite.o |
|
73 | 74 |
|
74 | 75 |
OBJS-$(CONFIG_ABUFFERSINK_FILTER) += sink_buffer.o |
75 | 76 |
OBJS-$(CONFIG_ANULLSINK_FILTER) += asink_anullsink.o |
... | ... |
@@ -59,6 +59,7 @@ void avfilter_register_all(void) |
59 | 59 |
|
60 | 60 |
REGISTER_FILTER (AEVALSRC, aevalsrc, asrc); |
61 | 61 |
REGISTER_FILTER (ANULLSRC, anullsrc, asrc); |
62 |
+ REGISTER_FILTER (FLITE, flite, asrc); |
|
62 | 63 |
|
63 | 64 |
REGISTER_FILTER (ABUFFERSINK, abuffersink, asink); |
64 | 65 |
REGISTER_FILTER (ANULLSINK, anullsink, asink); |
65 | 66 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,274 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 Stefano Sabatini |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * flite voice synth source |
|
23 |
+ */ |
|
24 |
+ |
|
25 |
+#include <flite/flite.h> |
|
26 |
+#include "libavutil/audioconvert.h" |
|
27 |
+#include "libavutil/file.h" |
|
28 |
+#include "libavutil/opt.h" |
|
29 |
+#include "avfilter.h" |
|
30 |
+#include "audio.h" |
|
31 |
+#include "formats.h" |
|
32 |
+#include "internal.h" |
|
33 |
+ |
|
34 |
+typedef struct { |
|
35 |
+ const AVClass *class; |
|
36 |
+ char *voice_str; |
|
37 |
+ char *textfile; |
|
38 |
+ char *text; |
|
39 |
+ cst_wave *wave; |
|
40 |
+ int16_t *wave_samples; |
|
41 |
+ int wave_nb_samples; |
|
42 |
+ int list_voices; |
|
43 |
+ cst_voice *voice; |
|
44 |
+ int64_t pts; |
|
45 |
+ int frame_nb_samples; ///< number of samples per frame |
|
46 |
+} FliteContext; |
|
47 |
+ |
|
48 |
+#define OFFSET(x) offsetof(FliteContext, x) |
|
49 |
+ |
|
50 |
+static const AVOption flite_options[] = { |
|
51 |
+ { "list_voices", "list voices and exit", OFFSET(list_voices), AV_OPT_TYPE_INT, {.dbl=0}, 0, 1 }, |
|
52 |
+ { "nb_samples", "set number of samples per frame", OFFSET(frame_nb_samples), AV_OPT_TYPE_INT, {.dbl=512}, 0, INT_MAX }, |
|
53 |
+ { "n", "set number of samples per frame", OFFSET(frame_nb_samples), AV_OPT_TYPE_INT, {.dbl=512}, 0, INT_MAX }, |
|
54 |
+ { "text", "set text to speak", OFFSET(text), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, |
|
55 |
+ { "textfile", "set filename of the text to speak", OFFSET(textfile), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, |
|
56 |
+ { "v", "set voice", OFFSET(voice_str), AV_OPT_TYPE_STRING, {.str="kal"}, CHAR_MIN, CHAR_MAX }, |
|
57 |
+ { "voice", "set voice", OFFSET(voice_str), AV_OPT_TYPE_STRING, {.str="kal"}, CHAR_MIN, CHAR_MAX }, |
|
58 |
+ { NULL } |
|
59 |
+}; |
|
60 |
+ |
|
61 |
+AVFILTER_DEFINE_CLASS(flite); |
|
62 |
+ |
|
63 |
+static volatile int flite_inited = 0; |
|
64 |
+ |
|
65 |
+/* declare functions for all the supported voices */ |
|
66 |
+#define DECLARE_REGISTER_VOICE_FN(name) cst_voice *register_cmu_us_## name(const char *) |
|
67 |
+DECLARE_REGISTER_VOICE_FN(awb); |
|
68 |
+DECLARE_REGISTER_VOICE_FN(kal); |
|
69 |
+DECLARE_REGISTER_VOICE_FN(kal16); |
|
70 |
+DECLARE_REGISTER_VOICE_FN(rms); |
|
71 |
+DECLARE_REGISTER_VOICE_FN(slt); |
|
72 |
+ |
|
73 |
+struct voice_entry { |
|
74 |
+ const char *name; |
|
75 |
+ cst_voice * (*register_fn)(const char *); |
|
76 |
+} voice_entry; |
|
77 |
+ |
|
78 |
+static struct voice_entry voice_entries[] = { |
|
79 |
+ { "awb", register_cmu_us_awb }, |
|
80 |
+ { "kal", register_cmu_us_kal }, |
|
81 |
+ { "kal16", register_cmu_us_kal16 }, |
|
82 |
+ { "rms", register_cmu_us_rms }, |
|
83 |
+ { "slt", register_cmu_us_slt }, |
|
84 |
+}; |
|
85 |
+ |
|
86 |
+static void list_voices(void *log_ctx, const char *sep) |
|
87 |
+{ |
|
88 |
+ int i, n = FF_ARRAY_ELEMS(voice_entries); |
|
89 |
+ for (i = 0; i < n; i++) |
|
90 |
+ av_log(log_ctx, AV_LOG_INFO, "%s%s", |
|
91 |
+ voice_entries[i].name, i < (n-1) ? sep : "\n"); |
|
92 |
+} |
|
93 |
+ |
|
94 |
+static int select_voice(cst_voice **voice, const char *voice_name, void *log_ctx) |
|
95 |
+{ |
|
96 |
+ int i; |
|
97 |
+ |
|
98 |
+ for (i = 0; i < FF_ARRAY_ELEMS(voice_entries); i++) { |
|
99 |
+ struct voice_entry *entry = &voice_entries[i]; |
|
100 |
+ if (!strcmp(entry->name, voice_name)) { |
|
101 |
+ *voice = entry->register_fn(NULL); |
|
102 |
+ if (!*voice) { |
|
103 |
+ av_log(log_ctx, AV_LOG_ERROR, |
|
104 |
+ "Could not register voice '%s'\n", voice_name); |
|
105 |
+ return AVERROR_UNKNOWN; |
|
106 |
+ } |
|
107 |
+ return 0; |
|
108 |
+ } |
|
109 |
+ } |
|
110 |
+ |
|
111 |
+ av_log(log_ctx, AV_LOG_ERROR, "Could not find voice '%s'\n", voice_name); |
|
112 |
+ av_log(log_ctx, AV_LOG_INFO, "Choose between the voices: "); |
|
113 |
+ list_voices(log_ctx, ", "); |
|
114 |
+ |
|
115 |
+ return AVERROR(EINVAL); |
|
116 |
+} |
|
117 |
+ |
|
118 |
+static av_cold int init(AVFilterContext *ctx, const char *args) |
|
119 |
+{ |
|
120 |
+ FliteContext *flite = ctx->priv; |
|
121 |
+ int ret = 0; |
|
122 |
+ |
|
123 |
+ flite->class = &flite_class; |
|
124 |
+ av_opt_set_defaults(flite); |
|
125 |
+ |
|
126 |
+ if ((ret = av_set_options_string(flite, args, "=", ":")) < 0) { |
|
127 |
+ av_log(ctx, AV_LOG_ERROR, "Error parsing options string: '%s'\n", args); |
|
128 |
+ return ret; |
|
129 |
+ } |
|
130 |
+ |
|
131 |
+ if (flite->list_voices) { |
|
132 |
+ list_voices(ctx, "\n"); |
|
133 |
+ return AVERROR_EXIT; |
|
134 |
+ } |
|
135 |
+ |
|
136 |
+ if (!flite_inited) { |
|
137 |
+ if (flite_init() < 0) { |
|
138 |
+ av_log(ctx, AV_LOG_ERROR, "flite initialization failed\n"); |
|
139 |
+ return AVERROR_UNKNOWN; |
|
140 |
+ } |
|
141 |
+ flite_inited++; |
|
142 |
+ } |
|
143 |
+ |
|
144 |
+ if ((ret = select_voice(&flite->voice, flite->voice_str, ctx)) < 0) |
|
145 |
+ return ret; |
|
146 |
+ |
|
147 |
+ if (flite->textfile && flite->text) { |
|
148 |
+ av_log(ctx, AV_LOG_ERROR, |
|
149 |
+ "Both text and textfile options set: only one must be specified\n"); |
|
150 |
+ return AVERROR(EINVAL); |
|
151 |
+ } |
|
152 |
+ |
|
153 |
+ if (flite->textfile) { |
|
154 |
+ uint8_t *textbuf; |
|
155 |
+ size_t textbuf_size; |
|
156 |
+ |
|
157 |
+ if ((ret = av_file_map(flite->textfile, &textbuf, &textbuf_size, 0, ctx)) < 0) { |
|
158 |
+ av_log(ctx, AV_LOG_ERROR, |
|
159 |
+ "The text file '%s' could not be read: %s\n", |
|
160 |
+ flite->textfile, av_err2str(ret)); |
|
161 |
+ return ret; |
|
162 |
+ } |
|
163 |
+ |
|
164 |
+ if (!(flite->text = av_malloc(textbuf_size+1))) |
|
165 |
+ return AVERROR(ENOMEM); |
|
166 |
+ memcpy(flite->text, textbuf, textbuf_size); |
|
167 |
+ flite->text[textbuf_size] = 0; |
|
168 |
+ av_file_unmap(textbuf, textbuf_size); |
|
169 |
+ } |
|
170 |
+ |
|
171 |
+ if (!flite->text) { |
|
172 |
+ av_log(ctx, AV_LOG_ERROR, |
|
173 |
+ "No speech text specified, specify the 'text' or 'textfile' option\n"); |
|
174 |
+ return AVERROR(EINVAL); |
|
175 |
+ } |
|
176 |
+ |
|
177 |
+ /* synth all the file data in block */ |
|
178 |
+ flite->wave = flite_text_to_wave(flite->text, flite->voice); |
|
179 |
+ flite->wave_samples = flite->wave->samples; |
|
180 |
+ flite->wave_nb_samples = flite->wave->num_samples; |
|
181 |
+ return 0; |
|
182 |
+} |
|
183 |
+ |
|
184 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
185 |
+{ |
|
186 |
+ FliteContext *flite = ctx->priv; |
|
187 |
+ |
|
188 |
+ av_opt_free(flite); |
|
189 |
+ |
|
190 |
+ delete_voice(flite->voice); |
|
191 |
+ flite->voice = NULL; |
|
192 |
+ delete_wave(flite->wave); |
|
193 |
+ flite->wave = NULL; |
|
194 |
+} |
|
195 |
+ |
|
196 |
+static int query_formats(AVFilterContext *ctx) |
|
197 |
+{ |
|
198 |
+ FliteContext *flite = ctx->priv; |
|
199 |
+ |
|
200 |
+ AVFilterChannelLayouts *chlayouts = NULL; |
|
201 |
+ int64_t chlayout = av_get_default_channel_layout(flite->wave->num_channels); |
|
202 |
+ AVFilterFormats *sample_formats = NULL; |
|
203 |
+ AVFilterFormats *sample_rates = NULL; |
|
204 |
+ |
|
205 |
+ ff_add_channel_layout(&chlayouts, chlayout); |
|
206 |
+ ff_set_common_channel_layouts(ctx, chlayouts); |
|
207 |
+ ff_add_format(&sample_formats, AV_SAMPLE_FMT_S16); |
|
208 |
+ ff_set_common_formats(ctx, sample_formats); |
|
209 |
+ ff_add_format(&sample_rates, flite->wave->sample_rate); |
|
210 |
+ ff_set_common_samplerates (ctx, sample_rates); |
|
211 |
+ |
|
212 |
+ return 0; |
|
213 |
+} |
|
214 |
+ |
|
215 |
+static int config_props(AVFilterLink *outlink) |
|
216 |
+{ |
|
217 |
+ AVFilterContext *ctx = outlink->src; |
|
218 |
+ FliteContext *flite = ctx->priv; |
|
219 |
+ |
|
220 |
+ outlink->sample_rate = flite->wave->sample_rate; |
|
221 |
+ outlink->time_base = (AVRational){1, flite->wave->sample_rate}; |
|
222 |
+ |
|
223 |
+ av_log(ctx, AV_LOG_VERBOSE, "voice:%s fmt:%s sample_rate:%d\n", |
|
224 |
+ flite->voice_str, |
|
225 |
+ av_get_sample_fmt_name(outlink->format), outlink->sample_rate); |
|
226 |
+ return 0; |
|
227 |
+} |
|
228 |
+ |
|
229 |
+static int request_frame(AVFilterLink *outlink) |
|
230 |
+{ |
|
231 |
+ AVFilterBufferRef *samplesref; |
|
232 |
+ FliteContext *flite = outlink->src->priv; |
|
233 |
+ int nb_samples = FFMIN(flite->wave_nb_samples, flite->frame_nb_samples); |
|
234 |
+ |
|
235 |
+ if (!nb_samples) |
|
236 |
+ return AVERROR_EOF; |
|
237 |
+ |
|
238 |
+ samplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples); |
|
239 |
+ if (!samplesref) |
|
240 |
+ return AVERROR(ENOMEM); |
|
241 |
+ |
|
242 |
+ memcpy(samplesref->data[0], flite->wave_samples, |
|
243 |
+ nb_samples * flite->wave->num_channels * 2); |
|
244 |
+ samplesref->pts = flite->pts; |
|
245 |
+ samplesref->pos = -1; |
|
246 |
+ samplesref->audio->sample_rate = flite->wave->sample_rate; |
|
247 |
+ flite->pts += nb_samples; |
|
248 |
+ flite->wave_samples += nb_samples * flite->wave->num_channels; |
|
249 |
+ flite->wave_nb_samples -= nb_samples; |
|
250 |
+ |
|
251 |
+ return ff_filter_samples(outlink, samplesref); |
|
252 |
+} |
|
253 |
+ |
|
254 |
+AVFilter avfilter_asrc_flite = { |
|
255 |
+ .name = "flite", |
|
256 |
+ .description = NULL_IF_CONFIG_SMALL("Synthesize voice from text using libflite."), |
|
257 |
+ .query_formats = query_formats, |
|
258 |
+ .init = init, |
|
259 |
+ .uninit = uninit, |
|
260 |
+ .priv_size = sizeof(FliteContext), |
|
261 |
+ |
|
262 |
+ .inputs = (const AVFilterPad[]) {{ .name = NULL}}, |
|
263 |
+ |
|
264 |
+ .outputs = (const AVFilterPad[]) { |
|
265 |
+ { |
|
266 |
+ .name = "default", |
|
267 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
268 |
+ .config_props = config_props, |
|
269 |
+ .request_frame = request_frame, |
|
270 |
+ }, |
|
271 |
+ { .name = NULL } |
|
272 |
+ }, |
|
273 |
+}; |
... | ... |
@@ -29,7 +29,7 @@ |
29 | 29 |
#include "libavutil/avutil.h" |
30 | 30 |
|
31 | 31 |
#define LIBAVFILTER_VERSION_MAJOR 3 |
32 |
-#define LIBAVFILTER_VERSION_MINOR 3 |
|
32 |
+#define LIBAVFILTER_VERSION_MINOR 4 |
|
33 | 33 |
#define LIBAVFILTER_VERSION_MICRO 100 |
34 | 34 |
|
35 | 35 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |