Signed-off-by: Paul B Mahol <onemda@gmail.com>
Paul B Mahol authored on 2016/01/10 22:48:12... | ... |
@@ -2903,6 +2903,8 @@ showspectrumpic_filter_deps="avcodec" |
2903 | 2903 |
showspectrumpic_filter_select="fft" |
2904 | 2904 |
sofalizer_filter_deps="netcdf avcodec" |
2905 | 2905 |
sofalizer_filter_select="fft" |
2906 |
+spectrumsynth_filter_deps="avcodec" |
|
2907 |
+spectrumsynth_filter_select="fft" |
|
2906 | 2908 |
spp_filter_deps="gpl avcodec" |
2907 | 2909 |
spp_filter_select="fft idctdsp fdctdsp me_cmp pixblockdsp" |
2908 | 2910 |
stereo3d_filter_deps="gpl" |
... | ... |
@@ -6081,6 +6083,7 @@ enabled sofalizer_filter && prepend avfilter_deps "avcodec" |
6081 | 6081 |
enabled showfreqs_filter && prepend avfilter_deps "avcodec" |
6082 | 6082 |
enabled showspectrum_filter && prepend avfilter_deps "avcodec" |
6083 | 6083 |
enabled smartblur_filter && prepend avfilter_deps "swscale" |
6084 |
+enabled spectrumsynth_filter && prepend avfilter_deps "avcodec" |
|
6084 | 6085 |
enabled subtitles_filter && prepend avfilter_deps "avformat avcodec" |
6085 | 6086 |
enabled uspp_filter && prepend avfilter_deps "avcodec" |
6086 | 6087 |
|
... | ... |
@@ -14578,6 +14578,7 @@ Default is @code{combined}. |
14578 | 14578 |
|
14579 | 14579 |
@end table |
14580 | 14580 |
|
14581 |
+@anchor{showspectrum} |
|
14581 | 14582 |
@section showspectrum |
14582 | 14583 |
|
14583 | 14584 |
Convert input audio to a video output, representing the audio frequency |
... | ... |
@@ -15003,6 +15004,68 @@ ffmpeg -i audio.mp3 -filter_complex "showwavespic,colorchannelmixer=rr=66/255:gg |
15003 | 15003 |
@end example |
15004 | 15004 |
@end itemize |
15005 | 15005 |
|
15006 |
+@section spectrumsynth |
|
15007 |
+ |
|
15008 |
+Sythesize audio from 2 input video spectrums, first input stream represents |
|
15009 |
+magnitude across time and second represents phase across time. |
|
15010 |
+The filter will transform from frequency domain as displayed in videos back |
|
15011 |
+to time domain as presented in audio output. |
|
15012 |
+ |
|
15013 |
+This filter is primarly created for reversing processed @ref{showspectrum} |
|
15014 |
+filter outputs, but can synthesize sound from other spectrograms too. |
|
15015 |
+But in such case results are going to be poor if the phase data is not |
|
15016 |
+available, because in such cases phase data need to be recreated, usually |
|
15017 |
+its just recreated from random noise. |
|
15018 |
+For best results use gray only output (@code{channel} color mode in |
|
15019 |
+@ref{showspectrum} filter) and @code{log} scale for magnitude video and |
|
15020 |
+@code{lin} scale for phase video. To produce phase, for 2nd video, use |
|
15021 |
+@code{data} option. Inputs videos should generally use @code{fullframe} |
|
15022 |
+slide mode as that saves resources needed for decoding video. |
|
15023 |
+ |
|
15024 |
+The filter accepts the following options: |
|
15025 |
+ |
|
15026 |
+@table @option |
|
15027 |
+@item sample_rate |
|
15028 |
+Specify sample rate of output audio, the sample rate of audio from which |
|
15029 |
+spectrum was generated may differ. |
|
15030 |
+ |
|
15031 |
+@item channels |
|
15032 |
+Set number of channels represented in input video spectrums. |
|
15033 |
+ |
|
15034 |
+@item scale |
|
15035 |
+Set scale which was used when generating magnitude input spectrum. |
|
15036 |
+Can be @code{lin} or @code{log}. Default is @code{log}. |
|
15037 |
+ |
|
15038 |
+@item slide |
|
15039 |
+Set slide which was used when generating inputs spectrums. |
|
15040 |
+Can be @code{replace}, @code{scroll}, @code{fullframe} or @code{rscroll}. |
|
15041 |
+Default is @code{fullframe}. |
|
15042 |
+ |
|
15043 |
+@item win_func |
|
15044 |
+Set window function used for resynthesis. |
|
15045 |
+ |
|
15046 |
+@item overlap |
|
15047 |
+Set window overlap. In range @code{[0, 1]}. Default is @code{1}, |
|
15048 |
+which means optimal overlap for selected window function will be picked. |
|
15049 |
+ |
|
15050 |
+@item orientation |
|
15051 |
+Set orientation of input videos. Can be @code{vertical} or @code{horizontal}. |
|
15052 |
+Default is @code{vertical}. |
|
15053 |
+@end table |
|
15054 |
+ |
|
15055 |
+@subsection Examples |
|
15056 |
+ |
|
15057 |
+@itemize |
|
15058 |
+@item |
|
15059 |
+First create magnitude and phase videos from audio, assuming audio is stereo with 44100 sample rate, |
|
15060 |
+then resynthesize videos back to audio with spectrumsynth: |
|
15061 |
+@example |
|
15062 |
+ffmpeg -i input.flac -lavfi showspectrum=mode=separate:scale=log:overlap=0.875:color=channel:slide=fullframe:data=magnitude -an -c:v rawvideo magnitude.nut |
|
15063 |
+ffmpeg -i input.flac -lavfi showspectrum=mode=separate:scale=lin:overlap=0.875:color=channel:slide=fullframe:data=phase -an -c:v rawvideo phase.nut |
|
15064 |
+ffmpeg -i magnitude.nut -i phase.nut -lavfi spectrumsynth=channels=2:sample_rate=44100:win_fun=hann:overlap=0.875:slide=fullframe output.flac |
|
15065 |
+@end example |
|
15066 |
+@end itemize |
|
15067 |
+ |
|
15006 | 15068 |
@section split, asplit |
15007 | 15069 |
|
15008 | 15070 |
Split input into several identical outputs. |
... | ... |
@@ -290,6 +290,7 @@ OBJS-$(CONFIG_SHOWSPECTRUMPIC_FILTER) += avf_showspectrum.o window_func.o |
290 | 290 |
OBJS-$(CONFIG_SHOWVOLUME_FILTER) += avf_showvolume.o |
291 | 291 |
OBJS-$(CONFIG_SHOWWAVES_FILTER) += avf_showwaves.o |
292 | 292 |
OBJS-$(CONFIG_SHOWWAVESPIC_FILTER) += avf_showwaves.o |
293 |
+OBJS-$(CONFIG_SPECTRUMSYNTH_FILTER) += vaf_spectrumsynth.o window_func.o |
|
293 | 294 |
|
294 | 295 |
# multimedia sources |
295 | 296 |
OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o |
... | ... |
@@ -310,6 +310,7 @@ void avfilter_register_all(void) |
310 | 310 |
REGISTER_FILTER(SHOWVOLUME, showvolume, avf); |
311 | 311 |
REGISTER_FILTER(SHOWWAVES, showwaves, avf); |
312 | 312 |
REGISTER_FILTER(SHOWWAVESPIC, showwavespic, avf); |
313 |
+ REGISTER_FILTER(SPECTRUMSYNTH, spectrumsynth, vaf); |
|
313 | 314 |
|
314 | 315 |
/* multimedia sources */ |
315 | 316 |
REGISTER_FILTER(AMOVIE, amovie, avsrc); |
316 | 317 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,533 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2016 Paul B Mahol |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * SpectrumSynth filter |
|
23 |
+ * @todo support float pixel format |
|
24 |
+ */ |
|
25 |
+ |
|
26 |
+#include "libavcodec/avfft.h" |
|
27 |
+#include "libavutil/avassert.h" |
|
28 |
+#include "libavutil/channel_layout.h" |
|
29 |
+#include "libavutil/opt.h" |
|
30 |
+#include "libavutil/parseutils.h" |
|
31 |
+#include "avfilter.h" |
|
32 |
+#include "formats.h" |
|
33 |
+#include "audio.h" |
|
34 |
+#include "video.h" |
|
35 |
+#include "internal.h" |
|
36 |
+#include "window_func.h" |
|
37 |
+ |
|
38 |
+enum MagnitudeScale { LINEAR, LOG, NB_SCALES }; |
|
39 |
+enum SlideMode { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES }; |
|
40 |
+enum Orientation { VERTICAL, HORIZONTAL, NB_ORIENTATIONS }; |
|
41 |
+ |
|
42 |
+typedef struct SpectrumSynthContext { |
|
43 |
+ const AVClass *class; |
|
44 |
+ int sample_rate; |
|
45 |
+ int channels; |
|
46 |
+ int scale; |
|
47 |
+ int sliding; |
|
48 |
+ int win_func; |
|
49 |
+ float overlap; |
|
50 |
+ int orientation; |
|
51 |
+ |
|
52 |
+ AVFrame *magnitude, *phase; |
|
53 |
+ FFTContext *fft; ///< Fast Fourier Transform context |
|
54 |
+ int fft_bits; ///< number of bits (FFT window size = 1<<fft_bits) |
|
55 |
+ FFTComplex **fft_data; ///< bins holder for each (displayed) channels |
|
56 |
+ int win_size; |
|
57 |
+ int size; |
|
58 |
+ int nb_freq; |
|
59 |
+ int hop_size; |
|
60 |
+ int start, end; |
|
61 |
+ int xpos; |
|
62 |
+ int xend; |
|
63 |
+ int64_t pts; |
|
64 |
+ float factor; |
|
65 |
+ AVFrame *buffer; |
|
66 |
+ float *window_func_lut; ///< Window function LUT |
|
67 |
+} SpectrumSynthContext; |
|
68 |
+ |
|
69 |
+#define OFFSET(x) offsetof(SpectrumSynthContext, x) |
|
70 |
+#define A AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM |
|
71 |
+#define V AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
|
72 |
+ |
|
73 |
+static const AVOption spectrumsynth_options[] = { |
|
74 |
+ { "sample_rate", "set sample rate", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 44100}, 15, INT_MAX, A }, |
|
75 |
+ { "channels", "set channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 8, A }, |
|
76 |
+ { "scale", "set input amplitude scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = LOG}, 0, NB_SCALES-1, V, "scale" }, |
|
77 |
+ { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, V, "scale" }, |
|
78 |
+ { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, V, "scale" }, |
|
79 |
+ { "slide", "set input sliding mode", OFFSET(sliding), AV_OPT_TYPE_INT, {.i64 = FULLFRAME}, 0, NB_SLIDES-1, V, "slide" }, |
|
80 |
+ { "replace", "consume old columns with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, V, "slide" }, |
|
81 |
+ { "scroll", "consume only most right column", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, V, "slide" }, |
|
82 |
+ { "fullframe", "consume full frames", 0, AV_OPT_TYPE_CONST, {.i64=FULLFRAME}, 0, 0, V, "slide" }, |
|
83 |
+ { "rscroll", "consume only most left column", 0, AV_OPT_TYPE_CONST, {.i64=RSCROLL}, 0, 0, V, "slide" }, |
|
84 |
+ { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_WFUNC-1, A, "win_func" }, |
|
85 |
+ { "rect", "Rectangular", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT}, 0, 0, A, "win_func" }, |
|
86 |
+ { "bartlett", "Bartlett", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" }, |
|
87 |
+ { "hann", "Hann", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" }, |
|
88 |
+ { "hanning", "Hanning", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" }, |
|
89 |
+ { "hamming", "Hamming", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING}, 0, 0, A, "win_func" }, |
|
90 |
+ { "sine", "Sine", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE}, 0, 0, A, "win_func" }, |
|
91 |
+ { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, A }, |
|
92 |
+ { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, V, "orientation" }, |
|
93 |
+ { "vertical", NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL}, 0, 0, V, "orientation" }, |
|
94 |
+ { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, V, "orientation" }, |
|
95 |
+ { NULL } |
|
96 |
+}; |
|
97 |
+ |
|
98 |
+AVFILTER_DEFINE_CLASS(spectrumsynth); |
|
99 |
+ |
|
100 |
+static int query_formats(AVFilterContext *ctx) |
|
101 |
+{ |
|
102 |
+ SpectrumSynthContext *s = ctx->priv; |
|
103 |
+ AVFilterFormats *formats = NULL; |
|
104 |
+ AVFilterChannelLayouts *layout = NULL; |
|
105 |
+ AVFilterLink *magnitude = ctx->inputs[0]; |
|
106 |
+ AVFilterLink *phase = ctx->inputs[1]; |
|
107 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
108 |
+ static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }; |
|
109 |
+ static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16, |
|
110 |
+ AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, |
|
111 |
+ AV_PIX_FMT_YUV444P16, AV_PIX_FMT_NONE }; |
|
112 |
+ int ret, sample_rates[] = { 48000, -1 }; |
|
113 |
+ |
|
114 |
+ formats = ff_make_format_list(sample_fmts); |
|
115 |
+ if ((ret = ff_formats_ref (formats, &outlink->in_formats )) < 0 || |
|
116 |
+ (ret = ff_add_channel_layout (&layout, FF_COUNT2LAYOUT(s->channels))) < 0 || |
|
117 |
+ (ret = ff_channel_layouts_ref (layout , &outlink->in_channel_layouts)) < 0) |
|
118 |
+ return ret; |
|
119 |
+ |
|
120 |
+ sample_rates[0] = s->sample_rate; |
|
121 |
+ formats = ff_make_format_list(sample_rates); |
|
122 |
+ if (!formats) |
|
123 |
+ return AVERROR(ENOMEM); |
|
124 |
+ if ((ret = ff_formats_ref(formats, &outlink->in_samplerates)) < 0) |
|
125 |
+ return ret; |
|
126 |
+ |
|
127 |
+ formats = ff_make_format_list(pix_fmts); |
|
128 |
+ if (!formats) |
|
129 |
+ return AVERROR(ENOMEM); |
|
130 |
+ if ((ret = ff_formats_ref(formats, &magnitude->out_formats)) < 0) |
|
131 |
+ return ret; |
|
132 |
+ |
|
133 |
+ formats = ff_make_format_list(pix_fmts); |
|
134 |
+ if (!formats) |
|
135 |
+ return AVERROR(ENOMEM); |
|
136 |
+ if ((ret = ff_formats_ref(formats, &phase->out_formats)) < 0) |
|
137 |
+ return ret; |
|
138 |
+ |
|
139 |
+ return 0; |
|
140 |
+} |
|
141 |
+ |
|
142 |
+static int config_output(AVFilterLink *outlink) |
|
143 |
+{ |
|
144 |
+ AVFilterContext *ctx = outlink->src; |
|
145 |
+ SpectrumSynthContext *s = ctx->priv; |
|
146 |
+ int width = ctx->inputs[0]->w; |
|
147 |
+ int height = ctx->inputs[0]->h; |
|
148 |
+ AVRational time_base = ctx->inputs[0]->time_base; |
|
149 |
+ AVRational frame_rate = ctx->inputs[0]->frame_rate; |
|
150 |
+ int i, ch, fft_bits; |
|
151 |
+ float factor, overlap; |
|
152 |
+ |
|
153 |
+ outlink->sample_rate = s->sample_rate; |
|
154 |
+ outlink->time_base = (AVRational){1, s->sample_rate}; |
|
155 |
+ |
|
156 |
+ if (width != ctx->inputs[1]->w || |
|
157 |
+ height != ctx->inputs[1]->h) { |
|
158 |
+ av_log(ctx, AV_LOG_ERROR, |
|
159 |
+ "Magnitude and Phase sizes differ (%dx%d vs %dx%d).\n", |
|
160 |
+ width, height, |
|
161 |
+ ctx->inputs[1]->w, ctx->inputs[1]->h); |
|
162 |
+ return AVERROR_INVALIDDATA; |
|
163 |
+ } else if (av_cmp_q(time_base, ctx->inputs[1]->time_base) != 0) { |
|
164 |
+ av_log(ctx, AV_LOG_ERROR, |
|
165 |
+ "Magnitude and Phase time bases differ (%d/%d vs %d/%d).\n", |
|
166 |
+ time_base.num, time_base.den, |
|
167 |
+ ctx->inputs[1]->time_base.num, |
|
168 |
+ ctx->inputs[1]->time_base.den); |
|
169 |
+ return AVERROR_INVALIDDATA; |
|
170 |
+ } else if (av_cmp_q(frame_rate, ctx->inputs[1]->frame_rate) != 0) { |
|
171 |
+ av_log(ctx, AV_LOG_ERROR, |
|
172 |
+ "Magnitude and Phase framerates differ (%d/%d vs %d/%d).\n", |
|
173 |
+ frame_rate.num, frame_rate.den, |
|
174 |
+ ctx->inputs[1]->frame_rate.num, |
|
175 |
+ ctx->inputs[1]->frame_rate.den); |
|
176 |
+ return AVERROR_INVALIDDATA; |
|
177 |
+ } |
|
178 |
+ |
|
179 |
+ s->size = s->orientation == VERTICAL ? height / s->channels : width / s->channels; |
|
180 |
+ s->xend = s->orientation == VERTICAL ? width : height; |
|
181 |
+ |
|
182 |
+ for (fft_bits = 1; 1 << fft_bits < 2 * s->size; fft_bits++); |
|
183 |
+ |
|
184 |
+ s->win_size = 1 << fft_bits; |
|
185 |
+ s->nb_freq = 1 << (fft_bits - 1); |
|
186 |
+ |
|
187 |
+ s->fft = av_fft_init(fft_bits, 1); |
|
188 |
+ if (!s->fft) { |
|
189 |
+ av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. " |
|
190 |
+ "The window size might be too high.\n"); |
|
191 |
+ return AVERROR(EINVAL); |
|
192 |
+ } |
|
193 |
+ s->fft_data = av_calloc(s->channels, sizeof(*s->fft_data)); |
|
194 |
+ if (!s->fft_data) |
|
195 |
+ return AVERROR(ENOMEM); |
|
196 |
+ for (ch = 0; ch < s->channels; ch++) { |
|
197 |
+ s->fft_data[ch] = av_calloc(s->win_size, sizeof(**s->fft_data)); |
|
198 |
+ if (!s->fft_data[ch]) |
|
199 |
+ return AVERROR(ENOMEM); |
|
200 |
+ } |
|
201 |
+ |
|
202 |
+ s->buffer = ff_get_audio_buffer(outlink, s->win_size * 2); |
|
203 |
+ if (!s->buffer) |
|
204 |
+ return AVERROR(ENOMEM); |
|
205 |
+ |
|
206 |
+ /* pre-calc windowing function */ |
|
207 |
+ s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size, |
|
208 |
+ sizeof(*s->window_func_lut)); |
|
209 |
+ if (!s->window_func_lut) |
|
210 |
+ return AVERROR(ENOMEM); |
|
211 |
+ ff_generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap); |
|
212 |
+ if (s->overlap == 1) |
|
213 |
+ s->overlap = overlap; |
|
214 |
+ s->hop_size = (1 - s->overlap) * s->win_size; |
|
215 |
+ for (factor = 0, i = 0; i < s->win_size; i++) { |
|
216 |
+ factor += s->window_func_lut[i] * s->window_func_lut[i]; |
|
217 |
+ } |
|
218 |
+ s->factor = (factor / s->win_size) / FFMAX(1 / (1 - s->overlap) - 1, 1); |
|
219 |
+ |
|
220 |
+ return 0; |
|
221 |
+} |
|
222 |
+ |
|
223 |
+static int request_frame(AVFilterLink *outlink) |
|
224 |
+{ |
|
225 |
+ AVFilterContext *ctx = outlink->src; |
|
226 |
+ SpectrumSynthContext *s = ctx->priv; |
|
227 |
+ int ret; |
|
228 |
+ |
|
229 |
+ if (!s->magnitude) { |
|
230 |
+ ret = ff_request_frame(ctx->inputs[0]); |
|
231 |
+ if (ret < 0) |
|
232 |
+ return ret; |
|
233 |
+ } |
|
234 |
+ if (!s->phase) { |
|
235 |
+ ret = ff_request_frame(ctx->inputs[1]); |
|
236 |
+ if (ret < 0) |
|
237 |
+ return ret; |
|
238 |
+ } |
|
239 |
+ return 0; |
|
240 |
+} |
|
241 |
+ |
|
242 |
+static void read16_fft_bin(SpectrumSynthContext *s, |
|
243 |
+ int x, int y, int f, int ch) |
|
244 |
+{ |
|
245 |
+ const int m_linesize = s->magnitude->linesize[0]; |
|
246 |
+ const int p_linesize = s->phase->linesize[0]; |
|
247 |
+ const uint16_t *m = (uint16_t *)(s->magnitude->data[0] + y * m_linesize); |
|
248 |
+ const uint16_t *p = (uint16_t *)(s->phase->data[0] + y * p_linesize); |
|
249 |
+ float magnitude, phase; |
|
250 |
+ |
|
251 |
+ switch (s->scale) { |
|
252 |
+ case LINEAR: |
|
253 |
+ magnitude = m[x] / (double)UINT16_MAX; |
|
254 |
+ break; |
|
255 |
+ case LOG: |
|
256 |
+ magnitude = ff_exp10(((m[x] / (double)UINT16_MAX) - 1.) * 6.); |
|
257 |
+ break; |
|
258 |
+ } |
|
259 |
+ phase = ((p[x] / (double)UINT16_MAX) * 2. - 1.) * M_PI; |
|
260 |
+ |
|
261 |
+ s->fft_data[ch][f].re = magnitude * cos(phase); |
|
262 |
+ s->fft_data[ch][f].im = magnitude * sin(phase); |
|
263 |
+} |
|
264 |
+ |
|
265 |
+static void read8_fft_bin(SpectrumSynthContext *s, |
|
266 |
+ int x, int y, int f, int ch) |
|
267 |
+{ |
|
268 |
+ const int m_linesize = s->magnitude->linesize[0]; |
|
269 |
+ const int p_linesize = s->phase->linesize[0]; |
|
270 |
+ const uint8_t *m = (uint8_t *)(s->magnitude->data[0] + y * m_linesize); |
|
271 |
+ const uint8_t *p = (uint8_t *)(s->phase->data[0] + y * p_linesize); |
|
272 |
+ float magnitude, phase; |
|
273 |
+ |
|
274 |
+ switch (s->scale) { |
|
275 |
+ case LINEAR: |
|
276 |
+ magnitude = m[x] / (double)UINT8_MAX; |
|
277 |
+ break; |
|
278 |
+ case LOG: |
|
279 |
+ magnitude = ff_exp10(((m[x] / (double)UINT8_MAX) - 1.) * 6.); |
|
280 |
+ break; |
|
281 |
+ } |
|
282 |
+ phase = ((p[x] / (double)UINT8_MAX) * 2. - 1.) * M_PI; |
|
283 |
+ |
|
284 |
+ s->fft_data[ch][f].re = magnitude * cos(phase); |
|
285 |
+ s->fft_data[ch][f].im = magnitude * sin(phase); |
|
286 |
+} |
|
287 |
+ |
|
288 |
+static void read_fft_data(AVFilterContext *ctx, int x, int h, int ch) |
|
289 |
+{ |
|
290 |
+ SpectrumSynthContext *s = ctx->priv; |
|
291 |
+ AVFilterLink *inlink = ctx->inputs[0]; |
|
292 |
+ int start = h * (s->channels - ch) - 1; |
|
293 |
+ int end = h * (s->channels - ch - 1); |
|
294 |
+ int y, f; |
|
295 |
+ |
|
296 |
+ switch (s->orientation) { |
|
297 |
+ case VERTICAL: |
|
298 |
+ switch (inlink->format) { |
|
299 |
+ case AV_PIX_FMT_YUV444P16: |
|
300 |
+ case AV_PIX_FMT_GRAY16: |
|
301 |
+ for (y = start, f = 0; y >= end; y--, f++) { |
|
302 |
+ read16_fft_bin(s, x, y, f, ch); |
|
303 |
+ } |
|
304 |
+ break; |
|
305 |
+ case AV_PIX_FMT_YUVJ444P: |
|
306 |
+ case AV_PIX_FMT_YUV444P: |
|
307 |
+ case AV_PIX_FMT_GRAY8: |
|
308 |
+ for (y = start, f = 0; y >= end; y--, f++) { |
|
309 |
+ read8_fft_bin(s, x, y, f, ch); |
|
310 |
+ } |
|
311 |
+ break; |
|
312 |
+ } |
|
313 |
+ break; |
|
314 |
+ case HORIZONTAL: |
|
315 |
+ switch (inlink->format) { |
|
316 |
+ case AV_PIX_FMT_YUV444P16: |
|
317 |
+ case AV_PIX_FMT_GRAY16: |
|
318 |
+ for (y = end, f = 0; y <= start; y++, f++) { |
|
319 |
+ read16_fft_bin(s, y, x, f, ch); |
|
320 |
+ } |
|
321 |
+ break; |
|
322 |
+ case AV_PIX_FMT_YUVJ444P: |
|
323 |
+ case AV_PIX_FMT_YUV444P: |
|
324 |
+ case AV_PIX_FMT_GRAY8: |
|
325 |
+ for (y = end, f = 0; y <= start; y++, f++) { |
|
326 |
+ read8_fft_bin(s, y, x, f, ch); |
|
327 |
+ } |
|
328 |
+ break; |
|
329 |
+ } |
|
330 |
+ break; |
|
331 |
+ } |
|
332 |
+} |
|
333 |
+ |
|
334 |
+static void synth_window(AVFilterContext *ctx, int x) |
|
335 |
+{ |
|
336 |
+ SpectrumSynthContext *s = ctx->priv; |
|
337 |
+ const int h = s->size; |
|
338 |
+ int nb = s->win_size; |
|
339 |
+ int y, f, ch; |
|
340 |
+ |
|
341 |
+ for (ch = 0; ch < s->channels; ch++) { |
|
342 |
+ read_fft_data(ctx, x, h, ch); |
|
343 |
+ |
|
344 |
+ for (y = h; y <= s->nb_freq; y++) { |
|
345 |
+ s->fft_data[ch][y].re = 0; |
|
346 |
+ s->fft_data[ch][y].im = 0; |
|
347 |
+ } |
|
348 |
+ |
|
349 |
+ for (y = s->nb_freq + 1, f = s->nb_freq - 1; y < nb; y++, f--) { |
|
350 |
+ s->fft_data[ch][y].re = s->fft_data[ch][f].re; |
|
351 |
+ s->fft_data[ch][y].im = -s->fft_data[ch][f].im; |
|
352 |
+ } |
|
353 |
+ |
|
354 |
+ av_fft_permute(s->fft, s->fft_data[ch]); |
|
355 |
+ av_fft_calc(s->fft, s->fft_data[ch]); |
|
356 |
+ } |
|
357 |
+} |
|
358 |
+ |
|
359 |
+static int try_push_frame(AVFilterContext *ctx, int x) |
|
360 |
+{ |
|
361 |
+ SpectrumSynthContext *s = ctx->priv; |
|
362 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
363 |
+ const float factor = s->factor; |
|
364 |
+ int ch, n, i, ret; |
|
365 |
+ int start, end; |
|
366 |
+ AVFrame *out; |
|
367 |
+ |
|
368 |
+ synth_window(ctx, x); |
|
369 |
+ |
|
370 |
+ for (ch = 0; ch < s->channels; ch++) { |
|
371 |
+ float *buf = (float *)s->buffer->extended_data[ch]; |
|
372 |
+ int j, k; |
|
373 |
+ |
|
374 |
+ start = s->start; |
|
375 |
+ end = s->end; |
|
376 |
+ k = end; |
|
377 |
+ for (i = 0, j = start; j < k && i < s->win_size; i++, j++) { |
|
378 |
+ buf[j] += s->fft_data[ch][i].re; |
|
379 |
+ } |
|
380 |
+ |
|
381 |
+ for (; i < s->win_size; i++, j++) { |
|
382 |
+ buf[j] = s->fft_data[ch][i].re; |
|
383 |
+ } |
|
384 |
+ |
|
385 |
+ start += s->hop_size; |
|
386 |
+ end = j; |
|
387 |
+ |
|
388 |
+ if (start >= s->win_size) { |
|
389 |
+ start -= s->win_size; |
|
390 |
+ end -= s->win_size; |
|
391 |
+ |
|
392 |
+ if (ch == s->channels - 1) { |
|
393 |
+ float *dst; |
|
394 |
+ |
|
395 |
+ out = ff_get_audio_buffer(outlink, s->win_size); |
|
396 |
+ if (!out) { |
|
397 |
+ av_frame_free(&s->magnitude); |
|
398 |
+ av_frame_free(&s->phase); |
|
399 |
+ return AVERROR(ENOMEM); |
|
400 |
+ } |
|
401 |
+ |
|
402 |
+ out->pts = s->pts; |
|
403 |
+ s->pts += s->win_size; |
|
404 |
+ for (int c = 0; c < s->channels; c++) { |
|
405 |
+ dst = (float *)out->extended_data[c]; |
|
406 |
+ buf = (float *)s->buffer->extended_data[c]; |
|
407 |
+ |
|
408 |
+ for (n = 0; n < s->win_size; n++) { |
|
409 |
+ dst[n] = buf[n] * factor; |
|
410 |
+ } |
|
411 |
+ memmove(buf, buf + s->win_size, s->win_size * 4); |
|
412 |
+ } |
|
413 |
+ |
|
414 |
+ ret = ff_filter_frame(outlink, out); |
|
415 |
+ } |
|
416 |
+ } |
|
417 |
+ } |
|
418 |
+ |
|
419 |
+ s->start = start; |
|
420 |
+ s->end = end; |
|
421 |
+ |
|
422 |
+ return 0; |
|
423 |
+} |
|
424 |
+ |
|
425 |
+static int try_push_frames(AVFilterContext *ctx) |
|
426 |
+{ |
|
427 |
+ SpectrumSynthContext *s = ctx->priv; |
|
428 |
+ int ret, x; |
|
429 |
+ |
|
430 |
+ if (!(s->magnitude && s->phase)) |
|
431 |
+ return 0; |
|
432 |
+ |
|
433 |
+ switch (s->sliding) { |
|
434 |
+ case REPLACE: |
|
435 |
+ ret = try_push_frame(ctx, s->xpos); |
|
436 |
+ s->xpos++; |
|
437 |
+ if (s->xpos >= s->xend) |
|
438 |
+ s->xpos = 0; |
|
439 |
+ break; |
|
440 |
+ case SCROLL: |
|
441 |
+ s->xpos = s->xend - 1; |
|
442 |
+ ret = try_push_frame(ctx, s->xpos); |
|
443 |
+ case RSCROLL: |
|
444 |
+ s->xpos = 0; |
|
445 |
+ ret = try_push_frame(ctx, s->xpos); |
|
446 |
+ break; |
|
447 |
+ break; |
|
448 |
+ case FULLFRAME: |
|
449 |
+ for (x = 0; x < s->xend; x++) { |
|
450 |
+ ret = try_push_frame(ctx, x); |
|
451 |
+ if (ret < 0) |
|
452 |
+ break; |
|
453 |
+ } |
|
454 |
+ break; |
|
455 |
+ } |
|
456 |
+ |
|
457 |
+ av_frame_free(&s->magnitude); |
|
458 |
+ av_frame_free(&s->phase); |
|
459 |
+ return ret; |
|
460 |
+} |
|
461 |
+ |
|
462 |
+static int filter_frame_magnitude(AVFilterLink *inlink, AVFrame *magnitude) |
|
463 |
+{ |
|
464 |
+ AVFilterContext *ctx = inlink->dst; |
|
465 |
+ SpectrumSynthContext *s = ctx->priv; |
|
466 |
+ |
|
467 |
+ s->magnitude = magnitude; |
|
468 |
+ return try_push_frames(ctx); |
|
469 |
+} |
|
470 |
+ |
|
471 |
+static int filter_frame_phase(AVFilterLink *inlink, AVFrame *phase) |
|
472 |
+{ |
|
473 |
+ AVFilterContext *ctx = inlink->dst; |
|
474 |
+ SpectrumSynthContext *s = ctx->priv; |
|
475 |
+ |
|
476 |
+ s->phase = phase; |
|
477 |
+ return try_push_frames(ctx); |
|
478 |
+} |
|
479 |
+ |
|
480 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
481 |
+{ |
|
482 |
+ SpectrumSynthContext *s = ctx->priv; |
|
483 |
+ int i; |
|
484 |
+ |
|
485 |
+ av_frame_free(&s->magnitude); |
|
486 |
+ av_frame_free(&s->phase); |
|
487 |
+ av_frame_free(&s->buffer); |
|
488 |
+ av_fft_end(s->fft); |
|
489 |
+ if (s->fft_data) { |
|
490 |
+ for (i = 0; i < s->channels; i++) |
|
491 |
+ av_freep(&s->fft_data[i]); |
|
492 |
+ } |
|
493 |
+ av_freep(&s->fft_data); |
|
494 |
+ av_freep(&s->window_func_lut); |
|
495 |
+} |
|
496 |
+ |
|
497 |
+static const AVFilterPad spectrumsynth_inputs[] = { |
|
498 |
+ { |
|
499 |
+ .name = "magnitude", |
|
500 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
501 |
+ .filter_frame = filter_frame_magnitude, |
|
502 |
+ .needs_fifo = 1, |
|
503 |
+ }, |
|
504 |
+ { |
|
505 |
+ .name = "phase", |
|
506 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
507 |
+ .filter_frame = filter_frame_phase, |
|
508 |
+ .needs_fifo = 1, |
|
509 |
+ }, |
|
510 |
+ { NULL } |
|
511 |
+}; |
|
512 |
+ |
|
513 |
+static const AVFilterPad spectrumsynth_outputs[] = { |
|
514 |
+ { |
|
515 |
+ .name = "default", |
|
516 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
517 |
+ .config_props = config_output, |
|
518 |
+ .request_frame = request_frame, |
|
519 |
+ }, |
|
520 |
+ { NULL } |
|
521 |
+}; |
|
522 |
+ |
|
523 |
+AVFilter ff_vaf_spectrumsynth = { |
|
524 |
+ .name = "spectrumsynth", |
|
525 |
+ .description = NULL_IF_CONFIG_SMALL("Convert input spectrum videos to audio output."), |
|
526 |
+ .uninit = uninit, |
|
527 |
+ .query_formats = query_formats, |
|
528 |
+ .priv_size = sizeof(SpectrumSynthContext), |
|
529 |
+ .inputs = spectrumsynth_inputs, |
|
530 |
+ .outputs = spectrumsynth_outputs, |
|
531 |
+ .priv_class = &spectrumsynth_class, |
|
532 |
+}; |
... | ... |
@@ -30,7 +30,7 @@ |
30 | 30 |
#include "libavutil/version.h" |
31 | 31 |
|
32 | 32 |
#define LIBAVFILTER_VERSION_MAJOR 6 |
33 |
-#define LIBAVFILTER_VERSION_MINOR 23 |
|
33 |
+#define LIBAVFILTER_VERSION_MINOR 24 |
|
34 | 34 |
#define LIBAVFILTER_VERSION_MICRO 100 |
35 | 35 |
|
36 | 36 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |