this filter is the same as showspectrum but with constant Q transform,
so frequency is spaced logarithmically
... | ... |
@@ -10133,6 +10133,76 @@ settb=AVTB |
10133 | 10133 |
@end example |
10134 | 10134 |
@end itemize |
10135 | 10135 |
|
10136 |
+@section showcqt |
|
10137 |
+Convert input audio to a video output (at full HD resolution), representing |
|
10138 |
+frequency spectrum logarithmically (using constant Q transform with |
|
10139 |
+Brown-Puckette algorithm), with musical tone scale, from E0 to D#10 (10 octaves). |
|
10140 |
+ |
|
10141 |
+The filter accepts the following options: |
|
10142 |
+ |
|
10143 |
+@table @option |
|
10144 |
+@item volume |
|
10145 |
+Specify the transform volume (multiplier). Acceptable value is [1.0, 100.0]. |
|
10146 |
+Default value is @code{16.0}. |
|
10147 |
+ |
|
10148 |
+@item timeclamp |
|
10149 |
+Specify the transform timeclamp. At low frequency, there is trade-off between |
|
10150 |
+accuracy in time domain and frequency domain. If timeclamp is lower, |
|
10151 |
+event in time domain is represented more accurately (such as fast bass drum), |
|
10152 |
+otherwise event in frequency domain is represented more accurately |
|
10153 |
+(such as bass guitar). Acceptable value is [0.1, 1.0]. Default value is @code{0.17}. |
|
10154 |
+ |
|
10155 |
+@item coeffclamp |
|
10156 |
+Specify the transform coeffclamp. If coeffclamp is lower, transform is |
|
10157 |
+more accurate, otherwise transform is faster. Acceptable value is [0.1, 10.0]. |
|
10158 |
+Default value is @code{1.0}. |
|
10159 |
+ |
|
10160 |
+@item gamma |
|
10161 |
+Specify gamma. Lower gamma makes the spectrum more contrast, higher gamma |
|
10162 |
+makes the spectrum having more range. Acceptable value is [1.0, 7.0]. |
|
10163 |
+Default value is @code{3.0}. |
|
10164 |
+ |
|
10165 |
+@item fps |
|
10166 |
+Specify video fps. Default value is @code{25}. |
|
10167 |
+ |
|
10168 |
+@item count |
|
10169 |
+Specify number of transform per frame, so there are fps*count transforms |
|
10170 |
+per second. Note tha audio data rate must be divisible by fps*count. |
|
10171 |
+Default value is @code{6}. |
|
10172 |
+ |
|
10173 |
+@end table |
|
10174 |
+ |
|
10175 |
+@subsection Examples |
|
10176 |
+ |
|
10177 |
+@itemize |
|
10178 |
+@item |
|
10179 |
+Playing audio while showing the spectrum: |
|
10180 |
+@example |
|
10181 |
+ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt [out0]' |
|
10182 |
+@end example |
|
10183 |
+ |
|
10184 |
+@item |
|
10185 |
+Same as above, but with frame rate 30 fps: |
|
10186 |
+@example |
|
10187 |
+ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt=fps=30:count=5 [out0]' |
|
10188 |
+@end example |
|
10189 |
+ |
|
10190 |
+@item |
|
10191 |
+A1 and its harmonics: A1, A2, (near)E3, A3: |
|
10192 |
+@example |
|
10193 |
+ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*t)+0.1*sin(8*PI*55*t), |
|
10194 |
+ asplit[a][out1]; [a] showcqt [out0]' |
|
10195 |
+@end example |
|
10196 |
+ |
|
10197 |
+@item |
|
10198 |
+Same as above, but with more accuracy in frequency domain (and slower): |
|
10199 |
+@example |
|
10200 |
+ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*t)+0.1*sin(8*PI*55*t), |
|
10201 |
+ asplit[a][out1]; [a] showcqt=timeclamp=0.5 [out0]' |
|
10202 |
+@end example |
|
10203 |
+ |
|
10204 |
+@end itemize |
|
10205 |
+ |
|
10136 | 10206 |
@section showspectrum |
10137 | 10207 |
|
10138 | 10208 |
Convert input audio to a video output, representing the audio frequency |
... | ... |
@@ -224,6 +224,7 @@ OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_uspp.o |
224 | 224 |
# multimedia filters |
225 | 225 |
OBJS-$(CONFIG_AVECTORSCOPE_FILTER) += avf_avectorscope.o |
226 | 226 |
OBJS-$(CONFIG_CONCAT_FILTER) += avf_concat.o |
227 |
+OBJS-$(CONFIG_SHOWCQT_FILTER) += avf_showcqt.o |
|
227 | 228 |
OBJS-$(CONFIG_SHOWSPECTRUM_FILTER) += avf_showspectrum.o |
228 | 229 |
OBJS-$(CONFIG_SHOWWAVES_FILTER) += avf_showwaves.o |
229 | 230 |
|
... | ... |
@@ -231,6 +231,7 @@ void avfilter_register_all(void) |
231 | 231 |
/* multimedia filters */ |
232 | 232 |
REGISTER_FILTER(AVECTORSCOPE, avectorscope, avf); |
233 | 233 |
REGISTER_FILTER(CONCAT, concat, avf); |
234 |
+ REGISTER_FILTER(SHOWCQT, showcqt, avf); |
|
234 | 235 |
REGISTER_FILTER(SHOWSPECTRUM, showspectrum, avf); |
235 | 236 |
REGISTER_FILTER(SHOWWAVES, showwaves, avf); |
236 | 237 |
|
237 | 238 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,585 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com> |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavcodec/avfft.h" |
|
21 |
+#include "libavutil/avassert.h" |
|
22 |
+#include "libavutil/channel_layout.h" |
|
23 |
+#include "libavutil/opt.h" |
|
24 |
+#include "libavutil/xga_font_data.h" |
|
25 |
+#include "libavutil/qsort.h" |
|
26 |
+#include "libavutil/time.h" |
|
27 |
+#include "avfilter.h" |
|
28 |
+#include "internal.h" |
|
29 |
+ |
|
30 |
+#include <math.h> |
|
31 |
+#include <stdlib.h> |
|
32 |
+ |
|
33 |
+/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm |
|
34 |
+ * start from E0 to D#10 (10 octaves) |
|
35 |
+ * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins |
|
36 |
+ * match with full HD resolution */ |
|
37 |
+ |
|
38 |
+#define VIDEO_WIDTH 1920 |
|
39 |
+#define VIDEO_HEIGHT 1080 |
|
40 |
+#define FONT_HEIGHT 32 |
|
41 |
+#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2) |
|
42 |
+#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT) |
|
43 |
+#define BASE_FREQ 20.051392800492 |
|
44 |
+#define COEFF_CLAMP 1.0e-4 |
|
45 |
+ |
|
46 |
+typedef struct { |
|
47 |
+ FFTSample value; |
|
48 |
+ int index; |
|
49 |
+} SparseCoeff; |
|
50 |
+ |
|
51 |
+static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b) |
|
52 |
+{ |
|
53 |
+ if (fabsf(a->value) >= fabsf(b->value)) |
|
54 |
+ return 1; |
|
55 |
+ else |
|
56 |
+ return -1; |
|
57 |
+} |
|
58 |
+ |
|
59 |
+typedef struct { |
|
60 |
+ const AVClass *class; |
|
61 |
+ AVFrame *outpicref; |
|
62 |
+ FFTContext *fft_context; |
|
63 |
+ FFTComplex *fft_data; |
|
64 |
+ FFTComplex *fft_result_left; |
|
65 |
+ FFTComplex *fft_result_right; |
|
66 |
+ SparseCoeff *coeff_sort; |
|
67 |
+ SparseCoeff *coeffs[VIDEO_WIDTH]; |
|
68 |
+ int coeffs_len[VIDEO_WIDTH]; |
|
69 |
+ uint8_t font_color[VIDEO_WIDTH]; |
|
70 |
+ uint8_t spectogram[SPECTOGRAM_HEIGHT][VIDEO_WIDTH][3]; |
|
71 |
+ int64_t frame_count; |
|
72 |
+ int spectogram_count; |
|
73 |
+ int spectogram_index; |
|
74 |
+ int fft_bits; |
|
75 |
+ int req_fullfilled; |
|
76 |
+ int remaining_fill; |
|
77 |
+ double volume; |
|
78 |
+ double timeclamp; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/ |
|
79 |
+ float coeffclamp; /* lower coeffclamp, more precise, higher coeffclamp, faster */ |
|
80 |
+ float gamma; /* lower gamma, more contrast, higher gamma, more range */ |
|
81 |
+ int fps; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */ |
|
82 |
+ int count; /* fps * count = transform rate */ |
|
83 |
+} ShowCQTContext; |
|
84 |
+ |
|
85 |
+#define OFFSET(x) offsetof(ShowCQTContext, x) |
|
86 |
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
|
87 |
+ |
|
88 |
+static const AVOption showcqt_options[] = { |
|
89 |
+ { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_DOUBLE, { .dbl = 16 }, 0.1, 100, FLAGS }, |
|
90 |
+ { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, |
|
91 |
+ { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS }, |
|
92 |
+ { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS }, |
|
93 |
+ { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS }, |
|
94 |
+ { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS }, |
|
95 |
+ { NULL } |
|
96 |
+}; |
|
97 |
+ |
|
98 |
+AVFILTER_DEFINE_CLASS(showcqt); |
|
99 |
+ |
|
100 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
101 |
+{ |
|
102 |
+ int k; |
|
103 |
+ ShowCQTContext *s = ctx->priv; |
|
104 |
+ av_fft_end(s->fft_context); |
|
105 |
+ s->fft_context = NULL; |
|
106 |
+ for (k = 0; k < VIDEO_WIDTH; k++) |
|
107 |
+ av_freep(&s->coeffs[k]); |
|
108 |
+ av_freep(&s->fft_data); |
|
109 |
+ av_freep(&s->fft_result_left); |
|
110 |
+ av_freep(&s->fft_result_right); |
|
111 |
+ av_freep(&s->coeff_sort); |
|
112 |
+ av_frame_free(&s->outpicref); |
|
113 |
+} |
|
114 |
+ |
|
115 |
+static int query_formats(AVFilterContext *ctx) |
|
116 |
+{ |
|
117 |
+ AVFilterFormats *formats = NULL; |
|
118 |
+ AVFilterChannelLayouts *layouts = NULL; |
|
119 |
+ AVFilterLink *inlink = ctx->inputs[0]; |
|
120 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
121 |
+ static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }; |
|
122 |
+ static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE }; |
|
123 |
+ static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 }; |
|
124 |
+ static const int samplerates[] = { 44100, 48000, -1 }; |
|
125 |
+ |
|
126 |
+ /* set input audio formats */ |
|
127 |
+ formats = ff_make_format_list(sample_fmts); |
|
128 |
+ if (!formats) |
|
129 |
+ return AVERROR(ENOMEM); |
|
130 |
+ ff_formats_ref(formats, &inlink->out_formats); |
|
131 |
+ |
|
132 |
+ layouts = avfilter_make_format64_list(channel_layouts); |
|
133 |
+ if (!layouts) |
|
134 |
+ return AVERROR(ENOMEM); |
|
135 |
+ ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts); |
|
136 |
+ |
|
137 |
+ formats = ff_make_format_list(samplerates); |
|
138 |
+ if (!formats) |
|
139 |
+ return AVERROR(ENOMEM); |
|
140 |
+ ff_formats_ref(formats, &inlink->out_samplerates); |
|
141 |
+ |
|
142 |
+ /* set output video format */ |
|
143 |
+ formats = ff_make_format_list(pix_fmts); |
|
144 |
+ if (!formats) |
|
145 |
+ return AVERROR(ENOMEM); |
|
146 |
+ ff_formats_ref(formats, &outlink->in_formats); |
|
147 |
+ |
|
148 |
+ return 0; |
|
149 |
+} |
|
150 |
+ |
|
151 |
+static int config_output(AVFilterLink *outlink) |
|
152 |
+{ |
|
153 |
+ AVFilterContext *ctx = outlink->src; |
|
154 |
+ AVFilterLink *inlink = ctx->inputs[0]; |
|
155 |
+ ShowCQTContext *s = ctx->priv; |
|
156 |
+ int fft_len, k, x, y; |
|
157 |
+ int num_coeffs = 0; |
|
158 |
+ int rate = inlink->sample_rate; |
|
159 |
+ double max_len = rate * (double) s->timeclamp; |
|
160 |
+ int64_t start_time, end_time; |
|
161 |
+ s->fft_bits = ceil(log2(max_len)); |
|
162 |
+ fft_len = 1 << s->fft_bits; |
|
163 |
+ |
|
164 |
+ if (rate % (s->fps * s->count)) |
|
165 |
+ { |
|
166 |
+ av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count); |
|
167 |
+ return AVERROR(EINVAL); |
|
168 |
+ } |
|
169 |
+ |
|
170 |
+ s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data)); |
|
171 |
+ s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort)); |
|
172 |
+ s->fft_result_left = av_malloc_array(fft_len, sizeof(*s->fft_result_left)); |
|
173 |
+ s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right)); |
|
174 |
+ s->fft_context = av_fft_init(s->fft_bits, 0); |
|
175 |
+ |
|
176 |
+ if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context) |
|
177 |
+ return AVERROR(ENOMEM); |
|
178 |
+ |
|
179 |
+ /* initializing font */ |
|
180 |
+ for (x = 0; x < VIDEO_WIDTH; x++) |
|
181 |
+ { |
|
182 |
+ if (x >= (12*3+8)*16 && x < (12*4+8)*16) |
|
183 |
+ { |
|
184 |
+ float fx = (x-(12*3+8)*16) * (1.0f/192.0f); |
|
185 |
+ float sv = sinf(M_PI*fx); |
|
186 |
+ s->font_color[x] = sv*sv*255.0f + 0.5f; |
|
187 |
+ } |
|
188 |
+ else |
|
189 |
+ s->font_color[x] = 0; |
|
190 |
+ } |
|
191 |
+ |
|
192 |
+ av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n"); |
|
193 |
+ start_time = av_gettime_relative(); |
|
194 |
+ for (k = 0; k < VIDEO_WIDTH; k++) |
|
195 |
+ { |
|
196 |
+ int hlen = fft_len >> 1; |
|
197 |
+ float total = 0; |
|
198 |
+ float partial = 0; |
|
199 |
+ double freq = BASE_FREQ * exp2(k * (1.0/192.0)); |
|
200 |
+ double tlen = rate * (24.0 * 16.0) /freq; |
|
201 |
+ /* a window function from Albert H. Nuttall, |
|
202 |
+ * "Some Windows with Very Good Sidelobe Behavior" |
|
203 |
+ * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay |
|
204 |
+ * coefficient normalized to a0 = 1 */ |
|
205 |
+ double a0 = 0.355768; |
|
206 |
+ double a1 = 0.487396/a0; |
|
207 |
+ double a2 = 0.144232/a0; |
|
208 |
+ double a3 = 0.012604/a0; |
|
209 |
+ double sv_step, cv_step, sv, cv; |
|
210 |
+ double sw_step, cw_step, sw, cw, w; |
|
211 |
+ |
|
212 |
+ tlen = tlen * max_len / (tlen + max_len); |
|
213 |
+ s->fft_data[0].re = 0; |
|
214 |
+ s->fft_data[0].im = 0; |
|
215 |
+ s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * s->volume * (1.0/fft_len); |
|
216 |
+ s->fft_data[hlen].im = 0; |
|
217 |
+ sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate)); |
|
218 |
+ cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate)); |
|
219 |
+ /* also optimizing window func */ |
|
220 |
+ sw_step = sw = sin(2.0*M_PI*(1.0/tlen)); |
|
221 |
+ cw_step = cw = cos(2.0*M_PI*(1.0/tlen)); |
|
222 |
+ for (x = 1; x < 0.5 * tlen; x++) |
|
223 |
+ { |
|
224 |
+ double cv_tmp, cw_tmp; |
|
225 |
+ double cw2, cw3, sw2; |
|
226 |
+ |
|
227 |
+ cw2 = cw * cw - sw * sw; |
|
228 |
+ sw2 = cw * sw + sw * cw; |
|
229 |
+ cw3 = cw * cw2 - sw * sw2; |
|
230 |
+ w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * s->volume * (1.0/fft_len); |
|
231 |
+ s->fft_data[hlen + x].re = w * cv; |
|
232 |
+ s->fft_data[hlen + x].im = w * sv; |
|
233 |
+ s->fft_data[hlen - x].re = s->fft_data[hlen + x].re; |
|
234 |
+ s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im; |
|
235 |
+ |
|
236 |
+ cv_tmp = cv * cv_step - sv * sv_step; |
|
237 |
+ sv = sv * cv_step + cv * sv_step; |
|
238 |
+ cv = cv_tmp; |
|
239 |
+ cw_tmp = cw * cw_step - sw * sw_step; |
|
240 |
+ sw = sw * cw_step + cw * sw_step; |
|
241 |
+ cw = cw_tmp; |
|
242 |
+ } |
|
243 |
+ for (; x < hlen; x++) |
|
244 |
+ { |
|
245 |
+ s->fft_data[hlen + x].re = 0; |
|
246 |
+ s->fft_data[hlen + x].im = 0; |
|
247 |
+ s->fft_data[hlen - x].re = 0; |
|
248 |
+ s->fft_data[hlen - x].im = 0; |
|
249 |
+ } |
|
250 |
+ av_fft_permute(s->fft_context, s->fft_data); |
|
251 |
+ av_fft_calc(s->fft_context, s->fft_data); |
|
252 |
+ |
|
253 |
+ for (x = 0; x < fft_len; x++) |
|
254 |
+ { |
|
255 |
+ s->coeff_sort[x].index = x; |
|
256 |
+ s->coeff_sort[x].value = s->fft_data[x].re; |
|
257 |
+ } |
|
258 |
+ |
|
259 |
+ AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff); |
|
260 |
+ for (x = 0; x < fft_len; x++) |
|
261 |
+ total += fabsf(s->coeff_sort[x].value); |
|
262 |
+ |
|
263 |
+ for (x = 0; x < fft_len; x++) |
|
264 |
+ { |
|
265 |
+ partial += fabsf(s->coeff_sort[x].value); |
|
266 |
+ if (partial > (total * s->coeffclamp * COEFF_CLAMP)) |
|
267 |
+ { |
|
268 |
+ s->coeffs_len[k] = fft_len - x; |
|
269 |
+ num_coeffs += s->coeffs_len[k]; |
|
270 |
+ s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k])); |
|
271 |
+ if (!s->coeffs[k]) |
|
272 |
+ return AVERROR(ENOMEM); |
|
273 |
+ for (y = 0; y < s->coeffs_len[k]; y++) |
|
274 |
+ s->coeffs[k][y] = s->coeff_sort[x+y]; |
|
275 |
+ break; |
|
276 |
+ } |
|
277 |
+ } |
|
278 |
+ } |
|
279 |
+ end_time = av_gettime_relative(); |
|
280 |
+ av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs); |
|
281 |
+ |
|
282 |
+ outlink->w = VIDEO_WIDTH; |
|
283 |
+ outlink->h = VIDEO_HEIGHT; |
|
284 |
+ |
|
285 |
+ s->req_fullfilled = 0; |
|
286 |
+ s->spectogram_index = 0; |
|
287 |
+ s->frame_count = 0; |
|
288 |
+ s->spectogram_count = 0; |
|
289 |
+ s->remaining_fill = fft_len >> 1; |
|
290 |
+ memset(s->spectogram, 0, VIDEO_WIDTH * SPECTOGRAM_HEIGHT * 3); |
|
291 |
+ memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data)); |
|
292 |
+ |
|
293 |
+ s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
|
294 |
+ if (!s->outpicref) |
|
295 |
+ return AVERROR(ENOMEM); |
|
296 |
+ |
|
297 |
+ outlink->sample_aspect_ratio = av_make_q(1, 1); |
|
298 |
+ outlink->time_base = av_make_q(1, s->fps); |
|
299 |
+ outlink->frame_rate = av_make_q(s->fps, 1); |
|
300 |
+ return 0; |
|
301 |
+} |
|
302 |
+ |
|
303 |
+static int plot_cqt(AVFilterLink *inlink) |
|
304 |
+{ |
|
305 |
+ AVFilterContext *ctx = inlink->dst; |
|
306 |
+ ShowCQTContext *s = ctx->priv; |
|
307 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
308 |
+ int fft_len = 1 << s->fft_bits; |
|
309 |
+ FFTSample result[VIDEO_WIDTH][4]; |
|
310 |
+ int x, y, ret = 0; |
|
311 |
+ |
|
312 |
+ /* real part contains left samples, imaginary part contains right samples */ |
|
313 |
+ memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data)); |
|
314 |
+ av_fft_permute(s->fft_context, s->fft_result_left); |
|
315 |
+ av_fft_calc(s->fft_context, s->fft_result_left); |
|
316 |
+ |
|
317 |
+ /* separate left and right, (and multiply by 2.0) */ |
|
318 |
+ s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im; |
|
319 |
+ s->fft_result_right[0].im = 0; |
|
320 |
+ s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re; |
|
321 |
+ s->fft_result_left[0].im = 0; |
|
322 |
+ for (x = 1; x <= (fft_len >> 1); x++) |
|
323 |
+ { |
|
324 |
+ FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im; |
|
325 |
+ |
|
326 |
+ s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im; |
|
327 |
+ s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re; |
|
328 |
+ s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re; |
|
329 |
+ s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im; |
|
330 |
+ |
|
331 |
+ s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re; |
|
332 |
+ s->fft_result_left[x].im = tmpy; |
|
333 |
+ s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re; |
|
334 |
+ s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im; |
|
335 |
+ } |
|
336 |
+ |
|
337 |
+ /* calculating cqt */ |
|
338 |
+ for (x = 0; x < VIDEO_WIDTH; x++) |
|
339 |
+ { |
|
340 |
+ int u; |
|
341 |
+ float g = 1.0f / s->gamma; |
|
342 |
+ FFTComplex l = {0,0}; |
|
343 |
+ FFTComplex r = {0,0}; |
|
344 |
+ |
|
345 |
+ for (u = 0; u < s->coeffs_len[x]; u++) |
|
346 |
+ { |
|
347 |
+ FFTSample value = s->coeffs[x][u].value; |
|
348 |
+ int index = s->coeffs[x][u].index; |
|
349 |
+ l.re += value * s->fft_result_left[index].re; |
|
350 |
+ l.im += value * s->fft_result_left[index].im; |
|
351 |
+ r.re += value * s->fft_result_right[index].re; |
|
352 |
+ r.im += value * s->fft_result_right[index].im; |
|
353 |
+ } |
|
354 |
+ /* result is power, not amplitude */ |
|
355 |
+ result[x][0] = l.re * l.re + l.im * l.im; |
|
356 |
+ result[x][2] = r.re * r.re + r.im * r.im; |
|
357 |
+ result[x][1] = 0.5f * (result[x][0] + result[x][2]); |
|
358 |
+ result[x][3] = result[x][1]; |
|
359 |
+ result[x][0] = 255.0f * powf(fminf(1.0f,result[x][0]), g); |
|
360 |
+ result[x][1] = 255.0f * powf(fminf(1.0f,result[x][1]), g); |
|
361 |
+ result[x][2] = 255.0f * powf(fminf(1.0f,result[x][2]), g); |
|
362 |
+ } |
|
363 |
+ |
|
364 |
+ for (x = 0; x < VIDEO_WIDTH; x++) |
|
365 |
+ { |
|
366 |
+ s->spectogram[s->spectogram_index][x][0] = result[x][0] + 0.5f; |
|
367 |
+ s->spectogram[s->spectogram_index][x][1] = result[x][1] + 0.5f; |
|
368 |
+ s->spectogram[s->spectogram_index][x][2] = result[x][2] + 0.5f; |
|
369 |
+ } |
|
370 |
+ |
|
371 |
+ /* drawing */ |
|
372 |
+ if (!s->spectogram_count) |
|
373 |
+ { |
|
374 |
+ uint8_t *data = (uint8_t*) s->outpicref->data[0]; |
|
375 |
+ int linesize = s->outpicref->linesize[0]; |
|
376 |
+ float rcp_result[VIDEO_WIDTH]; |
|
377 |
+ |
|
378 |
+ for (x = 0; x < VIDEO_WIDTH; x++) |
|
379 |
+ rcp_result[x] = 1.0f / (result[x][3]+0.0001f); |
|
380 |
+ |
|
381 |
+ /* drawing bar */ |
|
382 |
+ for (y = 0; y < SPECTOGRAM_HEIGHT; y++) |
|
383 |
+ { |
|
384 |
+ float height = (SPECTOGRAM_HEIGHT - y) * (1.0f/SPECTOGRAM_HEIGHT); |
|
385 |
+ uint8_t *lineptr = data + y * linesize; |
|
386 |
+ for (x = 0; x < VIDEO_WIDTH; x++) |
|
387 |
+ { |
|
388 |
+ float mul; |
|
389 |
+ if (result[x][3] <= height) |
|
390 |
+ { |
|
391 |
+ *lineptr++ = 0; |
|
392 |
+ *lineptr++ = 0; |
|
393 |
+ *lineptr++ = 0; |
|
394 |
+ } |
|
395 |
+ else |
|
396 |
+ { |
|
397 |
+ mul = (result[x][3] - height) * rcp_result[x]; |
|
398 |
+ *lineptr++ = mul * result[x][0] + 0.5f; |
|
399 |
+ *lineptr++ = mul * result[x][1] + 0.5f; |
|
400 |
+ *lineptr++ = mul * result[x][2] + 0.5f; |
|
401 |
+ } |
|
402 |
+ } |
|
403 |
+ |
|
404 |
+ } |
|
405 |
+ |
|
406 |
+ /* drawing font */ |
|
407 |
+ for (y = 0; y < FONT_HEIGHT; y++) |
|
408 |
+ { |
|
409 |
+ uint8_t *lineptr = data + (SPECTOGRAM_HEIGHT + y) * linesize; |
|
410 |
+ memcpy(lineptr, s->spectogram[s->spectogram_index], VIDEO_WIDTH*3); |
|
411 |
+ } |
|
412 |
+ for (x = 0; x < VIDEO_WIDTH; x += VIDEO_WIDTH/10) |
|
413 |
+ { |
|
414 |
+ int u; |
|
415 |
+ static const char str[] = "EF G A BC D "; |
|
416 |
+ uint8_t *startptr = data + SPECTOGRAM_HEIGHT * linesize + x * 3; |
|
417 |
+ for (u = 0; str[u]; u++) |
|
418 |
+ { |
|
419 |
+ int v; |
|
420 |
+ for (v = 0; v < 16; v++) |
|
421 |
+ { |
|
422 |
+ uint8_t *p = startptr + 2 * v * linesize + 16 * 3 * u; |
|
423 |
+ int ux = x + 16 * u; |
|
424 |
+ int mask; |
|
425 |
+ for (mask = 0x80; mask; mask >>= 1) |
|
426 |
+ { |
|
427 |
+ if (mask & avpriv_vga16_font[str[u] * 16 + v]) |
|
428 |
+ { |
|
429 |
+ p[0] = p[linesize] = 255 - s->font_color[ux]; |
|
430 |
+ p[1] = p[linesize+1] = 0; |
|
431 |
+ p[2] = p[linesize+2] = s->font_color[ux]; |
|
432 |
+ p[3] = p[linesize+3] = 255 - s->font_color[ux+1]; |
|
433 |
+ p[4] = p[linesize+4] = 0; |
|
434 |
+ p[5] = p[linesize+5] = s->font_color[ux+1]; |
|
435 |
+ } |
|
436 |
+ p += 6; |
|
437 |
+ ux += 2; |
|
438 |
+ } |
|
439 |
+ } |
|
440 |
+ } |
|
441 |
+ |
|
442 |
+ } |
|
443 |
+ |
|
444 |
+ /* drawing spectogram/sonogram */ |
|
445 |
+ if (linesize == VIDEO_WIDTH * 3) |
|
446 |
+ { |
|
447 |
+ int total_length = VIDEO_WIDTH * SPECTOGRAM_HEIGHT * 3; |
|
448 |
+ int back_length = VIDEO_WIDTH * s->spectogram_index * 3; |
|
449 |
+ data += SPECTOGRAM_START * VIDEO_WIDTH * 3; |
|
450 |
+ memcpy(data, s->spectogram[s->spectogram_index], total_length - back_length); |
|
451 |
+ data += total_length - back_length; |
|
452 |
+ if(back_length) |
|
453 |
+ memcpy(data, s->spectogram[0], back_length); |
|
454 |
+ } |
|
455 |
+ else |
|
456 |
+ { |
|
457 |
+ for (y = 0; y < SPECTOGRAM_HEIGHT; y++) |
|
458 |
+ memcpy(data + (SPECTOGRAM_START + y) * linesize, s->spectogram[(s->spectogram_index + y) % SPECTOGRAM_HEIGHT], VIDEO_WIDTH * 3); |
|
459 |
+ } |
|
460 |
+ |
|
461 |
+ s->outpicref->pts = s->frame_count; |
|
462 |
+ ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref)); |
|
463 |
+ s->req_fullfilled = 1; |
|
464 |
+ s->frame_count++; |
|
465 |
+ } |
|
466 |
+ s->spectogram_count = (s->spectogram_count + 1) % s->count; |
|
467 |
+ s->spectogram_index = (s->spectogram_index + SPECTOGRAM_HEIGHT - 1) % SPECTOGRAM_HEIGHT; |
|
468 |
+ return ret; |
|
469 |
+} |
|
470 |
+ |
|
471 |
+static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) |
|
472 |
+{ |
|
473 |
+ AVFilterContext *ctx = inlink->dst; |
|
474 |
+ ShowCQTContext *s = ctx->priv; |
|
475 |
+ int step = inlink->sample_rate / (s->fps * s->count); |
|
476 |
+ int fft_len = 1 << s->fft_bits; |
|
477 |
+ int remaining; |
|
478 |
+ float *audio_data; |
|
479 |
+ |
|
480 |
+ if (!insamples) |
|
481 |
+ { |
|
482 |
+ while (s->remaining_fill < (fft_len >> 1)) |
|
483 |
+ { |
|
484 |
+ int ret, x; |
|
485 |
+ memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); |
|
486 |
+ ret = plot_cqt(inlink); |
|
487 |
+ if (ret < 0) |
|
488 |
+ return ret; |
|
489 |
+ for (x = 0; x < (fft_len-step); x++) |
|
490 |
+ s->fft_data[x] = s->fft_data[x+step]; |
|
491 |
+ s->remaining_fill += step; |
|
492 |
+ } |
|
493 |
+ return AVERROR(EOF); |
|
494 |
+ } |
|
495 |
+ |
|
496 |
+ remaining = insamples->nb_samples; |
|
497 |
+ audio_data = (float*) insamples->data[0]; |
|
498 |
+ |
|
499 |
+ while (remaining) |
|
500 |
+ { |
|
501 |
+ if (remaining >= s->remaining_fill) |
|
502 |
+ { |
|
503 |
+ int i = insamples->nb_samples - remaining; |
|
504 |
+ int j = fft_len - s->remaining_fill; |
|
505 |
+ int m, ret; |
|
506 |
+ for (m = 0; m < s->remaining_fill; m++) |
|
507 |
+ { |
|
508 |
+ s->fft_data[j+m].re = audio_data[2*(i+m)]; |
|
509 |
+ s->fft_data[j+m].im = audio_data[2*(i+m)+1]; |
|
510 |
+ } |
|
511 |
+ ret = plot_cqt(inlink); |
|
512 |
+ if (ret < 0) |
|
513 |
+ { |
|
514 |
+ av_frame_free(&insamples); |
|
515 |
+ return ret; |
|
516 |
+ } |
|
517 |
+ remaining -= s->remaining_fill; |
|
518 |
+ for (m = 0; m < fft_len-step; m++) |
|
519 |
+ s->fft_data[m] = s->fft_data[m+step]; |
|
520 |
+ s->remaining_fill = step; |
|
521 |
+ } |
|
522 |
+ else |
|
523 |
+ { |
|
524 |
+ int i = insamples->nb_samples - remaining; |
|
525 |
+ int j = fft_len - s->remaining_fill; |
|
526 |
+ int m; |
|
527 |
+ for (m = 0; m < remaining; m++) |
|
528 |
+ { |
|
529 |
+ s->fft_data[m+j].re = audio_data[2*(i+m)]; |
|
530 |
+ s->fft_data[m+j].im = audio_data[2*(i+m)+1]; |
|
531 |
+ } |
|
532 |
+ s->remaining_fill -= remaining; |
|
533 |
+ remaining = 0; |
|
534 |
+ } |
|
535 |
+ } |
|
536 |
+ av_frame_free(&insamples); |
|
537 |
+ return 0; |
|
538 |
+} |
|
539 |
+ |
|
540 |
+static int request_frame(AVFilterLink *outlink) |
|
541 |
+{ |
|
542 |
+ ShowCQTContext *s = outlink->src->priv; |
|
543 |
+ AVFilterLink *inlink = outlink->src->inputs[0]; |
|
544 |
+ int ret; |
|
545 |
+ |
|
546 |
+ s->req_fullfilled = 0; |
|
547 |
+ do { |
|
548 |
+ ret = ff_request_frame(inlink); |
|
549 |
+ } while (!s->req_fullfilled && ret >= 0); |
|
550 |
+ |
|
551 |
+ if (ret == AVERROR_EOF && s->outpicref) |
|
552 |
+ filter_frame(inlink, NULL); |
|
553 |
+ return ret; |
|
554 |
+} |
|
555 |
+ |
|
556 |
+static const AVFilterPad showcqt_inputs[] = { |
|
557 |
+ { |
|
558 |
+ .name = "default", |
|
559 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
560 |
+ .filter_frame = filter_frame, |
|
561 |
+ }, |
|
562 |
+ { NULL } |
|
563 |
+}; |
|
564 |
+ |
|
565 |
+static const AVFilterPad showcqt_outputs[] = { |
|
566 |
+ { |
|
567 |
+ .name = "default", |
|
568 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
569 |
+ .config_props = config_output, |
|
570 |
+ .request_frame = request_frame, |
|
571 |
+ }, |
|
572 |
+ { NULL } |
|
573 |
+}; |
|
574 |
+ |
|
575 |
+AVFilter ff_avf_showcqt = { |
|
576 |
+ .name = "showcqt", |
|
577 |
+ .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."), |
|
578 |
+ .uninit = uninit, |
|
579 |
+ .query_formats = query_formats, |
|
580 |
+ .priv_size = sizeof(ShowCQTContext), |
|
581 |
+ .inputs = showcqt_inputs, |
|
582 |
+ .outputs = showcqt_outputs, |
|
583 |
+ .priv_class = &showcqt_class, |
|
584 |
+}; |
... | ... |
@@ -30,7 +30,7 @@ |
30 | 30 |
#include "libavutil/version.h" |
31 | 31 |
|
32 | 32 |
#define LIBAVFILTER_VERSION_MAJOR 4 |
33 |
-#define LIBAVFILTER_VERSION_MINOR 5 |
|
33 |
+#define LIBAVFILTER_VERSION_MINOR 6 |
|
34 | 34 |
#define LIBAVFILTER_VERSION_MICRO 100 |
35 | 35 |
|
36 | 36 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |