Browse code

avfilter: new multimedia filter avf_showcqt.c

this filter is the same as showspectrum but with constant Q transform,
so frequency is spaced logarithmically

Muhammad Faiz authored on 2014/06/05 13:42:38
Showing 7 changed files
... ...
@@ -26,6 +26,7 @@ version <next>:
26 26
 - native Opus decoder
27 27
 - display matrix export and rotation api
28 28
 - WebVTT encoder
29
+- showcqt multimedia filter
29 30
 
30 31
 
31 32
 version 2.2:
... ...
@@ -341,6 +341,7 @@ Filters:
341 341
   af_ladspa.c                           Paul B Mahol
342 342
   af_pan.c                              Nicolas George
343 343
   avf_avectorscope.c                    Paul B Mahol
344
+  avf_showcqt.c                         Muhammad Faiz
344 345
   vf_blend.c                            Paul B Mahol
345 346
   vf_colorbalance.c                     Paul B Mahol
346 347
   vf_dejudder.c                         Nicholas Robbins
... ...
@@ -10133,6 +10133,76 @@ settb=AVTB
10133 10133
 @end example
10134 10134
 @end itemize
10135 10135
 
10136
+@section showcqt
10137
+Convert input audio to a video output (at full HD resolution), representing
10138
+frequency spectrum logarithmically (using constant Q transform with
10139
+Brown-Puckette algorithm), with musical tone scale, from E0 to D#10 (10 octaves).
10140
+
10141
+The filter accepts the following options:
10142
+
10143
+@table @option
10144
+@item volume
10145
+Specify the transform volume (multiplier). Acceptable value is [1.0, 100.0].
10146
+Default value is @code{16.0}.
10147
+
10148
+@item timeclamp
10149
+Specify the transform timeclamp. At low frequency, there is trade-off between
10150
+accuracy in time domain and frequency domain. If timeclamp is lower,
10151
+event in time domain is represented more accurately (such as fast bass drum),
10152
+otherwise event in frequency domain is represented more accurately
10153
+(such as bass guitar). Acceptable value is [0.1, 1.0]. Default value is @code{0.17}.
10154
+
10155
+@item coeffclamp
10156
+Specify the transform coeffclamp. If coeffclamp is lower, transform is
10157
+more accurate, otherwise transform is faster. Acceptable value is [0.1, 10.0].
10158
+Default value is @code{1.0}.
10159
+
10160
+@item gamma
10161
+Specify gamma. Lower gamma makes the spectrum more contrast, higher gamma
10162
+makes the spectrum having more range. Acceptable value is [1.0, 7.0].
10163
+Default value is @code{3.0}.
10164
+
10165
+@item fps
10166
+Specify video fps. Default value is @code{25}.
10167
+
10168
+@item count
10169
+Specify number of transform per frame, so there are fps*count transforms
10170
+per second. Note tha audio data rate must be divisible by fps*count.
10171
+Default value is @code{6}.
10172
+
10173
+@end table
10174
+
10175
+@subsection Examples
10176
+
10177
+@itemize
10178
+@item
10179
+Playing audio while showing the spectrum:
10180
+@example
10181
+ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt [out0]'
10182
+@end example
10183
+
10184
+@item
10185
+Same as above, but with frame rate 30 fps:
10186
+@example
10187
+ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt=fps=30:count=5 [out0]'
10188
+@end example
10189
+
10190
+@item
10191
+A1 and its harmonics: A1, A2, (near)E3, A3:
10192
+@example
10193
+ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*t)+0.1*sin(8*PI*55*t),
10194
+                 asplit[a][out1]; [a] showcqt [out0]'
10195
+@end example
10196
+
10197
+@item
10198
+Same as above, but with more accuracy in frequency domain (and slower):
10199
+@example
10200
+ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*t)+0.1*sin(8*PI*55*t),
10201
+                 asplit[a][out1]; [a] showcqt=timeclamp=0.5 [out0]'
10202
+@end example
10203
+
10204
+@end itemize
10205
+
10136 10206
 @section showspectrum
10137 10207
 
10138 10208
 Convert input audio to a video output, representing the audio frequency
... ...
@@ -224,6 +224,7 @@ OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_uspp.o
224 224
 # multimedia filters
225 225
 OBJS-$(CONFIG_AVECTORSCOPE_FILTER)           += avf_avectorscope.o
226 226
 OBJS-$(CONFIG_CONCAT_FILTER)                 += avf_concat.o
227
+OBJS-$(CONFIG_SHOWCQT_FILTER)                += avf_showcqt.o
227 228
 OBJS-$(CONFIG_SHOWSPECTRUM_FILTER)           += avf_showspectrum.o
228 229
 OBJS-$(CONFIG_SHOWWAVES_FILTER)              += avf_showwaves.o
229 230
 
... ...
@@ -231,6 +231,7 @@ void avfilter_register_all(void)
231 231
     /* multimedia filters */
232 232
     REGISTER_FILTER(AVECTORSCOPE,   avectorscope,   avf);
233 233
     REGISTER_FILTER(CONCAT,         concat,         avf);
234
+    REGISTER_FILTER(SHOWCQT,        showcqt,        avf);
234 235
     REGISTER_FILTER(SHOWSPECTRUM,   showspectrum,   avf);
235 236
     REGISTER_FILTER(SHOWWAVES,      showwaves,      avf);
236 237
 
237 238
new file mode 100644
... ...
@@ -0,0 +1,585 @@
0
+/*
1
+ * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavcodec/avfft.h"
21
+#include "libavutil/avassert.h"
22
+#include "libavutil/channel_layout.h"
23
+#include "libavutil/opt.h"
24
+#include "libavutil/xga_font_data.h"
25
+#include "libavutil/qsort.h"
26
+#include "libavutil/time.h"
27
+#include "avfilter.h"
28
+#include "internal.h"
29
+
30
+#include <math.h>
31
+#include <stdlib.h>
32
+
33
+/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm
34
+ * start from E0 to D#10 (10 octaves)
35
+ * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins
36
+ * match with full HD resolution */
37
+
38
+#define VIDEO_WIDTH 1920
39
+#define VIDEO_HEIGHT 1080
40
+#define FONT_HEIGHT 32
41
+#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
42
+#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
43
+#define BASE_FREQ 20.051392800492
44
+#define COEFF_CLAMP 1.0e-4
45
+
46
+typedef struct {
47
+    FFTSample value;
48
+    int index;
49
+} SparseCoeff;
50
+
51
+static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b)
52
+{
53
+    if (fabsf(a->value) >= fabsf(b->value))
54
+        return 1;
55
+    else
56
+        return -1;
57
+}
58
+
59
+typedef struct {
60
+    const AVClass *class;
61
+    AVFrame *outpicref;
62
+    FFTContext *fft_context;
63
+    FFTComplex *fft_data;
64
+    FFTComplex *fft_result_left;
65
+    FFTComplex *fft_result_right;
66
+    SparseCoeff *coeff_sort;
67
+    SparseCoeff *coeffs[VIDEO_WIDTH];
68
+    int coeffs_len[VIDEO_WIDTH];
69
+    uint8_t font_color[VIDEO_WIDTH];
70
+    uint8_t spectogram[SPECTOGRAM_HEIGHT][VIDEO_WIDTH][3];
71
+    int64_t frame_count;
72
+    int spectogram_count;
73
+    int spectogram_index;
74
+    int fft_bits;
75
+    int req_fullfilled;
76
+    int remaining_fill;
77
+    double volume;
78
+    double timeclamp;   /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/
79
+    float coeffclamp;   /* lower coeffclamp, more precise, higher coeffclamp, faster */
80
+    float gamma;        /* lower gamma, more contrast, higher gamma, more range */
81
+    int fps;            /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */
82
+    int count;          /* fps * count = transform rate */
83
+} ShowCQTContext;
84
+
85
+#define OFFSET(x) offsetof(ShowCQTContext, x)
86
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
87
+
88
+static const AVOption showcqt_options[] = {
89
+    { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_DOUBLE, { .dbl = 16 }, 0.1, 100, FLAGS },
90
+    { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS },
91
+    { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS },
92
+    { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS },
93
+    { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS },
94
+    { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS },
95
+    { NULL }
96
+};
97
+
98
+AVFILTER_DEFINE_CLASS(showcqt);
99
+
100
+static av_cold void uninit(AVFilterContext *ctx)
101
+{
102
+    int k;
103
+    ShowCQTContext *s = ctx->priv;
104
+    av_fft_end(s->fft_context);
105
+    s->fft_context = NULL;
106
+    for (k = 0; k < VIDEO_WIDTH; k++)
107
+        av_freep(&s->coeffs[k]);
108
+    av_freep(&s->fft_data);
109
+    av_freep(&s->fft_result_left);
110
+    av_freep(&s->fft_result_right);
111
+    av_freep(&s->coeff_sort);
112
+    av_frame_free(&s->outpicref);
113
+}
114
+
115
+static int query_formats(AVFilterContext *ctx)
116
+{
117
+    AVFilterFormats *formats = NULL;
118
+    AVFilterChannelLayouts *layouts = NULL;
119
+    AVFilterLink *inlink = ctx->inputs[0];
120
+    AVFilterLink *outlink = ctx->outputs[0];
121
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
122
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE };
123
+    static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 };
124
+    static const int samplerates[] = { 44100, 48000, -1 };
125
+
126
+    /* set input audio formats */
127
+    formats = ff_make_format_list(sample_fmts);
128
+    if (!formats)
129
+        return AVERROR(ENOMEM);
130
+    ff_formats_ref(formats, &inlink->out_formats);
131
+
132
+    layouts = avfilter_make_format64_list(channel_layouts);
133
+    if (!layouts)
134
+        return AVERROR(ENOMEM);
135
+    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
136
+
137
+    formats = ff_make_format_list(samplerates);
138
+    if (!formats)
139
+        return AVERROR(ENOMEM);
140
+    ff_formats_ref(formats, &inlink->out_samplerates);
141
+
142
+    /* set output video format */
143
+    formats = ff_make_format_list(pix_fmts);
144
+    if (!formats)
145
+        return AVERROR(ENOMEM);
146
+    ff_formats_ref(formats, &outlink->in_formats);
147
+
148
+    return 0;
149
+}
150
+
151
+static int config_output(AVFilterLink *outlink)
152
+{
153
+    AVFilterContext *ctx = outlink->src;
154
+    AVFilterLink *inlink = ctx->inputs[0];
155
+    ShowCQTContext *s = ctx->priv;
156
+    int fft_len, k, x, y;
157
+    int num_coeffs = 0;
158
+    int rate = inlink->sample_rate;
159
+    double max_len = rate * (double) s->timeclamp;
160
+    int64_t start_time, end_time;
161
+    s->fft_bits = ceil(log2(max_len));
162
+    fft_len = 1 << s->fft_bits;
163
+
164
+    if (rate % (s->fps * s->count))
165
+    {
166
+        av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count);
167
+        return AVERROR(EINVAL);
168
+    }
169
+
170
+    s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data));
171
+    s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort));
172
+    s->fft_result_left = av_malloc_array(fft_len, sizeof(*s->fft_result_left));
173
+    s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right));
174
+    s->fft_context = av_fft_init(s->fft_bits, 0);
175
+
176
+    if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context)
177
+        return AVERROR(ENOMEM);
178
+
179
+    /* initializing font */
180
+    for (x = 0; x < VIDEO_WIDTH; x++)
181
+    {
182
+        if (x >= (12*3+8)*16 && x < (12*4+8)*16)
183
+        {
184
+            float fx = (x-(12*3+8)*16) * (1.0f/192.0f);
185
+            float sv = sinf(M_PI*fx);
186
+            s->font_color[x] = sv*sv*255.0f + 0.5f;
187
+        }
188
+        else
189
+            s->font_color[x] = 0;
190
+    }
191
+
192
+    av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n");
193
+    start_time = av_gettime_relative();
194
+    for (k = 0; k < VIDEO_WIDTH; k++)
195
+    {
196
+        int hlen = fft_len >> 1;
197
+        float total = 0;
198
+        float partial = 0;
199
+        double freq = BASE_FREQ * exp2(k * (1.0/192.0));
200
+        double tlen = rate * (24.0 * 16.0) /freq;
201
+        /* a window function from Albert H. Nuttall,
202
+         * "Some Windows with Very Good Sidelobe Behavior"
203
+         * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay
204
+         * coefficient normalized to a0 = 1 */
205
+        double a0 = 0.355768;
206
+        double a1 = 0.487396/a0;
207
+        double a2 = 0.144232/a0;
208
+        double a3 = 0.012604/a0;
209
+        double sv_step, cv_step, sv, cv;
210
+        double sw_step, cw_step, sw, cw, w;
211
+
212
+        tlen = tlen * max_len / (tlen + max_len);
213
+        s->fft_data[0].re = 0;
214
+        s->fft_data[0].im = 0;
215
+        s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * s->volume * (1.0/fft_len);
216
+        s->fft_data[hlen].im = 0;
217
+        sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate));
218
+        cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate));
219
+        /* also optimizing window func */
220
+        sw_step = sw = sin(2.0*M_PI*(1.0/tlen));
221
+        cw_step = cw = cos(2.0*M_PI*(1.0/tlen));
222
+        for (x = 1; x < 0.5 * tlen; x++)
223
+        {
224
+            double cv_tmp, cw_tmp;
225
+            double cw2, cw3, sw2;
226
+
227
+            cw2 = cw * cw - sw * sw;
228
+            sw2 = cw * sw + sw * cw;
229
+            cw3 = cw * cw2 - sw * sw2;
230
+            w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * s->volume * (1.0/fft_len);
231
+            s->fft_data[hlen + x].re = w * cv;
232
+            s->fft_data[hlen + x].im = w * sv;
233
+            s->fft_data[hlen - x].re = s->fft_data[hlen + x].re;
234
+            s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im;
235
+
236
+            cv_tmp = cv * cv_step - sv * sv_step;
237
+            sv = sv * cv_step + cv * sv_step;
238
+            cv = cv_tmp;
239
+            cw_tmp = cw * cw_step - sw * sw_step;
240
+            sw = sw * cw_step + cw * sw_step;
241
+            cw = cw_tmp;
242
+        }
243
+        for (; x < hlen; x++)
244
+        {
245
+            s->fft_data[hlen + x].re = 0;
246
+            s->fft_data[hlen + x].im = 0;
247
+            s->fft_data[hlen - x].re = 0;
248
+            s->fft_data[hlen - x].im = 0;
249
+        }
250
+        av_fft_permute(s->fft_context, s->fft_data);
251
+        av_fft_calc(s->fft_context, s->fft_data);
252
+
253
+        for (x = 0; x < fft_len; x++)
254
+        {
255
+            s->coeff_sort[x].index = x;
256
+            s->coeff_sort[x].value = s->fft_data[x].re;
257
+        }
258
+
259
+        AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff);
260
+        for (x = 0; x < fft_len; x++)
261
+            total += fabsf(s->coeff_sort[x].value);
262
+
263
+        for (x = 0; x < fft_len; x++)
264
+        {
265
+            partial += fabsf(s->coeff_sort[x].value);
266
+            if (partial > (total * s->coeffclamp * COEFF_CLAMP))
267
+            {
268
+                s->coeffs_len[k] = fft_len - x;
269
+                num_coeffs += s->coeffs_len[k];
270
+                s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k]));
271
+                if (!s->coeffs[k])
272
+                    return AVERROR(ENOMEM);
273
+                for (y = 0; y < s->coeffs_len[k]; y++)
274
+                    s->coeffs[k][y] = s->coeff_sort[x+y];
275
+                break;
276
+            }
277
+        }
278
+    }
279
+    end_time = av_gettime_relative();
280
+    av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs);
281
+
282
+    outlink->w = VIDEO_WIDTH;
283
+    outlink->h = VIDEO_HEIGHT;
284
+
285
+    s->req_fullfilled = 0;
286
+    s->spectogram_index = 0;
287
+    s->frame_count = 0;
288
+    s->spectogram_count = 0;
289
+    s->remaining_fill = fft_len >> 1;
290
+    memset(s->spectogram, 0, VIDEO_WIDTH * SPECTOGRAM_HEIGHT * 3);
291
+    memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data));
292
+
293
+    s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
294
+    if (!s->outpicref)
295
+        return AVERROR(ENOMEM);
296
+
297
+    outlink->sample_aspect_ratio = av_make_q(1, 1);
298
+    outlink->time_base = av_make_q(1, s->fps);
299
+    outlink->frame_rate = av_make_q(s->fps, 1);
300
+    return 0;
301
+}
302
+
303
+static int plot_cqt(AVFilterLink *inlink)
304
+{
305
+    AVFilterContext *ctx = inlink->dst;
306
+    ShowCQTContext *s = ctx->priv;
307
+    AVFilterLink *outlink = ctx->outputs[0];
308
+    int fft_len = 1 << s->fft_bits;
309
+    FFTSample result[VIDEO_WIDTH][4];
310
+    int x, y, ret = 0;
311
+
312
+    /* real part contains left samples, imaginary part contains right samples */
313
+    memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data));
314
+    av_fft_permute(s->fft_context, s->fft_result_left);
315
+    av_fft_calc(s->fft_context, s->fft_result_left);
316
+
317
+    /* separate left and right, (and multiply by 2.0) */
318
+    s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im;
319
+    s->fft_result_right[0].im = 0;
320
+    s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re;
321
+    s->fft_result_left[0].im = 0;
322
+    for (x = 1; x <= (fft_len >> 1); x++)
323
+    {
324
+        FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im;
325
+
326
+        s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im;
327
+        s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re;
328
+        s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re;
329
+        s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im;
330
+
331
+        s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re;
332
+        s->fft_result_left[x].im = tmpy;
333
+        s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re;
334
+        s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im;
335
+    }
336
+
337
+    /* calculating cqt */
338
+    for (x = 0; x < VIDEO_WIDTH; x++)
339
+    {
340
+        int u;
341
+        float g = 1.0f / s->gamma;
342
+        FFTComplex l = {0,0};
343
+        FFTComplex r = {0,0};
344
+
345
+        for (u = 0; u < s->coeffs_len[x]; u++)
346
+        {
347
+            FFTSample value = s->coeffs[x][u].value;
348
+            int index = s->coeffs[x][u].index;
349
+            l.re += value * s->fft_result_left[index].re;
350
+            l.im += value * s->fft_result_left[index].im;
351
+            r.re += value * s->fft_result_right[index].re;
352
+            r.im += value * s->fft_result_right[index].im;
353
+        }
354
+        /* result is power, not amplitude */
355
+        result[x][0] = l.re * l.re + l.im * l.im;
356
+        result[x][2] = r.re * r.re + r.im * r.im;
357
+        result[x][1] = 0.5f * (result[x][0] + result[x][2]);
358
+        result[x][3] = result[x][1];
359
+        result[x][0] = 255.0f * powf(fminf(1.0f,result[x][0]), g);
360
+        result[x][1] = 255.0f * powf(fminf(1.0f,result[x][1]), g);
361
+        result[x][2] = 255.0f * powf(fminf(1.0f,result[x][2]), g);
362
+    }
363
+
364
+    for (x = 0; x < VIDEO_WIDTH; x++)
365
+    {
366
+        s->spectogram[s->spectogram_index][x][0] = result[x][0] + 0.5f;
367
+        s->spectogram[s->spectogram_index][x][1] = result[x][1] + 0.5f;
368
+        s->spectogram[s->spectogram_index][x][2] = result[x][2] + 0.5f;
369
+    }
370
+
371
+    /* drawing */
372
+    if (!s->spectogram_count)
373
+    {
374
+        uint8_t *data = (uint8_t*) s->outpicref->data[0];
375
+        int linesize = s->outpicref->linesize[0];
376
+        float rcp_result[VIDEO_WIDTH];
377
+
378
+        for (x = 0; x < VIDEO_WIDTH; x++)
379
+            rcp_result[x] = 1.0f / (result[x][3]+0.0001f);
380
+
381
+        /* drawing bar */
382
+        for (y = 0; y < SPECTOGRAM_HEIGHT; y++)
383
+        {
384
+            float height = (SPECTOGRAM_HEIGHT - y) * (1.0f/SPECTOGRAM_HEIGHT);
385
+            uint8_t *lineptr = data + y * linesize;
386
+            for (x = 0; x < VIDEO_WIDTH; x++)
387
+            {
388
+                float mul;
389
+                if (result[x][3] <= height)
390
+                {
391
+                    *lineptr++ = 0;
392
+                    *lineptr++ = 0;
393
+                    *lineptr++ = 0;
394
+                }
395
+                else
396
+                {
397
+                    mul = (result[x][3] - height) * rcp_result[x];
398
+                    *lineptr++ = mul * result[x][0] + 0.5f;
399
+                    *lineptr++ = mul * result[x][1] + 0.5f;
400
+                    *lineptr++ = mul * result[x][2] + 0.5f;
401
+                }
402
+            }
403
+
404
+        }
405
+
406
+        /* drawing font */
407
+        for (y = 0; y < FONT_HEIGHT; y++)
408
+        {
409
+            uint8_t *lineptr = data + (SPECTOGRAM_HEIGHT + y) * linesize;
410
+            memcpy(lineptr, s->spectogram[s->spectogram_index], VIDEO_WIDTH*3);
411
+        }
412
+        for (x = 0; x < VIDEO_WIDTH; x += VIDEO_WIDTH/10)
413
+        {
414
+            int u;
415
+            static const char str[] = "EF G A BC D ";
416
+            uint8_t *startptr = data + SPECTOGRAM_HEIGHT * linesize + x * 3;
417
+            for (u = 0; str[u]; u++)
418
+            {
419
+                int v;
420
+                for (v = 0; v < 16; v++)
421
+                {
422
+                    uint8_t *p = startptr + 2 * v * linesize + 16 * 3 * u;
423
+                    int ux = x + 16 * u;
424
+                    int mask;
425
+                    for (mask = 0x80; mask; mask >>= 1)
426
+                    {
427
+                        if (mask & avpriv_vga16_font[str[u] * 16 + v])
428
+                        {
429
+                            p[0] = p[linesize] = 255 - s->font_color[ux];
430
+                            p[1] = p[linesize+1] = 0;
431
+                            p[2] = p[linesize+2] = s->font_color[ux];
432
+                            p[3] = p[linesize+3] = 255 - s->font_color[ux+1];
433
+                            p[4] = p[linesize+4] = 0;
434
+                            p[5] = p[linesize+5] = s->font_color[ux+1];
435
+                        }
436
+                        p += 6;
437
+                        ux += 2;
438
+                    }
439
+                }
440
+            }
441
+
442
+        }
443
+
444
+        /* drawing spectogram/sonogram */
445
+        if (linesize == VIDEO_WIDTH * 3)
446
+        {
447
+            int total_length = VIDEO_WIDTH * SPECTOGRAM_HEIGHT * 3;
448
+            int back_length = VIDEO_WIDTH * s->spectogram_index * 3;
449
+            data += SPECTOGRAM_START * VIDEO_WIDTH * 3;
450
+            memcpy(data, s->spectogram[s->spectogram_index], total_length - back_length);
451
+            data += total_length - back_length;
452
+            if(back_length)
453
+                memcpy(data, s->spectogram[0], back_length);
454
+        }
455
+        else
456
+        {
457
+            for (y = 0; y < SPECTOGRAM_HEIGHT; y++)
458
+                memcpy(data + (SPECTOGRAM_START + y) * linesize, s->spectogram[(s->spectogram_index + y) % SPECTOGRAM_HEIGHT], VIDEO_WIDTH * 3);
459
+        }
460
+
461
+        s->outpicref->pts = s->frame_count;
462
+        ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
463
+        s->req_fullfilled = 1;
464
+        s->frame_count++;
465
+    }
466
+    s->spectogram_count = (s->spectogram_count + 1) % s->count;
467
+    s->spectogram_index = (s->spectogram_index + SPECTOGRAM_HEIGHT - 1) % SPECTOGRAM_HEIGHT;
468
+    return ret;
469
+}
470
+
471
+static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
472
+{
473
+    AVFilterContext *ctx = inlink->dst;
474
+    ShowCQTContext *s = ctx->priv;
475
+    int step = inlink->sample_rate / (s->fps * s->count);
476
+    int fft_len = 1 << s->fft_bits;
477
+    int remaining;
478
+    float *audio_data;
479
+
480
+    if (!insamples)
481
+    {
482
+        while (s->remaining_fill < (fft_len >> 1))
483
+        {
484
+            int ret, x;
485
+            memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
486
+            ret = plot_cqt(inlink);
487
+            if (ret < 0)
488
+                return ret;
489
+            for (x = 0; x < (fft_len-step); x++)
490
+                s->fft_data[x] = s->fft_data[x+step];
491
+            s->remaining_fill += step;
492
+        }
493
+        return AVERROR(EOF);
494
+    }
495
+
496
+    remaining = insamples->nb_samples;
497
+    audio_data = (float*) insamples->data[0];
498
+
499
+    while (remaining)
500
+    {
501
+        if (remaining >= s->remaining_fill)
502
+        {
503
+            int i = insamples->nb_samples - remaining;
504
+            int j = fft_len - s->remaining_fill;
505
+            int m, ret;
506
+            for (m = 0; m < s->remaining_fill; m++)
507
+            {
508
+                s->fft_data[j+m].re = audio_data[2*(i+m)];
509
+                s->fft_data[j+m].im = audio_data[2*(i+m)+1];
510
+            }
511
+            ret = plot_cqt(inlink);
512
+            if (ret < 0)
513
+            {
514
+                av_frame_free(&insamples);
515
+                return ret;
516
+            }
517
+            remaining -= s->remaining_fill;
518
+            for (m = 0; m < fft_len-step; m++)
519
+                s->fft_data[m] = s->fft_data[m+step];
520
+            s->remaining_fill = step;
521
+        }
522
+        else
523
+        {
524
+            int i = insamples->nb_samples - remaining;
525
+            int j = fft_len - s->remaining_fill;
526
+            int m;
527
+            for (m = 0; m < remaining; m++)
528
+            {
529
+                s->fft_data[m+j].re = audio_data[2*(i+m)];
530
+                s->fft_data[m+j].im = audio_data[2*(i+m)+1];
531
+            }
532
+            s->remaining_fill -= remaining;
533
+            remaining = 0;
534
+        }
535
+    }
536
+    av_frame_free(&insamples);
537
+    return 0;
538
+}
539
+
540
+static int request_frame(AVFilterLink *outlink)
541
+{
542
+    ShowCQTContext *s = outlink->src->priv;
543
+    AVFilterLink *inlink = outlink->src->inputs[0];
544
+    int ret;
545
+
546
+    s->req_fullfilled = 0;
547
+    do {
548
+        ret = ff_request_frame(inlink);
549
+    } while (!s->req_fullfilled && ret >= 0);
550
+
551
+    if (ret == AVERROR_EOF && s->outpicref)
552
+        filter_frame(inlink, NULL);
553
+    return ret;
554
+}
555
+
556
+static const AVFilterPad showcqt_inputs[] = {
557
+    {
558
+        .name         = "default",
559
+        .type         = AVMEDIA_TYPE_AUDIO,
560
+        .filter_frame = filter_frame,
561
+    },
562
+    { NULL }
563
+};
564
+
565
+static const AVFilterPad showcqt_outputs[] = {
566
+    {
567
+        .name          = "default",
568
+        .type          = AVMEDIA_TYPE_VIDEO,
569
+        .config_props  = config_output,
570
+        .request_frame = request_frame,
571
+    },
572
+    { NULL }
573
+};
574
+
575
+AVFilter ff_avf_showcqt = {
576
+    .name          = "showcqt",
577
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."),
578
+    .uninit        = uninit,
579
+    .query_formats = query_formats,
580
+    .priv_size     = sizeof(ShowCQTContext),
581
+    .inputs        = showcqt_inputs,
582
+    .outputs       = showcqt_outputs,
583
+    .priv_class    = &showcqt_class,
584
+};
... ...
@@ -30,7 +30,7 @@
30 30
 #include "libavutil/version.h"
31 31
 
32 32
 #define LIBAVFILTER_VERSION_MAJOR   4
33
-#define LIBAVFILTER_VERSION_MINOR   5
33
+#define LIBAVFILTER_VERSION_MINOR   6
34 34
 #define LIBAVFILTER_VERSION_MICRO 100
35 35
 
36 36
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \