Browse code

avfilter/af_firequalizer: add fft2 option

2-channels convolution using complex fft
improves speed significantly

not sure if it should be enabled by default
so disable it by default

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>

Muhammad Faiz authored on 2016/11/03 10:11:27
Showing 3 changed files
... ...
@@ -2585,6 +2585,10 @@ Set file for dumping, suitable for gnuplot.
2585 2585
 @item dumpscale
2586 2586
 Set scale for dumpfile. Acceptable values are same with scale option.
2587 2587
 Default is linlog.
2588
+
2589
+@item fft2
2590
+Enable 2-channel convolution using complex FFT. This improves speed significantly.
2591
+Default is disabled.
2588 2592
 @end table
2589 2593
 
2590 2594
 @subsection Examples
... ...
@@ -69,6 +69,7 @@ typedef struct {
69 69
     RDFTContext   *analysis_irdft;
70 70
     RDFTContext   *rdft;
71 71
     RDFTContext   *irdft;
72
+    FFTContext    *fft_ctx;
72 73
     int           analysis_rdft_len;
73 74
     int           rdft_len;
74 75
 
... ...
@@ -97,6 +98,7 @@ typedef struct {
97 97
     int           scale;
98 98
     char          *dumpfile;
99 99
     int           dumpscale;
100
+    int           fft2;
100 101
 
101 102
     int           nb_gain_entry;
102 103
     int           gain_entry_err;
... ...
@@ -132,6 +134,7 @@ static const AVOption firequalizer_options[] = {
132 132
         { "loglog", "logarithmic-freq logarithmic-gain", 0, AV_OPT_TYPE_CONST, { .i64 = SCALE_LOGLOG }, 0, 0, FLAGS, "scale" },
133 133
     { "dumpfile", "set dump file", OFFSET(dumpfile), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
134 134
     { "dumpscale", "set dump scale", OFFSET(dumpscale), AV_OPT_TYPE_INT, { .i64 = SCALE_LINLOG }, 0, NB_SCALE-1, FLAGS, "scale" },
135
+    { "fft2", "set 2-channels fft", OFFSET(fft2), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
135 136
     { NULL }
136 137
 };
137 138
 
... ...
@@ -143,7 +146,9 @@ static void common_uninit(FIREqualizerContext *s)
143 143
     av_rdft_end(s->analysis_irdft);
144 144
     av_rdft_end(s->rdft);
145 145
     av_rdft_end(s->irdft);
146
+    av_fft_end(s->fft_ctx);
146 147
     s->analysis_rdft = s->analysis_irdft = s->rdft = s->irdft = NULL;
148
+    s->fft_ctx = NULL;
147 149
 
148 150
     av_freep(&s->analysis_buf);
149 151
     av_freep(&s->dump_buf);
... ...
@@ -230,6 +235,70 @@ static void fast_convolute(FIREqualizerContext *s, const float *kernel_buf, floa
230 230
     }
231 231
 }
232 232
 
233
+static void fast_convolute2(FIREqualizerContext *s, const float *kernel_buf, FFTComplex *conv_buf,
234
+                            OverlapIndex *idx, float *data0, float *data1, int nsamples)
235
+{
236
+    if (nsamples <= s->nsamples_max) {
237
+        FFTComplex *buf = conv_buf + idx->buf_idx * s->rdft_len;
238
+        FFTComplex *obuf = conv_buf + !idx->buf_idx * s->rdft_len + idx->overlap_idx;
239
+        int center = s->fir_len/2;
240
+        int k;
241
+        float tmp;
242
+
243
+        memset(buf, 0, center * sizeof(*buf));
244
+        for (k = 0; k < nsamples; k++) {
245
+            buf[center+k].re = data0[k];
246
+            buf[center+k].im = data1[k];
247
+        }
248
+        memset(buf + center + nsamples, 0, (s->rdft_len - nsamples - center) * sizeof(*buf));
249
+        av_fft_permute(s->fft_ctx, buf);
250
+        av_fft_calc(s->fft_ctx, buf);
251
+
252
+        /* swap re <-> im, do backward fft using forward fft_ctx */
253
+        /* normalize with 0.5f */
254
+        tmp = buf[0].re;
255
+        buf[0].re = 0.5f * kernel_buf[0] * buf[0].im;
256
+        buf[0].im = 0.5f * kernel_buf[0] * tmp;
257
+        for (k = 1; k < s->rdft_len/2; k++) {
258
+            int m = s->rdft_len - k;
259
+            tmp = buf[k].re;
260
+            buf[k].re = 0.5f * kernel_buf[k] * buf[k].im;
261
+            buf[k].im = 0.5f * kernel_buf[k] * tmp;
262
+            tmp = buf[m].re;
263
+            buf[m].re = 0.5f * kernel_buf[k] * buf[m].im;
264
+            buf[m].im = 0.5f * kernel_buf[k] * tmp;
265
+        }
266
+        tmp = buf[k].re;
267
+        buf[k].re = 0.5f * kernel_buf[k] * buf[k].im;
268
+        buf[k].im = 0.5f * kernel_buf[k] * tmp;
269
+
270
+        av_fft_permute(s->fft_ctx, buf);
271
+        av_fft_calc(s->fft_ctx, buf);
272
+
273
+        for (k = 0; k < s->rdft_len - idx->overlap_idx; k++) {
274
+            buf[k].re += obuf[k].re;
275
+            buf[k].im += obuf[k].im;
276
+        }
277
+
278
+        /* swapped re <-> im */
279
+        for (k = 0; k < nsamples; k++) {
280
+            data0[k] = buf[k].im;
281
+            data1[k] = buf[k].re;
282
+        }
283
+        idx->buf_idx = !idx->buf_idx;
284
+        idx->overlap_idx = nsamples;
285
+    } else {
286
+        while (nsamples > s->nsamples_max * 2) {
287
+            fast_convolute2(s, kernel_buf, conv_buf, idx, data0, data1, s->nsamples_max);
288
+            data0 += s->nsamples_max;
289
+            data1 += s->nsamples_max;
290
+            nsamples -= s->nsamples_max;
291
+        }
292
+        fast_convolute2(s, kernel_buf, conv_buf, idx, data0, data1, nsamples/2);
293
+        fast_convolute2(s, kernel_buf, conv_buf, idx, data0 + nsamples/2, data1 + nsamples/2, nsamples - nsamples/2);
294
+    }
295
+}
296
+
233 297
 static void dump_fir(AVFilterContext *ctx, FILE *fp, int ch)
234 298
 {
235 299
     FIREqualizerContext *s = ctx->priv;
... ...
@@ -598,6 +667,9 @@ static int config_input(AVFilterLink *inlink)
598 598
     if (!(s->rdft = av_rdft_init(rdft_bits, DFT_R2C)) || !(s->irdft = av_rdft_init(rdft_bits, IDFT_C2R)))
599 599
         return AVERROR(ENOMEM);
600 600
 
601
+    if (s->fft2 && !s->multi && inlink->channels > 1 && !(s->fft_ctx = av_fft_init(rdft_bits, 0)))
602
+        return AVERROR(ENOMEM);
603
+
601 604
     for ( ; rdft_bits <= RDFT_BITS_MAX; rdft_bits++) {
602 605
         s->analysis_rdft_len = 1 << rdft_bits;
603 606
         if (inlink->sample_rate <= s->accuracy * s->analysis_rdft_len)
... ...
@@ -640,7 +712,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
640 640
     FIREqualizerContext *s = ctx->priv;
641 641
     int ch;
642 642
 
643
-    for (ch = 0; ch < inlink->channels; ch++) {
643
+    for (ch = 0; ch + 1 < inlink->channels && s->fft_ctx; ch += 2) {
644
+        fast_convolute2(s, s->kernel_buf, (FFTComplex *)(s->conv_buf + 2 * ch * s->rdft_len),
645
+                        s->conv_idx + ch, (float *) frame->extended_data[ch],
646
+                        (float *) frame->extended_data[ch+1], frame->nb_samples);
647
+    }
648
+
649
+    for ( ; ch < inlink->channels; ch++) {
644 650
         fast_convolute(s, s->kernel_buf + (s->multi ? ch * s->rdft_len : 0),
645 651
                        s->conv_buf + 2 * ch * s->rdft_len, s->conv_idx + ch,
646 652
                        (float *) frame->extended_data[ch], frame->nb_samples);
... ...
@@ -1,4 +1,5 @@
1 1
 firequalizer    =
2
+    fft2        = on:
2 3
     gain        = 'sin(0.001*f) - 1':
3 4
     delay       = 0.05,
4 5
 
... ...
@@ -11,14 +12,15 @@ firequalizer    =
11 11
     wfunc       = nuttall,
12 12
 
13 13
 firequalizer    =
14
+    fft2        = on:
15
+    gain_entry  = 'entry(1000, 0); entry(5000, 0.1); entry(10000, 0.2)',
16
+
17
+firequalizer    =
14 18
     gain        = 'if (ch, -0.3 * sin(0.001*f), -0.8 * sin(0.001*f)) - 1':
15 19
     delay       = 0.05:
16 20
     multi       = on,
17 21
 
18 22
 firequalizer    =
19
-    gain_entry  = 'entry(1000, 0); entry(5000, 0.1); entry(10000, 0.2)',
20
-
21
-firequalizer    =
22 23
     gain_entry  = 'entry(1000, 0.2); entry(5000, 0.1); entry(10000, 0)',
23 24
 
24 25
 volume          = 2.8dB,