2-channels convolution using complex fft
improves speed significantly
not sure if it should be enabled by default
so disable it by default
Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
... | ... |
@@ -2585,6 +2585,10 @@ Set file for dumping, suitable for gnuplot. |
2585 | 2585 |
@item dumpscale |
2586 | 2586 |
Set scale for dumpfile. Acceptable values are same with scale option. |
2587 | 2587 |
Default is linlog. |
2588 |
+ |
|
2589 |
+@item fft2 |
|
2590 |
+Enable 2-channel convolution using complex FFT. This improves speed significantly. |
|
2591 |
+Default is disabled. |
|
2588 | 2592 |
@end table |
2589 | 2593 |
|
2590 | 2594 |
@subsection Examples |
... | ... |
@@ -69,6 +69,7 @@ typedef struct { |
69 | 69 |
RDFTContext *analysis_irdft; |
70 | 70 |
RDFTContext *rdft; |
71 | 71 |
RDFTContext *irdft; |
72 |
+ FFTContext *fft_ctx; |
|
72 | 73 |
int analysis_rdft_len; |
73 | 74 |
int rdft_len; |
74 | 75 |
|
... | ... |
@@ -97,6 +98,7 @@ typedef struct { |
97 | 97 |
int scale; |
98 | 98 |
char *dumpfile; |
99 | 99 |
int dumpscale; |
100 |
+ int fft2; |
|
100 | 101 |
|
101 | 102 |
int nb_gain_entry; |
102 | 103 |
int gain_entry_err; |
... | ... |
@@ -132,6 +134,7 @@ static const AVOption firequalizer_options[] = { |
132 | 132 |
{ "loglog", "logarithmic-freq logarithmic-gain", 0, AV_OPT_TYPE_CONST, { .i64 = SCALE_LOGLOG }, 0, 0, FLAGS, "scale" }, |
133 | 133 |
{ "dumpfile", "set dump file", OFFSET(dumpfile), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, |
134 | 134 |
{ "dumpscale", "set dump scale", OFFSET(dumpscale), AV_OPT_TYPE_INT, { .i64 = SCALE_LINLOG }, 0, NB_SCALE-1, FLAGS, "scale" }, |
135 |
+ { "fft2", "set 2-channels fft", OFFSET(fft2), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, |
|
135 | 136 |
{ NULL } |
136 | 137 |
}; |
137 | 138 |
|
... | ... |
@@ -143,7 +146,9 @@ static void common_uninit(FIREqualizerContext *s) |
143 | 143 |
av_rdft_end(s->analysis_irdft); |
144 | 144 |
av_rdft_end(s->rdft); |
145 | 145 |
av_rdft_end(s->irdft); |
146 |
+ av_fft_end(s->fft_ctx); |
|
146 | 147 |
s->analysis_rdft = s->analysis_irdft = s->rdft = s->irdft = NULL; |
148 |
+ s->fft_ctx = NULL; |
|
147 | 149 |
|
148 | 150 |
av_freep(&s->analysis_buf); |
149 | 151 |
av_freep(&s->dump_buf); |
... | ... |
@@ -230,6 +235,70 @@ static void fast_convolute(FIREqualizerContext *s, const float *kernel_buf, floa |
230 | 230 |
} |
231 | 231 |
} |
232 | 232 |
|
233 |
+static void fast_convolute2(FIREqualizerContext *s, const float *kernel_buf, FFTComplex *conv_buf, |
|
234 |
+ OverlapIndex *idx, float *data0, float *data1, int nsamples) |
|
235 |
+{ |
|
236 |
+ if (nsamples <= s->nsamples_max) { |
|
237 |
+ FFTComplex *buf = conv_buf + idx->buf_idx * s->rdft_len; |
|
238 |
+ FFTComplex *obuf = conv_buf + !idx->buf_idx * s->rdft_len + idx->overlap_idx; |
|
239 |
+ int center = s->fir_len/2; |
|
240 |
+ int k; |
|
241 |
+ float tmp; |
|
242 |
+ |
|
243 |
+ memset(buf, 0, center * sizeof(*buf)); |
|
244 |
+ for (k = 0; k < nsamples; k++) { |
|
245 |
+ buf[center+k].re = data0[k]; |
|
246 |
+ buf[center+k].im = data1[k]; |
|
247 |
+ } |
|
248 |
+ memset(buf + center + nsamples, 0, (s->rdft_len - nsamples - center) * sizeof(*buf)); |
|
249 |
+ av_fft_permute(s->fft_ctx, buf); |
|
250 |
+ av_fft_calc(s->fft_ctx, buf); |
|
251 |
+ |
|
252 |
+ /* swap re <-> im, do backward fft using forward fft_ctx */ |
|
253 |
+ /* normalize with 0.5f */ |
|
254 |
+ tmp = buf[0].re; |
|
255 |
+ buf[0].re = 0.5f * kernel_buf[0] * buf[0].im; |
|
256 |
+ buf[0].im = 0.5f * kernel_buf[0] * tmp; |
|
257 |
+ for (k = 1; k < s->rdft_len/2; k++) { |
|
258 |
+ int m = s->rdft_len - k; |
|
259 |
+ tmp = buf[k].re; |
|
260 |
+ buf[k].re = 0.5f * kernel_buf[k] * buf[k].im; |
|
261 |
+ buf[k].im = 0.5f * kernel_buf[k] * tmp; |
|
262 |
+ tmp = buf[m].re; |
|
263 |
+ buf[m].re = 0.5f * kernel_buf[k] * buf[m].im; |
|
264 |
+ buf[m].im = 0.5f * kernel_buf[k] * tmp; |
|
265 |
+ } |
|
266 |
+ tmp = buf[k].re; |
|
267 |
+ buf[k].re = 0.5f * kernel_buf[k] * buf[k].im; |
|
268 |
+ buf[k].im = 0.5f * kernel_buf[k] * tmp; |
|
269 |
+ |
|
270 |
+ av_fft_permute(s->fft_ctx, buf); |
|
271 |
+ av_fft_calc(s->fft_ctx, buf); |
|
272 |
+ |
|
273 |
+ for (k = 0; k < s->rdft_len - idx->overlap_idx; k++) { |
|
274 |
+ buf[k].re += obuf[k].re; |
|
275 |
+ buf[k].im += obuf[k].im; |
|
276 |
+ } |
|
277 |
+ |
|
278 |
+ /* swapped re <-> im */ |
|
279 |
+ for (k = 0; k < nsamples; k++) { |
|
280 |
+ data0[k] = buf[k].im; |
|
281 |
+ data1[k] = buf[k].re; |
|
282 |
+ } |
|
283 |
+ idx->buf_idx = !idx->buf_idx; |
|
284 |
+ idx->overlap_idx = nsamples; |
|
285 |
+ } else { |
|
286 |
+ while (nsamples > s->nsamples_max * 2) { |
|
287 |
+ fast_convolute2(s, kernel_buf, conv_buf, idx, data0, data1, s->nsamples_max); |
|
288 |
+ data0 += s->nsamples_max; |
|
289 |
+ data1 += s->nsamples_max; |
|
290 |
+ nsamples -= s->nsamples_max; |
|
291 |
+ } |
|
292 |
+ fast_convolute2(s, kernel_buf, conv_buf, idx, data0, data1, nsamples/2); |
|
293 |
+ fast_convolute2(s, kernel_buf, conv_buf, idx, data0 + nsamples/2, data1 + nsamples/2, nsamples - nsamples/2); |
|
294 |
+ } |
|
295 |
+} |
|
296 |
+ |
|
233 | 297 |
static void dump_fir(AVFilterContext *ctx, FILE *fp, int ch) |
234 | 298 |
{ |
235 | 299 |
FIREqualizerContext *s = ctx->priv; |
... | ... |
@@ -598,6 +667,9 @@ static int config_input(AVFilterLink *inlink) |
598 | 598 |
if (!(s->rdft = av_rdft_init(rdft_bits, DFT_R2C)) || !(s->irdft = av_rdft_init(rdft_bits, IDFT_C2R))) |
599 | 599 |
return AVERROR(ENOMEM); |
600 | 600 |
|
601 |
+ if (s->fft2 && !s->multi && inlink->channels > 1 && !(s->fft_ctx = av_fft_init(rdft_bits, 0))) |
|
602 |
+ return AVERROR(ENOMEM); |
|
603 |
+ |
|
601 | 604 |
for ( ; rdft_bits <= RDFT_BITS_MAX; rdft_bits++) { |
602 | 605 |
s->analysis_rdft_len = 1 << rdft_bits; |
603 | 606 |
if (inlink->sample_rate <= s->accuracy * s->analysis_rdft_len) |
... | ... |
@@ -640,7 +712,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) |
640 | 640 |
FIREqualizerContext *s = ctx->priv; |
641 | 641 |
int ch; |
642 | 642 |
|
643 |
- for (ch = 0; ch < inlink->channels; ch++) { |
|
643 |
+ for (ch = 0; ch + 1 < inlink->channels && s->fft_ctx; ch += 2) { |
|
644 |
+ fast_convolute2(s, s->kernel_buf, (FFTComplex *)(s->conv_buf + 2 * ch * s->rdft_len), |
|
645 |
+ s->conv_idx + ch, (float *) frame->extended_data[ch], |
|
646 |
+ (float *) frame->extended_data[ch+1], frame->nb_samples); |
|
647 |
+ } |
|
648 |
+ |
|
649 |
+ for ( ; ch < inlink->channels; ch++) { |
|
644 | 650 |
fast_convolute(s, s->kernel_buf + (s->multi ? ch * s->rdft_len : 0), |
645 | 651 |
s->conv_buf + 2 * ch * s->rdft_len, s->conv_idx + ch, |
646 | 652 |
(float *) frame->extended_data[ch], frame->nb_samples); |
... | ... |
@@ -1,4 +1,5 @@ |
1 | 1 |
firequalizer = |
2 |
+ fft2 = on: |
|
2 | 3 |
gain = 'sin(0.001*f) - 1': |
3 | 4 |
delay = 0.05, |
4 | 5 |
|
... | ... |
@@ -11,14 +12,15 @@ firequalizer = |
11 | 11 |
wfunc = nuttall, |
12 | 12 |
|
13 | 13 |
firequalizer = |
14 |
+ fft2 = on: |
|
15 |
+ gain_entry = 'entry(1000, 0); entry(5000, 0.1); entry(10000, 0.2)', |
|
16 |
+ |
|
17 |
+firequalizer = |
|
14 | 18 |
gain = 'if (ch, -0.3 * sin(0.001*f), -0.8 * sin(0.001*f)) - 1': |
15 | 19 |
delay = 0.05: |
16 | 20 |
multi = on, |
17 | 21 |
|
18 | 22 |
firequalizer = |
19 |
- gain_entry = 'entry(1000, 0); entry(5000, 0.1); entry(10000, 0.2)', |
|
20 |
- |
|
21 |
-firequalizer = |
|
22 | 23 |
gain_entry = 'entry(1000, 0.2); entry(5000, 0.1); entry(10000, 0)', |
23 | 24 |
|
24 | 25 |
volume = 2.8dB, |