Add atempo audio filter for adjusting audio tempo without affecting
pitch. This filter implements WSOLA algorithm with fast cross
correlation calculation in frequency domain.
Signed-off-by: Pavel Koshevoy <pavel@homestead.aragog.com>
Signed-off-by: Stefano Sabatini <stefasab@gmail.com>
... | ... |
@@ -275,6 +275,7 @@ Video filters: |
275 | 275 |
graphdump.c Nicolas George |
276 | 276 |
af_amerge.c Nicolas George |
277 | 277 |
af_astreamsync.c Nicolas George |
278 |
+ af_atempo.c Pavel Koshevoy |
|
278 | 279 |
af_pan.c Nicolas George |
279 | 280 |
vsrc_mandelbrot.c Michael Niedermayer |
280 | 281 |
vf_yadif.c Michael Niedermayer |
... | ... |
@@ -1702,6 +1702,7 @@ amovie_filter_deps="avcodec avformat" |
1702 | 1702 |
aresample_filter_deps="swresample" |
1703 | 1703 |
ass_filter_deps="libass" |
1704 | 1704 |
asyncts_filter_deps="avresample" |
1705 |
+atempo_filter_deps="avcodec" |
|
1705 | 1706 |
blackframe_filter_deps="gpl" |
1706 | 1707 |
boxblur_filter_deps="gpl" |
1707 | 1708 |
colormatrix_filter_deps="gpl" |
... | ... |
@@ -406,6 +406,24 @@ amovie=file.ogg [a] ; amovie=file.mp3 [b] ; |
406 | 406 |
[a2] [b2] amerge |
407 | 407 |
@end example |
408 | 408 |
|
409 |
+@section atempo |
|
410 |
+ |
|
411 |
+Adjust audio tempo. |
|
412 |
+ |
|
413 |
+The filter accepts exactly one parameter, the audio tempo. If not |
|
414 |
+specified then the filter will assume nominal 1.0 tempo. Tempo must |
|
415 |
+be in the [0.5, 2.0] range. |
|
416 |
+ |
|
417 |
+For example, to slow down audio to 80% tempo: |
|
418 |
+@example |
|
419 |
+atempo=0.8 |
|
420 |
+@end example |
|
421 |
+ |
|
422 |
+For example, to speed up audio to 125% tempo: |
|
423 |
+@example |
|
424 |
+atempo=1.25 |
|
425 |
+@end example |
|
426 |
+ |
|
409 | 427 |
@section earwax |
410 | 428 |
|
411 | 429 |
Make audio easier to listen to on headphones. |
... | ... |
@@ -9,6 +9,7 @@ FFLIBS-$(CONFIG_SCALE_FILTER) += swscale |
9 | 9 |
FFLIBS-$(CONFIG_ACONVERT_FILTER) += swresample |
10 | 10 |
FFLIBS-$(CONFIG_AMOVIE_FILTER) += avformat avcodec |
11 | 11 |
FFLIBS-$(CONFIG_ARESAMPLE_FILTER) += swresample |
12 |
+FFLIBS-$(CONFIG_ATEMPO_FILTER) += avcodec |
|
12 | 13 |
FFLIBS-$(CONFIG_MOVIE_FILTER) += avformat avcodec |
13 | 14 |
FFLIBS-$(CONFIG_PAN_FILTER) += swresample |
14 | 15 |
FFLIBS-$(CONFIG_REMOVELOGO_FILTER) += avformat avcodec |
... | ... |
@@ -56,6 +57,7 @@ OBJS-$(CONFIG_ASHOWINFO_FILTER) += af_ashowinfo.o |
56 | 56 |
OBJS-$(CONFIG_ASPLIT_FILTER) += split.o |
57 | 57 |
OBJS-$(CONFIG_ASTREAMSYNC_FILTER) += af_astreamsync.o |
58 | 58 |
OBJS-$(CONFIG_ASYNCTS_FILTER) += af_asyncts.o |
59 |
+OBJS-$(CONFIG_ATEMPO_FILTER) += af_atempo.o |
|
59 | 60 |
OBJS-$(CONFIG_CHANNELSPLIT_FILTER) += af_channelsplit.o |
60 | 61 |
OBJS-$(CONFIG_EARWAX_FILTER) += af_earwax.o |
61 | 62 |
OBJS-$(CONFIG_PAN_FILTER) += af_pan.o |
62 | 63 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1160 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com> |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * tempo scaling audio filter -- an implementation of WSOLA algorithm |
|
23 |
+ * |
|
24 |
+ * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h |
|
25 |
+ * from Apprentice Video player by Pavel Koshevoy. |
|
26 |
+ * https://sourceforge.net/projects/apprenticevideo/ |
|
27 |
+ * |
|
28 |
+ * An explanation of SOLA algorithm is available at |
|
29 |
+ * http://www.surina.net/article/time-and-pitch-scaling.html |
|
30 |
+ * |
|
31 |
+ * WSOLA is very similar to SOLA, only one major difference exists between |
|
32 |
+ * these algorithms. SOLA shifts audio fragments along the output stream, |
|
33 |
+ * where as WSOLA shifts audio fragments along the input stream. |
|
34 |
+ * |
|
35 |
+ * The advantage of WSOLA algorithm is that the overlap region size is |
|
36 |
+ * always the same, therefore the blending function is constant and |
|
37 |
+ * can be precomputed. |
|
38 |
+ */ |
|
39 |
+ |
|
40 |
+#include <float.h> |
|
41 |
+#include "libavcodec/avfft.h" |
|
42 |
+#include "libavutil/avassert.h" |
|
43 |
+#include "libavutil/avstring.h" |
|
44 |
+#include "libavutil/eval.h" |
|
45 |
+#include "libavutil/opt.h" |
|
46 |
+#include "libavutil/samplefmt.h" |
|
47 |
+#include "avfilter.h" |
|
48 |
+#include "audio.h" |
|
49 |
+#include "internal.h" |
|
50 |
+ |
|
51 |
+/** |
|
52 |
+ * A fragment of audio waveform |
|
53 |
+ */ |
|
54 |
+typedef struct { |
|
55 |
+ // index of the first sample of this fragment in the overall waveform; |
|
56 |
+ // 0: input sample position |
|
57 |
+ // 1: output sample position |
|
58 |
+ int64_t position[2]; |
|
59 |
+ |
|
60 |
+ // original packed multi-channel samples: |
|
61 |
+ uint8_t *data; |
|
62 |
+ |
|
63 |
+ // number of samples in this fragment: |
|
64 |
+ int nsamples; |
|
65 |
+ |
|
66 |
+ // rDFT transform of the down-mixed mono fragment, used for |
|
67 |
+ // fast waveform alignment via correlation in frequency domain: |
|
68 |
+ FFTSample *xdat; |
|
69 |
+} AudioFragment; |
|
70 |
+ |
|
71 |
+/** |
|
72 |
+ * Filter state machine states |
|
73 |
+ */ |
|
74 |
+typedef enum { |
|
75 |
+ YAE_LOAD_FRAGMENT, |
|
76 |
+ YAE_ADJUST_POSITION, |
|
77 |
+ YAE_RELOAD_FRAGMENT, |
|
78 |
+ YAE_OUTPUT_OVERLAP_ADD, |
|
79 |
+ YAE_FLUSH_OUTPUT, |
|
80 |
+} FilterState; |
|
81 |
+ |
|
82 |
+/** |
|
83 |
+ * Filter state machine |
|
84 |
+ */ |
|
85 |
+typedef struct { |
|
86 |
+ // ring-buffer of input samples, necessary because some times |
|
87 |
+ // input fragment position may be adjusted backwards: |
|
88 |
+ uint8_t *buffer; |
|
89 |
+ |
|
90 |
+ // ring-buffer maximum capacity, expressed in sample rate time base: |
|
91 |
+ int ring; |
|
92 |
+ |
|
93 |
+ // ring-buffer house keeping: |
|
94 |
+ int size; |
|
95 |
+ int head; |
|
96 |
+ int tail; |
|
97 |
+ |
|
98 |
+ // 0: input sample position corresponding to the ring buffer tail |
|
99 |
+ // 1: output sample position |
|
100 |
+ int64_t position[2]; |
|
101 |
+ |
|
102 |
+ // sample format: |
|
103 |
+ enum AVSampleFormat format; |
|
104 |
+ |
|
105 |
+ // number of channels: |
|
106 |
+ int channels; |
|
107 |
+ |
|
108 |
+ // row of bytes to skip from one sample to next, across multple channels; |
|
109 |
+ // stride = (number-of-channels * bits-per-sample-per-channel) / 8 |
|
110 |
+ int stride; |
|
111 |
+ |
|
112 |
+ // fragment window size, power-of-two integer: |
|
113 |
+ int window; |
|
114 |
+ |
|
115 |
+ // Hann window coefficients, for feathering |
|
116 |
+ // (blending) the overlapping fragment region: |
|
117 |
+ float *hann; |
|
118 |
+ |
|
119 |
+ // tempo scaling factor: |
|
120 |
+ double tempo; |
|
121 |
+ |
|
122 |
+ // cumulative alignment drift: |
|
123 |
+ int drift; |
|
124 |
+ |
|
125 |
+ // current/previous fragment ring-buffer: |
|
126 |
+ AudioFragment frag[2]; |
|
127 |
+ |
|
128 |
+ // current fragment index: |
|
129 |
+ uint64_t nfrag; |
|
130 |
+ |
|
131 |
+ // current state: |
|
132 |
+ FilterState state; |
|
133 |
+ |
|
134 |
+ // for fast correlation calculation in frequency domain: |
|
135 |
+ RDFTContext *real_to_complex; |
|
136 |
+ RDFTContext *complex_to_real; |
|
137 |
+ FFTSample *correlation; |
|
138 |
+ |
|
139 |
+ // for managing AVFilterPad.request_frame and AVFilterPad.filter_samples |
|
140 |
+ int request_fulfilled; |
|
141 |
+ AVFilterBufferRef *dst_buffer; |
|
142 |
+ uint8_t *dst; |
|
143 |
+ uint8_t *dst_end; |
|
144 |
+ uint64_t nsamples_in; |
|
145 |
+ uint64_t nsamples_out; |
|
146 |
+} ATempoContext; |
|
147 |
+ |
|
148 |
+/** |
|
149 |
+ * Reset filter to initial state, do not deallocate existing local buffers. |
|
150 |
+ */ |
|
151 |
+static void yae_clear(ATempoContext *atempo) |
|
152 |
+{ |
|
153 |
+ atempo->size = 0; |
|
154 |
+ atempo->head = 0; |
|
155 |
+ atempo->tail = 0; |
|
156 |
+ |
|
157 |
+ atempo->drift = 0; |
|
158 |
+ atempo->nfrag = 0; |
|
159 |
+ atempo->state = YAE_LOAD_FRAGMENT; |
|
160 |
+ |
|
161 |
+ atempo->position[0] = 0; |
|
162 |
+ atempo->position[1] = 0; |
|
163 |
+ |
|
164 |
+ atempo->frag[0].position[0] = 0; |
|
165 |
+ atempo->frag[0].position[1] = 0; |
|
166 |
+ atempo->frag[0].nsamples = 0; |
|
167 |
+ |
|
168 |
+ atempo->frag[1].position[0] = 0; |
|
169 |
+ atempo->frag[1].position[1] = 0; |
|
170 |
+ atempo->frag[1].nsamples = 0; |
|
171 |
+ |
|
172 |
+ // shift left position of 1st fragment by half a window |
|
173 |
+ // so that no re-normalization would be required for |
|
174 |
+ // the left half of the 1st fragment: |
|
175 |
+ atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2); |
|
176 |
+ atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2); |
|
177 |
+ |
|
178 |
+ avfilter_unref_bufferp(&atempo->dst_buffer); |
|
179 |
+ atempo->dst = NULL; |
|
180 |
+ atempo->dst_end = NULL; |
|
181 |
+ |
|
182 |
+ atempo->request_fulfilled = 0; |
|
183 |
+ atempo->nsamples_in = 0; |
|
184 |
+ atempo->nsamples_out = 0; |
|
185 |
+} |
|
186 |
+ |
|
187 |
+/** |
|
188 |
+ * Reset filter to initial state and deallocate all buffers. |
|
189 |
+ */ |
|
190 |
+static void yae_release_buffers(ATempoContext *atempo) |
|
191 |
+{ |
|
192 |
+ yae_clear(atempo); |
|
193 |
+ |
|
194 |
+ av_freep(&atempo->frag[0].data); |
|
195 |
+ av_freep(&atempo->frag[1].data); |
|
196 |
+ av_freep(&atempo->frag[0].xdat); |
|
197 |
+ av_freep(&atempo->frag[1].xdat); |
|
198 |
+ |
|
199 |
+ av_freep(&atempo->buffer); |
|
200 |
+ av_freep(&atempo->hann); |
|
201 |
+ av_freep(&atempo->correlation); |
|
202 |
+ |
|
203 |
+ av_rdft_end(atempo->real_to_complex); |
|
204 |
+ atempo->real_to_complex = NULL; |
|
205 |
+ |
|
206 |
+ av_rdft_end(atempo->complex_to_real); |
|
207 |
+ atempo->complex_to_real = NULL; |
|
208 |
+} |
|
209 |
+ |
|
210 |
+#define REALLOC_OR_FAIL(field, field_size) \ |
|
211 |
+ do { \ |
|
212 |
+ void * new_field = av_realloc(field, (field_size)); \ |
|
213 |
+ if (!new_field) { \ |
|
214 |
+ yae_release_buffers(atempo); \ |
|
215 |
+ return AVERROR(ENOMEM); \ |
|
216 |
+ } \ |
|
217 |
+ field = new_field; \ |
|
218 |
+ } while (0) |
|
219 |
+ |
|
220 |
+/** |
|
221 |
+ * Prepare filter for processing audio data of given format, |
|
222 |
+ * sample rate and number of channels. |
|
223 |
+ */ |
|
224 |
+static int yae_reset(ATempoContext *atempo, |
|
225 |
+ enum AVSampleFormat format, |
|
226 |
+ int sample_rate, |
|
227 |
+ int channels) |
|
228 |
+{ |
|
229 |
+ const int sample_size = av_get_bytes_per_sample(format); |
|
230 |
+ uint32_t nlevels = 0; |
|
231 |
+ uint32_t pot; |
|
232 |
+ int i; |
|
233 |
+ |
|
234 |
+ atempo->format = format; |
|
235 |
+ atempo->channels = channels; |
|
236 |
+ atempo->stride = sample_size * channels; |
|
237 |
+ |
|
238 |
+ // pick a segment window size: |
|
239 |
+ atempo->window = sample_rate / 24; |
|
240 |
+ |
|
241 |
+ // adjust window size to be a power-of-two integer: |
|
242 |
+ nlevels = av_log2(atempo->window); |
|
243 |
+ pot = 1 << nlevels; |
|
244 |
+ av_assert0(pot <= atempo->window); |
|
245 |
+ |
|
246 |
+ if (pot < atempo->window) { |
|
247 |
+ atempo->window = pot * 2; |
|
248 |
+ nlevels++; |
|
249 |
+ } |
|
250 |
+ |
|
251 |
+ // initialize audio fragment buffers: |
|
252 |
+ REALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride); |
|
253 |
+ REALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride); |
|
254 |
+ REALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex)); |
|
255 |
+ REALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex)); |
|
256 |
+ |
|
257 |
+ // initialize rDFT contexts: |
|
258 |
+ av_rdft_end(atempo->real_to_complex); |
|
259 |
+ atempo->real_to_complex = NULL; |
|
260 |
+ |
|
261 |
+ av_rdft_end(atempo->complex_to_real); |
|
262 |
+ atempo->complex_to_real = NULL; |
|
263 |
+ |
|
264 |
+ atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C); |
|
265 |
+ if (!atempo->real_to_complex) { |
|
266 |
+ yae_release_buffers(atempo); |
|
267 |
+ return AVERROR(ENOMEM); |
|
268 |
+ } |
|
269 |
+ |
|
270 |
+ atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R); |
|
271 |
+ if (!atempo->complex_to_real) { |
|
272 |
+ yae_release_buffers(atempo); |
|
273 |
+ return AVERROR(ENOMEM); |
|
274 |
+ } |
|
275 |
+ |
|
276 |
+ REALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex)); |
|
277 |
+ |
|
278 |
+ atempo->ring = atempo->window * 3; |
|
279 |
+ REALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride); |
|
280 |
+ |
|
281 |
+ // initialize the Hann window function: |
|
282 |
+ REALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float)); |
|
283 |
+ |
|
284 |
+ for (i = 0; i < atempo->window; i++) { |
|
285 |
+ double t = (double)i / (double)(atempo->window - 1); |
|
286 |
+ double h = 0.5 * (1.0 - cos(2.0 * M_PI * t)); |
|
287 |
+ atempo->hann[i] = (float)h; |
|
288 |
+ } |
|
289 |
+ |
|
290 |
+ yae_clear(atempo); |
|
291 |
+ return 0; |
|
292 |
+} |
|
293 |
+ |
|
294 |
+static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo) |
|
295 |
+{ |
|
296 |
+ ATempoContext *atempo = ctx->priv; |
|
297 |
+ char *tail = NULL; |
|
298 |
+ double tempo = av_strtod(arg_tempo, &tail); |
|
299 |
+ |
|
300 |
+ if (tail && *tail) { |
|
301 |
+ av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo); |
|
302 |
+ return AVERROR(EINVAL); |
|
303 |
+ } |
|
304 |
+ |
|
305 |
+ if (tempo < 0.5 || tempo > 2.0) { |
|
306 |
+ av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n", |
|
307 |
+ tempo); |
|
308 |
+ return AVERROR(EINVAL); |
|
309 |
+ } |
|
310 |
+ |
|
311 |
+ atempo->tempo = tempo; |
|
312 |
+ return 0; |
|
313 |
+} |
|
314 |
+ |
|
315 |
+inline static AudioFragment *yae_curr_frag(ATempoContext *atempo) |
|
316 |
+{ |
|
317 |
+ return &atempo->frag[atempo->nfrag % 2]; |
|
318 |
+} |
|
319 |
+ |
|
320 |
+inline static AudioFragment *yae_prev_frag(ATempoContext *atempo) |
|
321 |
+{ |
|
322 |
+ return &atempo->frag[(atempo->nfrag + 1) % 2]; |
|
323 |
+} |
|
324 |
+ |
|
325 |
+/** |
|
326 |
+ * A helper macro for initializing complex data buffer with scalar data |
|
327 |
+ * of a given type. |
|
328 |
+ */ |
|
329 |
+#define yae_init_xdat(scalar_type, scalar_max) \ |
|
330 |
+ do { \ |
|
331 |
+ const uint8_t *src_end = src + \ |
|
332 |
+ frag->nsamples * atempo->channels * sizeof(scalar_type); \ |
|
333 |
+ \ |
|
334 |
+ FFTSample *xdat = frag->xdat; \ |
|
335 |
+ scalar_type tmp; \ |
|
336 |
+ \ |
|
337 |
+ if (atempo->channels == 1) { \ |
|
338 |
+ for (; src < src_end; xdat++) { \ |
|
339 |
+ tmp = *(const scalar_type *)src; \ |
|
340 |
+ src += sizeof(scalar_type); \ |
|
341 |
+ \ |
|
342 |
+ *xdat = (FFTSample)tmp; \ |
|
343 |
+ } \ |
|
344 |
+ } else { \ |
|
345 |
+ FFTSample s, max, ti, si; \ |
|
346 |
+ int i; \ |
|
347 |
+ \ |
|
348 |
+ for (; src < src_end; xdat++) { \ |
|
349 |
+ tmp = *(const scalar_type *)src; \ |
|
350 |
+ src += sizeof(scalar_type); \ |
|
351 |
+ \ |
|
352 |
+ max = (FFTSample)tmp; \ |
|
353 |
+ s = FFMIN((FFTSample)scalar_max, \ |
|
354 |
+ (FFTSample)fabsf(max)); \ |
|
355 |
+ \ |
|
356 |
+ for (i = 1; i < atempo->channels; i++) { \ |
|
357 |
+ tmp = *(const scalar_type *)src; \ |
|
358 |
+ src += sizeof(scalar_type); \ |
|
359 |
+ \ |
|
360 |
+ ti = (FFTSample)tmp; \ |
|
361 |
+ si = FFMIN((FFTSample)scalar_max, \ |
|
362 |
+ (FFTSample)fabsf(ti)); \ |
|
363 |
+ \ |
|
364 |
+ if (s < si) { \ |
|
365 |
+ s = si; \ |
|
366 |
+ max = ti; \ |
|
367 |
+ } \ |
|
368 |
+ } \ |
|
369 |
+ \ |
|
370 |
+ *xdat = max; \ |
|
371 |
+ } \ |
|
372 |
+ } \ |
|
373 |
+ } while (0) |
|
374 |
+ |
|
375 |
+/** |
|
376 |
+ * Initialize complex data buffer of a given audio fragment |
|
377 |
+ * with down-mixed mono data of appropriate scalar type. |
|
378 |
+ */ |
|
379 |
+static void yae_downmix(ATempoContext *atempo, AudioFragment *frag) |
|
380 |
+{ |
|
381 |
+ // shortcuts: |
|
382 |
+ const uint8_t *src = frag->data; |
|
383 |
+ |
|
384 |
+ // init complex data buffer used for FFT and Correlation: |
|
385 |
+ memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window); |
|
386 |
+ |
|
387 |
+ if (atempo->format == AV_SAMPLE_FMT_U8) { |
|
388 |
+ yae_init_xdat(uint8_t, 127); |
|
389 |
+ } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
|
390 |
+ yae_init_xdat(int16_t, 32767); |
|
391 |
+ } else if (atempo->format == AV_SAMPLE_FMT_S32) { |
|
392 |
+ yae_init_xdat(int, 2147483647); |
|
393 |
+ } else if (atempo->format == AV_SAMPLE_FMT_FLT) { |
|
394 |
+ yae_init_xdat(float, 1); |
|
395 |
+ } else if (atempo->format == AV_SAMPLE_FMT_DBL) { |
|
396 |
+ yae_init_xdat(double, 1); |
|
397 |
+ } |
|
398 |
+} |
|
399 |
+ |
|
400 |
+/** |
|
401 |
+ * Populate the internal data buffer on as-needed basis. |
|
402 |
+ * |
|
403 |
+ * @return |
|
404 |
+ * 0 if requested data was already available or was successfully loaded, |
|
405 |
+ * AVERROR(EAGAIN) if more input data is required. |
|
406 |
+ */ |
|
407 |
+static int yae_load_data(ATempoContext *atempo, |
|
408 |
+ const uint8_t **src_ref, |
|
409 |
+ const uint8_t *src_end, |
|
410 |
+ int64_t stop_here) |
|
411 |
+{ |
|
412 |
+ // shortcut: |
|
413 |
+ const uint8_t *src = *src_ref; |
|
414 |
+ const int read_size = stop_here - atempo->position[0]; |
|
415 |
+ |
|
416 |
+ if (stop_here <= atempo->position[0]) { |
|
417 |
+ return 0; |
|
418 |
+ } |
|
419 |
+ |
|
420 |
+ // samples are not expected to be skipped: |
|
421 |
+ av_assert0(read_size <= atempo->ring); |
|
422 |
+ |
|
423 |
+ while (atempo->position[0] < stop_here && src < src_end) { |
|
424 |
+ int src_samples = (src_end - src) / atempo->stride; |
|
425 |
+ |
|
426 |
+ // load data piece-wise, in order to avoid complicating the logic: |
|
427 |
+ int nsamples = FFMIN(read_size, src_samples); |
|
428 |
+ int na; |
|
429 |
+ int nb; |
|
430 |
+ |
|
431 |
+ nsamples = FFMIN(nsamples, atempo->ring); |
|
432 |
+ na = FFMIN(nsamples, atempo->ring - atempo->tail); |
|
433 |
+ nb = FFMIN(nsamples - na, atempo->ring); |
|
434 |
+ |
|
435 |
+ if (na) { |
|
436 |
+ uint8_t *a = atempo->buffer + atempo->tail * atempo->stride; |
|
437 |
+ memcpy(a, src, na * atempo->stride); |
|
438 |
+ |
|
439 |
+ src += na * atempo->stride; |
|
440 |
+ atempo->position[0] += na; |
|
441 |
+ |
|
442 |
+ atempo->size = FFMIN(atempo->size + na, atempo->ring); |
|
443 |
+ atempo->tail = (atempo->tail + na) % atempo->ring; |
|
444 |
+ atempo->head = |
|
445 |
+ atempo->size < atempo->ring ? |
|
446 |
+ atempo->tail - atempo->size : |
|
447 |
+ atempo->tail; |
|
448 |
+ } |
|
449 |
+ |
|
450 |
+ if (nb) { |
|
451 |
+ uint8_t *b = atempo->buffer; |
|
452 |
+ memcpy(b, src, nb * atempo->stride); |
|
453 |
+ |
|
454 |
+ src += nb * atempo->stride; |
|
455 |
+ atempo->position[0] += nb; |
|
456 |
+ |
|
457 |
+ atempo->size = FFMIN(atempo->size + nb, atempo->ring); |
|
458 |
+ atempo->tail = (atempo->tail + nb) % atempo->ring; |
|
459 |
+ atempo->head = |
|
460 |
+ atempo->size < atempo->ring ? |
|
461 |
+ atempo->tail - atempo->size : |
|
462 |
+ atempo->tail; |
|
463 |
+ } |
|
464 |
+ } |
|
465 |
+ |
|
466 |
+ // pass back the updated source buffer pointer: |
|
467 |
+ *src_ref = src; |
|
468 |
+ |
|
469 |
+ // sanity check: |
|
470 |
+ av_assert0(atempo->position[0] <= stop_here); |
|
471 |
+ |
|
472 |
+ return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN); |
|
473 |
+} |
|
474 |
+ |
|
475 |
+/** |
|
476 |
+ * Populate current audio fragment data buffer. |
|
477 |
+ * |
|
478 |
+ * @return |
|
479 |
+ * 0 when the fragment is ready, |
|
480 |
+ * AVERROR(EAGAIN) if more input data is required. |
|
481 |
+ */ |
|
482 |
+static int yae_load_frag(ATempoContext *atempo, |
|
483 |
+ const uint8_t **src_ref, |
|
484 |
+ const uint8_t *src_end) |
|
485 |
+{ |
|
486 |
+ // shortcuts: |
|
487 |
+ AudioFragment *frag = yae_curr_frag(atempo); |
|
488 |
+ uint8_t *dst; |
|
489 |
+ int64_t missing, start, zeros; |
|
490 |
+ uint32_t nsamples; |
|
491 |
+ const uint8_t *a, *b; |
|
492 |
+ int i0, i1, n0, n1, na, nb; |
|
493 |
+ |
|
494 |
+ int64_t stop_here = frag->position[0] + atempo->window; |
|
495 |
+ if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) { |
|
496 |
+ return AVERROR(EAGAIN); |
|
497 |
+ } |
|
498 |
+ |
|
499 |
+ // calculate the number of samples we don't have: |
|
500 |
+ missing = |
|
501 |
+ stop_here > atempo->position[0] ? |
|
502 |
+ stop_here - atempo->position[0] : 0; |
|
503 |
+ |
|
504 |
+ nsamples = |
|
505 |
+ missing < (int64_t)atempo->window ? |
|
506 |
+ (uint32_t)(atempo->window - missing) : 0; |
|
507 |
+ |
|
508 |
+ // setup the output buffer: |
|
509 |
+ frag->nsamples = nsamples; |
|
510 |
+ dst = frag->data; |
|
511 |
+ |
|
512 |
+ start = atempo->position[0] - atempo->size; |
|
513 |
+ zeros = 0; |
|
514 |
+ |
|
515 |
+ if (frag->position[0] < start) { |
|
516 |
+ // what we don't have we substitute with zeros: |
|
517 |
+ zeros = FFMIN(start - frag->position[0], (int64_t)nsamples); |
|
518 |
+ av_assert0(zeros != nsamples); |
|
519 |
+ |
|
520 |
+ memset(dst, 0, zeros * atempo->stride); |
|
521 |
+ dst += zeros * atempo->stride; |
|
522 |
+ } |
|
523 |
+ |
|
524 |
+ if (zeros == nsamples) { |
|
525 |
+ return 0; |
|
526 |
+ } |
|
527 |
+ |
|
528 |
+ // get the remaining data from the ring buffer: |
|
529 |
+ na = (atempo->head < atempo->tail ? |
|
530 |
+ atempo->tail - atempo->head : |
|
531 |
+ atempo->ring - atempo->head); |
|
532 |
+ |
|
533 |
+ nb = atempo->head < atempo->tail ? 0 : atempo->tail; |
|
534 |
+ |
|
535 |
+ // sanity check: |
|
536 |
+ av_assert0(nsamples <= zeros + na + nb); |
|
537 |
+ |
|
538 |
+ a = atempo->buffer + atempo->head * atempo->stride; |
|
539 |
+ b = atempo->buffer; |
|
540 |
+ |
|
541 |
+ i0 = frag->position[0] + zeros - start; |
|
542 |
+ i1 = i0 < na ? 0 : i0 - na; |
|
543 |
+ |
|
544 |
+ n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0; |
|
545 |
+ n1 = nsamples - zeros - n0; |
|
546 |
+ |
|
547 |
+ if (n0) { |
|
548 |
+ memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride); |
|
549 |
+ dst += n0 * atempo->stride; |
|
550 |
+ } |
|
551 |
+ |
|
552 |
+ if (n1) { |
|
553 |
+ memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride); |
|
554 |
+ dst += n1 * atempo->stride; |
|
555 |
+ } |
|
556 |
+ |
|
557 |
+ return 0; |
|
558 |
+} |
|
559 |
+ |
|
560 |
+/** |
|
561 |
+ * Prepare for loading next audio fragment. |
|
562 |
+ */ |
|
563 |
+static void yae_advance_to_next_frag(ATempoContext *atempo) |
|
564 |
+{ |
|
565 |
+ const double fragment_step = atempo->tempo * (double)(atempo->window / 2); |
|
566 |
+ |
|
567 |
+ const AudioFragment *prev; |
|
568 |
+ AudioFragment *frag; |
|
569 |
+ |
|
570 |
+ atempo->nfrag++; |
|
571 |
+ prev = yae_prev_frag(atempo); |
|
572 |
+ frag = yae_curr_frag(atempo); |
|
573 |
+ |
|
574 |
+ frag->position[0] = prev->position[0] + (int64_t)fragment_step; |
|
575 |
+ frag->position[1] = prev->position[1] + atempo->window / 2; |
|
576 |
+ frag->nsamples = 0; |
|
577 |
+} |
|
578 |
+ |
|
579 |
+/** |
|
580 |
+ * Calculate cross-correlation via rDFT. |
|
581 |
+ * |
|
582 |
+ * Multiply two vectors of complex numbers (result of real_to_complex rDFT) |
|
583 |
+ * and transform back via complex_to_real rDFT. |
|
584 |
+ */ |
|
585 |
+static void yae_xcorr_via_rdft(FFTSample *xcorr, |
|
586 |
+ RDFTContext *complex_to_real, |
|
587 |
+ const FFTComplex *xa, |
|
588 |
+ const FFTComplex *xb, |
|
589 |
+ const int window) |
|
590 |
+{ |
|
591 |
+ FFTComplex *xc = (FFTComplex *)xcorr; |
|
592 |
+ int i; |
|
593 |
+ |
|
594 |
+ // NOTE: first element requires special care -- Given Y = rDFT(X), |
|
595 |
+ // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc |
|
596 |
+ // stores Re(Y[N/2]) in place of Im(Y[0]). |
|
597 |
+ |
|
598 |
+ xc->re = xa->re * xb->re; |
|
599 |
+ xc->im = xa->im * xb->im; |
|
600 |
+ xa++; |
|
601 |
+ xb++; |
|
602 |
+ xc++; |
|
603 |
+ |
|
604 |
+ for (i = 1; i < window; i++, xa++, xb++, xc++) { |
|
605 |
+ xc->re = (xa->re * xb->re + xa->im * xb->im); |
|
606 |
+ xc->im = (xa->im * xb->re - xa->re * xb->im); |
|
607 |
+ } |
|
608 |
+ |
|
609 |
+ // apply inverse rDFT: |
|
610 |
+ av_rdft_calc(complex_to_real, xcorr); |
|
611 |
+} |
|
612 |
+ |
|
613 |
+/** |
|
614 |
+ * Calculate alignment offset for given fragment |
|
615 |
+ * relative to the previous fragment. |
|
616 |
+ * |
|
617 |
+ * @return alignment offset of current fragment relative to previous. |
|
618 |
+ */ |
|
619 |
+static int yae_align(AudioFragment *frag, |
|
620 |
+ const AudioFragment *prev, |
|
621 |
+ const int window, |
|
622 |
+ const int delta_max, |
|
623 |
+ const int drift, |
|
624 |
+ FFTSample *correlation, |
|
625 |
+ RDFTContext *complex_to_real) |
|
626 |
+{ |
|
627 |
+ int best_offset = -drift; |
|
628 |
+ FFTSample best_metric = -FLT_MAX; |
|
629 |
+ FFTSample *xcorr; |
|
630 |
+ |
|
631 |
+ int i0; |
|
632 |
+ int i1; |
|
633 |
+ int i; |
|
634 |
+ |
|
635 |
+ yae_xcorr_via_rdft(correlation, |
|
636 |
+ complex_to_real, |
|
637 |
+ (const FFTComplex *)prev->xdat, |
|
638 |
+ (const FFTComplex *)frag->xdat, |
|
639 |
+ window); |
|
640 |
+ |
|
641 |
+ // identify search window boundaries: |
|
642 |
+ i0 = FFMAX(window / 2 - delta_max - drift, 0); |
|
643 |
+ i0 = FFMIN(i0, window); |
|
644 |
+ |
|
645 |
+ i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16); |
|
646 |
+ i1 = FFMAX(i1, 0); |
|
647 |
+ |
|
648 |
+ // identify cross-correlation peaks within search window: |
|
649 |
+ xcorr = correlation + i0; |
|
650 |
+ |
|
651 |
+ for (i = i0; i < i1; i++, xcorr++) { |
|
652 |
+ FFTSample metric = *xcorr; |
|
653 |
+ |
|
654 |
+ // normalize: |
|
655 |
+ FFTSample drifti = (FFTSample)(drift + i); |
|
656 |
+ metric *= drifti; |
|
657 |
+ |
|
658 |
+ if (metric > best_metric) { |
|
659 |
+ best_metric = metric; |
|
660 |
+ best_offset = i - window / 2; |
|
661 |
+ } |
|
662 |
+ } |
|
663 |
+ |
|
664 |
+ return best_offset; |
|
665 |
+} |
|
666 |
+ |
|
667 |
+/** |
|
668 |
+ * Adjust current fragment position for better alignment |
|
669 |
+ * with previous fragment. |
|
670 |
+ * |
|
671 |
+ * @return alignment correction. |
|
672 |
+ */ |
|
673 |
+static int yae_adjust_position(ATempoContext *atempo) |
|
674 |
+{ |
|
675 |
+ const AudioFragment *prev = yae_prev_frag(atempo); |
|
676 |
+ AudioFragment *frag = yae_curr_frag(atempo); |
|
677 |
+ |
|
678 |
+ const int delta_max = atempo->window / 2; |
|
679 |
+ const int correction = yae_align(frag, |
|
680 |
+ prev, |
|
681 |
+ atempo->window, |
|
682 |
+ delta_max, |
|
683 |
+ atempo->drift, |
|
684 |
+ atempo->correlation, |
|
685 |
+ atempo->complex_to_real); |
|
686 |
+ |
|
687 |
+ if (correction) { |
|
688 |
+ // adjust fragment position: |
|
689 |
+ frag->position[0] -= correction; |
|
690 |
+ |
|
691 |
+ // clear so that the fragment can be reloaded: |
|
692 |
+ frag->nsamples = 0; |
|
693 |
+ |
|
694 |
+ // update cumulative correction drift counter: |
|
695 |
+ atempo->drift += correction; |
|
696 |
+ } |
|
697 |
+ |
|
698 |
+ return correction; |
|
699 |
+} |
|
700 |
+ |
|
701 |
+/** |
|
702 |
+ * A helper macro for blending the overlap region of previous |
|
703 |
+ * and current audio fragment. |
|
704 |
+ */ |
|
705 |
+#define yae_blend(scalar_type) \ |
|
706 |
+ do { \ |
|
707 |
+ const scalar_type *aaa = (const scalar_type *)a; \ |
|
708 |
+ const scalar_type *bbb = (const scalar_type *)b; \ |
|
709 |
+ \ |
|
710 |
+ scalar_type *out = (scalar_type *)dst; \ |
|
711 |
+ scalar_type *out_end = (scalar_type *)dst_end; \ |
|
712 |
+ int64_t i; \ |
|
713 |
+ \ |
|
714 |
+ for (i = 0; i < overlap && out < out_end; \ |
|
715 |
+ i++, atempo->position[1]++, wa++, wb++) { \ |
|
716 |
+ float w0 = *wa; \ |
|
717 |
+ float w1 = *wb; \ |
|
718 |
+ int j; \ |
|
719 |
+ \ |
|
720 |
+ for (j = 0; j < atempo->channels; \ |
|
721 |
+ j++, aaa++, bbb++, out++) { \ |
|
722 |
+ float t0 = (float)*aaa; \ |
|
723 |
+ float t1 = (float)*bbb; \ |
|
724 |
+ \ |
|
725 |
+ *out = \ |
|
726 |
+ frag->position[0] + i < 0 ? \ |
|
727 |
+ *aaa : \ |
|
728 |
+ (scalar_type)(t0 * w0 + t1 * w1); \ |
|
729 |
+ } \ |
|
730 |
+ } \ |
|
731 |
+ dst = (uint8_t *)out; \ |
|
732 |
+ } while (0) |
|
733 |
+ |
|
734 |
+/** |
|
735 |
+ * Blend the overlap region of previous and current audio fragment |
|
736 |
+ * and output the results to the given destination buffer. |
|
737 |
+ * |
|
738 |
+ * @return |
|
739 |
+ * 0 if the overlap region was completely stored in the dst buffer, |
|
740 |
+ * AVERROR(EAGAIN) if more destination buffer space is required. |
|
741 |
+ */ |
|
742 |
+static int yae_overlap_add(ATempoContext *atempo, |
|
743 |
+ uint8_t **dst_ref, |
|
744 |
+ uint8_t *dst_end) |
|
745 |
+{ |
|
746 |
+ // shortcuts: |
|
747 |
+ const AudioFragment *prev = yae_prev_frag(atempo); |
|
748 |
+ const AudioFragment *frag = yae_curr_frag(atempo); |
|
749 |
+ |
|
750 |
+ const int64_t start_here = FFMAX(atempo->position[1], |
|
751 |
+ frag->position[1]); |
|
752 |
+ |
|
753 |
+ const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples, |
|
754 |
+ frag->position[1] + frag->nsamples); |
|
755 |
+ |
|
756 |
+ const int64_t overlap = stop_here - start_here; |
|
757 |
+ |
|
758 |
+ const int64_t ia = start_here - prev->position[1]; |
|
759 |
+ const int64_t ib = start_here - frag->position[1]; |
|
760 |
+ |
|
761 |
+ const float *wa = atempo->hann + ia; |
|
762 |
+ const float *wb = atempo->hann + ib; |
|
763 |
+ |
|
764 |
+ const uint8_t *a = prev->data + ia * atempo->stride; |
|
765 |
+ const uint8_t *b = frag->data + ib * atempo->stride; |
|
766 |
+ |
|
767 |
+ uint8_t *dst = *dst_ref; |
|
768 |
+ |
|
769 |
+ av_assert0(start_here <= stop_here && |
|
770 |
+ frag->position[1] <= start_here && |
|
771 |
+ overlap <= frag->nsamples); |
|
772 |
+ |
|
773 |
+ if (atempo->format == AV_SAMPLE_FMT_U8) { |
|
774 |
+ yae_blend(uint8_t); |
|
775 |
+ } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
|
776 |
+ yae_blend(int16_t); |
|
777 |
+ } else if (atempo->format == AV_SAMPLE_FMT_S32) { |
|
778 |
+ yae_blend(int); |
|
779 |
+ } else if (atempo->format == AV_SAMPLE_FMT_FLT) { |
|
780 |
+ yae_blend(float); |
|
781 |
+ } else if (atempo->format == AV_SAMPLE_FMT_DBL) { |
|
782 |
+ yae_blend(double); |
|
783 |
+ } |
|
784 |
+ |
|
785 |
+ // pass-back the updated destination buffer pointer: |
|
786 |
+ *dst_ref = dst; |
|
787 |
+ |
|
788 |
+ return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
|
789 |
+} |
|
790 |
+ |
|
791 |
+/** |
|
792 |
+ * Feed as much data to the filter as it is able to consume |
|
793 |
+ * and receive as much processed data in the destination buffer |
|
794 |
+ * as it is able to produce or store. |
|
795 |
+ */ |
|
796 |
+static void |
|
797 |
+yae_apply(ATempoContext *atempo, |
|
798 |
+ const uint8_t **src_ref, |
|
799 |
+ const uint8_t *src_end, |
|
800 |
+ uint8_t **dst_ref, |
|
801 |
+ uint8_t *dst_end) |
|
802 |
+{ |
|
803 |
+ while (1) { |
|
804 |
+ if (atempo->state == YAE_LOAD_FRAGMENT) { |
|
805 |
+ // load additional data for the current fragment: |
|
806 |
+ if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
|
807 |
+ break; |
|
808 |
+ } |
|
809 |
+ |
|
810 |
+ // down-mix to mono: |
|
811 |
+ yae_downmix(atempo, yae_curr_frag(atempo)); |
|
812 |
+ |
|
813 |
+ // apply rDFT: |
|
814 |
+ av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); |
|
815 |
+ |
|
816 |
+ // must load the second fragment before alignment can start: |
|
817 |
+ if (!atempo->nfrag) { |
|
818 |
+ yae_advance_to_next_frag(atempo); |
|
819 |
+ continue; |
|
820 |
+ } |
|
821 |
+ |
|
822 |
+ atempo->state = YAE_ADJUST_POSITION; |
|
823 |
+ } |
|
824 |
+ |
|
825 |
+ if (atempo->state == YAE_ADJUST_POSITION) { |
|
826 |
+ // adjust position for better alignment: |
|
827 |
+ if (yae_adjust_position(atempo)) { |
|
828 |
+ // reload the fragment at the corrected position, so that the |
|
829 |
+ // Hann window blending would not require normalization: |
|
830 |
+ atempo->state = YAE_RELOAD_FRAGMENT; |
|
831 |
+ } else { |
|
832 |
+ atempo->state = YAE_OUTPUT_OVERLAP_ADD; |
|
833 |
+ } |
|
834 |
+ } |
|
835 |
+ |
|
836 |
+ if (atempo->state == YAE_RELOAD_FRAGMENT) { |
|
837 |
+ // load additional data if necessary due to position adjustment: |
|
838 |
+ if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
|
839 |
+ break; |
|
840 |
+ } |
|
841 |
+ |
|
842 |
+ // down-mix to mono: |
|
843 |
+ yae_downmix(atempo, yae_curr_frag(atempo)); |
|
844 |
+ |
|
845 |
+ // apply rDFT: |
|
846 |
+ av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); |
|
847 |
+ |
|
848 |
+ atempo->state = YAE_OUTPUT_OVERLAP_ADD; |
|
849 |
+ } |
|
850 |
+ |
|
851 |
+ if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) { |
|
852 |
+ // overlap-add and output the result: |
|
853 |
+ if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
|
854 |
+ break; |
|
855 |
+ } |
|
856 |
+ |
|
857 |
+ // advance to the next fragment, repeat: |
|
858 |
+ yae_advance_to_next_frag(atempo); |
|
859 |
+ atempo->state = YAE_LOAD_FRAGMENT; |
|
860 |
+ } |
|
861 |
+ } |
|
862 |
+} |
|
863 |
+ |
|
864 |
+/** |
|
865 |
+ * Flush any buffered data from the filter. |
|
866 |
+ * |
|
867 |
+ * @return |
|
868 |
+ * 0 if all data was completely stored in the dst buffer, |
|
869 |
+ * AVERROR(EAGAIN) if more destination buffer space is required. |
|
870 |
+ */ |
|
871 |
+static int yae_flush(ATempoContext *atempo, |
|
872 |
+ uint8_t **dst_ref, |
|
873 |
+ uint8_t *dst_end) |
|
874 |
+{ |
|
875 |
+ AudioFragment *frag = yae_curr_frag(atempo); |
|
876 |
+ int64_t overlap_end; |
|
877 |
+ int64_t start_here; |
|
878 |
+ int64_t stop_here; |
|
879 |
+ int64_t offset; |
|
880 |
+ |
|
881 |
+ const uint8_t *src; |
|
882 |
+ uint8_t *dst; |
|
883 |
+ |
|
884 |
+ int src_size; |
|
885 |
+ int dst_size; |
|
886 |
+ int nbytes; |
|
887 |
+ |
|
888 |
+ atempo->state = YAE_FLUSH_OUTPUT; |
|
889 |
+ |
|
890 |
+ if (atempo->position[0] == frag->position[0] + frag->nsamples && |
|
891 |
+ atempo->position[1] == frag->position[1] + frag->nsamples) { |
|
892 |
+ // the current fragment is already flushed: |
|
893 |
+ return 0; |
|
894 |
+ } |
|
895 |
+ |
|
896 |
+ if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
|
897 |
+ // finish loading the current (possibly partial) fragment: |
|
898 |
+ yae_load_frag(atempo, NULL, NULL); |
|
899 |
+ |
|
900 |
+ if (atempo->nfrag) { |
|
901 |
+ // down-mix to mono: |
|
902 |
+ yae_downmix(atempo, frag); |
|
903 |
+ |
|
904 |
+ // apply rDFT: |
|
905 |
+ av_rdft_calc(atempo->real_to_complex, frag->xdat); |
|
906 |
+ |
|
907 |
+ // align current fragment to previous fragment: |
|
908 |
+ if (yae_adjust_position(atempo)) { |
|
909 |
+ // reload the current fragment due to adjusted position: |
|
910 |
+ yae_load_frag(atempo, NULL, NULL); |
|
911 |
+ } |
|
912 |
+ } |
|
913 |
+ } |
|
914 |
+ |
|
915 |
+ // flush the overlap region: |
|
916 |
+ overlap_end = frag->position[1] + FFMIN(atempo->window / 2, |
|
917 |
+ frag->nsamples); |
|
918 |
+ |
|
919 |
+ while (atempo->position[1] < overlap_end) { |
|
920 |
+ if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
|
921 |
+ return AVERROR(EAGAIN); |
|
922 |
+ } |
|
923 |
+ } |
|
924 |
+ |
|
925 |
+ // flush the remaininder of the current fragment: |
|
926 |
+ start_here = FFMAX(atempo->position[1], overlap_end); |
|
927 |
+ stop_here = frag->position[1] + frag->nsamples; |
|
928 |
+ offset = start_here - frag->position[1]; |
|
929 |
+ av_assert0(start_here <= stop_here && frag->position[1] <= start_here); |
|
930 |
+ |
|
931 |
+ src = frag->data + offset * atempo->stride; |
|
932 |
+ dst = (uint8_t *)*dst_ref; |
|
933 |
+ |
|
934 |
+ src_size = (int)(stop_here - start_here) * atempo->stride; |
|
935 |
+ dst_size = dst_end - dst; |
|
936 |
+ nbytes = FFMIN(src_size, dst_size); |
|
937 |
+ |
|
938 |
+ memcpy(dst, src, nbytes); |
|
939 |
+ dst += nbytes; |
|
940 |
+ |
|
941 |
+ atempo->position[1] += (nbytes / atempo->stride); |
|
942 |
+ |
|
943 |
+ // pass-back the updated destination buffer pointer: |
|
944 |
+ *dst_ref = (uint8_t *)dst; |
|
945 |
+ |
|
946 |
+ return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
|
947 |
+} |
|
948 |
+ |
|
949 |
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) |
|
950 |
+{ |
|
951 |
+ ATempoContext *atempo = ctx->priv; |
|
952 |
+ |
|
953 |
+ // NOTE: this assumes that the caller has memset ctx->priv to 0: |
|
954 |
+ atempo->format = AV_SAMPLE_FMT_NONE; |
|
955 |
+ atempo->tempo = 1.0; |
|
956 |
+ atempo->state = YAE_LOAD_FRAGMENT; |
|
957 |
+ |
|
958 |
+ return args ? yae_set_tempo(ctx, args) : 0; |
|
959 |
+} |
|
960 |
+ |
|
961 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
962 |
+{ |
|
963 |
+ ATempoContext *atempo = ctx->priv; |
|
964 |
+ yae_release_buffers(atempo); |
|
965 |
+} |
|
966 |
+ |
|
967 |
+static int query_formats(AVFilterContext *ctx) |
|
968 |
+{ |
|
969 |
+ AVFilterChannelLayouts *layouts = NULL; |
|
970 |
+ AVFilterFormats *formats = NULL; |
|
971 |
+ |
|
972 |
+ // WSOLA necessitates an internal sliding window ring buffer |
|
973 |
+ // for incoming audio stream. |
|
974 |
+ // |
|
975 |
+ // Planar sample formats are too cumbersome to store in a ring buffer, |
|
976 |
+ // therefore planar sample formats are not supported. |
|
977 |
+ // |
|
978 |
+ enum AVSampleFormat sample_fmts[] = { |
|
979 |
+ AV_SAMPLE_FMT_U8, |
|
980 |
+ AV_SAMPLE_FMT_S16, |
|
981 |
+ AV_SAMPLE_FMT_S32, |
|
982 |
+ AV_SAMPLE_FMT_FLT, |
|
983 |
+ AV_SAMPLE_FMT_DBL, |
|
984 |
+ AV_SAMPLE_FMT_NONE |
|
985 |
+ }; |
|
986 |
+ |
|
987 |
+ layouts = ff_all_channel_layouts(); |
|
988 |
+ if (!layouts) { |
|
989 |
+ return AVERROR(ENOMEM); |
|
990 |
+ } |
|
991 |
+ ff_set_common_channel_layouts(ctx, layouts); |
|
992 |
+ |
|
993 |
+ formats = ff_make_format_list(sample_fmts); |
|
994 |
+ if (!formats) { |
|
995 |
+ return AVERROR(ENOMEM); |
|
996 |
+ } |
|
997 |
+ ff_set_common_formats(ctx, formats); |
|
998 |
+ |
|
999 |
+ formats = ff_all_samplerates(); |
|
1000 |
+ if (!formats) { |
|
1001 |
+ return AVERROR(ENOMEM); |
|
1002 |
+ } |
|
1003 |
+ ff_set_common_samplerates(ctx, formats); |
|
1004 |
+ |
|
1005 |
+ return 0; |
|
1006 |
+} |
|
1007 |
+ |
|
1008 |
+static int config_props(AVFilterLink *inlink) |
|
1009 |
+{ |
|
1010 |
+ AVFilterContext *ctx = inlink->dst; |
|
1011 |
+ ATempoContext *atempo = ctx->priv; |
|
1012 |
+ |
|
1013 |
+ enum AVSampleFormat format = inlink->format; |
|
1014 |
+ int sample_rate = (int)inlink->sample_rate; |
|
1015 |
+ int channels = av_get_channel_layout_nb_channels(inlink->channel_layout); |
|
1016 |
+ |
|
1017 |
+ return yae_reset(atempo, format, sample_rate, channels); |
|
1018 |
+} |
|
1019 |
+ |
|
1020 |
+static void push_samples(ATempoContext *atempo, |
|
1021 |
+ AVFilterLink *outlink, |
|
1022 |
+ int n_out) |
|
1023 |
+{ |
|
1024 |
+ atempo->dst_buffer->audio->sample_rate = outlink->sample_rate; |
|
1025 |
+ atempo->dst_buffer->audio->nb_samples = n_out; |
|
1026 |
+ |
|
1027 |
+ // adjust the PTS: |
|
1028 |
+ atempo->dst_buffer->pts = |
|
1029 |
+ av_rescale_q(atempo->nsamples_out, |
|
1030 |
+ (AVRational){ 1, outlink->sample_rate }, |
|
1031 |
+ outlink->time_base); |
|
1032 |
+ |
|
1033 |
+ ff_filter_samples(outlink, atempo->dst_buffer); |
|
1034 |
+ atempo->dst_buffer = NULL; |
|
1035 |
+ atempo->dst = NULL; |
|
1036 |
+ atempo->dst_end = NULL; |
|
1037 |
+ |
|
1038 |
+ atempo->nsamples_out += n_out; |
|
1039 |
+} |
|
1040 |
+ |
|
1041 |
+static void filter_samples(AVFilterLink *inlink, |
|
1042 |
+ AVFilterBufferRef *src_buffer) |
|
1043 |
+{ |
|
1044 |
+ AVFilterContext *ctx = inlink->dst; |
|
1045 |
+ ATempoContext *atempo = ctx->priv; |
|
1046 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
1047 |
+ |
|
1048 |
+ int n_in = src_buffer->audio->nb_samples; |
|
1049 |
+ int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo); |
|
1050 |
+ |
|
1051 |
+ const uint8_t *src = src_buffer->data[0]; |
|
1052 |
+ const uint8_t *src_end = src + n_in * atempo->stride; |
|
1053 |
+ |
|
1054 |
+ while (src < src_end) { |
|
1055 |
+ if (!atempo->dst_buffer) { |
|
1056 |
+ atempo->dst_buffer = ff_get_audio_buffer(outlink, |
|
1057 |
+ AV_PERM_WRITE, |
|
1058 |
+ n_out); |
|
1059 |
+ avfilter_copy_buffer_ref_props(atempo->dst_buffer, src_buffer); |
|
1060 |
+ |
|
1061 |
+ atempo->dst = atempo->dst_buffer->data[0]; |
|
1062 |
+ atempo->dst_end = atempo->dst + n_out * atempo->stride; |
|
1063 |
+ } |
|
1064 |
+ |
|
1065 |
+ yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end); |
|
1066 |
+ |
|
1067 |
+ if (atempo->dst == atempo->dst_end) { |
|
1068 |
+ push_samples(atempo, outlink, n_out); |
|
1069 |
+ atempo->request_fulfilled = 1; |
|
1070 |
+ } |
|
1071 |
+ } |
|
1072 |
+ |
|
1073 |
+ atempo->nsamples_in += n_in; |
|
1074 |
+ avfilter_unref_bufferp(&src_buffer); |
|
1075 |
+} |
|
1076 |
+ |
|
1077 |
+static int request_frame(AVFilterLink *outlink) |
|
1078 |
+{ |
|
1079 |
+ AVFilterContext *ctx = outlink->src; |
|
1080 |
+ ATempoContext *atempo = ctx->priv; |
|
1081 |
+ int ret; |
|
1082 |
+ |
|
1083 |
+ atempo->request_fulfilled = 0; |
|
1084 |
+ do { |
|
1085 |
+ ret = avfilter_request_frame(ctx->inputs[0]); |
|
1086 |
+ } |
|
1087 |
+ while (!atempo->request_fulfilled && ret >= 0); |
|
1088 |
+ |
|
1089 |
+ if (ret == AVERROR_EOF) { |
|
1090 |
+ // flush the filter: |
|
1091 |
+ int n_max = atempo->ring; |
|
1092 |
+ int n_out; |
|
1093 |
+ int err = AVERROR(EAGAIN); |
|
1094 |
+ |
|
1095 |
+ while (err == AVERROR(EAGAIN)) { |
|
1096 |
+ if (!atempo->dst_buffer) { |
|
1097 |
+ atempo->dst_buffer = ff_get_audio_buffer(outlink, |
|
1098 |
+ AV_PERM_WRITE, |
|
1099 |
+ n_max); |
|
1100 |
+ |
|
1101 |
+ atempo->dst = atempo->dst_buffer->data[0]; |
|
1102 |
+ atempo->dst_end = atempo->dst + n_max * atempo->stride; |
|
1103 |
+ } |
|
1104 |
+ |
|
1105 |
+ err = yae_flush(atempo, &atempo->dst, atempo->dst_end); |
|
1106 |
+ |
|
1107 |
+ n_out = ((atempo->dst - atempo->dst_buffer->data[0]) / |
|
1108 |
+ atempo->stride); |
|
1109 |
+ |
|
1110 |
+ if (n_out) { |
|
1111 |
+ push_samples(atempo, outlink, n_out); |
|
1112 |
+ } |
|
1113 |
+ } |
|
1114 |
+ |
|
1115 |
+ avfilter_unref_bufferp(&atempo->dst_buffer); |
|
1116 |
+ atempo->dst = NULL; |
|
1117 |
+ atempo->dst_end = NULL; |
|
1118 |
+ |
|
1119 |
+ return AVERROR_EOF; |
|
1120 |
+ } |
|
1121 |
+ |
|
1122 |
+ return ret; |
|
1123 |
+} |
|
1124 |
+ |
|
1125 |
+static int process_command(AVFilterContext *ctx, |
|
1126 |
+ const char *cmd, |
|
1127 |
+ const char *arg, |
|
1128 |
+ char *res, |
|
1129 |
+ int res_len, |
|
1130 |
+ int flags) |
|
1131 |
+{ |
|
1132 |
+ return !strcmp(cmd, "tempo") ? yae_set_tempo(ctx, arg) : AVERROR(ENOSYS); |
|
1133 |
+} |
|
1134 |
+ |
|
1135 |
+AVFilter avfilter_af_atempo = { |
|
1136 |
+ .name = "atempo", |
|
1137 |
+ .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."), |
|
1138 |
+ .init = init, |
|
1139 |
+ .uninit = uninit, |
|
1140 |
+ .query_formats = query_formats, |
|
1141 |
+ .process_command = process_command, |
|
1142 |
+ .priv_size = sizeof(ATempoContext), |
|
1143 |
+ |
|
1144 |
+ .inputs = (const AVFilterPad[]) { |
|
1145 |
+ { .name = "default", |
|
1146 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
1147 |
+ .filter_samples = filter_samples, |
|
1148 |
+ .config_props = config_props, |
|
1149 |
+ .min_perms = AV_PERM_READ, }, |
|
1150 |
+ { .name = NULL} |
|
1151 |
+ }, |
|
1152 |
+ |
|
1153 |
+ .outputs = (const AVFilterPad[]) { |
|
1154 |
+ { .name = "default", |
|
1155 |
+ .request_frame = request_frame, |
|
1156 |
+ .type = AVMEDIA_TYPE_AUDIO, }, |
|
1157 |
+ { .name = NULL} |
|
1158 |
+ }, |
|
1159 |
+}; |
... | ... |
@@ -45,6 +45,7 @@ void avfilter_register_all(void) |
45 | 45 |
REGISTER_FILTER (ASPLIT, asplit, af); |
46 | 46 |
REGISTER_FILTER (ASTREAMSYNC, astreamsync, af); |
47 | 47 |
REGISTER_FILTER (ASYNCTS, asyncts, af); |
48 |
+ REGISTER_FILTER (ATEMPO, atempo, af); |
|
48 | 49 |
REGISTER_FILTER (CHANNELSPLIT,channelsplit,af); |
49 | 50 |
REGISTER_FILTER (EARWAX, earwax, af); |
50 | 51 |
REGISTER_FILTER (PAN, pan, af); |
... | ... |
@@ -29,7 +29,7 @@ |
29 | 29 |
#include "libavutil/avutil.h" |
30 | 30 |
|
31 | 31 |
#define LIBAVFILTER_VERSION_MAJOR 2 |
32 |
-#define LIBAVFILTER_VERSION_MINOR 80 |
|
32 |
+#define LIBAVFILTER_VERSION_MINOR 81 |
|
33 | 33 |
#define LIBAVFILTER_VERSION_MICRO 100 |
34 | 34 |
|
35 | 35 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |