... | ... |
@@ -690,6 +690,46 @@ volume=-12dB |
690 | 690 |
@end example |
691 | 691 |
@end itemize |
692 | 692 |
|
693 |
+@section volumedetect |
|
694 |
+ |
|
695 |
+Detect the volume of the input video. |
|
696 |
+ |
|
697 |
+The filter has no parameters. The input is not modified. Statistics about |
|
698 |
+the volume will be printed in the log when the input stream end is reached. |
|
699 |
+ |
|
700 |
+In particular it will show the mean volume (root mean square), maximum |
|
701 |
+volume (on a per-sample basis), and the beginning of an histogram of the |
|
702 |
+registered volume values (from the maximum value to a cumulated 1/1000 of |
|
703 |
+the samples). |
|
704 |
+ |
|
705 |
+All volumes are in decibels relative to the maximum PCM value. |
|
706 |
+ |
|
707 |
+Here is an excerpt of the output: |
|
708 |
+@example |
|
709 |
+[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB |
|
710 |
+[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB |
|
711 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6 |
|
712 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62 |
|
713 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286 |
|
714 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042 |
|
715 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551 |
|
716 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609 |
|
717 |
+[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409 |
|
718 |
+@end example |
|
719 |
+ |
|
720 |
+It means that: |
|
721 |
+@itemize |
|
722 |
+@item |
|
723 |
+The mean square energy is approximately -27 dB, or 10^-2.7. |
|
724 |
+@item |
|
725 |
+The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB. |
|
726 |
+@item |
|
727 |
+There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc. |
|
728 |
+@end itemize |
|
729 |
+ |
|
730 |
+In other words, raising the volume by +4 dB does not cause any clipping, |
|
731 |
+raising it by +5 dB causes clipping for 6 samples, etc. |
|
732 |
+ |
|
693 | 733 |
@section asyncts |
694 | 734 |
Synchronize audio data with timestamps by squeezing/stretching it and/or |
695 | 735 |
dropping samples/adding silence when needed. |
... | ... |
@@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o |
67 | 67 |
OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o |
68 | 68 |
OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o |
69 | 69 |
OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o |
70 |
+OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o |
|
70 | 71 |
|
71 | 72 |
OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o |
72 | 73 |
OBJS-$(CONFIG_ANULLSRC_FILTER) += asrc_anullsrc.o |
73 | 74 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,159 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 Nicolas George |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public License |
|
7 |
+ * as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
+ * GNU Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public License |
|
16 |
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
17 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/audioconvert.h" |
|
21 |
+#include "libavutil/avassert.h" |
|
22 |
+#include "audio.h" |
|
23 |
+#include "avfilter.h" |
|
24 |
+#include "internal.h" |
|
25 |
+ |
|
26 |
+typedef struct { |
|
27 |
+ /** |
|
28 |
+ * Number of samples at each PCM value. |
|
29 |
+ * histogram[0x8000 + i] is the number of samples at value i. |
|
30 |
+ * The extra element is there for symmetry. |
|
31 |
+ */ |
|
32 |
+ uint64_t histogram[0x10001]; |
|
33 |
+} VolDetectContext; |
|
34 |
+ |
|
35 |
+static int query_formats(AVFilterContext *ctx) |
|
36 |
+{ |
|
37 |
+ enum AVSampleFormat sample_fmts[] = { |
|
38 |
+ AV_SAMPLE_FMT_S16, |
|
39 |
+ AV_SAMPLE_FMT_S16P, |
|
40 |
+ AV_SAMPLE_FMT_NONE |
|
41 |
+ }; |
|
42 |
+ AVFilterFormats *formats; |
|
43 |
+ |
|
44 |
+ if (!(formats = ff_make_format_list(sample_fmts))) |
|
45 |
+ return AVERROR(ENOMEM); |
|
46 |
+ ff_set_common_formats(ctx, formats); |
|
47 |
+ |
|
48 |
+ return 0; |
|
49 |
+} |
|
50 |
+ |
|
51 |
+static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples) |
|
52 |
+{ |
|
53 |
+ AVFilterContext *ctx = inlink->dst; |
|
54 |
+ VolDetectContext *vd = ctx->priv; |
|
55 |
+ int64_t layout = samples->audio->channel_layout; |
|
56 |
+ int nb_samples = samples->audio->nb_samples; |
|
57 |
+ int nb_channels = av_get_channel_layout_nb_channels(layout); |
|
58 |
+ int nb_planes = nb_planes; |
|
59 |
+ int plane, i; |
|
60 |
+ int16_t *pcm; |
|
61 |
+ |
|
62 |
+ if (!av_sample_fmt_is_planar(samples->format)) { |
|
63 |
+ nb_samples *= nb_channels; |
|
64 |
+ nb_planes = 1; |
|
65 |
+ } |
|
66 |
+ for (plane = 0; plane < nb_planes; plane++) { |
|
67 |
+ pcm = (int16_t *)samples->extended_data[plane]; |
|
68 |
+ for (i = 0; i < nb_samples; i++) |
|
69 |
+ vd->histogram[pcm[i] + 0x8000]++; |
|
70 |
+ } |
|
71 |
+ |
|
72 |
+ return ff_filter_samples(inlink->dst->outputs[0], samples); |
|
73 |
+} |
|
74 |
+ |
|
75 |
+#define MAX_DB 91 |
|
76 |
+ |
|
77 |
+static inline double logdb(uint64_t v) |
|
78 |
+{ |
|
79 |
+ double d = v / (double)(0x8000 * 0x8000); |
|
80 |
+ if (!v) |
|
81 |
+ return MAX_DB; |
|
82 |
+ return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */ |
|
83 |
+} |
|
84 |
+ |
|
85 |
+static void print_stats(AVFilterContext *ctx) |
|
86 |
+{ |
|
87 |
+ VolDetectContext *vd = ctx->priv; |
|
88 |
+ int i, max_volume, shift; |
|
89 |
+ uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; |
|
90 |
+ uint64_t histdb[MAX_DB + 1] = { 0 }; |
|
91 |
+ |
|
92 |
+ for (i = 0; i < 0x10000; i++) |
|
93 |
+ nb_samples += vd->histogram[i]; |
|
94 |
+ av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); |
|
95 |
+ if (!nb_samples) |
|
96 |
+ return; |
|
97 |
+ |
|
98 |
+ /* If nb_samples > 1<<34, there is a risk of overflow in the |
|
99 |
+ multiplication or the sum: shift all histogram values to avoid that. |
|
100 |
+ The total number of samples must be recomputed to avoid rounding |
|
101 |
+ errors. */ |
|
102 |
+ shift = av_log2(nb_samples >> 33); |
|
103 |
+ for (i = 0; i < 0x10000; i++) { |
|
104 |
+ nb_samples_shift += vd->histogram[i] >> shift; |
|
105 |
+ power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); |
|
106 |
+ } |
|
107 |
+ if (!nb_samples_shift) |
|
108 |
+ return; |
|
109 |
+ power = (power + nb_samples_shift / 2) / nb_samples_shift; |
|
110 |
+ av_assert0(power <= 0x8000 * 0x8000); |
|
111 |
+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); |
|
112 |
+ |
|
113 |
+ max_volume = 0x8000; |
|
114 |
+ while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && |
|
115 |
+ !vd->histogram[0x8000 - max_volume]) |
|
116 |
+ max_volume--; |
|
117 |
+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); |
|
118 |
+ |
|
119 |
+ for (i = 0; i < 0x10000; i++) |
|
120 |
+ histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; |
|
121 |
+ for (i = 0; i <= MAX_DB && !histdb[i]; i++); |
|
122 |
+ for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { |
|
123 |
+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); |
|
124 |
+ sum += histdb[i]; |
|
125 |
+ } |
|
126 |
+} |
|
127 |
+ |
|
128 |
+static int request_frame(AVFilterLink *outlink) |
|
129 |
+{ |
|
130 |
+ AVFilterContext *ctx = outlink->src; |
|
131 |
+ int ret = ff_request_frame(ctx->inputs[0]); |
|
132 |
+ if (ret == AVERROR_EOF) |
|
133 |
+ print_stats(ctx); |
|
134 |
+ return ret; |
|
135 |
+} |
|
136 |
+ |
|
137 |
+AVFilter avfilter_af_volumedetect = { |
|
138 |
+ .name = "volumedetect", |
|
139 |
+ .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), |
|
140 |
+ |
|
141 |
+ .priv_size = sizeof(VolDetectContext), |
|
142 |
+ .query_formats = query_formats, |
|
143 |
+ |
|
144 |
+ .inputs = (const AVFilterPad[]) { |
|
145 |
+ { .name = "default", |
|
146 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
147 |
+ .get_audio_buffer = ff_null_get_audio_buffer, |
|
148 |
+ .filter_samples = filter_samples, |
|
149 |
+ .min_perms = AV_PERM_READ, }, |
|
150 |
+ { .name = NULL } |
|
151 |
+ }, |
|
152 |
+ .outputs = (const AVFilterPad[]) { |
|
153 |
+ { .name = "default", |
|
154 |
+ .type = AVMEDIA_TYPE_AUDIO, |
|
155 |
+ .request_frame = request_frame, }, |
|
156 |
+ { .name = NULL } |
|
157 |
+ }, |
|
158 |
+}; |
... | ... |
@@ -57,6 +57,7 @@ void avfilter_register_all(void) |
57 | 57 |
REGISTER_FILTER (PAN, pan, af); |
58 | 58 |
REGISTER_FILTER (SILENCEDETECT, silencedetect, af); |
59 | 59 |
REGISTER_FILTER (VOLUME, volume, af); |
60 |
+ REGISTER_FILTER (VOLUMEDETECT,volumedetect,af); |
|
60 | 61 |
REGISTER_FILTER (RESAMPLE, resample, af); |
61 | 62 |
|
62 | 63 |
REGISTER_FILTER (AEVALSRC, aevalsrc, asrc); |