Browse code

lavfi: add volumedetect filter.

Nicolas George authored on 2012/08/18 20:49:47
Showing 5 changed files
... ...
@@ -50,6 +50,7 @@ version next:
50 50
 - edge detection filter
51 51
 - framestep filter
52 52
 - ffmpeg -shortest option is now per-output file
53
+- volume measurement filter
53 54
 
54 55
 
55 56
 version 0.11:
... ...
@@ -690,6 +690,46 @@ volume=-12dB
690 690
 @end example
691 691
 @end itemize
692 692
 
693
+@section volumedetect
694
+
695
+Detect the volume of the input video.
696
+
697
+The filter has no parameters. The input is not modified. Statistics about
698
+the volume will be printed in the log when the input stream end is reached.
699
+
700
+In particular it will show the mean volume (root mean square), maximum
701
+volume (on a per-sample basis), and the beginning of an histogram of the
702
+registered volume values (from the maximum value to a cumulated 1/1000 of
703
+the samples).
704
+
705
+All volumes are in decibels relative to the maximum PCM value.
706
+
707
+Here is an excerpt of the output:
708
+@example
709
+[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
710
+[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
711
+[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
712
+[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
713
+[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
714
+[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
715
+[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
716
+[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
717
+[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
718
+@end example
719
+
720
+It means that:
721
+@itemize
722
+@item
723
+The mean square energy is approximately -27 dB, or 10^-2.7.
724
+@item
725
+The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
726
+@item
727
+There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
728
+@end itemize
729
+
730
+In other words, raising the volume by +4 dB does not cause any clipping,
731
+raising it by +5 dB causes clipping for 6 samples, etc.
732
+
693 733
 @section asyncts
694 734
 Synchronize audio data with timestamps by squeezing/stretching it and/or
695 735
 dropping samples/adding silence when needed.
... ...
@@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
67 67
 OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
68 68
 OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o
69 69
 OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
70
+OBJS-$(CONFIG_VOLUMEDETECT_FILTER)           += af_volumedetect.o
70 71
 
71 72
 OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o
72 73
 OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o
73 74
new file mode 100644
... ...
@@ -0,0 +1,159 @@
0
+/*
1
+ * Copyright (c) 2012 Nicolas George
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public License
7
+ * as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
+ * GNU Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public License
16
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/audioconvert.h"
21
+#include "libavutil/avassert.h"
22
+#include "audio.h"
23
+#include "avfilter.h"
24
+#include "internal.h"
25
+
26
+typedef struct {
27
+    /**
28
+     * Number of samples at each PCM value.
29
+     * histogram[0x8000 + i] is the number of samples at value i.
30
+     * The extra element is there for symmetry.
31
+     */
32
+    uint64_t histogram[0x10001];
33
+} VolDetectContext;
34
+
35
+static int query_formats(AVFilterContext *ctx)
36
+{
37
+    enum AVSampleFormat sample_fmts[] = {
38
+        AV_SAMPLE_FMT_S16,
39
+        AV_SAMPLE_FMT_S16P,
40
+        AV_SAMPLE_FMT_NONE
41
+    };
42
+    AVFilterFormats *formats;
43
+
44
+    if (!(formats = ff_make_format_list(sample_fmts)))
45
+        return AVERROR(ENOMEM);
46
+    ff_set_common_formats(ctx, formats);
47
+
48
+    return 0;
49
+}
50
+
51
+static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples)
52
+{
53
+    AVFilterContext *ctx = inlink->dst;
54
+    VolDetectContext *vd = ctx->priv;
55
+    int64_t layout  = samples->audio->channel_layout;
56
+    int nb_samples  = samples->audio->nb_samples;
57
+    int nb_channels = av_get_channel_layout_nb_channels(layout);
58
+    int nb_planes   = nb_planes;
59
+    int plane, i;
60
+    int16_t *pcm;
61
+
62
+    if (!av_sample_fmt_is_planar(samples->format)) {
63
+        nb_samples *= nb_channels;
64
+        nb_planes = 1;
65
+    }
66
+    for (plane = 0; plane < nb_planes; plane++) {
67
+        pcm = (int16_t *)samples->extended_data[plane];
68
+        for (i = 0; i < nb_samples; i++)
69
+            vd->histogram[pcm[i] + 0x8000]++;
70
+    }
71
+
72
+    return ff_filter_samples(inlink->dst->outputs[0], samples);
73
+}
74
+
75
+#define MAX_DB 91
76
+
77
+static inline double logdb(uint64_t v)
78
+{
79
+    double d = v / (double)(0x8000 * 0x8000);
80
+    if (!v)
81
+        return MAX_DB;
82
+    return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
83
+}
84
+
85
+static void print_stats(AVFilterContext *ctx)
86
+{
87
+    VolDetectContext *vd = ctx->priv;
88
+    int i, max_volume, shift;
89
+    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
90
+    uint64_t histdb[MAX_DB + 1] = { 0 };
91
+
92
+    for (i = 0; i < 0x10000; i++)
93
+        nb_samples += vd->histogram[i];
94
+    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
95
+    if (!nb_samples)
96
+        return;
97
+
98
+    /* If nb_samples > 1<<34, there is a risk of overflow in the
99
+       multiplication or the sum: shift all histogram values to avoid that.
100
+       The total number of samples must be recomputed to avoid rounding
101
+       errors. */
102
+    shift = av_log2(nb_samples >> 33);
103
+    for (i = 0; i < 0x10000; i++) {
104
+        nb_samples_shift += vd->histogram[i] >> shift;
105
+        power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
106
+    }
107
+    if (!nb_samples_shift)
108
+        return;
109
+    power = (power + nb_samples_shift / 2) / nb_samples_shift;
110
+    av_assert0(power <= 0x8000 * 0x8000);
111
+    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
112
+
113
+    max_volume = 0x8000;
114
+    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
115
+                             !vd->histogram[0x8000 - max_volume])
116
+        max_volume--;
117
+    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
118
+
119
+    for (i = 0; i < 0x10000; i++)
120
+        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
121
+    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
122
+    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
123
+        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
124
+        sum += histdb[i];
125
+    }
126
+}
127
+
128
+static int request_frame(AVFilterLink *outlink)
129
+{
130
+    AVFilterContext *ctx = outlink->src;
131
+    int ret = ff_request_frame(ctx->inputs[0]);
132
+    if (ret == AVERROR_EOF)
133
+        print_stats(ctx);
134
+    return ret;
135
+}
136
+
137
+AVFilter avfilter_af_volumedetect = {
138
+    .name          = "volumedetect",
139
+    .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
140
+
141
+    .priv_size     = sizeof(VolDetectContext),
142
+    .query_formats = query_formats,
143
+
144
+    .inputs    = (const AVFilterPad[]) {
145
+        { .name             = "default",
146
+          .type             = AVMEDIA_TYPE_AUDIO,
147
+          .get_audio_buffer = ff_null_get_audio_buffer,
148
+          .filter_samples   = filter_samples,
149
+          .min_perms        = AV_PERM_READ, },
150
+        { .name = NULL }
151
+    },
152
+    .outputs   = (const AVFilterPad[]) {
153
+        { .name = "default",
154
+          .type = AVMEDIA_TYPE_AUDIO,
155
+          .request_frame = request_frame, },
156
+        { .name = NULL }
157
+    },
158
+};
... ...
@@ -57,6 +57,7 @@ void avfilter_register_all(void)
57 57
     REGISTER_FILTER (PAN,         pan,         af);
58 58
     REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
59 59
     REGISTER_FILTER (VOLUME,      volume,      af);
60
+    REGISTER_FILTER (VOLUMEDETECT,volumedetect,af);
60 61
     REGISTER_FILTER (RESAMPLE,    resample,    af);
61 62
 
62 63
     REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc);