GitList

Browse code

lavfi: add volumedetect filter.

Nicolas George authored on 2012/08/18 20:49:47
Showing 5 changed files

Changelog index cd73c6d..14e01f3 100644
doc/filters.texi index 5793100..8847990 100644
libavfilter/Makefile index 916e54a..af4fde6 100644
libavfilter/af_volumedetect.c index 0000000..caf8559
libavfilter/allfilters.c index a9344c2..6defed4 100644

@@ -50,6 +50,7 @@ version next:
                      - edge detection filter
                      - framestep filter
                      - ffmpeg -shortest option is now per-output file
                     +- volume measurement filter
                      version 0.11:

doc/filters.texi

History View file @ 5980e57

@@ -690,6 +690,46 @@ volume=-12dB
                      @end example
                      @end itemize
                     +@section volumedetect
+                    +
                     +Detect the volume of the input video.
+                    +
                     +The filter has no parameters. The input is not modified. Statistics about
                     +the volume will be printed in the log when the input stream end is reached.
+                    +
                     +In particular it will show the mean volume (root mean square), maximum
                     +volume (on a per-sample basis), and the beginning of an histogram of the
                     +registered volume values (from the maximum value to a cumulated 1/1000 of
                     +the samples).
+                    +
                     +All volumes are in decibels relative to the maximum PCM value.
+                    +
                     +Here is an excerpt of the output:
                     +@example
                     +[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
                     +[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
                     +[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
                     +@end example
+                    +
                     +It means that:
                     +@itemize
                     +@item
                     +The mean square energy is approximately -27 dB, or 10^-2.7.
                     +@item
                     +The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
                     +@item
                     +There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
                     +@end itemize
+                    +
                     +In other words, raising the volume by +4 dB does not cause any clipping,
                     +raising it by +5 dB causes clipping for 6 samples, etc.
+                    +
                      @section asyncts
                      Synchronize audio data with timestamps by squeezing/stretching it and/or
                      dropping samples/adding silence when needed.

libavfilter/Makefile

History View file @ 5980e57

@@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
                      OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
                      OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o
                      OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
                     +OBJS-$(CONFIG_VOLUMEDETECT_FILTER)           += af_volumedetect.o
                      OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o
                      OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o

libavfilter/af_volumedetect.c

History View file @ 5980e57

                     new file mode 100644
@@ -0,0 +1,159 @@
                     +/*
                     + * Copyright (c) 2012 Nicolas George
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public License
                     + * as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     + * GNU Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public License
                     + * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
                     + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "libavutil/audioconvert.h"
                     +#include "libavutil/avassert.h"
                     +#include "audio.h"
                     +#include "avfilter.h"
                     +#include "internal.h"
+                    +
                     +typedef struct {
                     +    /**
                     +     * Number of samples at each PCM value.
                     +     * histogram[0x8000 + i] is the number of samples at value i.
                     +     * The extra element is there for symmetry.
                     +     */
                     +    uint64_t histogram[0x10001];
                     +} VolDetectContext;
+                    +
                     +static int query_formats(AVFilterContext *ctx)
                     +{
                     +    enum AVSampleFormat sample_fmts[] = {
                     +        AV_SAMPLE_FMT_S16,
                     +        AV_SAMPLE_FMT_S16P,
                     +        AV_SAMPLE_FMT_NONE
                     +    };
                     +    AVFilterFormats *formats;
+                    +
                     +    if (!(formats = ff_make_format_list(sample_fmts)))
                     +        return AVERROR(ENOMEM);
                     +    ff_set_common_formats(ctx, formats);
+                    +
                     +    return 0;
                     +}
+                    +
                     +static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples)
                     +{
                     +    AVFilterContext *ctx = inlink->dst;
                     +    VolDetectContext *vd = ctx->priv;
                     +    int64_t layout  = samples->audio->channel_layout;
                     +    int nb_samples  = samples->audio->nb_samples;
                     +    int nb_channels = av_get_channel_layout_nb_channels(layout);
                     +    int nb_planes   = nb_planes;
                     +    int plane, i;
                     +    int16_t *pcm;
+                    +
                     +    if (!av_sample_fmt_is_planar(samples->format)) {
                     +        nb_samples *= nb_channels;
                     +        nb_planes = 1;
                     +    }
                     +    for (plane = 0; plane < nb_planes; plane++) {
                     +        pcm = (int16_t *)samples->extended_data[plane];
                     +        for (i = 0; i < nb_samples; i++)
                     +            vd->histogram[pcm[i] + 0x8000]++;
                     +    }
+                    +
                     +    return ff_filter_samples(inlink->dst->outputs[0], samples);
                     +}
+                    +
                     +#define MAX_DB 91
+                    +
                     +static inline double logdb(uint64_t v)
                     +{
                     +    double d = v / (double)(0x8000 * 0x8000);
                     +    if (!v)
                     +        return MAX_DB;
                     +    return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
                     +}
+                    +
                     +static void print_stats(AVFilterContext *ctx)
                     +{
                     +    VolDetectContext *vd = ctx->priv;
                     +    int i, max_volume, shift;
                     +    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
                     +    uint64_t histdb[MAX_DB + 1] = { 0 };
+                    +
                     +    for (i = 0; i < 0x10000; i++)
                     +        nb_samples += vd->histogram[i];
                     +    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
                     +    if (!nb_samples)
                     +        return;
+                    +
                     +    /* If nb_samples > 1<<34, there is a risk of overflow in the
                     +       multiplication or the sum: shift all histogram values to avoid that.
                     +       The total number of samples must be recomputed to avoid rounding
                     +       errors. */
                     +    shift = av_log2(nb_samples >> 33);
                     +    for (i = 0; i < 0x10000; i++) {
                     +        nb_samples_shift += vd->histogram[i] >> shift;
                     +        power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
                     +    }
                     +    if (!nb_samples_shift)
                     +        return;
                     +    power = (power + nb_samples_shift / 2) / nb_samples_shift;
                     +    av_assert0(power <= 0x8000 * 0x8000);
                     +    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
+                    +
                     +    max_volume = 0x8000;
                     +    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
                     +                             !vd->histogram[0x8000 - max_volume])
                     +        max_volume--;
                     +    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
+                    +
                     +    for (i = 0; i < 0x10000; i++)
                     +        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
                     +    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
                     +    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
                     +        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
                     +        sum += histdb[i];
                     +    }
                     +}
+                    +
                     +static int request_frame(AVFilterLink *outlink)
                     +{
                     +    AVFilterContext *ctx = outlink->src;
                     +    int ret = ff_request_frame(ctx->inputs[0]);
                     +    if (ret == AVERROR_EOF)
                     +        print_stats(ctx);
                     +    return ret;
                     +}
+                    +
                     +AVFilter avfilter_af_volumedetect = {
                     +    .name          = "volumedetect",
                     +    .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
+                    +
                     +    .priv_size     = sizeof(VolDetectContext),
                     +    .query_formats = query_formats,
+                    +
                     +    .inputs    = (const AVFilterPad[]) {
                     +        { .name             = "default",
                     +          .type             = AVMEDIA_TYPE_AUDIO,
                     +          .get_audio_buffer = ff_null_get_audio_buffer,
                     +          .filter_samples   = filter_samples,
                     +          .min_perms        = AV_PERM_READ, },
                     +        { .name = NULL }
                     +    },
                     +    .outputs   = (const AVFilterPad[]) {
                     +        { .name = "default",
                     +          .type = AVMEDIA_TYPE_AUDIO,
                     +          .request_frame = request_frame, },
                     +        { .name = NULL }
                     +    },
                     +};

libavfilter/allfilters.c

History View file @ 5980e57

@@ -57,6 +57,7 @@ void avfilter_register_all(void)
                          REGISTER_FILTER (PAN,         pan,         af);
                          REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
                          REGISTER_FILTER (VOLUME,      volume,      af);
                     +    REGISTER_FILTER (VOLUMEDETECT,volumedetect,af);
                          REGISTER_FILTER (RESAMPLE,    resample,    af);
                          REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc);