Browse code

lavfi/ebur128: add sample and true peak metering.

Metadata injection and logging. Not yet present visually.

Signed-off-by: Jean First <jeanfirst@gmail.com>

Clément Bœsch authored on 2014/01/26 07:19:05
Showing 3 changed files
... ...
@@ -9322,6 +9322,27 @@ verbose logging level
9322 9322
 
9323 9323
 By default, the logging level is set to @var{info}. If the @option{video} or
9324 9324
 the @option{metadata} options are set, it switches to @var{verbose}.
9325
+
9326
+@item peak
9327
+Set peak mode(s).
9328
+
9329
+Available modes can be cumulated (the option is a @code{flag} type). Possible
9330
+values are:
9331
+@table @samp
9332
+@item none
9333
+Disable any peak mode (default).
9334
+@item sample
9335
+Enable sample-peak mode.
9336
+
9337
+Simple peak mode looking for the higher sample value.
9338
+@item true
9339
+Enable true-peak mode.
9340
+
9341
+If enabled, the peak lookup is done on an over-sampled version of the input
9342
+stream for better peak accuracy. This mode requires a build with
9343
+@code{libswresample}.
9344
+@end table
9345
+
9325 9346
 @end table
9326 9347
 
9327 9348
 @subsection Examples
... ...
@@ -22,6 +22,8 @@ FFLIBS-$(CONFIG_SCALE_FILTER)                += swscale
22 22
 FFLIBS-$(CONFIG_SHOWSPECTRUM_FILTER)         += avcodec
23 23
 FFLIBS-$(CONFIG_SMARTBLUR_FILTER)            += swscale
24 24
 FFLIBS-$(CONFIG_SUBTITLES_FILTER)            += avformat avcodec
25
+EBUR128LIBS-$(CONFIG_SWRESAMPLE)              = swresample
26
+FFLIBS-$(CONFIG_EBUR128_FILTER)              += $(EBUR128LIBS-yes)
25 27
 
26 28
 HEADERS = asrc_abuffer.h                                                \
27 29
           avcodec.h                                                     \
... ...
@@ -37,6 +37,7 @@
37 37
 #include "libavutil/xga_font_data.h"
38 38
 #include "libavutil/opt.h"
39 39
 #include "libavutil/timestamp.h"
40
+#include "libswresample/swresample.h"
40 41
 #include "audio.h"
41 42
 #include "avfilter.h"
42 43
 #include "formats.h"
... ...
@@ -92,6 +93,16 @@ struct rect { int x, y, w, h; };
92 92
 typedef struct {
93 93
     const AVClass *class;           ///< AVClass context for log and options purpose
94 94
 
95
+    /* peak metering */
96
+    int peak_mode;                  ///< enabled peak modes
97
+    double *true_peaks;             ///< true peaks per channel
98
+    double *sample_peaks;           ///< sample peaks per channel
99
+#if CONFIG_SWRESAMPLE
100
+    SwrContext *swr_ctx;            ///< over-sampling context for true peak metering
101
+    double *swr_buf;                ///< resampled audio data for true peak metering
102
+    int swr_linesize;
103
+#endif
104
+
95 105
     /* video  */
96 106
     int do_video;                   ///< 1 if video output enabled, 0 otherwise
97 107
     int w, h;                       ///< size of the video output
... ...
@@ -130,6 +141,12 @@ typedef struct {
130 130
     int metadata;                   ///< whether or not to inject loudness results in frames
131 131
 } EBUR128Context;
132 132
 
133
+enum {
134
+    PEAK_MODE_NONE          = 0,
135
+    PEAK_MODE_SAMPLES_PEAKS = 1<<1,
136
+    PEAK_MODE_TRUE_PEAKS    = 1<<2,
137
+};
138
+
133 139
 #define OFFSET(x) offsetof(EBUR128Context, x)
134 140
 #define A AV_OPT_FLAG_AUDIO_PARAM
135 141
 #define V AV_OPT_FLAG_VIDEO_PARAM
... ...
@@ -142,7 +159,11 @@ static const AVOption ebur128_options[] = {
142 142
         { "info",    "information logging level", 0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_INFO},    INT_MIN, INT_MAX, A|V|F, "level" },
143 143
         { "verbose", "verbose logging level",     0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_VERBOSE}, INT_MIN, INT_MAX, A|V|F, "level" },
144 144
     { "metadata", "inject metadata in the filtergraph", OFFSET(metadata), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, A|V|F },
145
-    { NULL }
145
+    { "peak", "set peak mode", OFFSET(peak_mode), AV_OPT_TYPE_FLAGS, {.i64 = PEAK_MODE_NONE}, 0, INT_MAX, A|F, "mode" },
146
+        { "none",   "disable any peak mode",   0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_NONE},          INT_MIN, INT_MAX, A|F, "mode" },
147
+        { "sample", "enable peak-sample mode", 0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_SAMPLES_PEAKS}, INT_MIN, INT_MAX, A|F, "mode" },
148
+        { "true",   "enable true-peak mode",   0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_TRUE_PEAKS},    INT_MIN, INT_MAX, A|F, "mode" },
149
+    { NULL },
146 150
 };
147 151
 
148 152
 AVFILTER_DEFINE_CLASS(ebur128);
... ...
@@ -326,9 +347,13 @@ static int config_audio_input(AVFilterLink *inlink)
326 326
     AVFilterContext *ctx = inlink->dst;
327 327
     EBUR128Context *ebur128 = ctx->priv;
328 328
 
329
-    /* force 100ms framing in case of metadata injection: the frames must have
330
-     * a granularity of the window overlap to be accurately exploited */
331
-    if (ebur128->metadata)
329
+    /* Force 100ms framing in case of metadata injection: the frames must have
330
+     * a granularity of the window overlap to be accurately exploited.
331
+     * As for the true peaks mode, it just simplifies the resampling buffer
332
+     * allocation and the lookup in it (since sample buffers differ in size, it
333
+     * can be more complex to integrate in the one-sample loop of
334
+     * filter_frame()). */
335
+    if (ebur128->metadata || (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS))
332 336
         inlink->min_samples =
333 337
         inlink->max_samples =
334 338
         inlink->partial_buf_size = inlink->sample_rate / 10;
... ...
@@ -383,6 +408,36 @@ static int config_audio_output(AVFilterLink *outlink)
383 383
 
384 384
     outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
385 385
 
386
+#if CONFIG_SWRESAMPLE
387
+    if (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS) {
388
+        int ret;
389
+
390
+        ebur128->swr_buf    = av_malloc(19200 * nb_channels * sizeof(double));
391
+        ebur128->true_peaks = av_calloc(nb_channels, sizeof(*ebur128->true_peaks));
392
+        ebur128->swr_ctx    = swr_alloc();
393
+        if (!ebur128->swr_buf || !ebur128->true_peaks || !ebur128->swr_ctx)
394
+            return AVERROR(ENOMEM);
395
+
396
+        av_opt_set_int(ebur128->swr_ctx, "in_channel_layout",    outlink->channel_layout, 0);
397
+        av_opt_set_int(ebur128->swr_ctx, "in_sample_rate",       outlink->sample_rate, 0);
398
+        av_opt_set_sample_fmt(ebur128->swr_ctx, "in_sample_fmt", outlink->format, 0);
399
+
400
+        av_opt_set_int(ebur128->swr_ctx, "out_channel_layout",    outlink->channel_layout, 0);
401
+        av_opt_set_int(ebur128->swr_ctx, "out_sample_rate",       192000, 0);
402
+        av_opt_set_sample_fmt(ebur128->swr_ctx, "out_sample_fmt", outlink->format, 0);
403
+
404
+        ret = swr_init(ebur128->swr_ctx);
405
+        if (ret < 0)
406
+            return ret;
407
+    }
408
+#endif
409
+
410
+    if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) {
411
+        ebur128->sample_peaks = av_calloc(nb_channels, sizeof(*ebur128->sample_peaks));
412
+        if (!ebur128->sample_peaks)
413
+            return AVERROR(ENOMEM);
414
+    }
415
+
386 416
     return 0;
387 417
 }
388 418
 
... ...
@@ -416,6 +471,12 @@ static av_cold int init(AVFilterContext *ctx)
416 416
             ebur128->loglevel = AV_LOG_INFO;
417 417
     }
418 418
 
419
+    if (!CONFIG_SWRESAMPLE && (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS)) {
420
+        av_log(ctx, AV_LOG_ERROR,
421
+               "True-peak mode requires libswresample to be performed\n");
422
+        return AVERROR(EINVAL);
423
+    }
424
+
419 425
     // if meter is  +9 scale, scale range is from -18 LU to  +9 LU (or 3*9)
420 426
     // if meter is +18 scale, scale range is from -36 LU to +18 LU (or 3*18)
421 427
     ebur128->scale_range = 3 * ebur128->meter;
... ...
@@ -491,6 +552,22 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
491 491
     const double *samples = (double *)insamples->data[0];
492 492
     AVFrame *pic = ebur128->outpicref;
493 493
 
494
+#if CONFIG_SWRESAMPLE
495
+    if (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS) {
496
+        const double *swr_samples = ebur128->swr_buf;
497
+        int ret = swr_convert(ebur128->swr_ctx, (uint8_t**)&ebur128->swr_buf, 19200,
498
+                              (const uint8_t **)insamples->data, nb_samples);
499
+        if (ret < 0)
500
+            return ret;
501
+        for (idx_insample = 0; idx_insample < ret; idx_insample++) {
502
+            for (ch = 0; ch < nb_channels; ch++) {
503
+                ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], FFABS(*swr_samples));
504
+                swr_samples++;
505
+            }
506
+        }
507
+    }
508
+#endif
509
+
494 510
     for (idx_insample = 0; idx_insample < nb_samples; idx_insample++) {
495 511
         const int bin_id_400  = ebur128->i400.cache_pos;
496 512
         const int bin_id_3000 = ebur128->i3000.cache_pos;
... ...
@@ -509,6 +586,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
509 509
         for (ch = 0; ch < nb_channels; ch++) {
510 510
             double bin;
511 511
 
512
+            if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
513
+                ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], FFABS(*samples));
514
+
512 515
             ebur128->x[ch * 3] = *samples++; // set X[i]
513 516
 
514 517
             if (!ebur128->ch_weighting[ch])
... ...
@@ -677,22 +757,52 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
677 677
 
678 678
             if (ebur128->metadata) { /* happens only once per filter_frame call */
679 679
                 char metabuf[128];
680
+#define META_PREFIX "lavfi.r128."
681
+
680 682
 #define SET_META(name, var) do {                                            \
681 683
     snprintf(metabuf, sizeof(metabuf), "%.3f", var);                        \
682
-    av_dict_set(&insamples->metadata, "lavfi.r128." name, metabuf, 0);      \
684
+    av_dict_set(&insamples->metadata, name, metabuf, 0);                    \
683 685
 } while (0)
684
-                SET_META("M",        loudness_400);
685
-                SET_META("S",        loudness_3000);
686
-                SET_META("I",        ebur128->integrated_loudness);
687
-                SET_META("LRA",      ebur128->loudness_range);
688
-                SET_META("LRA.low",  ebur128->lra_low);
689
-                SET_META("LRA.high", ebur128->lra_high);
686
+
687
+#define SET_META_PEAK(name, ptype) do {                                     \
688
+    if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) {               \
689
+        char key[64];                                                       \
690
+        for (ch = 0; ch < nb_channels; ch++) {                              \
691
+            snprintf(key, sizeof(key),                                      \
692
+                     META_PREFIX AV_STRINGIFY(name) "_peaks_ch%d", ch);     \
693
+            SET_META(key, ebur128->name##_peaks[ch]);                       \
694
+        }                                                                   \
695
+    }                                                                       \
696
+} while (0)
697
+
698
+                SET_META(META_PREFIX "M",        loudness_400);
699
+                SET_META(META_PREFIX "S",        loudness_3000);
700
+                SET_META(META_PREFIX "I",        ebur128->integrated_loudness);
701
+                SET_META(META_PREFIX "LRA",      ebur128->loudness_range);
702
+                SET_META(META_PREFIX "LRA.low",  ebur128->lra_low);
703
+                SET_META(META_PREFIX "LRA.high", ebur128->lra_high);
704
+
705
+                SET_META_PEAK(sample, SAMPLES);
706
+                SET_META_PEAK(true,   TRUE);
690 707
             }
691 708
 
692
-            av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT "\n",
709
+            av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT,
693 710
                    av_ts2timestr(pts, &outlink->time_base),
694 711
                    loudness_400, loudness_3000,
695 712
                    ebur128->integrated_loudness, ebur128->loudness_range);
713
+
714
+#define PRINT_PEAKS(str, sp, ptype) do {                        \
715
+    if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) {   \
716
+        av_log(ctx, ebur128->loglevel, " [" str ":");           \
717
+        for (ch = 0; ch < nb_channels; ch++)                    \
718
+            av_log(ctx, ebur128->loglevel, " %.5f", sp[ch]);    \
719
+        av_log(ctx, ebur128->loglevel, "]");                    \
720
+    }                                                           \
721
+} while (0)
722
+
723
+            PRINT_PEAKS("SPK", ebur128->sample_peaks, SAMPLES);
724
+            PRINT_PEAKS("TPK", ebur128->true_peaks,   TRUE);
725
+            av_log(ctx, ebur128->loglevel, "\n");
696 726
         }
697 727
     }
698 728
 
... ...
@@ -764,6 +874,8 @@ static av_cold void uninit(AVFilterContext *ctx)
764 764
 
765 765
     av_freep(&ebur128->y_line_ref);
766 766
     av_freep(&ebur128->ch_weighting);
767
+    av_freep(&ebur128->true_peaks);
768
+    av_freep(&ebur128->sample_peaks);
767 769
     av_freep(&ebur128->i400.histogram);
768 770
     av_freep(&ebur128->i3000.histogram);
769 771
     for (i = 0; i < ebur128->nb_channels; i++) {
... ...
@@ -773,6 +885,10 @@ static av_cold void uninit(AVFilterContext *ctx)
773 773
     for (i = 0; i < ctx->nb_outputs; i++)
774 774
         av_freep(&ctx->output_pads[i].name);
775 775
     av_frame_free(&ebur128->outpicref);
776
+#if CONFIG_SWRESAMPLE
777
+    av_freep(&ebur128->swr_buf);
778
+    swr_free(&ebur128->swr_ctx);
779
+#endif
776 780
 }
777 781
 
778 782
 static const AVFilterPad ebur128_inputs[] = {