Browse code

avfilter/af_silenceremove: add peak detector

Signed-off-by: Paul B Mahol <onemda@gmail.com>

Paul B Mahol authored on 2015/12/28 04:45:46
Showing 2 changed files
... ...
@@ -2969,6 +2969,10 @@ at the beginning of each period of silence.
2969 2969
 For example, if you want to remove long pauses between words but do not want
2970 2970
 to remove the pauses completely. Default value is @code{0}.
2971 2971
 
2972
+@item detection
2973
+Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
2974
+and works better with digital silence which is exactly 0.
2975
+Default value is @code{rms}.
2972 2976
 @end table
2973 2977
 
2974 2978
 @subsection Examples
... ...
@@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext {
65 65
     double *window_current;
66 66
     double *window_end;
67 67
     int window_size;
68
-    double rms_sum;
68
+    double sum;
69 69
 
70 70
     int leave_silence;
71 71
     int restart;
72 72
     int64_t next_pts;
73
+
74
+    int detection;
75
+    void (*update)(struct SilenceRemoveContext *s, double sample);
76
+    double(*compute)(struct SilenceRemoveContext *s, double sample);
73 77
 } SilenceRemoveContext;
74 78
 
75 79
 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
... ...
@@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = {
82 82
     { "stop_duration",   NULL, OFFSET(stop_duration),   AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
83 83
     { "stop_threshold",  NULL, OFFSET(stop_threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
84 84
     { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_BOOL,     {.i64=0},     0,       1, FLAGS },
85
+    { "detection",       NULL, OFFSET(detection),       AV_OPT_TYPE_INT,      {.i64=1},     0,       1, FLAGS, "detection" },
86
+    {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
87
+    {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
85 88
     { NULL }
86 89
 };
87 90
 
88 91
 AVFILTER_DEFINE_CLASS(silenceremove);
89 92
 
93
+static double compute_peak(SilenceRemoveContext *s, double sample)
94
+{
95
+    double new_sum;
96
+
97
+    new_sum  = s->sum;
98
+    new_sum -= *s->window_current;
99
+    new_sum += fabs(sample);
100
+
101
+    return new_sum / s->window_size;
102
+}
103
+
104
+static void update_peak(SilenceRemoveContext *s, double sample)
105
+{
106
+    s->sum -= *s->window_current;
107
+    *s->window_current = fabs(sample);
108
+    s->sum += *s->window_current;
109
+
110
+    s->window_current++;
111
+    if (s->window_current >= s->window_end)
112
+        s->window_current = s->window;
113
+}
114
+
115
+static double compute_rms(SilenceRemoveContext *s, double sample)
116
+{
117
+    double new_sum;
118
+
119
+    new_sum  = s->sum;
120
+    new_sum -= *s->window_current;
121
+    new_sum += sample * sample;
122
+
123
+    return sqrt(new_sum / s->window_size);
124
+}
125
+
126
+static void update_rms(SilenceRemoveContext *s, double sample)
127
+{
128
+    s->sum -= *s->window_current;
129
+    *s->window_current = sample * sample;
130
+    s->sum += *s->window_current;
131
+
132
+    s->window_current++;
133
+    if (s->window_current >= s->window_end)
134
+        s->window_current = s->window;
135
+}
136
+
90 137
 static av_cold int init(AVFilterContext *ctx)
91 138
 {
92 139
     SilenceRemoveContext *s = ctx->priv;
... ...
@@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx)
96 96
         s->restart = 1;
97 97
     }
98 98
 
99
+    switch (s->detection) {
100
+    case 0:
101
+        s->update = update_peak;
102
+        s->compute = compute_peak;
103
+        break;
104
+    case 1:
105
+        s->update = update_rms;
106
+        s->compute = compute_rms;
107
+        break;
108
+    };
109
+
99 110
     return 0;
100 111
 }
101 112
 
102
-static void clear_rms(SilenceRemoveContext *s)
113
+static void clear_window(SilenceRemoveContext *s)
103 114
 {
104 115
     memset(s->window, 0, s->window_size * sizeof(*s->window));
105 116
 
106 117
     s->window_current = s->window;
107 118
     s->window_end = s->window + s->window_size;
108
-    s->rms_sum = 0;
119
+    s->sum = 0;
109 120
 }
110 121
 
111 122
 static int config_input(AVFilterLink *inlink)
... ...
@@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink)
118 118
     if (!s->window)
119 119
         return AVERROR(ENOMEM);
120 120
 
121
-    clear_rms(s);
121
+    clear_window(s);
122 122
 
123 123
     s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
124 124
                                    AV_TIME_BASE);
... ...
@@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink)
153 153
     return 0;
154 154
 }
155 155
 
156
-static double compute_rms(SilenceRemoveContext *s, double sample)
157
-{
158
-    double new_sum;
159
-
160
-    new_sum  = s->rms_sum;
161
-    new_sum -= *s->window_current;
162
-    new_sum += sample * sample;
163
-
164
-    return sqrt(new_sum / s->window_size);
165
-}
166
-
167
-static void update_rms(SilenceRemoveContext *s, double sample)
168
-{
169
-    s->rms_sum -= *s->window_current;
170
-    *s->window_current = sample * sample;
171
-    s->rms_sum += *s->window_current;
172
-
173
-    s->window_current++;
174
-    if (s->window_current >= s->window_end)
175
-        s->window_current = s->window;
176
-}
177
-
178 156
 static void flush(AVFrame *out, AVFilterLink *outlink,
179 157
                   int *nb_samples_written, int *ret)
180 158
 {
... ...
@@ -209,12 +249,12 @@ silence_trim:
209 209
         for (i = 0; i < nbs; i++) {
210 210
             threshold = 0;
211 211
             for (j = 0; j < inlink->channels; j++) {
212
-                threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
212
+                threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
213 213
             }
214 214
 
215 215
             if (threshold) {
216 216
                 for (j = 0; j < inlink->channels; j++) {
217
-                    update_rms(s, *ibuf);
217
+                    s->update(s, *ibuf);
218 218
                     s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
219 219
                     nb_samples_read++;
220 220
                 }
... ...
@@ -232,7 +272,7 @@ silence_trim:
232 232
                 s->start_holdoff_end = 0;
233 233
 
234 234
                 for (j = 0; j < inlink->channels; j++)
235
-                    update_rms(s, ibuf[j]);
235
+                    s->update(s, ibuf[j]);
236 236
 
237 237
                 ibuf += inlink->channels;
238 238
                 nb_samples_read += inlink->channels;
... ...
@@ -284,7 +324,7 @@ silence_copy:
284 284
             for (i = 0; i < nbs; i++) {
285 285
                 threshold = 1;
286 286
                 for (j = 0; j < inlink->channels; j++)
287
-                    threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
287
+                    threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
288 288
 
289 289
                 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
290 290
                     s->mode = SILENCE_COPY_FLUSH;
... ...
@@ -292,14 +332,14 @@ silence_copy:
292 292
                     goto silence_copy_flush;
293 293
                 } else if (threshold) {
294 294
                     for (j = 0; j < inlink->channels; j++) {
295
-                        update_rms(s, *ibuf);
295
+                        s->update(s, *ibuf);
296 296
                         *obuf++ = *ibuf++;
297 297
                         nb_samples_read++;
298 298
                         nb_samples_written++;
299 299
                     }
300 300
                 } else if (!threshold) {
301 301
                     for (j = 0; j < inlink->channels; j++) {
302
-                        update_rms(s, *ibuf);
302
+                        s->update(s, *ibuf);
303 303
                         if (s->leave_silence) {
304 304
                             *obuf++ = *ibuf;
305 305
                             nb_samples_written++;
... ...
@@ -323,7 +363,7 @@ silence_copy:
323 323
                                 s->start_found_periods = 0;
324 324
                                 s->start_holdoff_offset = 0;
325 325
                                 s->start_holdoff_end = 0;
326
-                                clear_rms(s);
326
+                                clear_window(s);
327 327
                                 s->mode = SILENCE_TRIM;
328 328
                                 flush(out, outlink, &nb_samples_written, &ret);
329 329
                                 goto silence_trim;