Signed-off-by: Paul B Mahol <onemda@gmail.com>
Paul B Mahol authored on 2015/12/28 04:45:46... | ... |
@@ -2969,6 +2969,10 @@ at the beginning of each period of silence. |
2969 | 2969 |
For example, if you want to remove long pauses between words but do not want |
2970 | 2970 |
to remove the pauses completely. Default value is @code{0}. |
2971 | 2971 |
|
2972 |
+@item detection |
|
2973 |
+Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster |
|
2974 |
+and works better with digital silence which is exactly 0. |
|
2975 |
+Default value is @code{rms}. |
|
2972 | 2976 |
@end table |
2973 | 2977 |
|
2974 | 2978 |
@subsection Examples |
... | ... |
@@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext { |
65 | 65 |
double *window_current; |
66 | 66 |
double *window_end; |
67 | 67 |
int window_size; |
68 |
- double rms_sum; |
|
68 |
+ double sum; |
|
69 | 69 |
|
70 | 70 |
int leave_silence; |
71 | 71 |
int restart; |
72 | 72 |
int64_t next_pts; |
73 |
+ |
|
74 |
+ int detection; |
|
75 |
+ void (*update)(struct SilenceRemoveContext *s, double sample); |
|
76 |
+ double(*compute)(struct SilenceRemoveContext *s, double sample); |
|
73 | 77 |
} SilenceRemoveContext; |
74 | 78 |
|
75 | 79 |
#define OFFSET(x) offsetof(SilenceRemoveContext, x) |
... | ... |
@@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = { |
82 | 82 |
{ "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS }, |
83 | 83 |
{ "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS }, |
84 | 84 |
{ "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, |
85 |
+ { "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" }, |
|
86 |
+ { "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" }, |
|
87 |
+ { "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" }, |
|
85 | 88 |
{ NULL } |
86 | 89 |
}; |
87 | 90 |
|
88 | 91 |
AVFILTER_DEFINE_CLASS(silenceremove); |
89 | 92 |
|
93 |
+static double compute_peak(SilenceRemoveContext *s, double sample) |
|
94 |
+{ |
|
95 |
+ double new_sum; |
|
96 |
+ |
|
97 |
+ new_sum = s->sum; |
|
98 |
+ new_sum -= *s->window_current; |
|
99 |
+ new_sum += fabs(sample); |
|
100 |
+ |
|
101 |
+ return new_sum / s->window_size; |
|
102 |
+} |
|
103 |
+ |
|
104 |
+static void update_peak(SilenceRemoveContext *s, double sample) |
|
105 |
+{ |
|
106 |
+ s->sum -= *s->window_current; |
|
107 |
+ *s->window_current = fabs(sample); |
|
108 |
+ s->sum += *s->window_current; |
|
109 |
+ |
|
110 |
+ s->window_current++; |
|
111 |
+ if (s->window_current >= s->window_end) |
|
112 |
+ s->window_current = s->window; |
|
113 |
+} |
|
114 |
+ |
|
115 |
+static double compute_rms(SilenceRemoveContext *s, double sample) |
|
116 |
+{ |
|
117 |
+ double new_sum; |
|
118 |
+ |
|
119 |
+ new_sum = s->sum; |
|
120 |
+ new_sum -= *s->window_current; |
|
121 |
+ new_sum += sample * sample; |
|
122 |
+ |
|
123 |
+ return sqrt(new_sum / s->window_size); |
|
124 |
+} |
|
125 |
+ |
|
126 |
+static void update_rms(SilenceRemoveContext *s, double sample) |
|
127 |
+{ |
|
128 |
+ s->sum -= *s->window_current; |
|
129 |
+ *s->window_current = sample * sample; |
|
130 |
+ s->sum += *s->window_current; |
|
131 |
+ |
|
132 |
+ s->window_current++; |
|
133 |
+ if (s->window_current >= s->window_end) |
|
134 |
+ s->window_current = s->window; |
|
135 |
+} |
|
136 |
+ |
|
90 | 137 |
static av_cold int init(AVFilterContext *ctx) |
91 | 138 |
{ |
92 | 139 |
SilenceRemoveContext *s = ctx->priv; |
... | ... |
@@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx) |
96 | 96 |
s->restart = 1; |
97 | 97 |
} |
98 | 98 |
|
99 |
+ switch (s->detection) { |
|
100 |
+ case 0: |
|
101 |
+ s->update = update_peak; |
|
102 |
+ s->compute = compute_peak; |
|
103 |
+ break; |
|
104 |
+ case 1: |
|
105 |
+ s->update = update_rms; |
|
106 |
+ s->compute = compute_rms; |
|
107 |
+ break; |
|
108 |
+ }; |
|
109 |
+ |
|
99 | 110 |
return 0; |
100 | 111 |
} |
101 | 112 |
|
102 |
-static void clear_rms(SilenceRemoveContext *s) |
|
113 |
+static void clear_window(SilenceRemoveContext *s) |
|
103 | 114 |
{ |
104 | 115 |
memset(s->window, 0, s->window_size * sizeof(*s->window)); |
105 | 116 |
|
106 | 117 |
s->window_current = s->window; |
107 | 118 |
s->window_end = s->window + s->window_size; |
108 |
- s->rms_sum = 0; |
|
119 |
+ s->sum = 0; |
|
109 | 120 |
} |
110 | 121 |
|
111 | 122 |
static int config_input(AVFilterLink *inlink) |
... | ... |
@@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink) |
118 | 118 |
if (!s->window) |
119 | 119 |
return AVERROR(ENOMEM); |
120 | 120 |
|
121 |
- clear_rms(s); |
|
121 |
+ clear_window(s); |
|
122 | 122 |
|
123 | 123 |
s->start_duration = av_rescale(s->start_duration, inlink->sample_rate, |
124 | 124 |
AV_TIME_BASE); |
... | ... |
@@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink) |
153 | 153 |
return 0; |
154 | 154 |
} |
155 | 155 |
|
156 |
-static double compute_rms(SilenceRemoveContext *s, double sample) |
|
157 |
-{ |
|
158 |
- double new_sum; |
|
159 |
- |
|
160 |
- new_sum = s->rms_sum; |
|
161 |
- new_sum -= *s->window_current; |
|
162 |
- new_sum += sample * sample; |
|
163 |
- |
|
164 |
- return sqrt(new_sum / s->window_size); |
|
165 |
-} |
|
166 |
- |
|
167 |
-static void update_rms(SilenceRemoveContext *s, double sample) |
|
168 |
-{ |
|
169 |
- s->rms_sum -= *s->window_current; |
|
170 |
- *s->window_current = sample * sample; |
|
171 |
- s->rms_sum += *s->window_current; |
|
172 |
- |
|
173 |
- s->window_current++; |
|
174 |
- if (s->window_current >= s->window_end) |
|
175 |
- s->window_current = s->window; |
|
176 |
-} |
|
177 |
- |
|
178 | 156 |
static void flush(AVFrame *out, AVFilterLink *outlink, |
179 | 157 |
int *nb_samples_written, int *ret) |
180 | 158 |
{ |
... | ... |
@@ -209,12 +249,12 @@ silence_trim: |
209 | 209 |
for (i = 0; i < nbs; i++) { |
210 | 210 |
threshold = 0; |
211 | 211 |
for (j = 0; j < inlink->channels; j++) { |
212 |
- threshold |= compute_rms(s, ibuf[j]) > s->start_threshold; |
|
212 |
+ threshold |= s->compute(s, ibuf[j]) > s->start_threshold; |
|
213 | 213 |
} |
214 | 214 |
|
215 | 215 |
if (threshold) { |
216 | 216 |
for (j = 0; j < inlink->channels; j++) { |
217 |
- update_rms(s, *ibuf); |
|
217 |
+ s->update(s, *ibuf); |
|
218 | 218 |
s->start_holdoff[s->start_holdoff_end++] = *ibuf++; |
219 | 219 |
nb_samples_read++; |
220 | 220 |
} |
... | ... |
@@ -232,7 +272,7 @@ silence_trim: |
232 | 232 |
s->start_holdoff_end = 0; |
233 | 233 |
|
234 | 234 |
for (j = 0; j < inlink->channels; j++) |
235 |
- update_rms(s, ibuf[j]); |
|
235 |
+ s->update(s, ibuf[j]); |
|
236 | 236 |
|
237 | 237 |
ibuf += inlink->channels; |
238 | 238 |
nb_samples_read += inlink->channels; |
... | ... |
@@ -284,7 +324,7 @@ silence_copy: |
284 | 284 |
for (i = 0; i < nbs; i++) { |
285 | 285 |
threshold = 1; |
286 | 286 |
for (j = 0; j < inlink->channels; j++) |
287 |
- threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold; |
|
287 |
+ threshold &= s->compute(s, ibuf[j]) > s->stop_threshold; |
|
288 | 288 |
|
289 | 289 |
if (threshold && s->stop_holdoff_end && !s->leave_silence) { |
290 | 290 |
s->mode = SILENCE_COPY_FLUSH; |
... | ... |
@@ -292,14 +332,14 @@ silence_copy: |
292 | 292 |
goto silence_copy_flush; |
293 | 293 |
} else if (threshold) { |
294 | 294 |
for (j = 0; j < inlink->channels; j++) { |
295 |
- update_rms(s, *ibuf); |
|
295 |
+ s->update(s, *ibuf); |
|
296 | 296 |
*obuf++ = *ibuf++; |
297 | 297 |
nb_samples_read++; |
298 | 298 |
nb_samples_written++; |
299 | 299 |
} |
300 | 300 |
} else if (!threshold) { |
301 | 301 |
for (j = 0; j < inlink->channels; j++) { |
302 |
- update_rms(s, *ibuf); |
|
302 |
+ s->update(s, *ibuf); |
|
303 | 303 |
if (s->leave_silence) { |
304 | 304 |
*obuf++ = *ibuf; |
305 | 305 |
nb_samples_written++; |
... | ... |
@@ -323,7 +363,7 @@ silence_copy: |
323 | 323 |
s->start_found_periods = 0; |
324 | 324 |
s->start_holdoff_offset = 0; |
325 | 325 |
s->start_holdoff_end = 0; |
326 |
- clear_rms(s); |
|
326 |
+ clear_window(s); |
|
327 | 327 |
s->mode = SILENCE_TRIM; |
328 | 328 |
flush(out, outlink, &nb_samples_written, &ret); |
329 | 329 |
goto silence_trim; |