GitList

@@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext {
                          double *window_current;
                          double *window_end;
                          int window_size;
                     -    double rms_sum;
                     +    double sum;
                          int leave_silence;
                          int restart;
                          int64_t next_pts;
+                    +
                     +    int detection;
                     +    void (*update)(struct SilenceRemoveContext *s, double sample);
                     +    double(*compute)(struct SilenceRemoveContext *s, double sample);
                      } SilenceRemoveContext;
                      #define OFFSET(x) offsetof(SilenceRemoveContext, x)
@@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = {
                          { "stop_duration",   NULL, OFFSET(stop_duration),   AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
                          { "stop_threshold",  NULL, OFFSET(stop_threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
                          { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_BOOL,     {.i64=0},     0,       1, FLAGS },
                     +    { "detection",       NULL, OFFSET(detection),       AV_OPT_TYPE_INT,      {.i64=1},     0,       1, FLAGS, "detection" },
                     +    {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
                     +    {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
                          { NULL }
                      };
                      AVFILTER_DEFINE_CLASS(silenceremove);
                     +static double compute_peak(SilenceRemoveContext *s, double sample)
                     +{
                     +    double new_sum;
+                    +
                     +    new_sum  = s->sum;
                     +    new_sum -= *s->window_current;
                     +    new_sum += fabs(sample);
+                    +
                     +    return new_sum / s->window_size;
                     +}
+                    +
                     +static void update_peak(SilenceRemoveContext *s, double sample)
                     +{
                     +    s->sum -= *s->window_current;
                     +    *s->window_current = fabs(sample);
                     +    s->sum += *s->window_current;
+                    +
                     +    s->window_current++;
                     +    if (s->window_current >= s->window_end)
                     +        s->window_current = s->window;
                     +}
+                    +
                     +static double compute_rms(SilenceRemoveContext *s, double sample)
                     +{
                     +    double new_sum;
+                    +
                     +    new_sum  = s->sum;
                     +    new_sum -= *s->window_current;
                     +    new_sum += sample * sample;
+                    +
                     +    return sqrt(new_sum / s->window_size);
                     +}
+                    +
                     +static void update_rms(SilenceRemoveContext *s, double sample)
                     +{
                     +    s->sum -= *s->window_current;
                     +    *s->window_current = sample * sample;
                     +    s->sum += *s->window_current;
+                    +
                     +    s->window_current++;
                     +    if (s->window_current >= s->window_end)
                     +        s->window_current = s->window;
                     +}
+                    +
                      static av_cold int init(AVFilterContext *ctx)
+                     {
                          SilenceRemoveContext *s = ctx->priv;
@@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx)
                              s->restart = 1;
+                         }
                     +    switch (s->detection) {
                     +    case 0:
                     +        s->update = update_peak;
                     +        s->compute = compute_peak;
                     +        break;
                     +    case 1:
                     +        s->update = update_rms;
                     +        s->compute = compute_rms;
                     +        break;
                     +    };
+                    +
                          return 0;
+                     }
                     -static void clear_rms(SilenceRemoveContext *s)
                     +static void clear_window(SilenceRemoveContext *s)
+                     {
                          memset(s->window, 0, s->window_size * sizeof(*s->window));
                          s->window_current = s->window;
                          s->window_end = s->window + s->window_size;
                     -    s->rms_sum = 0;
                     +    s->sum = 0;
+                     }
                      static int config_input(AVFilterLink *inlink)
@@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink)
                          if (!s->window)
                              return AVERROR(ENOMEM);
                     -    clear_rms(s);
                     +    clear_window(s);
                          s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
                                                         AV_TIME_BASE);
@@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink)
                          return 0;
+                     }
                     -static double compute_rms(SilenceRemoveContext *s, double sample)
                     -{
                     -    double new_sum;
+                    -
                     -    new_sum  = s->rms_sum;
                     -    new_sum -= *s->window_current;
                     -    new_sum += sample * sample;
+                    -
                     -    return sqrt(new_sum / s->window_size);
                     -}
+                    -
                     -static void update_rms(SilenceRemoveContext *s, double sample)
                     -{
                     -    s->rms_sum -= *s->window_current;
                     -    *s->window_current = sample * sample;
                     -    s->rms_sum += *s->window_current;
+                    -
                     -    s->window_current++;
                     -    if (s->window_current >= s->window_end)
                     -        s->window_current = s->window;
                     -}
+                    -
                      static void flush(AVFrame *out, AVFilterLink *outlink,
                                        int *nb_samples_written, int *ret)
+                     {
@@ -209,12 +249,12 @@ silence_trim:
                              for (i = 0; i < nbs; i++) {
                                  threshold = 0;
                                  for (j = 0; j < inlink->channels; j++) {
                     -                threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
                     +                threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
+                                 }
                                  if (threshold) {
                                      for (j = 0; j < inlink->channels; j++) {
                     -                    update_rms(s, *ibuf);
                     +                    s->update(s, *ibuf);
                                          s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
                                          nb_samples_read++;
+                                     }
@@ -232,7 +272,7 @@ silence_trim:
                                      s->start_holdoff_end = 0;
                                      for (j = 0; j < inlink->channels; j++)
                     -                    update_rms(s, ibuf[j]);
                     +                    s->update(s, ibuf[j]);
                                      ibuf += inlink->channels;
                                      nb_samples_read += inlink->channels;
@@ -284,7 +324,7 @@ silence_copy:
                                  for (i = 0; i < nbs; i++) {
                                      threshold = 1;
                                      for (j = 0; j < inlink->channels; j++)
                     -                    threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
                     +                    threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
                                      if (threshold && s->stop_holdoff_end && !s->leave_silence) {
                                          s->mode = SILENCE_COPY_FLUSH;
@@ -292,14 +332,14 @@ silence_copy:
                                          goto silence_copy_flush;
                                      } else if (threshold) {
                                          for (j = 0; j < inlink->channels; j++) {
                     -                        update_rms(s, *ibuf);
                     +                        s->update(s, *ibuf);
                                              *obuf++ = *ibuf++;
                                              nb_samples_read++;
                                              nb_samples_written++;
+                                         }
                                      } else if (!threshold) {
                                          for (j = 0; j < inlink->channels; j++) {
                     -                        update_rms(s, *ibuf);
                     +                        s->update(s, *ibuf);
                                              if (s->leave_silence) {
                                                  *obuf++ = *ibuf;
                                                  nb_samples_written++;
@@ -323,7 +363,7 @@ silence_copy:
                                                      s->start_found_periods = 0;
                                                      s->start_holdoff_offset = 0;
                                                      s->start_holdoff_end = 0;
                     -                                clear_rms(s);
                     +                                clear_window(s);
                                                      s->mode = SILENCE_TRIM;
                                                      flush(out, outlink, &nb_samples_written, &ret);
                                                      goto silence_trim;

@@ -2969,6 +2969,10 @@ at the beginning of each period of silence.
                      For example, if you want to remove long pauses between words but do not want
                      to remove the pauses completely. Default value is @code{0}.
                     +@item detection
                     +Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
                     +and works better with digital silence which is exactly 0.
                     +Default value is @code{rms}.
                      @end table
                      @subsection Examples

avfilter/af_silenceremove: add peak detector