Browse code

lavfi/select: add scene detection.

Based on the shotdetect algorithm (http://shotdetect.nonutc.fr/ by Johan
MATHE johan.mathe tremplin-utc.net).

Clément Bœsch authored on 2012/05/26 19:38:59
Showing 4 changed files
... ...
@@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
3 3
 
4 4
 version next:
5 5
 - INI output in ffprobe
6
+- Scene detection in libavfilter
6 7
 
7 8
 
8 9
 version 0.11:
... ...
@@ -1708,6 +1708,7 @@ ocv_filter_deps="libopencv"
1708 1708
 pan_filter_deps="swresample"
1709 1709
 removelogo_filter_deps="avcodec avformat swscale"
1710 1710
 scale_filter_deps="swscale"
1711
+select_filter_deps="avcodec"
1711 1712
 super2xsai_filter_deps="gpl"
1712 1713
 tinterlace_filter_deps="gpl"
1713 1714
 yadif_filter_deps="gpl"
... ...
@@ -2603,6 +2603,12 @@ the frame is bottom-field-first
2603 2603
 @item pos
2604 2604
 the position in the file of the filtered frame, -1 if the information
2605 2605
 is not available (e.g. for synthetic video)
2606
+
2607
+@item scene
2608
+value between 0 and 1 to indicate a new scene; a low value reflects a low
2609
+probability for the current frame to introduce a new scene, while a higher
2610
+value means the current frame is more likely to be one (see the example below)
2611
+
2606 2612
 @end table
2607 2613
 
2608 2614
 The default value of the select expression is "1".
... ...
@@ -2635,6 +2641,15 @@ select='gte(t\,10)*lte(t\,20)*eq(pict_type\,I)'
2635 2635
 select='isnan(prev_selected_t)+gte(t-prev_selected_t\,10)'
2636 2636
 @end example
2637 2637
 
2638
+Complete example to create a mosaic of the first scenes:
2639
+
2640
+@example
2641
+ffmpeg -i video.avi -vf select='gt(scene\,0.4)',scale=160:120,tile -frames:v 1 preview.png
2642
+@end example
2643
+
2644
+Comparing @var{scene} against a value between 0.3 and 0.5 is generally a sane
2645
+choice.
2646
+
2638 2647
 @section setdar, setsar
2639 2648
 
2640 2649
 The @code{setdar} filter sets the Display Aspect Ratio for the filter
... ...
@@ -25,7 +25,9 @@
25 25
 
26 26
 #include "libavutil/eval.h"
27 27
 #include "libavutil/fifo.h"
28
+#include "libavcodec/dsputil.h"
28 29
 #include "avfilter.h"
30
+#include "formats.h"
29 31
 #include "video.h"
30 32
 
31 33
 static const char *const var_names[] = {
... ...
@@ -62,6 +64,8 @@ static const char *const var_names[] = {
62 62
     "key",               ///< tell if the frame is a key frame
63 63
     "pos",               ///< original position in the file of the frame
64 64
 
65
+    "scene",
66
+
65 67
     NULL
66 68
 };
67 69
 
... ...
@@ -99,6 +103,8 @@ enum var_name {
99 99
     VAR_KEY,
100 100
     VAR_POS,
101 101
 
102
+    VAR_SCENE,
103
+
102 104
     VAR_VARS_NB
103 105
 };
104 106
 
... ...
@@ -107,6 +113,11 @@ enum var_name {
107 107
 typedef struct {
108 108
     AVExpr *expr;
109 109
     double var_values[VAR_VARS_NB];
110
+    int do_scene_detect;            ///< 1 if the expression requires scene detection variables, 0 otherwise
111
+    AVCodecContext *avctx;          ///< codec context required for the DSPContext (scene detect only)
112
+    DSPContext c;                   ///< context providing optimized SAD methods   (scene detect only)
113
+    double prev_mafd;               ///< previous MAFD                             (scene detect only)
114
+    AVFilterBufferRef *prev_picref; ///< previous frame                            (scene detect only)
110 115
     double select;
111 116
     int cache_frames;
112 117
     AVFifoBuffer *pending_frames; ///< FIFO buffer of video frames
... ...
@@ -128,6 +139,8 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
128 128
         av_log(ctx, AV_LOG_ERROR, "Failed to allocate pending frames buffer.\n");
129 129
         return AVERROR(ENOMEM);
130 130
     }
131
+
132
+    select->do_scene_detect = args && strstr(args, "scene");
131 133
     return 0;
132 134
 }
133 135
 
... ...
@@ -160,9 +173,49 @@ static int config_input(AVFilterLink *inlink)
160 160
     select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T;
161 161
     select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B;
162 162
 
163
+    if (select->do_scene_detect) {
164
+        select->avctx = avcodec_alloc_context3(NULL);
165
+        if (!select->avctx)
166
+            return AVERROR(ENOMEM);
167
+        ff_dsputil_init(&select->c, select->avctx);
168
+    }
163 169
     return 0;
164 170
 }
165 171
 
172
+static double get_scene_score(AVFilterContext *ctx, AVFilterBufferRef *picref)
173
+{
174
+    double ret = 0;
175
+    SelectContext *select = ctx->priv;
176
+    AVFilterBufferRef *prev_picref = select->prev_picref;
177
+
178
+    if (prev_picref &&
179
+        picref->video->h    == prev_picref->video->h &&
180
+        picref->video->w    == prev_picref->video->w &&
181
+        picref->linesize[0] == prev_picref->linesize[0]) {
182
+        int x, y;
183
+        int64_t sad;
184
+        double mafd, diff;
185
+        uint8_t *p1 =      picref->data[0];
186
+        uint8_t *p2 = prev_picref->data[0];
187
+        const int linesize = picref->linesize[0];
188
+
189
+        for (sad = y = 0; y < picref->video->h; y += 8)
190
+            for (x = 0; x < linesize; x += 8)
191
+                sad += select->c.sad[1](select,
192
+                                        p1 + y * linesize + x,
193
+                                        p2 + y * linesize + x,
194
+                                        linesize, 8);
195
+        emms_c();
196
+        mafd = sad / (picref->video->h * picref->video->w * 3);
197
+        diff = llabs(mafd - select->prev_mafd);
198
+        ret  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
199
+        select->prev_mafd = mafd;
200
+        avfilter_unref_buffer(prev_picref);
201
+    }
202
+    select->prev_picref = avfilter_ref_buffer(picref, ~0);
203
+    return ret;
204
+}
205
+
166 206
 #define D2TS(d)  (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d))
167 207
 #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts))
168 208
 
... ...
@@ -172,6 +225,8 @@ static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref)
172 172
     AVFilterLink *inlink = ctx->inputs[0];
173 173
     double res;
174 174
 
175
+    if (select->do_scene_detect)
176
+        select->var_values[VAR_SCENE] = get_scene_score(ctx, picref);
175 177
     if (isnan(select->var_values[VAR_START_PTS]))
176 178
         select->var_values[VAR_START_PTS] = TS2D(picref->pts);
177 179
     if (isnan(select->var_values[VAR_START_T]))
... ...
@@ -315,6 +370,28 @@ static av_cold void uninit(AVFilterContext *ctx)
315 315
         avfilter_unref_buffer(picref);
316 316
     av_fifo_free(select->pending_frames);
317 317
     select->pending_frames = NULL;
318
+
319
+    if (select->do_scene_detect) {
320
+        avfilter_unref_bufferp(&select->prev_picref);
321
+        avcodec_close(select->avctx);
322
+        av_freep(&select->avctx);
323
+    }
324
+}
325
+
326
+static int query_formats(AVFilterContext *ctx)
327
+{
328
+    SelectContext *select = ctx->priv;
329
+
330
+    if (!select->do_scene_detect) {
331
+        return ff_default_query_formats(ctx);
332
+    } else {
333
+        static const enum PixelFormat pix_fmts[] = {
334
+            PIX_FMT_RGB24, PIX_FMT_BGR24,
335
+            PIX_FMT_NONE
336
+        };
337
+        avfilter_set_common_pixel_formats(ctx, avfilter_make_format_list(pix_fmts));
338
+    }
339
+    return 0;
318 340
 }
319 341
 
320 342
 AVFilter avfilter_vf_select = {
... ...
@@ -322,6 +399,7 @@ AVFilter avfilter_vf_select = {
322 322
     .description = NULL_IF_CONFIG_SMALL("Select frames to pass in output."),
323 323
     .init      = init,
324 324
     .uninit    = uninit,
325
+    .query_formats = query_formats,
325 326
 
326 327
     .priv_size = sizeof(SelectContext),
327 328