Based on the shotdetect algorithm (http://shotdetect.nonutc.fr/ by Johan
MATHE johan.mathe tremplin-utc.net).
... | ... |
@@ -1708,6 +1708,7 @@ ocv_filter_deps="libopencv" |
1708 | 1708 |
pan_filter_deps="swresample" |
1709 | 1709 |
removelogo_filter_deps="avcodec avformat swscale" |
1710 | 1710 |
scale_filter_deps="swscale" |
1711 |
+select_filter_deps="avcodec" |
|
1711 | 1712 |
super2xsai_filter_deps="gpl" |
1712 | 1713 |
tinterlace_filter_deps="gpl" |
1713 | 1714 |
yadif_filter_deps="gpl" |
... | ... |
@@ -2603,6 +2603,12 @@ the frame is bottom-field-first |
2603 | 2603 |
@item pos |
2604 | 2604 |
the position in the file of the filtered frame, -1 if the information |
2605 | 2605 |
is not available (e.g. for synthetic video) |
2606 |
+ |
|
2607 |
+@item scene |
|
2608 |
+value between 0 and 1 to indicate a new scene; a low value reflects a low |
|
2609 |
+probability for the current frame to introduce a new scene, while a higher |
|
2610 |
+value means the current frame is more likely to be one (see the example below) |
|
2611 |
+ |
|
2606 | 2612 |
@end table |
2607 | 2613 |
|
2608 | 2614 |
The default value of the select expression is "1". |
... | ... |
@@ -2635,6 +2641,15 @@ select='gte(t\,10)*lte(t\,20)*eq(pict_type\,I)' |
2635 | 2635 |
select='isnan(prev_selected_t)+gte(t-prev_selected_t\,10)' |
2636 | 2636 |
@end example |
2637 | 2637 |
|
2638 |
+Complete example to create a mosaic of the first scenes: |
|
2639 |
+ |
|
2640 |
+@example |
|
2641 |
+ffmpeg -i video.avi -vf select='gt(scene\,0.4)',scale=160:120,tile -frames:v 1 preview.png |
|
2642 |
+@end example |
|
2643 |
+ |
|
2644 |
+Comparing @var{scene} against a value between 0.3 and 0.5 is generally a sane |
|
2645 |
+choice. |
|
2646 |
+ |
|
2638 | 2647 |
@section setdar, setsar |
2639 | 2648 |
|
2640 | 2649 |
The @code{setdar} filter sets the Display Aspect Ratio for the filter |
... | ... |
@@ -25,7 +25,9 @@ |
25 | 25 |
|
26 | 26 |
#include "libavutil/eval.h" |
27 | 27 |
#include "libavutil/fifo.h" |
28 |
+#include "libavcodec/dsputil.h" |
|
28 | 29 |
#include "avfilter.h" |
30 |
+#include "formats.h" |
|
29 | 31 |
#include "video.h" |
30 | 32 |
|
31 | 33 |
static const char *const var_names[] = { |
... | ... |
@@ -62,6 +64,8 @@ static const char *const var_names[] = { |
62 | 62 |
"key", ///< tell if the frame is a key frame |
63 | 63 |
"pos", ///< original position in the file of the frame |
64 | 64 |
|
65 |
+ "scene", |
|
66 |
+ |
|
65 | 67 |
NULL |
66 | 68 |
}; |
67 | 69 |
|
... | ... |
@@ -99,6 +103,8 @@ enum var_name { |
99 | 99 |
VAR_KEY, |
100 | 100 |
VAR_POS, |
101 | 101 |
|
102 |
+ VAR_SCENE, |
|
103 |
+ |
|
102 | 104 |
VAR_VARS_NB |
103 | 105 |
}; |
104 | 106 |
|
... | ... |
@@ -107,6 +113,11 @@ enum var_name { |
107 | 107 |
typedef struct { |
108 | 108 |
AVExpr *expr; |
109 | 109 |
double var_values[VAR_VARS_NB]; |
110 |
+ int do_scene_detect; ///< 1 if the expression requires scene detection variables, 0 otherwise |
|
111 |
+ AVCodecContext *avctx; ///< codec context required for the DSPContext (scene detect only) |
|
112 |
+ DSPContext c; ///< context providing optimized SAD methods (scene detect only) |
|
113 |
+ double prev_mafd; ///< previous MAFD (scene detect only) |
|
114 |
+ AVFilterBufferRef *prev_picref; ///< previous frame (scene detect only) |
|
110 | 115 |
double select; |
111 | 116 |
int cache_frames; |
112 | 117 |
AVFifoBuffer *pending_frames; ///< FIFO buffer of video frames |
... | ... |
@@ -128,6 +139,8 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) |
128 | 128 |
av_log(ctx, AV_LOG_ERROR, "Failed to allocate pending frames buffer.\n"); |
129 | 129 |
return AVERROR(ENOMEM); |
130 | 130 |
} |
131 |
+ |
|
132 |
+ select->do_scene_detect = args && strstr(args, "scene"); |
|
131 | 133 |
return 0; |
132 | 134 |
} |
133 | 135 |
|
... | ... |
@@ -160,9 +173,49 @@ static int config_input(AVFilterLink *inlink) |
160 | 160 |
select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T; |
161 | 161 |
select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B; |
162 | 162 |
|
163 |
+ if (select->do_scene_detect) { |
|
164 |
+ select->avctx = avcodec_alloc_context3(NULL); |
|
165 |
+ if (!select->avctx) |
|
166 |
+ return AVERROR(ENOMEM); |
|
167 |
+ ff_dsputil_init(&select->c, select->avctx); |
|
168 |
+ } |
|
163 | 169 |
return 0; |
164 | 170 |
} |
165 | 171 |
|
172 |
+static double get_scene_score(AVFilterContext *ctx, AVFilterBufferRef *picref) |
|
173 |
+{ |
|
174 |
+ double ret = 0; |
|
175 |
+ SelectContext *select = ctx->priv; |
|
176 |
+ AVFilterBufferRef *prev_picref = select->prev_picref; |
|
177 |
+ |
|
178 |
+ if (prev_picref && |
|
179 |
+ picref->video->h == prev_picref->video->h && |
|
180 |
+ picref->video->w == prev_picref->video->w && |
|
181 |
+ picref->linesize[0] == prev_picref->linesize[0]) { |
|
182 |
+ int x, y; |
|
183 |
+ int64_t sad; |
|
184 |
+ double mafd, diff; |
|
185 |
+ uint8_t *p1 = picref->data[0]; |
|
186 |
+ uint8_t *p2 = prev_picref->data[0]; |
|
187 |
+ const int linesize = picref->linesize[0]; |
|
188 |
+ |
|
189 |
+ for (sad = y = 0; y < picref->video->h; y += 8) |
|
190 |
+ for (x = 0; x < linesize; x += 8) |
|
191 |
+ sad += select->c.sad[1](select, |
|
192 |
+ p1 + y * linesize + x, |
|
193 |
+ p2 + y * linesize + x, |
|
194 |
+ linesize, 8); |
|
195 |
+ emms_c(); |
|
196 |
+ mafd = sad / (picref->video->h * picref->video->w * 3); |
|
197 |
+ diff = llabs(mafd - select->prev_mafd); |
|
198 |
+ ret = av_clipf(FFMIN(mafd, diff) / 100., 0, 1); |
|
199 |
+ select->prev_mafd = mafd; |
|
200 |
+ avfilter_unref_buffer(prev_picref); |
|
201 |
+ } |
|
202 |
+ select->prev_picref = avfilter_ref_buffer(picref, ~0); |
|
203 |
+ return ret; |
|
204 |
+} |
|
205 |
+ |
|
166 | 206 |
#define D2TS(d) (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d)) |
167 | 207 |
#define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts)) |
168 | 208 |
|
... | ... |
@@ -172,6 +225,8 @@ static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref) |
172 | 172 |
AVFilterLink *inlink = ctx->inputs[0]; |
173 | 173 |
double res; |
174 | 174 |
|
175 |
+ if (select->do_scene_detect) |
|
176 |
+ select->var_values[VAR_SCENE] = get_scene_score(ctx, picref); |
|
175 | 177 |
if (isnan(select->var_values[VAR_START_PTS])) |
176 | 178 |
select->var_values[VAR_START_PTS] = TS2D(picref->pts); |
177 | 179 |
if (isnan(select->var_values[VAR_START_T])) |
... | ... |
@@ -315,6 +370,28 @@ static av_cold void uninit(AVFilterContext *ctx) |
315 | 315 |
avfilter_unref_buffer(picref); |
316 | 316 |
av_fifo_free(select->pending_frames); |
317 | 317 |
select->pending_frames = NULL; |
318 |
+ |
|
319 |
+ if (select->do_scene_detect) { |
|
320 |
+ avfilter_unref_bufferp(&select->prev_picref); |
|
321 |
+ avcodec_close(select->avctx); |
|
322 |
+ av_freep(&select->avctx); |
|
323 |
+ } |
|
324 |
+} |
|
325 |
+ |
|
326 |
+static int query_formats(AVFilterContext *ctx) |
|
327 |
+{ |
|
328 |
+ SelectContext *select = ctx->priv; |
|
329 |
+ |
|
330 |
+ if (!select->do_scene_detect) { |
|
331 |
+ return ff_default_query_formats(ctx); |
|
332 |
+ } else { |
|
333 |
+ static const enum PixelFormat pix_fmts[] = { |
|
334 |
+ PIX_FMT_RGB24, PIX_FMT_BGR24, |
|
335 |
+ PIX_FMT_NONE |
|
336 |
+ }; |
|
337 |
+ avfilter_set_common_pixel_formats(ctx, avfilter_make_format_list(pix_fmts)); |
|
338 |
+ } |
|
339 |
+ return 0; |
|
318 | 340 |
} |
319 | 341 |
|
320 | 342 |
AVFilter avfilter_vf_select = { |
... | ... |
@@ -322,6 +399,7 @@ AVFilter avfilter_vf_select = { |
322 | 322 |
.description = NULL_IF_CONFIG_SMALL("Select frames to pass in output."), |
323 | 323 |
.init = init, |
324 | 324 |
.uninit = uninit, |
325 |
+ .query_formats = query_formats, |
|
325 | 326 |
|
326 | 327 |
.priv_size = sizeof(SelectContext), |
327 | 328 |
|