Browse code

lavfi/overlay: add dynamic expression evaluation support

Add support for dynamic x, y expressions evaluation.

Also add support for an evaluation mode which allows to disable per-frame
evaluation, so that there is no speedloss in case the expression does not
depend on frame variables.

Stefano Sabatini authored on 2013/02/20 04:10:02
Showing 3 changed files
... ...
@@ -4260,26 +4260,31 @@ arguments are interpreted according to the syntax @var{x}:@var{y}.
4260 4260
 A description of the accepted options follows.
4261 4261
 
4262 4262
 @table @option
4263
-@item x, y
4263
+@item x
4264
+@item y
4264 4265
 Set the expression for the x and y coordinates of the overlayed video
4265
-on the main video. Default value is 0.
4266
-
4267
-The @var{x} and @var{y} expressions can contain the following
4268
-parameters:
4269
-@table @option
4270
-@item main_w, main_h
4271
-main input width and height
4266
+on the main video. Default value is "0" for both expressions. In case
4267
+the expression is invalid, it is set to a huge value (meaning that the
4268
+overlay will not be displayed within the output visible area).
4272 4269
 
4273
-@item W, H
4274
-same as @var{main_w} and @var{main_h}
4270
+@item eval
4271
+Set when the expressions for @option{x} and @option{y} are evaluated.
4275 4272
 
4276
-@item overlay_w, overlay_h
4277
-overlay input width and height
4273
+It accepts the following values:
4274
+@table @samp
4275
+@item init
4276
+only evaluate expressions once during the filter initialization
4278 4277
 
4279
-@item w, h
4280
-same as @var{overlay_w} and @var{overlay_h}
4278
+@item frame
4279
+evaluate expressions for each incoming frame
4281 4280
 @end table
4282 4281
 
4282
+Default value is @samp{frame}.
4283
+
4284
+@item shortest
4285
+If set to 1, force the output to terminate when the shortest input
4286
+terminates. Default value is 0.
4287
+
4283 4288
 @item format
4284 4289
 Set the format for the output video.
4285 4290
 
... ...
@@ -4301,12 +4306,45 @@ Default value is @samp{yuv420}.
4301 4301
 If set to 1, force the filter to accept inputs in the RGB
4302 4302
 color space. Default value is 0. This option is deprecated, use
4303 4303
 @option{format} instead.
4304
+@end table
4304 4305
 
4305
-@item shortest
4306
-If set to 1, force the output to terminate when the shortest input
4307
-terminates. Default value is 0.
4306
+The @option{x}, and @option{y} expressions can contain the following
4307
+parameters.
4308
+
4309
+@table @option
4310
+@item main_w, W
4311
+@item main_h, H
4312
+main input width and height
4313
+
4314
+@item overlay_w, w
4315
+@item overlay_h, h
4316
+overlay input width and height
4317
+
4318
+@item x
4319
+@item y
4320
+the computed values for @var{x} and @var{y}. They are evaluated for
4321
+each new frame.
4322
+
4323
+@item hsub
4324
+@item vsub
4325
+horizontal and vertical chroma subsample values of the output
4326
+format. For example for the pixel format "yuv422p" @var{hsub} is 2 and
4327
+@var{vsub} is 1.
4328
+
4329
+@item n
4330
+the number of input frame, starting from 0
4331
+
4332
+@item pos
4333
+the position in the file of the input frame, NAN if unknown
4334
+
4335
+@item t
4336
+timestamp expressed in seconds, NAN if the input timestamp is unknown
4308 4337
 @end table
4309 4338
 
4339
+Note that the @var{n}, @var{pos}, @var{t} variables are available only
4340
+when evaluation is done @emph{per frame}, and will evaluate to NAN
4341
+when @option{eval} is set to @samp{init}.
4342
+
4310 4343
 Be aware that frames are taken from each input video in timestamp
4311 4344
 order, hence, if their initial timestamps differ, it is a a good idea
4312 4345
 to pass the two inputs through a @var{setpts=PTS-STARTPTS} filter to
... ...
@@ -4365,6 +4403,13 @@ ffplay input.avi -vf 'split[b], pad=iw*2[src], [b]deshake, [src]overlay=w'
4365 4365
 @end example
4366 4366
 
4367 4367
 @item
4368
+Make a sliding overlay appearing from the left to the right top part of the
4369
+screen starting since time 2:
4370
+@example
4371
+overlay=x='if(gte(t,2), -w+(t-2)*20, NAN)':y=0
4372
+@end example
4373
+
4374
+@item
4368 4375
 Compose output by putting two input videos side to side:
4369 4376
 @example
4370 4377
 ffmpeg -i left.avi -i right.avi -filter_complex "
... ...
@@ -30,7 +30,7 @@
30 30
 
31 31
 #define LIBAVFILTER_VERSION_MAJOR  3
32 32
 #define LIBAVFILTER_VERSION_MINOR  50
33
-#define LIBAVFILTER_VERSION_MICRO 100
33
+#define LIBAVFILTER_VERSION_MICRO 101
34 34
 
35 35
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
36 36
                                                LIBAVFILTER_VERSION_MINOR, \
... ...
@@ -47,6 +47,13 @@ static const char *const var_names[] = {
47 47
     "main_h",    "H", ///< height of the main    video
48 48
     "overlay_w", "w", ///< width  of the overlay video
49 49
     "overlay_h", "h", ///< height of the overlay video
50
+    "hsub",
51
+    "vsub",
52
+    "x",
53
+    "y",
54
+    "n",            ///< number of frame
55
+    "pos",          ///< position in the file
56
+    "t",            ///< timestamp expressed in seconds
50 57
     NULL
51 58
 };
52 59
 
... ...
@@ -55,6 +62,13 @@ enum var_name {
55 55
     VAR_MAIN_H,    VAR_MH,
56 56
     VAR_OVERLAY_W, VAR_OW,
57 57
     VAR_OVERLAY_H, VAR_OH,
58
+    VAR_HSUB,
59
+    VAR_VSUB,
60
+    VAR_X,
61
+    VAR_Y,
62
+    VAR_N,
63
+    VAR_POS,
64
+    VAR_T,
58 65
     VAR_VARS_NB
59 66
 };
60 67
 
... ...
@@ -84,6 +98,7 @@ typedef struct {
84 84
     uint8_t overlay_rgba_map[4];
85 85
     uint8_t overlay_has_alpha;
86 86
     enum OverlayFormat { OVERLAY_FORMAT_YUV420, OVERLAY_FORMAT_YUV444, OVERLAY_FORMAT_RGB, OVERLAY_FORMAT_NB} format;
87
+    enum EvalMode { EVAL_MODE_INIT, EVAL_MODE_FRAME, EVAL_MODE_NB } eval_mode;
87 88
 
88 89
     AVFrame *overpicref;
89 90
     struct FFBufQueue queue_main;
... ...
@@ -94,7 +109,9 @@ typedef struct {
94 94
     int hsub, vsub;             ///< chroma subsampling values
95 95
     int shortest;               ///< terminate stream when the shortest input terminates
96 96
 
97
+    double var_values[VAR_VARS_NB];
97 98
     char *x_expr, *y_expr;
99
+    AVExpr *x_pexpr, *y_pexpr;
98 100
 } OverlayContext;
99 101
 
100 102
 #define OFFSET(x) offsetof(OverlayContext, x)
... ...
@@ -103,6 +120,11 @@ typedef struct {
103 103
 static const AVOption overlay_options[] = {
104 104
     { "x", "set the x expression", OFFSET(x_expr), AV_OPT_TYPE_STRING, {.str = "0"}, CHAR_MIN, CHAR_MAX, FLAGS },
105 105
     { "y", "set the y expression", OFFSET(y_expr), AV_OPT_TYPE_STRING, {.str = "0"}, CHAR_MIN, CHAR_MAX, FLAGS },
106
+
107
+    { "eval", "specify when to evaluate expressions", OFFSET(eval_mode), AV_OPT_TYPE_INT, {.i64 = EVAL_MODE_FRAME}, 0, EVAL_MODE_NB-1, FLAGS, "eval" },
108
+    { "init",  "eval expressions once during initialization", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_INIT}, .flags = FLAGS, .unit = "eval" },
109
+    { "frame", "eval expressions per-frame",   0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = FLAGS, .unit = "eval" },
110
+
106 111
     { "rgb", "force packed RGB in input and output (deprecated)", OFFSET(allow_packed_rgb), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
107 112
     { "shortest", "force termination when the shortest input terminates", OFFSET(shortest), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
108 113
 
... ...
@@ -135,6 +157,8 @@ static av_cold void uninit(AVFilterContext *ctx)
135 135
     av_frame_free(&over->overpicref);
136 136
     ff_bufqueue_discard_all(&over->queue_main);
137 137
     ff_bufqueue_discard_all(&over->queue_over);
138
+    av_expr_free(over->x_pexpr); over->x_pexpr = NULL;
139
+    av_expr_free(over->y_pexpr); over->y_pexpr = NULL;
138 140
 }
139 141
 
140 142
 static int query_formats(AVFilterContext *ctx)
... ...
@@ -217,12 +241,29 @@ static int config_input_main(AVFilterLink *inlink)
217 217
     return 0;
218 218
 }
219 219
 
220
+static inline int normalize_xy(double d, int chroma_sub)
221
+{
222
+    if (isnan(d))
223
+        return INT_MAX;
224
+    return (int)d & ~((1 << chroma_sub) - 1);
225
+}
226
+
227
+static void eval_expr(AVFilterContext *ctx)
228
+{
229
+    OverlayContext  *over = ctx->priv;
230
+
231
+    over->var_values[VAR_X] = av_expr_eval(over->x_pexpr, over->var_values, NULL);
232
+    over->var_values[VAR_Y] = av_expr_eval(over->y_pexpr, over->var_values, NULL);
233
+    over->var_values[VAR_X] = av_expr_eval(over->x_pexpr, over->var_values, NULL);
234
+    over->x = normalize_xy(over->var_values[VAR_X], over->hsub);
235
+    over->y = normalize_xy(over->var_values[VAR_Y], over->vsub);
236
+}
237
+
220 238
 static int config_input_overlay(AVFilterLink *inlink)
221 239
 {
222 240
     AVFilterContext *ctx  = inlink->dst;
223 241
     OverlayContext  *over = inlink->dst->priv;
224 242
     char *expr;
225
-    double var_values[VAR_VARS_NB], res;
226 243
     int ret;
227 244
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
228 245
 
... ...
@@ -230,53 +271,49 @@ static int config_input_overlay(AVFilterLink *inlink)
230 230
 
231 231
     /* Finish the configuration by evaluating the expressions
232 232
        now when both inputs are configured. */
233
-    var_values[VAR_MAIN_W   ] = var_values[VAR_MW] = ctx->inputs[MAIN   ]->w;
234
-    var_values[VAR_MAIN_H   ] = var_values[VAR_MH] = ctx->inputs[MAIN   ]->h;
235
-    var_values[VAR_OVERLAY_W] = var_values[VAR_OW] = ctx->inputs[OVERLAY]->w;
236
-    var_values[VAR_OVERLAY_H] = var_values[VAR_OH] = ctx->inputs[OVERLAY]->h;
237
-
238
-    if ((ret = av_expr_parse_and_eval(&res, (expr = over->x_expr), var_names, var_values,
239
-                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
240
-        goto fail;
241
-    over->x = res;
242
-    if ((ret = av_expr_parse_and_eval(&res, (expr = over->y_expr), var_names, var_values,
243
-                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)))
233
+    over->var_values[VAR_MAIN_W   ] = over->var_values[VAR_MW] = ctx->inputs[MAIN   ]->w;
234
+    over->var_values[VAR_MAIN_H   ] = over->var_values[VAR_MH] = ctx->inputs[MAIN   ]->h;
235
+    over->var_values[VAR_OVERLAY_W] = over->var_values[VAR_OW] = ctx->inputs[OVERLAY]->w;
236
+    over->var_values[VAR_OVERLAY_H] = over->var_values[VAR_OH] = ctx->inputs[OVERLAY]->h;
237
+    over->var_values[VAR_HSUB]  = 1<<pix_desc->log2_chroma_w;
238
+    over->var_values[VAR_VSUB]  = 1<<pix_desc->log2_chroma_h;
239
+    over->var_values[VAR_X]     = NAN;
240
+    over->var_values[VAR_Y]     = NAN;
241
+    over->var_values[VAR_N]     = 0;
242
+    over->var_values[VAR_T]     = NAN;
243
+    over->var_values[VAR_POS]   = NAN;
244
+
245
+    expr = over->x_expr;
246
+    if ((ret = av_expr_parse(&over->x_pexpr, expr, var_names,
247
+                             NULL, NULL, NULL, NULL, 0, ctx)) < 0)
244 248
         goto fail;
245
-    over->y = res;
246
-    /* x may depend on y */
247
-    if ((ret = av_expr_parse_and_eval(&res, (expr = over->x_expr), var_names, var_values,
248
-                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
249
+    expr = over->y_expr;
250
+    if ((ret = av_expr_parse(&over->y_pexpr, expr, var_names,
251
+                             NULL, NULL, NULL, NULL, 0, ctx)) < 0)
249 252
         goto fail;
250
-    over->x = res;
251 253
 
252 254
     over->overlay_is_packed_rgb =
253 255
         ff_fill_rgba_map(over->overlay_rgba_map, inlink->format) >= 0;
254 256
     over->overlay_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts);
255 257
 
258
+    if (over->eval_mode == EVAL_MODE_INIT) {
259
+        eval_expr(ctx);
260
+        av_log(ctx, AV_LOG_VERBOSE, "x:%f xi:%d y:%f yi:%d\n",
261
+               over->var_values[VAR_X], over->x,
262
+               over->var_values[VAR_Y], over->y);
263
+    }
264
+
256 265
     av_log(ctx, AV_LOG_VERBOSE,
257
-           "main w:%d h:%d fmt:%s overlay x:%d y:%d w:%d h:%d fmt:%s\n",
266
+           "main w:%d h:%d fmt:%s overlay w:%d h:%d fmt:%s\n",
258 267
            ctx->inputs[MAIN]->w, ctx->inputs[MAIN]->h,
259 268
            av_get_pix_fmt_name(ctx->inputs[MAIN]->format),
260
-           over->x, over->y,
261 269
            ctx->inputs[OVERLAY]->w, ctx->inputs[OVERLAY]->h,
262 270
            av_get_pix_fmt_name(ctx->inputs[OVERLAY]->format));
263
-
264
-    if (over->x < 0 || over->y < 0 ||
265
-        over->x + var_values[VAR_OVERLAY_W] > var_values[VAR_MAIN_W] ||
266
-        over->y + var_values[VAR_OVERLAY_H] > var_values[VAR_MAIN_H]) {
267
-        av_log(ctx, AV_LOG_WARNING,
268
-               "Overlay area with coordinates x1:%d y1:%d x2:%d y2:%d "
269
-               "is not completely contained within the output with size %dx%d\n",
270
-               over->x, over->y,
271
-               (int)(over->x + var_values[VAR_OVERLAY_W]),
272
-               (int)(over->y + var_values[VAR_OVERLAY_H]),
273
-               (int)var_values[VAR_MAIN_W], (int)var_values[VAR_MAIN_H]);
274
-    }
275 271
     return 0;
276 272
 
277 273
 fail:
278 274
     av_log(NULL, AV_LOG_ERROR,
279
-           "Error when evaluating the expression '%s'\n", expr);
275
+           "Error when parsing the expression '%s'\n", expr);
280 276
     return ret;
281 277
 }
282 278
 
... ...
@@ -495,6 +532,7 @@ static void blend_image(AVFilterContext *ctx,
495 495
 static int try_filter_frame(AVFilterContext *ctx, AVFrame *mainpic)
496 496
 {
497 497
     OverlayContext *over = ctx->priv;
498
+    AVFilterLink *inlink = ctx->inputs[0];
498 499
     AVFrame *next_overpic;
499 500
     int ret;
500 501
 
... ...
@@ -526,8 +564,24 @@ static int try_filter_frame(AVFilterContext *ctx, AVFrame *mainpic)
526 526
                 av_ts2str(over->overpicref->pts), av_ts2timestr(over->overpicref->pts, &ctx->inputs[OVERLAY]->time_base));
527 527
     av_dlog(ctx, "\n");
528 528
 
529
-    if (over->overpicref)
529
+    if (over->overpicref) {
530
+        if (over->eval_mode == EVAL_MODE_FRAME) {
531
+            int64_t pos = av_frame_get_pkt_pos(mainpic);
532
+
533
+            over->var_values[VAR_T] = mainpic->pts == AV_NOPTS_VALUE ?
534
+                NAN : mainpic->pts * av_q2d(inlink->time_base);
535
+            over->var_values[VAR_POS] = pos == -1 ? NAN : pos;
536
+
537
+            eval_expr(ctx);
538
+            av_log(ctx, AV_LOG_DEBUG, "n:%f t:%f pos:%f x:%f xi:%d y:%f yi:%d\n",
539
+                   over->var_values[VAR_N], over->var_values[VAR_T], over->var_values[VAR_POS],
540
+                   over->var_values[VAR_X], over->x,
541
+                   over->var_values[VAR_Y], over->y);
542
+        }
530 543
         blend_image(ctx, mainpic, over->overpicref, over->x, over->y);
544
+
545
+        over->var_values[VAR_N] += 1.0;
546
+    }
531 547
     ret = ff_filter_frame(ctx->outputs[0], mainpic);
532 548
     av_assert1(ret != AVERROR(EAGAIN));
533 549
     over->frame_requested = 0;