Browse code

avfilter: add vmafmotion filter

Signed-off-by: Ashish Singh <ashk43712@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Ashish Singh authored on 2017/09/16 05:47:58
Showing 6 changed files
... ...
@@ -51,6 +51,7 @@ version <next>:
51 51
 - CUDA thumbnail filter
52 52
 - V4L2 mem2mem HW assisted codecs
53 53
 - Rockchip MPP hardware decoding
54
+- vmafmotion video filter
54 55
 
55 56
 
56 57
 version 3.3:
... ...
@@ -15570,6 +15570,20 @@ vignette='PI/4+random(1)*PI/50':eval=frame
15570 15570
 
15571 15571
 @end itemize
15572 15572
 
15573
+@section vmafmotion
15574
+
15575
+Obtain the average vmaf motion score of a video.
15576
+It is one of the component filters of VMAF.
15577
+
15578
+The obtained average motion score is printed through the logging system.
15579
+
15580
+In the below example the input file @file{ref.mpg} is being processed and score
15581
+is computed.
15582
+
15583
+@example
15584
+ffmpeg -i ref.mpg -lavfi vmafmotion -f null -
15585
+@end example
15586
+
15573 15587
 @section vstack
15574 15588
 Stack input videos vertically.
15575 15589
 
... ...
@@ -331,6 +331,7 @@ OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
331 331
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
332 332
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
333 333
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
334
+OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
334 335
 OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
335 336
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
336 337
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
... ...
@@ -342,6 +342,7 @@ static void register_all(void)
342 342
     REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
343 343
     REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
344 344
     REGISTER_FILTER(VIGNETTE,       vignette,       vf);
345
+    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
345 346
     REGISTER_FILTER(VSTACK,         vstack,         vf);
346 347
     REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
347 348
     REGISTER_FILTER(WAVEFORM,       waveform,       vf);
348 349
new file mode 100644
... ...
@@ -0,0 +1,365 @@
0
+/*
1
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
2
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * Calculate VMAF Motion score.
24
+ */
25
+
26
+#include "libavutil/opt.h"
27
+#include "libavutil/pixdesc.h"
28
+#include "avfilter.h"
29
+#include "drawutils.h"
30
+#include "formats.h"
31
+#include "internal.h"
32
+#include "vmaf_motion.h"
33
+
34
+#define BIT_SHIFT 15
35
+
36
+static const float FILTER_5[5] = {
37
+    0.054488685,
38
+    0.244201342,
39
+    0.402619947,
40
+    0.244201342,
41
+    0.054488685
42
+};
43
+
44
+typedef struct VMAFMotionContext {
45
+    const AVClass *class;
46
+    VMAFMotionData data;
47
+    FILE *stats_file;
48
+    char *stats_file_str;
49
+} VMAFMotionContext;
50
+
51
+#define OFFSET(x) offsetof(VMAFMotionContext, x)
52
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
53
+
54
+static const AVOption vmafmotion_options[] = {
55
+    {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
56
+    { NULL }
57
+};
58
+
59
+AVFILTER_DEFINE_CLASS(vmafmotion);
60
+
61
+static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
62
+                          int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
63
+{
64
+    ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
65
+    ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
66
+    uint64_t sum = 0;
67
+    int i, j;
68
+
69
+    for (i = 0; i < h; i++) {
70
+        for (j = 0; j < w; j++) {
71
+            sum += abs(img1[j] - img2[j]);
72
+        }
73
+        img1 += img1_stride;
74
+        img2 += img2_stride;
75
+    }
76
+
77
+    return sum;
78
+}
79
+
80
+static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
81
+                          uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
82
+                          ptrdiff_t _dst_stride)
83
+{
84
+    ptrdiff_t src_stride = _src_stride / sizeof(*src);
85
+    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
86
+    int radius = filt_w / 2;
87
+    int borders_left = radius;
88
+    int borders_right = w - (filt_w - radius);
89
+    int i, j, k;
90
+    int sum = 0;
91
+
92
+    for (i = 0; i < h; i++) {
93
+        for (j = 0; j < borders_left; j++) {
94
+            sum = 0;
95
+            for (k = 0; k < filt_w; k++) {
96
+                int j_tap = FFABS(j - radius + k);
97
+                if (j_tap >= w) {
98
+                    j_tap = w - (j_tap - w + 1);
99
+                }
100
+                sum += filter[k] * src[i * src_stride + j_tap];
101
+            }
102
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
103
+        }
104
+
105
+        for (j = borders_left; j < borders_right; j++) {
106
+            int sum = 0;
107
+            for (k = 0; k < filt_w; k++) {
108
+                sum += filter[k] * src[i * src_stride + j - radius + k];
109
+            }
110
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
111
+        }
112
+
113
+        for (j = borders_right; j < w; j++) {
114
+            sum = 0;
115
+            for (k = 0; k < filt_w; k++) {
116
+                int j_tap = FFABS(j - radius + k);
117
+                if (j_tap >= w) {
118
+                    j_tap = w - (j_tap - w + 1);
119
+                }
120
+                sum += filter[k] * src[i * src_stride + j_tap];
121
+            }
122
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
123
+        }
124
+    }
125
+}
126
+
127
+#define conv_y_fn(type, bits) \
128
+static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
129
+                                      const uint8_t *_src, uint16_t *dst, \
130
+                                      int w, int h, ptrdiff_t _src_stride, \
131
+                                      ptrdiff_t _dst_stride) \
132
+{ \
133
+    const type *src = (const type *) _src; \
134
+    ptrdiff_t src_stride = _src_stride / sizeof(*src); \
135
+    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
136
+    int radius = filt_w / 2; \
137
+    int borders_top = radius; \
138
+    int borders_bottom = h - (filt_w - radius); \
139
+    int i, j, k; \
140
+    int sum = 0; \
141
+    \
142
+    for (i = 0; i < borders_top; i++) { \
143
+        for (j = 0; j < w; j++) { \
144
+            sum = 0; \
145
+            for (k = 0; k < filt_w; k++) { \
146
+                int i_tap = FFABS(i - radius + k); \
147
+                if (i_tap >= h) { \
148
+                    i_tap = h - (i_tap - h + 1); \
149
+                } \
150
+                sum += filter[k] * src[i_tap * src_stride + j]; \
151
+            } \
152
+            dst[i * dst_stride + j] = sum >> bits; \
153
+        } \
154
+    } \
155
+    for (i = borders_top; i < borders_bottom; i++) { \
156
+        for (j = 0; j < w; j++) { \
157
+            sum = 0; \
158
+            for (k = 0; k < filt_w; k++) { \
159
+                sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
160
+            } \
161
+            dst[i * dst_stride + j] = sum >> bits; \
162
+        } \
163
+    } \
164
+    for (i = borders_bottom; i < h; i++) { \
165
+        for (j = 0; j < w; j++) { \
166
+            sum = 0; \
167
+            for (k = 0; k < filt_w; k++) { \
168
+                int i_tap = FFABS(i - radius + k); \
169
+                if (i_tap >= h) { \
170
+                    i_tap = h - (i_tap - h + 1); \
171
+                } \
172
+                sum += filter[k] * src[i_tap * src_stride + j]; \
173
+            } \
174
+            dst[i * dst_stride + j] = sum >> bits; \
175
+        } \
176
+    } \
177
+}
178
+
179
+conv_y_fn(uint8_t, 8);
180
+conv_y_fn(uint16_t, 10);
181
+
182
+static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
183
+    dsp->convolution_x = convolution_x;
184
+    dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
185
+    dsp->sad = image_sad;
186
+}
187
+
188
+double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
189
+{
190
+    double score;
191
+
192
+    s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
193
+                             s->width, s->height, ref->linesize[0], s->stride);
194
+    s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
195
+                             s->width, s->height, s->stride, s->stride);
196
+
197
+    if (!s->nb_frames) {
198
+        score = 0.0;
199
+    } else {
200
+        uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
201
+                                      s->width, s->height, s->stride, s->stride);
202
+        // the output score is always normalized to 8 bits
203
+        score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
204
+    }
205
+
206
+    FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
207
+    s->nb_frames++;
208
+    s->motion_sum += score;
209
+
210
+    return score;
211
+}
212
+
213
+static void set_meta(AVDictionary **metadata, const char *key, float d)
214
+{
215
+    char value[128];
216
+    snprintf(value, sizeof(value), "%0.2f", d);
217
+    av_dict_set(metadata, key, value, 0);
218
+}
219
+
220
+static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
221
+{
222
+    VMAFMotionContext *s = ctx->priv;
223
+    double score;
224
+
225
+    score = ff_vmafmotion_process(&s->data, ref);
226
+    set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
227
+    if (s->stats_file) {
228
+        fprintf(s->stats_file,
229
+                "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
230
+    }
231
+}
232
+
233
+
234
+int ff_vmafmotion_init(VMAFMotionData *s,
235
+                       int w, int h, enum AVPixelFormat fmt)
236
+{
237
+    size_t data_sz;
238
+    int i;
239
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
240
+
241
+    s->width = w;
242
+    s->height = h;
243
+    s->stride = FFALIGN(w * sizeof(uint16_t), 32);
244
+
245
+    data_sz = (size_t) s->stride * h;
246
+    if (!(s->blur_data[0] = av_malloc(data_sz)) ||
247
+        !(s->blur_data[1] = av_malloc(data_sz)) ||
248
+        !(s->temp_data    = av_malloc(data_sz))) {
249
+        return AVERROR(ENOMEM);
250
+    }
251
+
252
+    for (i = 0; i < 5; i++) {
253
+        s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
254
+    }
255
+
256
+    vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
257
+
258
+    return 0;
259
+}
260
+
261
+static int query_formats(AVFilterContext *ctx)
262
+{
263
+    static const enum AVPixelFormat pix_fmts[] = {
264
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
265
+        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
266
+        AV_PIX_FMT_NONE
267
+    };
268
+
269
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
270
+    if (!fmts_list)
271
+        return AVERROR(ENOMEM);
272
+    return ff_set_common_formats(ctx, fmts_list);
273
+}
274
+
275
+static int config_input_ref(AVFilterLink *inlink)
276
+{
277
+    AVFilterContext *ctx  = inlink->dst;
278
+    VMAFMotionContext *s = ctx->priv;
279
+
280
+    return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
281
+                              ctx->inputs[0]->h, ctx->inputs[0]->format);
282
+}
283
+
284
+double ff_vmafmotion_uninit(VMAFMotionData *s)
285
+{
286
+    av_free(s->blur_data[0]);
287
+    av_free(s->blur_data[1]);
288
+    av_free(s->temp_data);
289
+
290
+    return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
291
+}
292
+
293
+static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
294
+{
295
+    AVFilterContext *ctx = inlink->dst;
296
+    do_vmafmotion(ctx, ref);
297
+    return ff_filter_frame(ctx->outputs[0], ref);
298
+}
299
+
300
+static av_cold int init(AVFilterContext *ctx)
301
+{
302
+    VMAFMotionContext *s = ctx->priv;
303
+
304
+    if (s->stats_file_str) {
305
+        if (!strcmp(s->stats_file_str, "-")) {
306
+            s->stats_file = stdout;
307
+        } else {
308
+            s->stats_file = fopen(s->stats_file_str, "w");
309
+            if (!s->stats_file) {
310
+                int err = AVERROR(errno);
311
+                char buf[128];
312
+                av_strerror(err, buf, sizeof(buf));
313
+                av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
314
+                       s->stats_file_str, buf);
315
+                return err;
316
+            }
317
+        }
318
+    }
319
+
320
+    return 0;
321
+}
322
+
323
+static av_cold void uninit(AVFilterContext *ctx)
324
+{
325
+    VMAFMotionContext *s = ctx->priv;
326
+    double avg_motion = ff_vmafmotion_uninit(&s->data);
327
+
328
+    if (s->data.nb_frames > 0) {
329
+        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
330
+    }
331
+
332
+    if (s->stats_file && s->stats_file != stdout)
333
+        fclose(s->stats_file);
334
+}
335
+
336
+static const AVFilterPad vmafmotion_inputs[] = {
337
+    {
338
+        .name         = "reference",
339
+        .type         = AVMEDIA_TYPE_VIDEO,
340
+        .filter_frame = filter_frame,
341
+        .config_props = config_input_ref,
342
+    },
343
+    { NULL }
344
+};
345
+
346
+static const AVFilterPad vmafmotion_outputs[] = {
347
+    {
348
+        .name          = "default",
349
+        .type          = AVMEDIA_TYPE_VIDEO,
350
+    },
351
+    { NULL }
352
+};
353
+
354
+AVFilter ff_vf_vmafmotion = {
355
+    .name          = "vmafmotion",
356
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
357
+    .init          = init,
358
+    .uninit        = uninit,
359
+    .query_formats = query_formats,
360
+    .priv_size     = sizeof(VMAFMotionContext),
361
+    .priv_class    = &vmafmotion_class,
362
+    .inputs        = vmafmotion_inputs,
363
+    .outputs       = vmafmotion_outputs,
364
+};
0 365
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+/*
1
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
2
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#ifndef AVFILTER_VMAFMOTION_H
22
+#define AVFILTER_VMAFMOTION_H
23
+
24
+#include <stddef.h>
25
+#include <stdint.h>
26
+#include "video.h"
27
+
28
+typedef struct VMAFMotionDSPContext {
29
+    uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w, int h,
30
+                    ptrdiff_t img1_stride, ptrdiff_t img2_stride);
31
+    void (*convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src,
32
+                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
33
+                          ptrdiff_t dst_stride);
34
+    void (*convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src,
35
+                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
36
+                          ptrdiff_t dst_stride);
37
+} VMAFMotionDSPContext;
38
+
39
+void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp);
40
+
41
+typedef struct VMAFMotionData {
42
+    uint16_t filter[5];
43
+    int width;
44
+    int height;
45
+    ptrdiff_t stride;
46
+    uint16_t *blur_data[2 /* cur, prev */];
47
+    uint16_t *temp_data;
48
+    double motion_sum;
49
+    uint64_t nb_frames;
50
+    VMAFMotionDSPContext vmafdsp;
51
+} VMAFMotionData;
52
+
53
+int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum AVPixelFormat fmt);
54
+double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame);
55
+double ff_vmafmotion_uninit(VMAFMotionData *data);
56
+
57
+#endif /* AVFILTER_VMAFMOTION_H */