GitList

Browse code

avfilter: add vmafmotion filter

Signed-off-by: Ashish Singh <ashk43712@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Ashish Singh authored on 2017/09/16 05:47:58
Showing 6 changed files

Changelog index c64cef4..03686ac 100644
doc/filters.texi index b09c3a0..e87b90b 100644
libavfilter/Makefile index 9eeb7d2..d2f0495 100644
libavfilter/allfilters.c index baa84a3..9b672a7 100644
libavfilter/vf_vmafmotion.c index 0000000..6b6150a
libavfilter/vmaf_motion.h index 0000000..0c71182

@@ -51,6 +51,7 @@ version <next>:
                      - CUDA thumbnail filter
                      - V4L2 mem2mem HW assisted codecs
                      - Rockchip MPP hardware decoding
                     +- vmafmotion video filter
                      version 3.3:

doc/filters.texi

History View file @ 148c8e8

@@ -15570,6 +15570,20 @@ vignette='PI/4+random(1)*PI/50':eval=frame
                      @end itemize
                     +@section vmafmotion
+                    +
                     +Obtain the average vmaf motion score of a video.
                     +It is one of the component filters of VMAF.
+                    +
                     +The obtained average motion score is printed through the logging system.
+                    +
                     +In the below example the input file @file{ref.mpg} is being processed and score
                     +is computed.
+                    +
                     +@example
                     +ffmpeg -i ref.mpg -lavfi vmafmotion -f null -
                     +@end example
+                    +
                      @section vstack
                      Stack input videos vertically.

libavfilter/Makefile

History View file @ 148c8e8

@@ -331,6 +331,7 @@ OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
                      OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
                      OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
                      OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
                     +OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
                      OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
                      OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
                      OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o

libavfilter/allfilters.c

History View file @ 148c8e8

@@ -342,6 +342,7 @@ static void register_all(void)
                          REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
                          REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
                          REGISTER_FILTER(VIGNETTE,       vignette,       vf);
                     +    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
                          REGISTER_FILTER(VSTACK,         vstack,         vf);
                          REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
                          REGISTER_FILTER(WAVEFORM,       waveform,       vf);

libavfilter/vf_vmafmotion.c

History View file @ 148c8e8

                     new file mode 100644
@@ -0,0 +1,365 @@
                     +/*
                     + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
                     + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +/**
                     + * @file
                     + * Calculate VMAF Motion score.
                     + */
+                    +
                     +#include "libavutil/opt.h"
                     +#include "libavutil/pixdesc.h"
                     +#include "avfilter.h"
                     +#include "drawutils.h"
                     +#include "formats.h"
                     +#include "internal.h"
                     +#include "vmaf_motion.h"
+                    +
                     +#define BIT_SHIFT 15
+                    +
                     +static const float FILTER_5[5] = {
                     +    0.054488685,
                     +    0.244201342,
                     +    0.402619947,
                     +    0.244201342,
                     +    0.054488685
                     +};
+                    +
                     +typedef struct VMAFMotionContext {
                     +    const AVClass *class;
                     +    VMAFMotionData data;
                     +    FILE *stats_file;
                     +    char *stats_file_str;
                     +} VMAFMotionContext;
+                    +
                     +#define OFFSET(x) offsetof(VMAFMotionContext, x)
                     +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+                    +
                     +static const AVOption vmafmotion_options[] = {
                     +    {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
                     +    { NULL }
                     +};
+                    +
                     +AVFILTER_DEFINE_CLASS(vmafmotion);
+                    +
                     +static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
                     +                          int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
                     +{
                     +    ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
                     +    ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
                     +    uint64_t sum = 0;
                     +    int i, j;
+                    +
                     +    for (i = 0; i < h; i++) {
                     +        for (j = 0; j < w; j++) {
                     +            sum += abs(img1[j] - img2[j]);
                     +        }
                     +        img1 += img1_stride;
                     +        img2 += img2_stride;
                     +    }
+                    +
                     +    return sum;
                     +}
+                    +
                     +static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
                     +                          uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
                     +                          ptrdiff_t _dst_stride)
                     +{
                     +    ptrdiff_t src_stride = _src_stride / sizeof(*src);
                     +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
                     +    int radius = filt_w / 2;
                     +    int borders_left = radius;
                     +    int borders_right = w - (filt_w - radius);
                     +    int i, j, k;
                     +    int sum = 0;
+                    +
                     +    for (i = 0; i < h; i++) {
                     +        for (j = 0; j < borders_left; j++) {
                     +            sum = 0;
                     +            for (k = 0; k < filt_w; k++) {
                     +                int j_tap = FFABS(j - radius + k);
                     +                if (j_tap >= w) {
                     +                    j_tap = w - (j_tap - w + 1);
                     +                }
                     +                sum += filter[k] * src[i * src_stride + j_tap];
                     +            }
                     +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
                     +        }
+                    +
                     +        for (j = borders_left; j < borders_right; j++) {
                     +            int sum = 0;
                     +            for (k = 0; k < filt_w; k++) {
                     +                sum += filter[k] * src[i * src_stride + j - radius + k];
                     +            }
                     +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
                     +        }
+                    +
                     +        for (j = borders_right; j < w; j++) {
                     +            sum = 0;
                     +            for (k = 0; k < filt_w; k++) {
                     +                int j_tap = FFABS(j - radius + k);
                     +                if (j_tap >= w) {
                     +                    j_tap = w - (j_tap - w + 1);
                     +                }
                     +                sum += filter[k] * src[i * src_stride + j_tap];
                     +            }
                     +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
                     +        }
                     +    }
                     +}
+                    +
                     +#define conv_y_fn(type, bits) \
                     +static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
                     +                                      const uint8_t *_src, uint16_t *dst, \
                     +                                      int w, int h, ptrdiff_t _src_stride, \
                     +                                      ptrdiff_t _dst_stride) \
                     +{ \
                     +    const type *src = (const type *) _src; \
                     +    ptrdiff_t src_stride = _src_stride / sizeof(*src); \
                     +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
                     +    int radius = filt_w / 2; \
                     +    int borders_top = radius; \
                     +    int borders_bottom = h - (filt_w - radius); \
                     +    int i, j, k; \
                     +    int sum = 0; \
                     +    \
                     +    for (i = 0; i < borders_top; i++) { \
                     +        for (j = 0; j < w; j++) { \
                     +            sum = 0; \
                     +            for (k = 0; k < filt_w; k++) { \
                     +                int i_tap = FFABS(i - radius + k); \
                     +                if (i_tap >= h) { \
                     +                    i_tap = h - (i_tap - h + 1); \
                     +                } \
                     +                sum += filter[k] * src[i_tap * src_stride + j]; \
                     +            } \
                     +            dst[i * dst_stride + j] = sum >> bits; \
                     +        } \
                     +    } \
                     +    for (i = borders_top; i < borders_bottom; i++) { \
                     +        for (j = 0; j < w; j++) { \
                     +            sum = 0; \
                     +            for (k = 0; k < filt_w; k++) { \
                     +                sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
                     +            } \
                     +            dst[i * dst_stride + j] = sum >> bits; \
                     +        } \
                     +    } \
                     +    for (i = borders_bottom; i < h; i++) { \
                     +        for (j = 0; j < w; j++) { \
                     +            sum = 0; \
                     +            for (k = 0; k < filt_w; k++) { \
                     +                int i_tap = FFABS(i - radius + k); \
                     +                if (i_tap >= h) { \
                     +                    i_tap = h - (i_tap - h + 1); \
                     +                } \
                     +                sum += filter[k] * src[i_tap * src_stride + j]; \
                     +            } \
                     +            dst[i * dst_stride + j] = sum >> bits; \
                     +        } \
                     +    } \
                     +}
+                    +
                     +conv_y_fn(uint8_t, 8);
                     +conv_y_fn(uint16_t, 10);
+                    +
                     +static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
                     +    dsp->convolution_x = convolution_x;
                     +    dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
                     +    dsp->sad = image_sad;
                     +}
+                    +
                     +double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
                     +{
                     +    double score;
+                    +
                     +    s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
                     +                             s->width, s->height, ref->linesize[0], s->stride);
                     +    s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
                     +                             s->width, s->height, s->stride, s->stride);
+                    +
                     +    if (!s->nb_frames) {
                     +        score = 0.0;
                     +    } else {
                     +        uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
                     +                                      s->width, s->height, s->stride, s->stride);
                     +        // the output score is always normalized to 8 bits
                     +        score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
                     +    }
+                    +
                     +    FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
                     +    s->nb_frames++;
                     +    s->motion_sum += score;
+                    +
                     +    return score;
                     +}
+                    +
                     +static void set_meta(AVDictionary **metadata, const char *key, float d)
                     +{
                     +    char value[128];
                     +    snprintf(value, sizeof(value), "%0.2f", d);
                     +    av_dict_set(metadata, key, value, 0);
                     +}
+                    +
                     +static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
                     +{
                     +    VMAFMotionContext *s = ctx->priv;
                     +    double score;
+                    +
                     +    score = ff_vmafmotion_process(&s->data, ref);
                     +    set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
                     +    if (s->stats_file) {
                     +        fprintf(s->stats_file,
                     +                "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
                     +    }
                     +}
+                    +
+                    +
                     +int ff_vmafmotion_init(VMAFMotionData *s,
                     +                       int w, int h, enum AVPixelFormat fmt)
                     +{
                     +    size_t data_sz;
                     +    int i;
                     +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+                    +
                     +    s->width = w;
                     +    s->height = h;
                     +    s->stride = FFALIGN(w * sizeof(uint16_t), 32);
+                    +
                     +    data_sz = (size_t) s->stride * h;
                     +    if (!(s->blur_data[0] = av_malloc(data_sz)) ||
                     +        !(s->blur_data[1] = av_malloc(data_sz)) ||
                     +        !(s->temp_data    = av_malloc(data_sz))) {
                     +        return AVERROR(ENOMEM);
                     +    }
+                    +
                     +    for (i = 0; i < 5; i++) {
                     +        s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
                     +    }
+                    +
                     +    vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
+                    +
                     +    return 0;
                     +}
+                    +
                     +static int query_formats(AVFilterContext *ctx)
                     +{
                     +    static const enum AVPixelFormat pix_fmts[] = {
                     +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
                     +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
                     +        AV_PIX_FMT_NONE
                     +    };
+                    +
                     +    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
                     +    if (!fmts_list)
                     +        return AVERROR(ENOMEM);
                     +    return ff_set_common_formats(ctx, fmts_list);
                     +}
+                    +
                     +static int config_input_ref(AVFilterLink *inlink)
                     +{
                     +    AVFilterContext *ctx  = inlink->dst;
                     +    VMAFMotionContext *s = ctx->priv;
+                    +
                     +    return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
                     +                              ctx->inputs[0]->h, ctx->inputs[0]->format);
                     +}
+                    +
                     +double ff_vmafmotion_uninit(VMAFMotionData *s)
                     +{
                     +    av_free(s->blur_data[0]);
                     +    av_free(s->blur_data[1]);
                     +    av_free(s->temp_data);
+                    +
                     +    return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
                     +}
+                    +
                     +static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
                     +{
                     +    AVFilterContext *ctx = inlink->dst;
                     +    do_vmafmotion(ctx, ref);
                     +    return ff_filter_frame(ctx->outputs[0], ref);
                     +}
+                    +
                     +static av_cold int init(AVFilterContext *ctx)
                     +{
                     +    VMAFMotionContext *s = ctx->priv;
+                    +
                     +    if (s->stats_file_str) {
                     +        if (!strcmp(s->stats_file_str, "-")) {
                     +            s->stats_file = stdout;
                     +        } else {
                     +            s->stats_file = fopen(s->stats_file_str, "w");
                     +            if (!s->stats_file) {
                     +                int err = AVERROR(errno);
                     +                char buf[128];
                     +                av_strerror(err, buf, sizeof(buf));
                     +                av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
                     +                       s->stats_file_str, buf);
                     +                return err;
                     +            }
                     +        }
                     +    }
+                    +
                     +    return 0;
                     +}
+                    +
                     +static av_cold void uninit(AVFilterContext *ctx)
                     +{
                     +    VMAFMotionContext *s = ctx->priv;
                     +    double avg_motion = ff_vmafmotion_uninit(&s->data);
+                    +
                     +    if (s->data.nb_frames > 0) {
                     +        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
                     +    }
+                    +
                     +    if (s->stats_file && s->stats_file != stdout)
                     +        fclose(s->stats_file);
                     +}
+                    +
                     +static const AVFilterPad vmafmotion_inputs[] = {
                     +    {
                     +        .name         = "reference",
                     +        .type         = AVMEDIA_TYPE_VIDEO,
                     +        .filter_frame = filter_frame,
                     +        .config_props = config_input_ref,
                     +    },
                     +    { NULL }
                     +};
+                    +
                     +static const AVFilterPad vmafmotion_outputs[] = {
                     +    {
                     +        .name          = "default",
                     +        .type          = AVMEDIA_TYPE_VIDEO,
                     +    },
                     +    { NULL }
                     +};
+                    +
                     +AVFilter ff_vf_vmafmotion = {
                     +    .name          = "vmafmotion",
                     +    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
                     +    .init          = init,
                     +    .uninit        = uninit,
                     +    .query_formats = query_formats,
                     +    .priv_size     = sizeof(VMAFMotionContext),
                     +    .priv_class    = &vmafmotion_class,
                     +    .inputs        = vmafmotion_inputs,
                     +    .outputs       = vmafmotion_outputs,
                     +};

libavfilter/vmaf_motion.h

History View file @ 148c8e8

                     new file mode 100644
@@ -0,0 +1,58 @@
                     +/*
                     + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
                     + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#ifndef AVFILTER_VMAFMOTION_H
                     +#define AVFILTER_VMAFMOTION_H
+                    +
                     +#include <stddef.h>
                     +#include <stdint.h>
                     +#include "video.h"
+                    +
                     +typedef struct VMAFMotionDSPContext {
                     +    uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w, int h,
                     +                    ptrdiff_t img1_stride, ptrdiff_t img2_stride);
                     +    void (*convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src,
                     +                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
                     +                          ptrdiff_t dst_stride);
                     +    void (*convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src,
                     +                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
                     +                          ptrdiff_t dst_stride);
                     +} VMAFMotionDSPContext;
+                    +
                     +void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp);
+                    +
                     +typedef struct VMAFMotionData {
                     +    uint16_t filter[5];
                     +    int width;
                     +    int height;
                     +    ptrdiff_t stride;
                     +    uint16_t *blur_data[2 /* cur, prev */];
                     +    uint16_t *temp_data;
                     +    double motion_sum;
                     +    uint64_t nb_frames;
                     +    VMAFMotionDSPContext vmafdsp;
                     +} VMAFMotionData;
+                    +
                     +int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum AVPixelFormat fmt);
                     +double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame);
                     +double ff_vmafmotion_uninit(VMAFMotionData *data);
+                    +
                     +#endif /* AVFILTER_VMAFMOTION_H */