Browse code

avfilter: add ssim filter

Signed-off-by: Paul B Mahol <onemda@gmail.com>

Paul B Mahol authored on 2015/06/21 20:27:55
Showing 6 changed files
... ...
@@ -7,6 +7,7 @@ version <next>:
7 7
 - little-endian ADPCM_THP decoder
8 8
 - Hap decoder and encoder
9 9
 - DirectDraw Surface image/texture decoder
10
+- ssim filter
10 11
 
11 12
 
12 13
 version 2.7:
... ...
@@ -8834,6 +8834,64 @@ in [-30,0] will filter edges. Default value is 0.
8834 8834
 If a chroma option is not explicitly set, the corresponding luma value
8835 8835
 is set.
8836 8836
 
8837
+@section ssim
8838
+
8839
+Obtain the SSIM (Structural SImilarity Metric) between two input videos.
8840
+
8841
+This filter takes in input two input videos, the first input is
8842
+considered the "main" source and is passed unchanged to the
8843
+output. The second input is used as a "reference" video for computing
8844
+the SSIM.
8845
+
8846
+Both video inputs must have the same resolution and pixel format for
8847
+this filter to work correctly. Also it assumes that both inputs
8848
+have the same number of frames, which are compared one by one.
8849
+
8850
+The filter stores the calculated SSIM of each frame.
8851
+
8852
+The description of the accepted parameters follows.
8853
+
8854
+@table @option
8855
+@item stats_file, f
8856
+If specified the filter will use the named file to save the PSNR of
8857
+each individual frame.
8858
+@end table
8859
+
8860
+The file printed if @var{stats_file} is selected, contains a sequence of
8861
+key/value pairs of the form @var{key}:@var{value} for each compared
8862
+couple of frames.
8863
+
8864
+A description of each shown parameter follows:
8865
+
8866
+@table @option
8867
+@item n
8868
+sequential number of the input frame, starting from 1
8869
+
8870
+@item Y, U, V, R, G, B
8871
+SSIM of the compared frames for the component specified by the suffix.
8872
+
8873
+@item All
8874
+SSIM of the compared frames for the whole frame.
8875
+
8876
+@item dB
8877
+Same as above but in dB representation.
8878
+@end table
8879
+
8880
+For example:
8881
+@example
8882
+movie=ref_movie.mpg, setpts=PTS-STARTPTS [main];
8883
+[main][ref] ssim="stats_file=stats.log" [out]
8884
+@end example
8885
+
8886
+On this example the input file being processed is compared with the
8887
+reference file @file{ref_movie.mpg}. The SSIM of each individual frame
8888
+is stored in @file{stats.log}.
8889
+
8890
+Another example with both psnr and ssim at same time:
8891
+@example
8892
+ffmpeg -i main.mpg -i ref.mpg -lavfi  "ssim;[0:v][1:v]psnr" -f null -
8893
+@end example
8894
+
8837 8895
 @section stereo3d
8838 8896
 
8839 8897
 Convert between different stereoscopic image formats.
... ...
@@ -197,6 +197,7 @@ OBJS-$(CONFIG_SIGNALSTATS_FILTER)            += vf_signalstats.o
197 197
 OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
198 198
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
199 199
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o
200
+OBJS-$(CONFIG_SSIM_FILTER)                   += vf_ssim.o dualinput.o framesync.o
200 201
 OBJS-$(CONFIG_STEREO3D_FILTER)               += vf_stereo3d.o
201 202
 OBJS-$(CONFIG_SUBTITLES_FILTER)              += vf_subtitles.o
202 203
 OBJS-$(CONFIG_SUPER2XSAI_FILTER)             += vf_super2xsai.o
... ...
@@ -212,6 +212,7 @@ void avfilter_register_all(void)
212 212
     REGISTER_FILTER(SMARTBLUR,      smartblur,      vf);
213 213
     REGISTER_FILTER(SPLIT,          split,          vf);
214 214
     REGISTER_FILTER(SPP,            spp,            vf);
215
+    REGISTER_FILTER(SSIM,           ssim,           vf);
215 216
     REGISTER_FILTER(STEREO3D,       stereo3d,       vf);
216 217
     REGISTER_FILTER(SUBTITLES,      subtitles,      vf);
217 218
     REGISTER_FILTER(SUPER2XSAI,     super2xsai,     vf);
... ...
@@ -30,7 +30,7 @@
30 30
 #include "libavutil/version.h"
31 31
 
32 32
 #define LIBAVFILTER_VERSION_MAJOR  5
33
-#define LIBAVFILTER_VERSION_MINOR  17
33
+#define LIBAVFILTER_VERSION_MINOR  18
34 34
 #define LIBAVFILTER_VERSION_MICRO 100
35 35
 
36 36
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
37 37
new file mode 100644
... ...
@@ -0,0 +1,399 @@
0
+/*
1
+ * Copyright (c) 2003-2013 Loren Merritt
2
+ * Copyright (c) 2015 Paul B Mahol
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/* Computes the Structural Similarity Metric between two video streams.
22
+ * original algorithm:
23
+ * Z. Wang, A. C. Bovik, H. R. Sheikh and E. P. Simoncelli,
24
+ *   "Image quality assessment: From error visibility to structural similarity,"
25
+ *   IEEE Transactions on Image Processing, vol. 13, no. 4, pp. 600-612, Apr. 2004.
26
+ *
27
+ * To improve speed, this implementation uses the standard approximation of
28
+ * overlapped 8x8 block sums, rather than the original gaussian weights.
29
+ */
30
+
31
+/*
32
+ * @file
33
+ * Caculate the SSIM between two input videos.
34
+ */
35
+
36
+#include "libavutil/opt.h"
37
+#include "libavutil/pixdesc.h"
38
+#include "avfilter.h"
39
+#include "dualinput.h"
40
+#include "drawutils.h"
41
+#include "formats.h"
42
+#include "internal.h"
43
+#include "video.h"
44
+
45
+typedef struct SSIMContext {
46
+    const AVClass *class;
47
+    FFDualInputContext dinput;
48
+    FILE *stats_file;
49
+    char *stats_file_str;
50
+    int nb_components;
51
+    uint64_t nb_frames;
52
+    double ssim[4];
53
+    char comps[4];
54
+    int *coefs;
55
+    uint8_t rgba_map[4];
56
+    int planewidth[4];
57
+    int planeheight[4];
58
+    int *temp;
59
+} SSIMContext;
60
+
61
+#define OFFSET(x) offsetof(SSIMContext, x)
62
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
63
+
64
+static const AVOption ssim_options[] = {
65
+    {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
66
+    {"f",          "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
67
+    { NULL }
68
+};
69
+
70
+AVFILTER_DEFINE_CLASS(ssim);
71
+
72
+static int rgb_coefs[4]  = { 1, 1, 1, 3};
73
+static int yuv_coefs[4]  = { 4, 1, 1, 6};
74
+static int gray_coefs[4] = { 1, 0, 0, 1};
75
+
76
+static void set_meta(AVDictionary **metadata, const char *key, char comp, float d)
77
+{
78
+    char value[128];
79
+    snprintf(value, sizeof(value), "%0.2f", d);
80
+    if (comp) {
81
+        char key2[128];
82
+        snprintf(key2, sizeof(key2), "%s%c", key, comp);
83
+        av_dict_set(metadata, key2, value, 0);
84
+    } else {
85
+        av_dict_set(metadata, key, value, 0);
86
+    }
87
+}
88
+
89
+static void ssim_4x4x2_core(const uint8_t *main, int main_stride,
90
+                            const uint8_t *ref, int ref_stride,
91
+                            int sums[2][4])
92
+{
93
+    int x, y, z;
94
+
95
+    for (z = 0; z < 2; z++) {
96
+        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
97
+
98
+        for (y = 0; y < 4; y++) {
99
+            for (x = 0; x < 4; x++) {
100
+                int a = main[x + y * main_stride];
101
+                int b = ref[x + y * ref_stride];
102
+
103
+                s1  += a;
104
+                s2  += b;
105
+                ss  += a*a;
106
+                ss  += b*b;
107
+                s12 += a*b;
108
+            }
109
+        }
110
+
111
+        sums[z][0] = s1;
112
+        sums[z][1] = s2;
113
+        sums[z][2] = ss;
114
+        sums[z][3] = s12;
115
+        main += 4;
116
+        ref += 4;
117
+    }
118
+}
119
+
120
+static float ssim_end1(int s1, int s2, int ss, int s12)
121
+{
122
+    static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
123
+    static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
124
+
125
+    int fs1 = s1;
126
+    int fs2 = s2;
127
+    int fss = ss;
128
+    int fs12 = s12;
129
+    int vars = fss * 64 - fs1 * fs1 - fs2 * fs2;
130
+    int covar = fs12 * 64 - fs1 * fs2;
131
+
132
+    return (float)(2 * fs1 * fs2 + ssim_c1) * (float)(2 * covar + ssim_c2)
133
+         / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2));
134
+}
135
+
136
+static float ssim_end4(int sum0[5][4], int sum1[5][4], int width)
137
+{
138
+    float ssim = 0.0;
139
+    int i;
140
+
141
+    for( i = 0; i < width; i++ )
142
+        ssim += ssim_end1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + sum1[i + 1][0],
143
+                          sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + sum1[i + 1][1],
144
+                          sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + sum1[i + 1][2],
145
+                          sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + sum1[i + 1][3]);
146
+    return ssim;
147
+}
148
+
149
+static float ssim_plane(uint8_t *main, int main_stride,
150
+                        uint8_t *ref, int ref_stride,
151
+                        int width, int height, void *temp)
152
+{
153
+    int z = 0;
154
+    int x, y;
155
+    float ssim = 0.0;
156
+    int (*sum0)[4] = temp;
157
+    int (*sum1)[4] = sum0 + (width >> 2) + 3;
158
+
159
+    width >>= 2;
160
+    height >>= 2;
161
+
162
+    for (y = 1; y < height; y++) {
163
+        for (; z <= y; z++) {
164
+            FFSWAP(void*, sum0, sum1);
165
+            for (x = 0; x < width; x+=2)
166
+                ssim_4x4x2_core(&main[4 * (x + z * main_stride)], main_stride,
167
+                                &ref[4 * (x + z * ref_stride)], ref_stride,
168
+                                &sum0[x]);
169
+        }
170
+
171
+        for (x = 0; x < width - 1; x += 4)
172
+            ssim += ssim_end4(sum0 + x, sum1 + x, FFMIN(4, width - x - 1));
173
+    }
174
+
175
+    return ssim / ((height - 1) * (width - 1));
176
+}
177
+
178
+static double ssim_db(double ssim, double weight)
179
+{
180
+    return 10 * (log(weight) / log(10) - log(weight - ssim) / log(10));
181
+}
182
+
183
+static AVFrame *do_ssim(AVFilterContext *ctx, AVFrame *main,
184
+                        const AVFrame *ref)
185
+{
186
+    AVDictionary **metadata = avpriv_frame_get_metadatap(main);
187
+    SSIMContext *s = ctx->priv;
188
+    float c[4], ssimv;
189
+    int i;
190
+
191
+    s->nb_frames++;
192
+
193
+    for (i = 0; i < s->nb_components; i++)
194
+        c[i] = ssim_plane(main->data[i], main->linesize[i],
195
+                          ref->data[i], ref->linesize[i],
196
+                          s->planewidth[i], s->planeheight[i], s->temp);
197
+
198
+    ssimv = (c[0] * s->coefs[0] + c[1] * s->coefs[1] + c[2] * s->coefs[2]) / s->coefs[3];
199
+
200
+    for (i = 0; i < s->nb_components; i++)
201
+        set_meta(metadata, "lavfi.ssim.", s->comps[i], c[i]);
202
+
203
+    set_meta(metadata, "lavfi.ssim.All", 0, ssimv);
204
+    set_meta(metadata, "lavfi.ssim.dB", 0, ssim_db(c[0] * s->coefs[0] + c[1] * s->coefs[1] + c[2] * s->coefs[2], s->coefs[3]));
205
+
206
+    if (s->stats_file) {
207
+        fprintf(s->stats_file, "n:%"PRId64" ", s->nb_frames);
208
+
209
+        for (i = 0; i < s->nb_components; i++)
210
+            fprintf(s->stats_file, "%c:%f ", s->comps[i], c[i]);
211
+
212
+        fprintf(s->stats_file, "All:%f (%f)\n", ssimv, ssim_db(c[0] * s->coefs[0] + c[1] * s->coefs[1] + c[2] * s->coefs[2], s->coefs[3]));
213
+    }
214
+
215
+    s->ssim[0] += c[0];
216
+    s->ssim[1] += c[1];
217
+    s->ssim[2] += c[2];
218
+
219
+    return main;
220
+}
221
+
222
+static av_cold int init(AVFilterContext *ctx)
223
+{
224
+    SSIMContext *s = ctx->priv;
225
+
226
+    if (s->stats_file_str) {
227
+        s->stats_file = fopen(s->stats_file_str, "w");
228
+        if (!s->stats_file) {
229
+            int err = AVERROR(errno);
230
+            char buf[128];
231
+            av_strerror(err, buf, sizeof(buf));
232
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
233
+                   s->stats_file_str, buf);
234
+            return err;
235
+        }
236
+    }
237
+
238
+    s->dinput.process = do_ssim;
239
+    s->dinput.shortest = 1;
240
+    s->dinput.repeatlast = 0;
241
+    return 0;
242
+}
243
+
244
+static int query_formats(AVFilterContext *ctx)
245
+{
246
+    static const enum AVPixelFormat pix_fmts[] = {
247
+        AV_PIX_FMT_GRAY8,
248
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
249
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
250
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
251
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
252
+        AV_PIX_FMT_GBRP,
253
+        AV_PIX_FMT_NONE
254
+    };
255
+
256
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
257
+    if (!fmts_list)
258
+        return AVERROR(ENOMEM);
259
+    return ff_set_common_formats(ctx, fmts_list);
260
+}
261
+
262
+static int config_input_ref(AVFilterLink *inlink)
263
+{
264
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
265
+    AVFilterContext *ctx  = inlink->dst;
266
+    SSIMContext *s = ctx->priv;
267
+    int is_rgb;
268
+
269
+    s->nb_components = desc->nb_components;
270
+
271
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
272
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
273
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
274
+        return AVERROR(EINVAL);
275
+    }
276
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
277
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
278
+        return AVERROR(EINVAL);
279
+    }
280
+
281
+    is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0;
282
+    s->comps[0] = is_rgb ? 'R' : 'Y';
283
+    s->comps[1] = is_rgb ? 'G' : 'U';
284
+    s->comps[2] = is_rgb ? 'B' : 'V';
285
+    s->comps[3] = 'A';
286
+
287
+    if (is_rgb) {
288
+        s->coefs = rgb_coefs;
289
+    } else if (s->nb_components == 1) {
290
+        s->coefs = gray_coefs;
291
+    } else {
292
+        s->coefs = yuv_coefs;
293
+    }
294
+
295
+    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
296
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
297
+    s->planewidth[1]  = s->planewidth[2]  = FF_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
298
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
299
+
300
+    s->temp = av_malloc((2 * inlink->w + 12) * sizeof(*s->temp));
301
+    if (!s->temp)
302
+        return AVERROR(ENOMEM);
303
+
304
+    return 0;
305
+}
306
+
307
+static int config_output(AVFilterLink *outlink)
308
+{
309
+    AVFilterContext *ctx = outlink->src;
310
+    SSIMContext *s = ctx->priv;
311
+    AVFilterLink *mainlink = ctx->inputs[0];
312
+    int ret;
313
+
314
+    outlink->w = mainlink->w;
315
+    outlink->h = mainlink->h;
316
+    outlink->time_base = mainlink->time_base;
317
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
318
+    outlink->frame_rate = mainlink->frame_rate;
319
+
320
+    if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
321
+        return ret;
322
+
323
+    return 0;
324
+}
325
+
326
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
327
+{
328
+    SSIMContext *s = inlink->dst->priv;
329
+    return ff_dualinput_filter_frame(&s->dinput, inlink, buf);
330
+}
331
+
332
+static int request_frame(AVFilterLink *outlink)
333
+{
334
+    SSIMContext *s = outlink->src->priv;
335
+    return ff_dualinput_request_frame(&s->dinput, outlink);
336
+}
337
+
338
+static av_cold void uninit(AVFilterContext *ctx)
339
+{
340
+    SSIMContext *s = ctx->priv;
341
+
342
+    if (s->nb_frames > 0) {
343
+        if (s->nb_components == 3) {
344
+            av_log(ctx, AV_LOG_INFO, "SSIM %c:%f %c:%f %c:%f All:%f (%f)\n",
345
+               s->comps[0], s->ssim[0] / s->nb_frames,
346
+               s->comps[1], s->ssim[1] / s->nb_frames,
347
+               s->comps[2], s->ssim[2] / s->nb_frames,
348
+               (s->ssim[0] * 4 + s->ssim[1] + s->ssim[2]) / (s->nb_frames * 6),
349
+               ssim_db(s->ssim[0] * 4 + s->ssim[1] + s->ssim[2], s->nb_frames * 6));
350
+        } else if (s->nb_components == 1) {
351
+            av_log(ctx, AV_LOG_INFO, "SSIM All:%f (%f)\n",
352
+               s->ssim[0] / s->nb_frames, ssim_db(s->ssim[0], s->nb_frames));
353
+        }
354
+    }
355
+
356
+    ff_dualinput_uninit(&s->dinput);
357
+
358
+    if (s->stats_file)
359
+        fclose(s->stats_file);
360
+
361
+    av_freep(&s->temp);
362
+}
363
+
364
+static const AVFilterPad ssim_inputs[] = {
365
+    {
366
+        .name         = "main",
367
+        .type         = AVMEDIA_TYPE_VIDEO,
368
+        .filter_frame = filter_frame,
369
+    },{
370
+        .name         = "reference",
371
+        .type         = AVMEDIA_TYPE_VIDEO,
372
+        .filter_frame = filter_frame,
373
+        .config_props = config_input_ref,
374
+    },
375
+    { NULL }
376
+};
377
+
378
+static const AVFilterPad ssim_outputs[] = {
379
+    {
380
+        .name          = "default",
381
+        .type          = AVMEDIA_TYPE_VIDEO,
382
+        .config_props  = config_output,
383
+        .request_frame = request_frame,
384
+    },
385
+    { NULL }
386
+};
387
+
388
+AVFilter ff_vf_ssim = {
389
+    .name          = "ssim",
390
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the SSIM between two video streams."),
391
+    .init          = init,
392
+    .uninit        = uninit,
393
+    .query_formats = query_formats,
394
+    .priv_size     = sizeof(SSIMContext),
395
+    .priv_class    = &ssim_class,
396
+    .inputs        = ssim_inputs,
397
+    .outputs       = ssim_outputs,
398
+};