Browse code

lavfi: add video stabilization plugins using vid.stab library

vidstabdetect and vidstabtransform common functions for interfacing
vid.stab are in libavfilter/vidstabutils.c

Signed-off-by: Georg Martius <martius@mis.mpg.de>

Georg Martius authored on 2013/04/19 08:49:27
Showing 10 changed files
... ...
@@ -31,6 +31,8 @@ version <next>:
31 31
 - asetrate filter
32 32
 - interleave filter
33 33
 - timeline editing with filters
34
+- vidstabdetect and vidstabtransform filters for video stabilization using
35
+  the vid.stab library
34 36
 
35 37
 
36 38
 version 1.2:
... ...
@@ -72,6 +72,7 @@ compatible libraries
72 72
 The following libraries are under GPL:
73 73
     - libcdio
74 74
     - libutvideo
75
+    - libvidstab
75 76
     - libx264
76 77
     - libxavs
77 78
     - libxvid
... ...
@@ -226,6 +226,7 @@ External library support:
226 226
   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
227 227
   --enable-libutvideo      enable Ut Video encoding and decoding via libutvideo [no]
228 228
   --enable-libv4l2         enable libv4l2/v4l-utils [no]
229
+  --enable-libvidstab      enable video stabilization using vid.stab [no]
229 230
   --enable-libvo-aacenc    enable AAC encoding via libvo-aacenc [no]
230 231
   --enable-libvo-amrwbenc  enable AMR-WB encoding via libvo-amrwbenc [no]
231 232
   --enable-libvorbis       enable Vorbis en/decoding via libvorbis,
... ...
@@ -1181,6 +1182,7 @@ EXTERNAL_LIBRARY_LIST="
1181 1181
     libtwolame
1182 1182
     libutvideo
1183 1183
     libv4l2
1184
+    libvidstab
1184 1185
     libvo_aacenc
1185 1186
     libvo_amrwbenc
1186 1187
     libvorbis
... ...
@@ -2152,6 +2154,8 @@ stereo3d_filter_deps="gpl"
2152 2152
 subtitles_filter_deps="avformat avcodec libass"
2153 2153
 super2xsai_filter_deps="gpl"
2154 2154
 tinterlace_filter_deps="gpl"
2155
+vidstabdetect_filter_deps="libvidstab"
2156
+vidstabtransform_filter_deps="libvidstab"
2155 2157
 yadif_filter_deps="gpl"
2156 2158
 pixfmts_super2xsai_test_deps="super2xsai_filter"
2157 2159
 tinterlace_merge_test_deps="tinterlace_filter"
... ...
@@ -3573,6 +3577,7 @@ die_license_disabled_gpl() {
3573 3573
 
3574 3574
 die_license_disabled gpl libcdio
3575 3575
 die_license_disabled gpl libutvideo
3576
+die_license_disabled gpl libvidstab
3576 3577
 die_license_disabled gpl libx264
3577 3578
 die_license_disabled gpl libxavs
3578 3579
 die_license_disabled gpl libxvid
... ...
@@ -4015,6 +4020,7 @@ enabled libtwolame && require  libtwolame twolame.h twolame_init -ltwolame &&
4015 4015
                         die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
4016 4016
 enabled libutvideo    && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++
4017 4017
 enabled libv4l2    && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
4018
+enabled libvidstab && require_pkg_config vidstab vid.stab/libvidstab.h vsMotionDetectInit
4018 4019
 enabled libvo_aacenc && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc
4019 4020
 enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
4020 4021
 enabled libvorbis  && require  libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
... ...
@@ -5308,6 +5308,141 @@ much, but it will increase the amount of blurring needed to cover over
5308 5308
 the image and will destroy more information than necessary, and extra
5309 5309
 pixels will slow things down on a large logo.
5310 5310
 
5311
+@anchor{vidstabtransform}
5312
+@section vidstabtransform
5313
+
5314
+Video stabilization/deshaking: pass 2 of 2,
5315
+see @ref{vidstabdetect} for pass 1.
5316
+
5317
+Read a file with transform information for each frame and
5318
+apply/compensate them. Together with the @ref{vidstabdetect}
5319
+filter this can be used to deshake videos. See also
5320
+@url{http://public.hronopik.de/vid.stab}. It is important to also use
5321
+the unsharp filter, see below.
5322
+
5323
+To enable compilation of this filter you need to configure FFmpeg with
5324
+@code{--enable-libvidstab}.
5325
+
5326
+This filter accepts the following named options, expressed as a
5327
+sequence of @var{key}=@var{value} pairs, separated by ":".
5328
+
5329
+@table @option
5330
+
5331
+@item input
5332
+path to the file used to read the transforms (default: @file{transforms.trf})
5333
+
5334
+@item smoothing
5335
+number of frames (value*2 + 1) used for lowpass filtering the camera movements
5336
+(default: 10). For example a number of 10 means that 21 frames are used
5337
+(10 in the past and 10 in the future) to smoothen the motion in the
5338
+video. A larger values leads to a smoother video, but limits the
5339
+acceleration of the camera (pan/tilt movements).
5340
+
5341
+@item maxshift
5342
+maximal number of pixels to translate frames (default: -1 no limit)
5343
+
5344
+@item maxangle
5345
+maximal angle in radians (degree*PI/180) to rotate frames (default: -1
5346
+no limit)
5347
+
5348
+@item crop
5349
+How to deal with borders that may be visible due to movement
5350
+compensation. Available values are:
5351
+
5352
+@table @samp
5353
+@item keep
5354
+keep image information from previous frame (default)
5355
+@item black
5356
+fill the border black
5357
+@end table
5358
+
5359
+@item invert
5360
+@table @samp
5361
+@item 0
5362
+ keep transforms normal (default)
5363
+@item 1
5364
+ invert transforms
5365
+@end table
5366
+
5367
+
5368
+@item relative
5369
+consider transforms as
5370
+@table @samp
5371
+@item 0
5372
+ absolute
5373
+@item 1
5374
+ relative to previous frame (default)
5375
+@end table
5376
+
5377
+
5378
+@item zoom
5379
+percentage to zoom (default: 0)
5380
+@table @samp
5381
+@item >0
5382
+  zoom in
5383
+@item <0
5384
+  zoom out
5385
+@end table
5386
+
5387
+@item optzoom
5388
+if 1 then optimal zoom value is determined (default).
5389
+Optimal zoom means no (or only little) border should be visible.
5390
+Note that the value given at zoom is added to the one calculated
5391
+here.
5392
+
5393
+@item interpol
5394
+type of interpolation
5395
+
5396
+Available values are:
5397
+@table @samp
5398
+@item no
5399
+no interpolation
5400
+@item linear
5401
+linear only horizontal
5402
+@item bilinear
5403
+linear in both directions (default)
5404
+@item bicubic
5405
+cubic in both directions (slow)
5406
+@end table
5407
+
5408
+@item tripod
5409
+virtual tripod mode means that the video is stabilized such that the
5410
+camera stays stationary. Use also @code{tripod} option of
5411
+@ref{vidstabdetect}.
5412
+@table @samp
5413
+@item 0
5414
+off (default)
5415
+@item 1
5416
+virtual tripod mode: equivalent to @code{relative=0:smoothing=0}
5417
+@end table
5418
+
5419
+@end table
5420
+
5421
+@subsection Examples
5422
+
5423
+@itemize
5424
+@item
5425
+typical call with default default values:
5426
+ (note the unsharp filter which is always recommended)
5427
+@example
5428
+ffmpeg -i inp.mpeg -vf vidstabtransform,unsharp=5:5:0.8:3:3:0.4 inp_stabilized.mpeg
5429
+@end example
5430
+
5431
+@item
5432
+zoom in a bit more and load transform data from a given file
5433
+@example
5434
+vidstabtransform=zoom=5:input="mytransforms.trf"
5435
+@end example
5436
+
5437
+@item
5438
+smoothen the video even more
5439
+@example
5440
+vidstabtransform=smoothing=30
5441
+@end example
5442
+
5443
+@end itemize
5444
+
5445
+
5311 5446
 @section scale
5312 5447
 
5313 5448
 Scale (resize) the input video, using the libswscale library.
... ...
@@ -5706,6 +5841,93 @@ in [-30,0] will filter edges. Default value is 0.
5706 5706
 If a chroma option is not explicitly set, the corresponding luma value
5707 5707
 is set.
5708 5708
 
5709
+@anchor{vidstabdetect}
5710
+@section vidstabdetect
5711
+
5712
+Video stabilization/deshaking: pass 1 of 2, see @ref{vidstabtransform}
5713
+for pass 2.
5714
+
5715
+Generates a file with relative transform information translation,
5716
+rotation about subsequent frames.
5717
+
5718
+To enable compilation of this filter you need to configure FFmpeg with
5719
+@code{--enable-libvidstab}.
5720
+
5721
+This filter accepts the following named options, expressed as a
5722
+sequence of @var{key}=@var{value} pairs, separated by ":".
5723
+
5724
+@table @option
5725
+@item result
5726
+path to the file used to write the transforms (default:@file{transforms.trf})
5727
+
5728
+@item shakiness
5729
+how shaky is the video and how quick is the camera? (default: 5)
5730
+@table @samp
5731
+@item 1
5732
+ little (fast)
5733
+@item ...
5734
+@item 10
5735
+ very strong/quick (slow)
5736
+@end table
5737
+
5738
+@item accuracy
5739
+accuracy of detection process (>=shakiness) (default: 9)
5740
+@table @samp
5741
+@item 1
5742
+ low (fast)
5743
+@item 15
5744
+ high (slow)
5745
+@end table
5746
+
5747
+@item stepsize
5748
+stepsize of search process, region around minimum is scanned with 1 pixel
5749
+resolution (default: 6)
5750
+
5751
+@item mincontrast
5752
+below this contrast a local measurement field is discarded (0-1) (default: 0.3)
5753
+
5754
+@item tripod
5755
+virtual tripod mode: @code{tripod=framenum} if framenum>0 otherwise disabled.
5756
+The motion of the frames is compared to a reference frame (framenum).
5757
+The idea is to compensate all movements in a more-or-less static scene
5758
+ and keep the camera view absolutely still.
5759
+(default: 0 (disabled))
5760
+
5761
+@item show
5762
+draw nothing (default); 1,2: show fields and transforms in the resulting frames
5763
+
5764
+@end table
5765
+
5766
+@subsection Examples
5767
+
5768
+@itemize
5769
+@item
5770
+use default values:
5771
+@example
5772
+vidstabdetect
5773
+@end example
5774
+
5775
+@item
5776
+strongly shaky movie and put the results in @code{mytransforms.trf}
5777
+@example
5778
+vidstabdetect=shakiness=10:accuracy=15:result="mytransforms.trf"
5779
+@end example
5780
+
5781
+@item
5782
+visualize some internals in the resulting video
5783
+@example
5784
+vidstabdetect=show=1
5785
+@end example
5786
+
5787
+
5788
+@item
5789
+Typical call with visualization
5790
+@example
5791
+ffmpeg -i input -vf vidstabdetect=shakiness=5:show=1 dummy.avi
5792
+@end example
5793
+@end itemize
5794
+
5795
+
5709 5796
 @section stereo3d
5710 5797
 
5711 5798
 Convert between different stereoscopic image formats.
... ...
@@ -179,6 +179,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER)             += vf_tinterlace.o
179 179
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
180 180
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
181 181
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
182
+OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
183
+OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
182 184
 OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o
183 185
 
184 186
 OBJS-$(CONFIG_CELLAUTO_FILTER)               += vsrc_cellauto.o
... ...
@@ -176,6 +176,8 @@ void avfilter_register_all(void)
176 176
     REGISTER_FILTER(TRANSPOSE,      transpose,      vf);
177 177
     REGISTER_FILTER(UNSHARP,        unsharp,        vf);
178 178
     REGISTER_FILTER(VFLIP,          vflip,          vf);
179
+    REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
180
+    REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
179 181
     REGISTER_FILTER(YADIF,          yadif,          vf);
180 182
 
181 183
     REGISTER_FILTER(CELLAUTO,       cellauto,       vsrc);
182 184
new file mode 100644
... ...
@@ -0,0 +1,239 @@
0
+/*
1
+ * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#define DEFAULT_RESULT_NAME     "transforms.trf"
21
+
22
+#include <vid.stab/libvidstab.h>
23
+
24
+#include "libavutil/common.h"
25
+#include "libavutil/opt.h"
26
+#include "libavutil/imgutils.h"
27
+#include "avfilter.h"
28
+#include "internal.h"
29
+
30
+#include "vidstabutils.h"
31
+
32
+typedef struct {
33
+    const AVClass* class;
34
+
35
+    VSMotionDetect md;
36
+    VSMotionDetectConfig conf;
37
+
38
+    char* result;
39
+    FILE* f;
40
+} StabData;
41
+
42
+
43
+#define OFFSET(x) offsetof(StabData, x)
44
+#define OFFSETC(x) (offsetof(StabData, conf)+offsetof(VSMotionDetectConfig, x))
45
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
46
+
47
+static const AVOption vidstabdetect_options[]= {
48
+    {"result",      "path to the file used to write the transforms (def:transforms.trf)", OFFSET(result),              AV_OPT_TYPE_STRING, {.str = DEFAULT_RESULT_NAME}},
49
+    {"shakiness",   "how shaky is the video and how quick is the camera?"
50
+                    " 1: little (fast) 10: very strong/quick (slow) (def: 5)",            OFFSETC(shakiness),         AV_OPT_TYPE_INT,    {.i64 = 5},  1, 10,       FLAGS},
51
+    {"accuracy",    "(>=shakiness) 1: low 15: high (slow) (def: 9)",                      OFFSETC(accuracy),          AV_OPT_TYPE_INT,    {.i64 = 9 }, 1, 15,       FLAGS},
52
+    {"stepsize",    "region around minimum is scanned with 1 pixel resolution (def: 6)",  OFFSETC(stepSize),          AV_OPT_TYPE_INT,    {.i64 = 6},  1, 32,       FLAGS},
53
+    {"mincontrast", "below this contrast a field is discarded (0-1) (def: 0.3)",          OFFSETC(contrastThreshold), AV_OPT_TYPE_DOUBLE, {.dbl =  0.25}, 0.0, 1.0, FLAGS},
54
+    {"show",        "0: draw nothing (def); 1,2: show fields and transforms",             OFFSETC(show),              AV_OPT_TYPE_INT,    {.i64 =  0}, 0, 2,        FLAGS},
55
+    {"tripod",      "virtual tripod mode (if >0): motion is compared to a reference"
56
+                    " reference frame (frame # is the value) (def: 0)",                   OFFSETC(virtualTripod),     AV_OPT_TYPE_INT,    {.i64 = 0},  0, INT_MAX,  FLAGS},
57
+    {NULL},
58
+};
59
+
60
+AVFILTER_DEFINE_CLASS(vidstabdetect);
61
+
62
+static av_cold int init(AVFilterContext *ctx)
63
+{
64
+    StabData* sd = ctx->priv;
65
+    vs_set_mem_and_log_functions();
66
+    sd->class = &vidstabdetect_class;
67
+    av_log(ctx, AV_LOG_VERBOSE, "vidstabdetect filter: init %s\n", LIBVIDSTAB_VERSION);
68
+    return 0;
69
+}
70
+
71
+static av_cold void uninit(AVFilterContext *ctx)
72
+{
73
+    StabData *sd = ctx->priv;
74
+    VSMotionDetect* md = &(sd->md);
75
+
76
+    if (sd->f) {
77
+        fclose(sd->f);
78
+        sd->f = NULL;
79
+    }
80
+
81
+    vsMotionDetectionCleanup(md);
82
+
83
+}
84
+
85
+static int query_formats(AVFilterContext *ctx)
86
+{
87
+    // If you add something here also add it in vidstabutils.c
88
+    static const enum AVPixelFormat pix_fmts[] = {
89
+        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
90
+        AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
91
+        AV_PIX_FMT_YUV440P,  AV_PIX_FMT_GRAY8,
92
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA,
93
+        AV_PIX_FMT_NONE
94
+    };
95
+
96
+    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
97
+    return 0;
98
+}
99
+
100
+
101
+static int config_input(AVFilterLink *inlink)
102
+{
103
+    AVFilterContext *ctx = inlink->dst;
104
+    StabData *sd = ctx->priv;
105
+
106
+    VSMotionDetect* md = &(sd->md);
107
+    VSFrameInfo fi;
108
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
109
+
110
+    vsFrameInfoInit(&fi,inlink->w, inlink->h, av_2_vs_pixel_format(ctx, inlink->format));
111
+    if(fi.bytesPerPixel != av_get_bits_per_pixel(desc)/8){
112
+        av_log(ctx, AV_LOG_ERROR, "pixel-format error: wrong bits/per/pixel, please report a BUG");
113
+        return AVERROR(EINVAL);
114
+    }
115
+    if(fi.log2ChromaW != desc->log2_chroma_w){
116
+        av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_w, please report a BUG");
117
+        return AVERROR(EINVAL);
118
+    }
119
+
120
+    if(fi.log2ChromaH != desc->log2_chroma_h){
121
+        av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_h, please report a BUG");
122
+        return AVERROR(EINVAL);
123
+    }
124
+
125
+    // set values that are not initializes by the options
126
+    sd->conf.algo     = 1;
127
+    sd->conf.modName  = "vidstabdetect";
128
+    if(vsMotionDetectInit(md, &sd->conf, &fi) != VS_OK){
129
+        av_log(ctx, AV_LOG_ERROR, "initialization of Motion Detection failed, please report a BUG");
130
+        return AVERROR(EINVAL);
131
+    }
132
+
133
+    vsMotionDetectGetConfig(&sd->conf, md);
134
+    av_log(ctx, AV_LOG_INFO, "Video stabilization settings (pass 1/2):\n");
135
+    av_log(ctx, AV_LOG_INFO, "     shakiness = %d\n", sd->conf.shakiness);
136
+    av_log(ctx, AV_LOG_INFO, "      accuracy = %d\n", sd->conf.accuracy);
137
+    av_log(ctx, AV_LOG_INFO, "      stepsize = %d\n", sd->conf.stepSize);
138
+    av_log(ctx, AV_LOG_INFO, "   mincontrast = %f\n", sd->conf.contrastThreshold);
139
+    av_log(ctx, AV_LOG_INFO, "          show = %d\n", sd->conf.show);
140
+    av_log(ctx, AV_LOG_INFO, "        result = %s\n", sd->result);
141
+
142
+    sd->f = fopen(sd->result, "w");
143
+    if (sd->f == NULL) {
144
+        av_log(ctx, AV_LOG_ERROR, "cannot open transform file %s\n", sd->result);
145
+        return AVERROR(EINVAL);
146
+    }else{
147
+        if(vsPrepareFile(md, sd->f) != VS_OK){
148
+            av_log(ctx, AV_LOG_ERROR, "cannot write to transform file %s\n", sd->result);
149
+            return AVERROR(EINVAL);
150
+        }
151
+    }
152
+    return 0;
153
+}
154
+
155
+
156
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
157
+{
158
+    AVFilterContext *ctx = inlink->dst;
159
+    StabData *sd = ctx->priv;
160
+    VSMotionDetect* md = &(sd->md);
161
+    LocalMotions localmotions;
162
+
163
+    AVFilterLink *outlink = inlink->dst->outputs[0];
164
+    int direct = 0;
165
+    AVFrame *out;
166
+    VSFrame frame;
167
+    int plane;
168
+
169
+    if (av_frame_is_writable(in)) {
170
+        direct = 1;
171
+        out = in;
172
+    } else {
173
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
174
+        if (!out) {
175
+            av_frame_free(&in);
176
+            return AVERROR(ENOMEM);
177
+        }
178
+        av_frame_copy_props(out, in);
179
+    }
180
+
181
+    for(plane=0; plane < md->fi.planes; plane++){
182
+        frame.data[plane] = in->data[plane];
183
+        frame.linesize[plane] = in->linesize[plane];
184
+    }
185
+    if(vsMotionDetection(md, &localmotions, &frame) !=  VS_OK){
186
+        av_log(ctx, AV_LOG_ERROR, "motion detection failed");
187
+        return AVERROR(AVERROR_EXTERNAL);
188
+    } else {
189
+        if(vsWriteToFile(md, sd->f, &localmotions) != VS_OK){
190
+            av_log(ctx, AV_LOG_ERROR, "cannot write to transform file");
191
+            return AVERROR(errno);
192
+        }
193
+        vs_vector_del(&localmotions);
194
+    }
195
+    if(sd->conf.show>0 && !direct){
196
+        av_image_copy(out->data, out->linesize,
197
+                      (void*)in->data, in->linesize,
198
+                      in->format, in->width, in->height);
199
+    }
200
+
201
+    if (!direct)
202
+        av_frame_free(&in);
203
+
204
+    return ff_filter_frame(outlink, out);
205
+}
206
+
207
+static const AVFilterPad avfilter_vf_vidstabdetect_inputs[] = {
208
+    {
209
+        .name             = "default",
210
+        .type             = AVMEDIA_TYPE_VIDEO,
211
+        .filter_frame     = filter_frame,
212
+        .config_props     = config_input,
213
+    },
214
+    { NULL }
215
+};
216
+
217
+static const AVFilterPad avfilter_vf_vidstabdetect_outputs[] = {
218
+    {
219
+        .name             = "default",
220
+        .type             = AVMEDIA_TYPE_VIDEO,
221
+    },
222
+    { NULL }
223
+};
224
+
225
+AVFilter avfilter_vf_vidstabdetect = {
226
+    .name          = "vidstabdetect",
227
+    .description   = NULL_IF_CONFIG_SMALL("pass 1 of 2 for stabilization"
228
+                                          "extracts relative transformations"
229
+                                          "(pass 2 see vidstabtransform)"),
230
+    .priv_size     = sizeof(StabData),
231
+    .init          = init,
232
+    .uninit        = uninit,
233
+    .query_formats = query_formats,
234
+
235
+    .inputs        = avfilter_vf_vidstabdetect_inputs,
236
+    .outputs       = avfilter_vf_vidstabdetect_outputs,
237
+    .priv_class    = &vidstabdetect_class,
238
+};
0 239
new file mode 100644
... ...
@@ -0,0 +1,294 @@
0
+/*
1
+ * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#define DEFAULT_INPUT_NAME     "transforms.trf"
21
+
22
+#include <vid.stab/libvidstab.h>
23
+
24
+#include "libavutil/common.h"
25
+#include "libavutil/opt.h"
26
+#include "libavutil/imgutils.h"
27
+#include "avfilter.h"
28
+#include "internal.h"
29
+
30
+#include "vidstabutils.h"
31
+
32
+typedef struct {
33
+    const AVClass* class;
34
+
35
+    VSTransformData td;
36
+    VSTransformConfig conf;
37
+
38
+    VSTransformations trans; // transformations
39
+    char* input;           // name of transform file
40
+    int tripod;
41
+} TransformContext;
42
+
43
+#define OFFSET(x) offsetof(TransformContext, x)
44
+#define OFFSETC(x) (offsetof(TransformContext, conf)+offsetof(VSTransformConfig, x))
45
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
46
+
47
+static const AVOption vidstabtransform_options[]= {
48
+    {"input",     "path to the file storing the transforms (def:transforms.trf)",   OFFSET(input),
49
+                   AV_OPT_TYPE_STRING, {.str = DEFAULT_INPUT_NAME} },
50
+    {"smoothing", "number of frames*2 + 1 used for lowpass filtering (def: 10)",    OFFSETC(smoothing),
51
+                   AV_OPT_TYPE_INT,    {.i64 = 10},       1, 1000, FLAGS},
52
+    {"maxshift",  "maximal number of pixels to translate image (def: -1 no limit)", OFFSETC(maxShift),
53
+                   AV_OPT_TYPE_INT,    {.i64 = -1},      -1, 500,  FLAGS},
54
+    {"maxangle",  "maximal angle in rad to rotate image (def: -1 no limit)",        OFFSETC(maxAngle),
55
+                   AV_OPT_TYPE_DOUBLE, {.dbl = -1.0},  -1.0, 3.14, FLAGS},
56
+    {"crop",      "keep: (def), black",                                             OFFSETC(crop),
57
+                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS, "crop"},
58
+    {  "keep",    "keep border",                                                    0,
59
+                   AV_OPT_TYPE_CONST,  {.i64 = VSKeepBorder }, 0, 0, FLAGS, "crop"},
60
+    {  "black",   "black border",                                                   0,
61
+                   AV_OPT_TYPE_CONST,  {.i64 = VSCropBorder }, 0, 0, FLAGS, "crop"},
62
+    {"invert",    "1: invert transforms (def: 0)",                                  OFFSETC(invert),
63
+                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
64
+    {"relative",  "consider transforms as 0: abslute, 1: relative (def)",          OFFSETC(relative),
65
+                   AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,    FLAGS},
66
+    {"zoom",      "percentage to zoom >0: zoom in, <0 zoom out (def: 0)",           OFFSETC(zoom),
67
+                   AV_OPT_TYPE_DOUBLE, {.dbl = 0},        0, 100,  FLAGS},
68
+    {"optzoom",   "0: nothing, 1: determine optimal zoom (def) (added to 'zoom')",  OFFSETC(optZoom),
69
+                   AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,    FLAGS},
70
+    {"interpol",  "type of interpolation, no, linear, bilinear (def) , bicubic",    OFFSETC(interpolType),
71
+                   AV_OPT_TYPE_INT,    {.i64 = 2},        0, 3,    FLAGS, "interpol"},
72
+    {  "no",      "no interpolation",                                               0,
73
+                   AV_OPT_TYPE_CONST,  {.i64 = VS_Zero  },  0, 0,  FLAGS, "interpol"},
74
+    {  "linear",  "linear (horizontal)",                                            0,
75
+                   AV_OPT_TYPE_CONST,  {.i64 = VS_Linear }, 0, 0,  FLAGS, "interpol"},
76
+    {  "bilinear","bi-linear",                                                      0,
77
+                   AV_OPT_TYPE_CONST,  {.i64 = VS_BiLinear},0, 0,  FLAGS, "interpol"},
78
+    {  "bicubic", "bi-cubic",                                                       0,
79
+                   AV_OPT_TYPE_CONST,  {.i64 = VS_BiCubic },0, 0,  FLAGS, "interpol"},
80
+    {"tripod",    "if 1: virtual tripod mode (equiv. to relative=0:smoothing=0)",   OFFSET(tripod),
81
+                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
82
+    {NULL},
83
+};
84
+
85
+AVFILTER_DEFINE_CLASS(vidstabtransform);
86
+
87
+static av_cold int init(AVFilterContext *ctx)
88
+{
89
+    TransformContext* tc = ctx->priv;
90
+    vs_set_mem_and_log_functions();
91
+    tc->class = &vidstabtransform_class;
92
+    av_log(ctx, AV_LOG_VERBOSE, "vidstabtransform filter: init %s\n", LIBVIDSTAB_VERSION);
93
+    return 0;
94
+}
95
+
96
+static av_cold void uninit(AVFilterContext *ctx)
97
+{
98
+    TransformContext *tc = ctx->priv;
99
+
100
+    vsTransformDataCleanup(&tc->td);
101
+    vsTransformationsCleanup(&tc->trans);
102
+}
103
+
104
+static int query_formats(AVFilterContext *ctx)
105
+{
106
+    // If you add something here also add it in vidstabutils.c
107
+    static const enum AVPixelFormat pix_fmts[] = {
108
+        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
109
+        AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
110
+        AV_PIX_FMT_YUV440P,  AV_PIX_FMT_GRAY8,
111
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA,
112
+        AV_PIX_FMT_NONE
113
+    };
114
+
115
+    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
116
+    return 0;
117
+}
118
+
119
+
120
+static int config_input(AVFilterLink *inlink)
121
+{
122
+    AVFilterContext *ctx = inlink->dst;
123
+    TransformContext *tc = ctx->priv;
124
+    FILE* f;
125
+
126
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
127
+
128
+    VSTransformData* td = &(tc->td);
129
+
130
+    VSFrameInfo fi_src;
131
+    VSFrameInfo fi_dest;
132
+
133
+    if(!vsFrameInfoInit(&fi_src, inlink->w, inlink->h,
134
+                      av_2_vs_pixel_format(ctx,inlink->format)) ||
135
+       !vsFrameInfoInit(&fi_dest, inlink->w, inlink->h,
136
+                      av_2_vs_pixel_format(ctx, inlink->format))){
137
+        av_log(ctx, AV_LOG_ERROR, "unknown pixel format: %i (%s)",
138
+               inlink->format, desc->name);
139
+        return AVERROR(EINVAL);
140
+    }
141
+
142
+    if(fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8 ||
143
+       fi_src.log2ChromaW != desc->log2_chroma_w ||
144
+       fi_src.log2ChromaH != desc->log2_chroma_h){
145
+        av_log(ctx, AV_LOG_ERROR, "pixel-format error: bpp %i<>%i  ",
146
+               fi_src.bytesPerPixel, av_get_bits_per_pixel(desc)/8);
147
+        av_log(ctx, AV_LOG_ERROR, "chroma_subsampl: w: %i<>%i  h: %i<>%i\n",
148
+               fi_src.log2ChromaW, desc->log2_chroma_w,
149
+               fi_src.log2ChromaH, desc->log2_chroma_h);
150
+        return AVERROR(EINVAL);
151
+    }
152
+
153
+    // set values that are not initializes by the options
154
+    tc->conf.modName = "vidstabtransform";
155
+    tc->conf.verbose =1;
156
+    if(tc->tripod){
157
+        av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0");
158
+        tc->conf.relative=0;
159
+        tc->conf.smoothing=0;
160
+    }
161
+
162
+    if(vsTransformDataInit(td, &tc->conf, &fi_src, &fi_dest) != VS_OK){
163
+        av_log(ctx, AV_LOG_ERROR, "initialization of vid.stab transform failed, please report a BUG\n");
164
+        return AVERROR(EINVAL);
165
+    }
166
+
167
+    vsTransformGetConfig(&tc->conf,td);
168
+    av_log(ctx, AV_LOG_INFO, "Video transformation/stabilization settings (pass 2/2):\n");
169
+    av_log(ctx, AV_LOG_INFO, "    input     = %s\n", tc->input);
170
+    av_log(ctx, AV_LOG_INFO, "    smoothing = %d\n", tc->conf.smoothing);
171
+    av_log(ctx, AV_LOG_INFO, "    maxshift  = %d\n", tc->conf.maxShift);
172
+    av_log(ctx, AV_LOG_INFO, "    maxangle  = %f\n", tc->conf.maxAngle);
173
+    av_log(ctx, AV_LOG_INFO, "    crop      = %s\n", tc->conf.crop ? "Black" : "Keep");
174
+    av_log(ctx, AV_LOG_INFO, "    relative  = %s\n", tc->conf.relative ? "True": "False");
175
+    av_log(ctx, AV_LOG_INFO, "    invert    = %s\n", tc->conf.invert ? "True" : "False");
176
+    av_log(ctx, AV_LOG_INFO, "    zoom      = %f\n", tc->conf.zoom);
177
+    av_log(ctx, AV_LOG_INFO, "    optzoom   = %s\n", tc->conf.optZoom ? "On" : "Off");
178
+    av_log(ctx, AV_LOG_INFO, "    interpol  = %s\n", getInterpolationTypeName(tc->conf.interpolType));
179
+
180
+    f = fopen(tc->input, "r");
181
+    if (f == NULL) {
182
+        av_log(ctx, AV_LOG_ERROR, "cannot open input file %s\n", tc->input);
183
+        return AVERROR(errno);
184
+    } else {
185
+        VSManyLocalMotions mlms;
186
+        if(vsReadLocalMotionsFile(f,&mlms)==VS_OK){
187
+            // calculate the actual transforms from the localmotions
188
+            if(vsLocalmotions2TransformsSimple(td, &mlms,&tc->trans)!=VS_OK){
189
+                av_log(ctx, AV_LOG_ERROR, "calculating transformations failed\n");
190
+                return AVERROR(EINVAL);
191
+            }
192
+        }else{ // try to read old format
193
+            if (!vsReadOldTransforms(td, f, &tc->trans)) { /* read input file */
194
+                av_log(ctx, AV_LOG_ERROR, "error parsing input file %s\n", tc->input);
195
+                return AVERROR(EINVAL);
196
+            }
197
+        }
198
+    }
199
+    fclose(f);
200
+
201
+    if (vsPreprocessTransforms(td, &tc->trans)!= VS_OK ) {
202
+        av_log(ctx, AV_LOG_ERROR, "error while preprocessing transforms\n");
203
+        return AVERROR(EINVAL);
204
+    }
205
+
206
+    // TODO: add sharpening, so far the user needs to call the unsharp filter manually
207
+    return 0;
208
+}
209
+
210
+
211
+static int filter_frame(AVFilterLink *inlink,  AVFrame *in)
212
+{
213
+    AVFilterContext *ctx = inlink->dst;
214
+    TransformContext *tc = ctx->priv;
215
+    VSTransformData* td = &(tc->td);
216
+
217
+    AVFilterLink *outlink = inlink->dst->outputs[0];
218
+    int direct = 0;
219
+    AVFrame *out;
220
+    VSFrame inframe;
221
+    int plane;
222
+
223
+    if (av_frame_is_writable(in)) {
224
+        direct = 1;
225
+        out = in;
226
+    } else {
227
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
228
+        if (!out) {
229
+            av_frame_free(&in);
230
+            return AVERROR(ENOMEM);
231
+        }
232
+        av_frame_copy_props(out, in);
233
+    }
234
+
235
+    for(plane=0; plane < vsTransformGetSrcFrameInfo(td)->planes; plane++){
236
+        inframe.data[plane] = in->data[plane];
237
+        inframe.linesize[plane] = in->linesize[plane];
238
+    }
239
+    if(out == in){ // inplace
240
+        vsTransformPrepare(td, &inframe, &inframe);
241
+    }else{ // seperate frames
242
+        VSFrame outframe;
243
+        for(plane=0; plane < vsTransformGetDestFrameInfo(td)->planes; plane++){
244
+            outframe.data[plane] = out->data[plane];
245
+            outframe.linesize[plane] = out->linesize[plane];
246
+        }
247
+        vsTransformPrepare(td, &inframe, &outframe);
248
+    }
249
+
250
+    vsDoTransform(td, vsGetNextTransform(td, &tc->trans));
251
+
252
+    vsTransformFinish(td);
253
+
254
+    if (!direct)
255
+        av_frame_free(&in);
256
+
257
+    return ff_filter_frame(outlink, out);
258
+}
259
+
260
+static const AVFilterPad avfilter_vf_vidstabtransform_inputs[] = {
261
+    {
262
+        .name             = "default",
263
+        .type             = AVMEDIA_TYPE_VIDEO,
264
+        .filter_frame     = filter_frame,
265
+        .config_props     = config_input,
266
+    },
267
+    { NULL }
268
+};
269
+
270
+static const AVFilterPad avfilter_vf_vidstabtransform_outputs[] = {
271
+    {
272
+        .name             = "default",
273
+        .type             = AVMEDIA_TYPE_VIDEO,
274
+    },
275
+    { NULL }
276
+};
277
+
278
+AVFilter avfilter_vf_vidstabtransform = {
279
+    .name          = "vidstabtransform",
280
+    .description   = NULL_IF_CONFIG_SMALL("pass 2 of stabilization"
281
+                                          "transforms the frames"
282
+                                          "(see vidstabdetect for pass 1)"),
283
+    .priv_size     = sizeof(TransformContext),
284
+    .init          = init,
285
+    .uninit        = uninit,
286
+    .query_formats = query_formats,
287
+
288
+    .inputs        = avfilter_vf_vidstabtransform_inputs,
289
+    .outputs       = avfilter_vf_vidstabtransform_outputs,
290
+    .priv_class    = &vidstabtransform_class,
291
+
292
+};
293
+
0 294
new file mode 100644
... ...
@@ -0,0 +1,84 @@
0
+/*
1
+ * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "vidstabutils.h"
21
+
22
+
23
+/** convert AV's pixelformat to vid.stab pixelformat */
24
+VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf){
25
+    switch(pf){
26
+    case AV_PIX_FMT_YUV420P:  return PF_YUV420P;
27
+    case AV_PIX_FMT_YUV422P:  return PF_YUV422P;
28
+    case AV_PIX_FMT_YUV444P:  return PF_YUV444P;
29
+    case AV_PIX_FMT_YUV410P:  return PF_YUV410P;
30
+    case AV_PIX_FMT_YUV411P:  return PF_YUV411P;
31
+    case AV_PIX_FMT_YUV440P:  return PF_YUV440P;
32
+    case AV_PIX_FMT_YUVA420P: return PF_YUVA420P;
33
+    case AV_PIX_FMT_GRAY8:    return PF_GRAY8;
34
+    case AV_PIX_FMT_RGB24:    return PF_RGB24;
35
+    case AV_PIX_FMT_BGR24:    return PF_BGR24;
36
+    case AV_PIX_FMT_RGBA:     return PF_RGBA;
37
+    default:
38
+        av_log(ctx, AV_LOG_ERROR, "cannot deal with pixel format %i\n", pf);
39
+        return PF_NONE;
40
+    }
41
+}
42
+
43
+
44
+/** struct to hold a valid context for logging from within vid.stab lib */
45
+typedef struct {
46
+    const AVClass* class;
47
+} VS2AVLogCtx;
48
+
49
+/** wrapper to log vs_log into av_log */
50
+static int vs_2_av_log_wrapper(int type, const char* tag, const char* format, ...){
51
+    va_list ap;
52
+    VS2AVLogCtx ctx;
53
+    AVClass class = {
54
+      .class_name = tag,
55
+      .item_name  = av_default_item_name,
56
+      .option     = 0,
57
+      .version    = LIBAVUTIL_VERSION_INT,
58
+      .category   = AV_CLASS_CATEGORY_FILTER,
59
+    };
60
+    ctx.class = &class;
61
+    va_start (ap, format);
62
+    av_vlog(&ctx, type, format, ap);
63
+    va_end (ap);
64
+    return VS_OK;
65
+}
66
+
67
+/** sets the memory allocation function and logging constants to av versions */
68
+void vs_set_mem_and_log_functions(void){
69
+    vs_malloc  = av_malloc;
70
+    vs_zalloc  = av_mallocz;
71
+    vs_realloc = av_realloc;
72
+    vs_free    = av_free;
73
+
74
+    VS_ERROR_TYPE = AV_LOG_ERROR;
75
+    VS_WARN_TYPE  = AV_LOG_WARNING;
76
+    VS_INFO_TYPE  = AV_LOG_INFO;
77
+    VS_MSG_TYPE   = AV_LOG_VERBOSE;
78
+
79
+    vs_log   = vs_2_av_log_wrapper;
80
+
81
+    VS_ERROR = 0;
82
+    VS_OK    = 1;
83
+}
0 84
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+/*
1
+ * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVFILTER_VIDSTABUTILS_H
21
+#define AVFILTER_VIDSTABUTILS_H
22
+
23
+#include <vid.stab/libvidstab.h>
24
+
25
+#include "avfilter.h"
26
+
27
+/* ** some conversions from avlib to vid.stab constants and functions *** */
28
+
29
+/** converts the pixelformat of avlib into the one of the vid.stab library */
30
+VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf);
31
+
32
+/** sets the memory allocation function and logging constants to av versions */
33
+void vs_set_mem_and_log_functions(void);
34
+
35
+#endif