Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Michael Niedermayer authored on 2015/05/03 04:00:01... | ... |
@@ -2644,6 +2644,7 @@ eq_filter_deps="gpl" |
2644 | 2644 |
fftfilt_filter_deps="avcodec" |
2645 | 2645 |
fftfilt_filter_select="rdft" |
2646 | 2646 |
flite_filter_deps="libflite" |
2647 |
+find_rect_filter_deps="gpl" |
|
2647 | 2648 |
frei0r_filter_deps="frei0r dlopen" |
2648 | 2649 |
frei0r_src_filter_deps="frei0r dlopen" |
2649 | 2650 |
fspp_filter_deps="gpl" |
... | ... |
@@ -5163,6 +5163,36 @@ framework. |
5163 | 5163 |
|
5164 | 5164 |
It does not take parameters. |
5165 | 5165 |
|
5166 |
+@section find_rect |
|
5167 |
+ |
|
5168 |
+Find a rectangular object |
|
5169 |
+ |
|
5170 |
+It accepts the following options: |
|
5171 |
+ |
|
5172 |
+@table @option |
|
5173 |
+@item object |
|
5174 |
+Filepath of the object image, needs to be in gray8. |
|
5175 |
+ |
|
5176 |
+@item threshold |
|
5177 |
+Detection threshold, default is 0.5. |
|
5178 |
+ |
|
5179 |
+@item mipmaps |
|
5180 |
+Number of mipmaps, default is 3. |
|
5181 |
+ |
|
5182 |
+@item xmin, ymin, xmax, ymax |
|
5183 |
+Specifies the rectangle in which to search. |
|
5184 |
+@end table |
|
5185 |
+ |
|
5186 |
+@subsection Examples |
|
5187 |
+ |
|
5188 |
+@itemize |
|
5189 |
+@item |
|
5190 |
+Generate a representative palette of a given video using @command{ffmpeg}: |
|
5191 |
+@example |
|
5192 |
+ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv |
|
5193 |
+@end example |
|
5194 |
+@end itemize |
|
5195 |
+ |
|
5166 | 5196 |
@anchor{format} |
5167 | 5197 |
@section format |
5168 | 5198 |
|
... | ... |
@@ -126,6 +126,7 @@ OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o |
126 | 126 |
OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o |
127 | 127 |
OBJS-$(CONFIG_FIELDMATCH_FILTER) += vf_fieldmatch.o |
128 | 128 |
OBJS-$(CONFIG_FIELDORDER_FILTER) += vf_fieldorder.o |
129 |
+OBJS-$(CONFIG_FIND_RECT_FILTER) += vf_find_rect.o |
|
129 | 130 |
OBJS-$(CONFIG_FORMAT_FILTER) += vf_format.o |
130 | 131 |
OBJS-$(CONFIG_FRAMESTEP_FILTER) += vf_framestep.o |
131 | 132 |
OBJS-$(CONFIG_FPS_FILTER) += vf_fps.o |
... | ... |
@@ -142,6 +142,7 @@ void avfilter_register_all(void) |
142 | 142 |
REGISTER_FILTER(FIELD, field, vf); |
143 | 143 |
REGISTER_FILTER(FIELDMATCH, fieldmatch, vf); |
144 | 144 |
REGISTER_FILTER(FIELDORDER, fieldorder, vf); |
145 |
+ REGISTER_FILTER(FIND_RECT, find_rect, vf); |
|
145 | 146 |
REGISTER_FILTER(FORMAT, format, vf); |
146 | 147 |
REGISTER_FILTER(FPS, fps, vf); |
147 | 148 |
REGISTER_FILTER(FRAMEPACK, framepack, vf); |
... | ... |
@@ -30,7 +30,7 @@ |
30 | 30 |
#include "libavutil/version.h" |
31 | 31 |
|
32 | 32 |
#define LIBAVFILTER_VERSION_MAJOR 5 |
33 |
-#define LIBAVFILTER_VERSION_MINOR 14 |
|
33 |
+#define LIBAVFILTER_VERSION_MINOR 15 |
|
34 | 34 |
#define LIBAVFILTER_VERSION_MICRO 100 |
35 | 35 |
|
36 | 36 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |
37 | 37 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,311 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2014-2015 Michael Niedermayer <michaelni@gmx.at> |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or modify |
|
6 |
+ * it under the terms of the GNU General Public License as published by |
|
7 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
8 |
+ * (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
+ * GNU General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU General Public License along |
|
16 |
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
17 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @todo switch to dualinput |
|
22 |
+ */ |
|
23 |
+ |
|
24 |
+#include "libavutil/avassert.h" |
|
25 |
+#include "libavutil/imgutils.h" |
|
26 |
+#include "libavutil/opt.h" |
|
27 |
+#include "internal.h" |
|
28 |
+ |
|
29 |
+#include "lavfutils.h" |
|
30 |
+ |
|
31 |
+#define MAX_MIPMAPS 5 |
|
32 |
+ |
|
33 |
+typedef struct FOCContext { |
|
34 |
+ AVClass *class; |
|
35 |
+ float threshold; |
|
36 |
+ int mipmaps; |
|
37 |
+ int xmin, ymin, xmax, ymax; |
|
38 |
+ char *obj_filename; |
|
39 |
+ int last_x, last_y; |
|
40 |
+ AVFrame *obj_frame; |
|
41 |
+ AVFrame *needle_frame[MAX_MIPMAPS]; |
|
42 |
+ AVFrame *haystack_frame[MAX_MIPMAPS]; |
|
43 |
+} FOCContext; |
|
44 |
+ |
|
45 |
+#define OFFSET(x) offsetof(FOCContext, x) |
|
46 |
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
|
47 |
+static const AVOption foc_options[] = { |
|
48 |
+ { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS }, |
|
49 |
+ { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS }, |
|
50 |
+ { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS }, |
|
51 |
+ { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
|
52 |
+ { "ymin", "", OFFSET(ymin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
|
53 |
+ { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
|
54 |
+ { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
|
55 |
+ { NULL } |
|
56 |
+}; |
|
57 |
+ |
|
58 |
+static const AVClass foc_class = { |
|
59 |
+ .class_name = "find_rect", |
|
60 |
+ .item_name = av_default_item_name, |
|
61 |
+ .option = foc_options, |
|
62 |
+ .version = LIBAVUTIL_VERSION_INT, |
|
63 |
+ .category = AV_CLASS_CATEGORY_FILTER, |
|
64 |
+}; |
|
65 |
+ |
|
66 |
+static int query_formats(AVFilterContext *ctx) |
|
67 |
+{ |
|
68 |
+ static const enum AVPixelFormat pix_fmts[] = { |
|
69 |
+ AV_PIX_FMT_YUV420P, |
|
70 |
+ AV_PIX_FMT_YUVJ420P, |
|
71 |
+ AV_PIX_FMT_NONE |
|
72 |
+ }; |
|
73 |
+ |
|
74 |
+ return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); |
|
75 |
+} |
|
76 |
+ |
|
77 |
+static AVFrame *downscale(AVFrame *in) |
|
78 |
+{ |
|
79 |
+ int x, y; |
|
80 |
+ AVFrame *frame = av_frame_alloc(); |
|
81 |
+ uint8_t *src, *dst; |
|
82 |
+ if (!frame) |
|
83 |
+ return NULL; |
|
84 |
+ |
|
85 |
+ frame->format = in->format; |
|
86 |
+ frame->width = (in->width + 1) / 2; |
|
87 |
+ frame->height = (in->height+ 1) / 2; |
|
88 |
+ |
|
89 |
+ if (av_frame_get_buffer(frame, 32) < 0) { |
|
90 |
+ av_frame_free(&frame); |
|
91 |
+ return NULL; |
|
92 |
+ } |
|
93 |
+ src = in ->data[0]; |
|
94 |
+ dst = frame->data[0]; |
|
95 |
+ |
|
96 |
+ for(y = 0; y < frame->height; y++) { |
|
97 |
+ for(x = 0; x < frame->width; x++) { |
|
98 |
+ dst[x] = ( src[2*x+0] |
|
99 |
+ + src[2*x+1] |
|
100 |
+ + src[2*x+0 + in->linesize[0]] |
|
101 |
+ + src[2*x+1 + in->linesize[0]] |
|
102 |
+ + 2) >> 2; |
|
103 |
+ } |
|
104 |
+ src += 2*in->linesize[0]; |
|
105 |
+ dst += frame->linesize[0]; |
|
106 |
+ } |
|
107 |
+ return frame; |
|
108 |
+} |
|
109 |
+ |
|
110 |
+static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int offy) |
|
111 |
+{ |
|
112 |
+ int x,y; |
|
113 |
+ int o_sum_v = 0; |
|
114 |
+ int h_sum_v = 0; |
|
115 |
+ int64_t oo_sum_v = 0; |
|
116 |
+ int64_t hh_sum_v = 0; |
|
117 |
+ int64_t oh_sum_v = 0; |
|
118 |
+ float c; |
|
119 |
+ int n = obj->height * obj->width; |
|
120 |
+ const uint8_t *odat = obj ->data[0]; |
|
121 |
+ const uint8_t *hdat = haystack->data[0] + offx + offy * haystack->linesize[0]; |
|
122 |
+ int64_t o_sigma, h_sigma; |
|
123 |
+ |
|
124 |
+ for(y = 0; y < obj->height; y++) { |
|
125 |
+ for(x = 0; x < obj->width; x++) { |
|
126 |
+ int o_v = odat[x]; |
|
127 |
+ int h_v = hdat[x]; |
|
128 |
+ o_sum_v += o_v; |
|
129 |
+ h_sum_v += h_v; |
|
130 |
+ oo_sum_v += o_v * o_v; |
|
131 |
+ hh_sum_v += h_v * h_v; |
|
132 |
+ oh_sum_v += o_v * h_v; |
|
133 |
+ } |
|
134 |
+ odat += obj->linesize[0]; |
|
135 |
+ hdat += haystack->linesize[0]; |
|
136 |
+ } |
|
137 |
+ o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v; |
|
138 |
+ h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v; |
|
139 |
+ |
|
140 |
+ if (o_sigma == 0 || h_sigma == 0) |
|
141 |
+ return 1.0; |
|
142 |
+ |
|
143 |
+ c = (n*oh_sum_v - o_sum_v*(int64_t)h_sum_v) / (sqrt(o_sigma)*sqrt(h_sigma)); |
|
144 |
+ |
|
145 |
+ return 1 - fabs(c); |
|
146 |
+} |
|
147 |
+ |
|
148 |
+static int config_input(AVFilterLink *inlink) |
|
149 |
+{ |
|
150 |
+ AVFilterContext *ctx = inlink->dst; |
|
151 |
+ FOCContext *foc = ctx->priv; |
|
152 |
+ |
|
153 |
+ if (foc->xmax <= 0) |
|
154 |
+ foc->xmax = inlink->w - foc->obj_frame->width; |
|
155 |
+ if (foc->ymax <= 0) |
|
156 |
+ foc->ymax = inlink->h - foc->obj_frame->height; |
|
157 |
+ |
|
158 |
+ return 0; |
|
159 |
+} |
|
160 |
+ |
|
161 |
+static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score) |
|
162 |
+{ |
|
163 |
+ int x, y; |
|
164 |
+ |
|
165 |
+ if (pass + 1 <= maxpass) { |
|
166 |
+ int sub_x, sub_y; |
|
167 |
+ search(foc, pass+1, maxpass, xmin>>1, (xmax+1)>>1, ymin>>1, (ymax+1)>>1, &sub_x, &sub_y, 1.0); |
|
168 |
+ xmin = FFMAX(xmin, 2*sub_x - 4); |
|
169 |
+ xmax = FFMIN(xmax, 2*sub_x + 4); |
|
170 |
+ ymin = FFMAX(ymin, 2*sub_y - 4); |
|
171 |
+ ymax = FFMIN(ymax, 2*sub_y + 4); |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ for (y = ymin; y <= ymax; y++) { |
|
175 |
+ for (x = xmin; x <= xmax; x++) { |
|
176 |
+ float score = compare(foc->haystack_frame[pass], foc->needle_frame[pass], x, y); |
|
177 |
+ av_assert0(score != 0); |
|
178 |
+ if (score < best_score) { |
|
179 |
+ best_score = score; |
|
180 |
+ *best_x = x; |
|
181 |
+ *best_y = y; |
|
182 |
+ } |
|
183 |
+ } |
|
184 |
+ } |
|
185 |
+ return best_score; |
|
186 |
+} |
|
187 |
+ |
|
188 |
+static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
|
189 |
+{ |
|
190 |
+ AVFilterContext *ctx = inlink->dst; |
|
191 |
+ FOCContext *foc = ctx->priv; |
|
192 |
+ float best_score; |
|
193 |
+ int best_x, best_y; |
|
194 |
+ int i; |
|
195 |
+ |
|
196 |
+ foc->haystack_frame[0] = av_frame_clone(in); |
|
197 |
+ for (i=1; i<foc->mipmaps; i++) { |
|
198 |
+ foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]); |
|
199 |
+ } |
|
200 |
+ |
|
201 |
+ best_score = search(foc, 0, 0, |
|
202 |
+ FFMAX(foc->xmin, foc->last_x - 8), |
|
203 |
+ FFMIN(foc->xmax, foc->last_x + 8), |
|
204 |
+ FFMAX(foc->ymin, foc->last_y - 8), |
|
205 |
+ FFMIN(foc->ymax, foc->last_y + 8), |
|
206 |
+ &best_x, &best_y, 1.0); |
|
207 |
+ |
|
208 |
+ best_score = search(foc, 0, foc->mipmaps - 1, foc->xmin, foc->xmax, foc->ymin, foc->ymax, |
|
209 |
+ &best_x, &best_y, best_score); |
|
210 |
+ |
|
211 |
+ for (i=0; i<MAX_MIPMAPS; i++) { |
|
212 |
+ av_frame_free(&foc->haystack_frame[i]); |
|
213 |
+ } |
|
214 |
+ |
|
215 |
+ if (best_score > foc->threshold) { |
|
216 |
+ return ff_filter_frame(ctx->outputs[0], in); |
|
217 |
+ } |
|
218 |
+ |
|
219 |
+ av_log(ctx, AV_LOG_DEBUG, "Found at %d %d score %f\n", best_x, best_y, best_score); |
|
220 |
+ foc->last_x = best_x; |
|
221 |
+ foc->last_y = best_y; |
|
222 |
+ |
|
223 |
+ av_frame_make_writable(in); |
|
224 |
+ |
|
225 |
+ av_dict_set_int(&in->metadata, "lavfi.rect.w", foc->obj_frame->width, 0); |
|
226 |
+ av_dict_set_int(&in->metadata, "lavfi.rect.h", foc->obj_frame->height, 0); |
|
227 |
+ av_dict_set_int(&in->metadata, "lavfi.rect.x", best_x, 0); |
|
228 |
+ av_dict_set_int(&in->metadata, "lavfi.rect.y", best_y, 0); |
|
229 |
+ |
|
230 |
+ return ff_filter_frame(ctx->outputs[0], in); |
|
231 |
+} |
|
232 |
+ |
|
233 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
234 |
+{ |
|
235 |
+ FOCContext *foc = ctx->priv; |
|
236 |
+ int i; |
|
237 |
+ |
|
238 |
+ for (i = 0; i < MAX_MIPMAPS; i++) { |
|
239 |
+ av_frame_free(&foc->needle_frame[i]); |
|
240 |
+ av_frame_free(&foc->haystack_frame[i]); |
|
241 |
+ } |
|
242 |
+ |
|
243 |
+ if (foc->obj_frame) |
|
244 |
+ av_freep(&foc->obj_frame->data[0]); |
|
245 |
+ av_frame_free(&foc->obj_frame); |
|
246 |
+} |
|
247 |
+ |
|
248 |
+static av_cold int init(AVFilterContext *ctx) |
|
249 |
+{ |
|
250 |
+ FOCContext *foc = ctx->priv; |
|
251 |
+ int ret, i; |
|
252 |
+ |
|
253 |
+ if (!foc->obj_filename) { |
|
254 |
+ av_log(ctx, AV_LOG_ERROR, "object filename not set\n"); |
|
255 |
+ return AVERROR(EINVAL); |
|
256 |
+ } |
|
257 |
+ |
|
258 |
+ foc->obj_frame = av_frame_alloc(); |
|
259 |
+ if (!foc->obj_frame) |
|
260 |
+ return AVERROR(ENOMEM); |
|
261 |
+ |
|
262 |
+ if ((ret = ff_load_image(foc->obj_frame->data, foc->obj_frame->linesize, |
|
263 |
+ &foc->obj_frame->width, &foc->obj_frame->height, |
|
264 |
+ &foc->obj_frame->format, foc->obj_filename, ctx)) < 0) |
|
265 |
+ return ret; |
|
266 |
+ |
|
267 |
+ if (foc->obj_frame->format != AV_PIX_FMT_GRAY8) { |
|
268 |
+ av_log(ctx, AV_LOG_ERROR, "object image is not a grayscale image\n"); |
|
269 |
+ return AVERROR(EINVAL); |
|
270 |
+ } |
|
271 |
+ |
|
272 |
+ foc->needle_frame[0] = av_frame_clone(foc->obj_frame); |
|
273 |
+ for (i = 1; i < foc->mipmaps; i++) { |
|
274 |
+ foc->needle_frame[i] = downscale(foc->needle_frame[i-1]); |
|
275 |
+ if (!foc->needle_frame[i]) |
|
276 |
+ return AVERROR(ENOMEM); |
|
277 |
+ } |
|
278 |
+ |
|
279 |
+ return 0; |
|
280 |
+} |
|
281 |
+ |
|
282 |
+static const AVFilterPad foc_inputs[] = { |
|
283 |
+ { |
|
284 |
+ .name = "default", |
|
285 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
286 |
+ .config_props = config_input, |
|
287 |
+ .filter_frame = filter_frame, |
|
288 |
+ }, |
|
289 |
+ { NULL } |
|
290 |
+}; |
|
291 |
+ |
|
292 |
+static const AVFilterPad foc_outputs[] = { |
|
293 |
+ { |
|
294 |
+ .name = "default", |
|
295 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
296 |
+ }, |
|
297 |
+ { NULL } |
|
298 |
+}; |
|
299 |
+ |
|
300 |
+AVFilter ff_vf_find_rect = { |
|
301 |
+ .name = "find_rect", |
|
302 |
+ .description = NULL_IF_CONFIG_SMALL("Find a user specified object"), |
|
303 |
+ .priv_size = sizeof(FOCContext), |
|
304 |
+ .init = init, |
|
305 |
+ .uninit = uninit, |
|
306 |
+ .query_formats = query_formats, |
|
307 |
+ .inputs = foc_inputs, |
|
308 |
+ .outputs = foc_outputs, |
|
309 |
+ .priv_class = &foc_class, |
|
310 |
+}; |