Patch by Nolan L nol888 <=> gmail >=< com.
See thread:
Subject: [FFmpeg-devel] [PATCH] Port gradfun to libavfilter (GCI)
Date: Mon, 29 Nov 2010 07:18:14 -0500
Originally committed as revision 25942 to svn://svn.ffmpeg.org/ffmpeg/trunk
... | ... |
@@ -425,6 +425,35 @@ frei0r=perspective:0.2/0.2:0.8/0.2 |
425 | 425 |
For more information see: |
426 | 426 |
@url{http://piksel.org/frei0r} |
427 | 427 |
|
428 |
+@section gradfun |
|
429 |
+ |
|
430 |
+Fix the banding artifacts that are sometimes introduced into nearly flat |
|
431 |
+regions by truncation to 8bit colordepth. |
|
432 |
+Interpolate the gradients that should go where the bands are, and |
|
433 |
+dither them. |
|
434 |
+ |
|
435 |
+The filter takes two optional parameters, separated by ':': |
|
436 |
+@var{strength}:@var{radius} |
|
437 |
+ |
|
438 |
+@var{strength} is the maximum amount by which the filter will change |
|
439 |
+any one pixel. Also the threshold for detecting nearly flat |
|
440 |
+regions. Acceptable values range from .51 to 255, default value is |
|
441 |
+1.2, out-of-range values will be clipped to the valid range. |
|
442 |
+ |
|
443 |
+@var{radius} is the neighborhood to fit the gradient to. A larger |
|
444 |
+radius makes for smoother gradients, but also prevents the filter from |
|
445 |
+modifying the pixels near detailed regions. Acceptable values are |
|
446 |
+8-32, default value is 16, out-of-range values will be clipped to the |
|
447 |
+valid range. |
|
448 |
+ |
|
449 |
+@example |
|
450 |
+# default parameters |
|
451 |
+gradfun=1.2:16 |
|
452 |
+ |
|
453 |
+# omitting radius |
|
454 |
+gradfun=1.2 |
|
455 |
+@end example |
|
456 |
+ |
|
428 | 457 |
@section hflip |
429 | 458 |
|
430 | 459 |
Flip the input video horizontally. |
... | ... |
@@ -26,6 +26,7 @@ OBJS-$(CONFIG_DRAWBOX_FILTER) += vf_drawbox.o |
26 | 26 |
OBJS-$(CONFIG_FIFO_FILTER) += vf_fifo.o |
27 | 27 |
OBJS-$(CONFIG_FORMAT_FILTER) += vf_format.o |
28 | 28 |
OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o |
29 |
+OBJS-$(CONFIG_GRADFUN_FILTER) += vf_gradfun.o |
|
29 | 30 |
OBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o |
30 | 31 |
OBJS-$(CONFIG_HQDN3D_FILTER) += vf_hqdn3d.o |
31 | 32 |
OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o |
... | ... |
@@ -47,6 +47,7 @@ void avfilter_register_all(void) |
47 | 47 |
REGISTER_FILTER (FIFO, fifo, vf); |
48 | 48 |
REGISTER_FILTER (FORMAT, format, vf); |
49 | 49 |
REGISTER_FILTER (FREI0R, frei0r, vf); |
50 |
+ REGISTER_FILTER (GRADFUN, gradfun, vf); |
|
50 | 51 |
REGISTER_FILTER (HFLIP, hflip, vf); |
51 | 52 |
REGISTER_FILTER (HQDN3D, hqdn3d, vf); |
52 | 53 |
REGISTER_FILTER (NOFORMAT, noformat, vf); |
... | ... |
@@ -27,8 +27,8 @@ |
27 | 27 |
#include "libavcore/samplefmt.h" |
28 | 28 |
|
29 | 29 |
#define LIBAVFILTER_VERSION_MAJOR 1 |
30 |
-#define LIBAVFILTER_VERSION_MINOR 68 |
|
31 |
-#define LIBAVFILTER_VERSION_MICRO 1 |
|
30 |
+#define LIBAVFILTER_VERSION_MINOR 69 |
|
31 |
+#define LIBAVFILTER_VERSION_MICRO 0 |
|
32 | 32 |
|
33 | 33 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |
34 | 34 |
LIBAVFILTER_VERSION_MINOR, \ |
35 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,48 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2010 Nolan Lum <nol888@gmail.com> |
|
2 |
+ * Copyright (c) 2009 Loren Merritt <lorenm@u.washignton.edu> |
|
3 |
+ * |
|
4 |
+ * This file is part of FFmpeg. |
|
5 |
+ * |
|
6 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
8 |
+ * License as published by the Free Software Foundation; either |
|
9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
+ * Lesser General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
17 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#ifndef AVFILTER_GRADFUN_H |
|
22 |
+#define AVFILTER_GRADFUN_H |
|
23 |
+ |
|
24 |
+#include "avfilter.h" |
|
25 |
+ |
|
26 |
+/// Holds instance-specific information for gradfun. |
|
27 |
+typedef struct { |
|
28 |
+ int thresh; ///< threshold for gradient algorithm |
|
29 |
+ int radius; ///< blur radius |
|
30 |
+ int chroma_w; ///< width of the chroma planes |
|
31 |
+ int chroma_h; ///< weight of the chroma planes |
|
32 |
+ int chroma_r; ///< blur radius for the chroma planes |
|
33 |
+ uint16_t *buf; ///< holds image data for blur algorithm passed into filter. |
|
34 |
+ /// DSP functions. |
|
35 |
+ void (*filter_line) (uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); |
|
36 |
+ void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); |
|
37 |
+} GradFunContext; |
|
38 |
+ |
|
39 |
+void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); |
|
40 |
+void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); |
|
41 |
+ |
|
42 |
+void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); |
|
43 |
+void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); |
|
44 |
+ |
|
45 |
+void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); |
|
46 |
+ |
|
47 |
+#endif /* AVFILTER_GRADFUN_H */ |
0 | 48 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,253 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2010 Nolan Lum <nol888@gmail.com> |
|
2 |
+ * Copyright (c) 2009 Loren Merritt <lorenm@u.washignton.edu> |
|
3 |
+ * |
|
4 |
+ * This file is part of FFmpeg. |
|
5 |
+ * |
|
6 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
8 |
+ * License as published by the Free Software Foundation; either |
|
9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
+ * Lesser General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
17 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+/** |
|
22 |
+ * @file |
|
23 |
+ * gradfun debanding filter, ported from MPlayer |
|
24 |
+ * libmpcodecs/vf_gradfun.c |
|
25 |
+ * |
|
26 |
+ * Apply a boxblur debanding algorithm (based on the gradfun2db |
|
27 |
+ * Avisynth filter by prunedtree). |
|
28 |
+ * Foreach pixel, if it's within threshold of the blurred value, make it closer. |
|
29 |
+ * So now we have a smoothed and higher bitdepth version of all the shallow |
|
30 |
+ * gradients, while leaving detailed areas untouched. |
|
31 |
+ * Dither it back to 8bit. |
|
32 |
+ */ |
|
33 |
+ |
|
34 |
+#include "libavcore/imgutils.h" |
|
35 |
+#include "libavutil/cpu.h" |
|
36 |
+#include "libavutil/pixdesc.h" |
|
37 |
+#include "avfilter.h" |
|
38 |
+#include "gradfun.h" |
|
39 |
+ |
|
40 |
+DECLARE_ALIGNED(16, static const uint16_t, dither)[8][8] = { |
|
41 |
+ {0x00,0x60,0x18,0x78,0x06,0x66,0x1E,0x7E}, |
|
42 |
+ {0x40,0x20,0x58,0x38,0x46,0x26,0x5E,0x3E}, |
|
43 |
+ {0x10,0x70,0x08,0x68,0x16,0x76,0x0E,0x6E}, |
|
44 |
+ {0x50,0x30,0x48,0x28,0x56,0x36,0x4E,0x2E}, |
|
45 |
+ {0x04,0x64,0x1C,0x7C,0x02,0x62,0x1A,0x7A}, |
|
46 |
+ {0x44,0x24,0x5C,0x3C,0x42,0x22,0x5A,0x3A}, |
|
47 |
+ {0x14,0x74,0x0C,0x6C,0x12,0x72,0x0A,0x6A}, |
|
48 |
+ {0x54,0x34,0x4C,0x2C,0x52,0x32,0x4A,0x2A}, |
|
49 |
+}; |
|
50 |
+ |
|
51 |
+void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) |
|
52 |
+{ |
|
53 |
+ int x; |
|
54 |
+ for (x = 0; x < width; x++, dc += x & 1) { |
|
55 |
+ int pix = src[x] << 7; |
|
56 |
+ int delta = dc[0] - pix; |
|
57 |
+ int m = abs(delta) * thresh >> 16; |
|
58 |
+ m = FFMAX(0, 127 - m); |
|
59 |
+ m = m * m * delta >> 14; |
|
60 |
+ pix += m + dithers[x & 7]; |
|
61 |
+ dst[x] = av_clip_uint8(pix >> 7); |
|
62 |
+ } |
|
63 |
+} |
|
64 |
+ |
|
65 |
+void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) |
|
66 |
+{ |
|
67 |
+ int x, v, old; |
|
68 |
+ for (x = 0; x < width; x++) { |
|
69 |
+ v = buf1[x] + src[2 * x] + src[2 * x + 1] + src[2 * x + src_linesize] + src[2 * x + 1 + src_linesize]; |
|
70 |
+ old = buf[x]; |
|
71 |
+ buf[x] = v; |
|
72 |
+ dc[x] = v - old; |
|
73 |
+ } |
|
74 |
+} |
|
75 |
+ |
|
76 |
+static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r) |
|
77 |
+{ |
|
78 |
+ int bstride = FFALIGN(width, 16) / 2; |
|
79 |
+ int y; |
|
80 |
+ uint32_t dc_factor = (1 << 21) / (r * r); |
|
81 |
+ uint16_t *dc = ctx->buf + 16; |
|
82 |
+ uint16_t *buf = ctx->buf + bstride + 32; |
|
83 |
+ int thresh = ctx->thresh; |
|
84 |
+ |
|
85 |
+ memset(dc, 0, (bstride + 16) * sizeof(*buf)); |
|
86 |
+ for (y = 0; y < r; y++) |
|
87 |
+ ctx->blur_line(dc, buf + y * bstride, buf + (y - 1) * bstride, src + 2 * y * src_linesize, src_linesize, width / 2); |
|
88 |
+ for (;;) { |
|
89 |
+ if (y < height - r) { |
|
90 |
+ int mod = ((y + r) / 2) % r; |
|
91 |
+ uint16_t *buf0 = buf + mod * bstride; |
|
92 |
+ uint16_t *buf1 = buf + (mod ? mod - 1 : r - 1) * bstride; |
|
93 |
+ int x, v; |
|
94 |
+ ctx->blur_line(dc, buf0, buf1, src + (y + r) * src_linesize, src_linesize, width / 2); |
|
95 |
+ for (x = v = 0; x < r; x++) |
|
96 |
+ v += dc[x]; |
|
97 |
+ for (; x < width / 2; x++) { |
|
98 |
+ v += dc[x] - dc[x-r]; |
|
99 |
+ dc[x-r] = v * dc_factor >> 16; |
|
100 |
+ } |
|
101 |
+ for (; x < (width + r + 1) / 2; x++) |
|
102 |
+ dc[x-r] = v * dc_factor >> 16; |
|
103 |
+ for (x = -r / 2; x < 0; x++) |
|
104 |
+ dc[x] = dc[0]; |
|
105 |
+ } |
|
106 |
+ if (y == r) { |
|
107 |
+ for (y = 0; y < r; y++) |
|
108 |
+ ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]); |
|
109 |
+ } |
|
110 |
+ ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]); |
|
111 |
+ if (++y >= height) break; |
|
112 |
+ ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]); |
|
113 |
+ if (++y >= height) break; |
|
114 |
+ } |
|
115 |
+} |
|
116 |
+ |
|
117 |
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) |
|
118 |
+{ |
|
119 |
+ GradFunContext *gf = ctx->priv; |
|
120 |
+ float thresh = 1.2; |
|
121 |
+ int radius = 16; |
|
122 |
+ av_unused int cpu_flags = av_get_cpu_flags(); |
|
123 |
+ |
|
124 |
+ if (args) |
|
125 |
+ sscanf(args, "%f:%d", &thresh, &radius); |
|
126 |
+ |
|
127 |
+ thresh = av_clipf(thresh, 0.51, 255); |
|
128 |
+ gf->thresh = (1 << 15) / thresh; |
|
129 |
+ gf->radius = av_clip((radius + 1) & ~1, 4, 32); |
|
130 |
+ |
|
131 |
+ gf->blur_line = ff_gradfun_blur_line_c; |
|
132 |
+ gf->filter_line = ff_gradfun_filter_line_c; |
|
133 |
+ |
|
134 |
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2) |
|
135 |
+ gf->filter_line = ff_gradfun_filter_line_mmx2; |
|
136 |
+ if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3) |
|
137 |
+ gf->filter_line = ff_gradfun_filter_line_ssse3; |
|
138 |
+ if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) |
|
139 |
+ gf->blur_line = ff_gradfun_blur_line_sse2; |
|
140 |
+ |
|
141 |
+ av_log(ctx, AV_LOG_INFO, "threshold:%.2f radius:%d\n", thresh, gf->radius); |
|
142 |
+ |
|
143 |
+ return 0; |
|
144 |
+} |
|
145 |
+ |
|
146 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
147 |
+{ |
|
148 |
+ GradFunContext *gf = ctx->priv; |
|
149 |
+ av_freep(&gf->buf); |
|
150 |
+} |
|
151 |
+ |
|
152 |
+static int query_formats(AVFilterContext *ctx) |
|
153 |
+{ |
|
154 |
+ static const enum PixelFormat pix_fmts[] = { |
|
155 |
+ PIX_FMT_YUV410P, PIX_FMT_YUV420P, |
|
156 |
+ PIX_FMT_GRAY8, PIX_FMT_NV12, |
|
157 |
+ PIX_FMT_NV21, PIX_FMT_YUV444P, |
|
158 |
+ PIX_FMT_YUV422P, PIX_FMT_YUV411P, |
|
159 |
+ PIX_FMT_NONE |
|
160 |
+ }; |
|
161 |
+ |
|
162 |
+ avfilter_set_common_formats(ctx, avfilter_make_format_list(pix_fmts)); |
|
163 |
+ |
|
164 |
+ return 0; |
|
165 |
+} |
|
166 |
+ |
|
167 |
+static int config_input(AVFilterLink *inlink) |
|
168 |
+{ |
|
169 |
+ GradFunContext *gf = inlink->dst->priv; |
|
170 |
+ int hsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_w; |
|
171 |
+ int vsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_h; |
|
172 |
+ |
|
173 |
+ gf->buf = av_mallocz((FFALIGN(inlink->w, 16) * (gf->radius + 1) / 2 + 32) * sizeof(uint16_t)); |
|
174 |
+ if (!gf->buf) |
|
175 |
+ return AVERROR(ENOMEM); |
|
176 |
+ |
|
177 |
+ gf->chroma_w = -((-inlink->w) >> hsub); |
|
178 |
+ gf->chroma_h = -((-inlink->h) >> vsub); |
|
179 |
+ gf->chroma_r = av_clip(((((gf->radius >> hsub) + (gf->radius >> vsub)) / 2 ) + 1) & ~1, 4, 32); |
|
180 |
+ |
|
181 |
+ return 0; |
|
182 |
+} |
|
183 |
+ |
|
184 |
+static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *inpicref) |
|
185 |
+{ |
|
186 |
+ AVFilterLink *outlink = inlink->dst->outputs[0]; |
|
187 |
+ AVFilterBufferRef *outpicref; |
|
188 |
+ |
|
189 |
+ if (inpicref->perms & AV_PERM_PRESERVE) { |
|
190 |
+ outpicref = avfilter_get_video_buffer(outlink, AV_PERM_WRITE, outlink->w, outlink->h); |
|
191 |
+ avfilter_copy_buffer_ref_props(outpicref, inpicref); |
|
192 |
+ outpicref->video->w = outlink->w; |
|
193 |
+ outpicref->video->h = outlink->h; |
|
194 |
+ } else |
|
195 |
+ outpicref = inpicref; |
|
196 |
+ |
|
197 |
+ outlink->out_buf = outpicref; |
|
198 |
+ avfilter_start_frame(outlink, avfilter_ref_buffer(outpicref, ~0)); |
|
199 |
+} |
|
200 |
+ |
|
201 |
+static void null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir) { } |
|
202 |
+ |
|
203 |
+static void end_frame(AVFilterLink *inlink) |
|
204 |
+{ |
|
205 |
+ GradFunContext *gf = inlink->dst->priv; |
|
206 |
+ AVFilterBufferRef *inpic = inlink->cur_buf; |
|
207 |
+ AVFilterLink *outlink = inlink->dst->outputs[0]; |
|
208 |
+ AVFilterBufferRef *outpic = outlink->out_buf; |
|
209 |
+ int p; |
|
210 |
+ |
|
211 |
+ for (p = 0; p < 4 && inpic->data[p]; p++) { |
|
212 |
+ int w = inlink->w; |
|
213 |
+ int h = inlink->h; |
|
214 |
+ int r = gf->radius; |
|
215 |
+ if (p) { |
|
216 |
+ w = gf->chroma_w; |
|
217 |
+ h = gf->chroma_h; |
|
218 |
+ r = gf->chroma_r; |
|
219 |
+ } |
|
220 |
+ |
|
221 |
+ if (FFMIN(w, h) > 2 * r) |
|
222 |
+ filter(gf, outpic->data[p], inpic->data[p], w, h, outpic->linesize[p], inpic->linesize[p], r); |
|
223 |
+ else if (outpic->data[p] != inpic->data[p]) |
|
224 |
+ av_image_copy_plane(outpic->data[p], outpic->linesize[p], inpic->data[p], inpic->linesize[p], w, h); |
|
225 |
+ } |
|
226 |
+ |
|
227 |
+ avfilter_draw_slice(outlink, 0, inlink->h, 1); |
|
228 |
+ avfilter_end_frame(outlink); |
|
229 |
+ avfilter_unref_buffer(inpic); |
|
230 |
+ avfilter_unref_buffer(outpic); |
|
231 |
+} |
|
232 |
+ |
|
233 |
+AVFilter avfilter_vf_gradfun = { |
|
234 |
+ .name = "gradfun", |
|
235 |
+ .description = NULL_IF_CONFIG_SMALL("Debands video quickly using gradients."), |
|
236 |
+ .priv_size = sizeof(GradFunContext), |
|
237 |
+ .init = init, |
|
238 |
+ .uninit = uninit, |
|
239 |
+ .query_formats = query_formats, |
|
240 |
+ |
|
241 |
+ .inputs = (AVFilterPad[]) {{ .name = "default", |
|
242 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
243 |
+ .config_props = config_input, |
|
244 |
+ .start_frame = start_frame, |
|
245 |
+ .draw_slice = null_draw_slice, |
|
246 |
+ .end_frame = end_frame, |
|
247 |
+ .min_perms = AV_PERM_READ, }, |
|
248 |
+ { .name = NULL}}, |
|
249 |
+ .outputs = (AVFilterPad[]) {{ .name = "default", |
|
250 |
+ .type = AVMEDIA_TYPE_VIDEO, }, |
|
251 |
+ { .name = NULL}}, |
|
252 |
+}; |
2 | 3 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,162 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or modify |
|
4 |
+ * it under the terms of the GNU General Public License as published by |
|
5 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
6 |
+ * (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+ * GNU General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU General Public License along |
|
14 |
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
15 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "libavutil/cpu.h" |
|
19 |
+#include "libavutil/x86_cpu.h" |
|
20 |
+#include "libavfilter/gradfun.h" |
|
21 |
+ |
|
22 |
+DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; |
|
23 |
+DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; |
|
24 |
+ |
|
25 |
+void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) |
|
26 |
+{ |
|
27 |
+#if HAVE_MMX |
|
28 |
+ intptr_t x; |
|
29 |
+ if (width & 3) { |
|
30 |
+ x = width & ~3; |
|
31 |
+ ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); |
|
32 |
+ width = x; |
|
33 |
+ } |
|
34 |
+ x = -width; |
|
35 |
+ __asm__ volatile( |
|
36 |
+ "movd %4, %%mm5 \n" |
|
37 |
+ "pxor %%mm7, %%mm7 \n" |
|
38 |
+ "pshufw $0, %%mm5, %%mm5 \n" |
|
39 |
+ "movq %6, %%mm6 \n" |
|
40 |
+ "movq %5, %%mm4 \n" |
|
41 |
+ "1: \n" |
|
42 |
+ "movd (%2,%0), %%mm0 \n" |
|
43 |
+ "movd (%3,%0), %%mm1 \n" |
|
44 |
+ "punpcklbw %%mm7, %%mm0 \n" |
|
45 |
+ "punpcklwd %%mm1, %%mm1 \n" |
|
46 |
+ "psllw $7, %%mm0 \n" |
|
47 |
+ "pxor %%mm2, %%mm2 \n" |
|
48 |
+ "psubw %%mm0, %%mm1 \n" // delta = dc - pix |
|
49 |
+ "psubw %%mm1, %%mm2 \n" |
|
50 |
+ "pmaxsw %%mm1, %%mm2 \n" |
|
51 |
+ "pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16 |
|
52 |
+ "psubw %%mm6, %%mm2 \n" |
|
53 |
+ "pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m) |
|
54 |
+ "pmullw %%mm2, %%mm2 \n" |
|
55 |
+ "paddw %%mm4, %%mm0 \n" // pix += dither |
|
56 |
+ "pmulhw %%mm2, %%mm1 \n" |
|
57 |
+ "psllw $2, %%mm1 \n" // m = m*m*delta >> 14 |
|
58 |
+ "paddw %%mm1, %%mm0 \n" // pix += m |
|
59 |
+ "psraw $7, %%mm0 \n" |
|
60 |
+ "packuswb %%mm0, %%mm0 \n" |
|
61 |
+ "movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7) |
|
62 |
+ "add $4, %0 \n" |
|
63 |
+ "jl 1b \n" |
|
64 |
+ "emms \n" |
|
65 |
+ :"+r"(x) |
|
66 |
+ :"r"(dst+width), "r"(src+width), "r"(dc+width/2), |
|
67 |
+ "rm"(thresh), "m"(*dithers), "m"(*pw_7f) |
|
68 |
+ :"memory" |
|
69 |
+ ); |
|
70 |
+#endif |
|
71 |
+} |
|
72 |
+ |
|
73 |
+void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) |
|
74 |
+{ |
|
75 |
+#if HAVE_SSSE3 |
|
76 |
+ intptr_t x; |
|
77 |
+ if (width & 7) { |
|
78 |
+ // could be 10% faster if I somehow eliminated this |
|
79 |
+ x = width & ~7; |
|
80 |
+ ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); |
|
81 |
+ width = x; |
|
82 |
+ } |
|
83 |
+ x = -width; |
|
84 |
+ __asm__ volatile( |
|
85 |
+ "movd %4, %%xmm5 \n" |
|
86 |
+ "pxor %%xmm7, %%xmm7 \n" |
|
87 |
+ "pshuflw $0,%%xmm5, %%xmm5 \n" |
|
88 |
+ "movdqa %6, %%xmm6 \n" |
|
89 |
+ "punpcklqdq %%xmm5, %%xmm5 \n" |
|
90 |
+ "movdqa %5, %%xmm4 \n" |
|
91 |
+ "1: \n" |
|
92 |
+ "movq (%2,%0), %%xmm0 \n" |
|
93 |
+ "movq (%3,%0), %%xmm1 \n" |
|
94 |
+ "punpcklbw %%xmm7, %%xmm0 \n" |
|
95 |
+ "punpcklwd %%xmm1, %%xmm1 \n" |
|
96 |
+ "psllw $7, %%xmm0 \n" |
|
97 |
+ "psubw %%xmm0, %%xmm1 \n" // delta = dc - pix |
|
98 |
+ "pabsw %%xmm1, %%xmm2 \n" |
|
99 |
+ "pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16 |
|
100 |
+ "psubw %%xmm6, %%xmm2 \n" |
|
101 |
+ "pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m) |
|
102 |
+ "pmullw %%xmm2, %%xmm2 \n" |
|
103 |
+ "psllw $1, %%xmm2 \n" |
|
104 |
+ "paddw %%xmm4, %%xmm0 \n" // pix += dither |
|
105 |
+ "pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14 |
|
106 |
+ "paddw %%xmm1, %%xmm0 \n" // pix += m |
|
107 |
+ "psraw $7, %%xmm0 \n" |
|
108 |
+ "packuswb %%xmm0, %%xmm0 \n" |
|
109 |
+ "movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7) |
|
110 |
+ "add $8, %0 \n" |
|
111 |
+ "jl 1b \n" |
|
112 |
+ :"+&r"(x) |
|
113 |
+ :"r"(dst+width), "r"(src+width), "r"(dc+width/2), |
|
114 |
+ "rm"(thresh), "m"(*dithers), "m"(*pw_7f) |
|
115 |
+ :"memory" |
|
116 |
+ ); |
|
117 |
+#endif // HAVE_SSSE3 |
|
118 |
+} |
|
119 |
+ |
|
120 |
+void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) |
|
121 |
+{ |
|
122 |
+#if HAVE_SSE |
|
123 |
+#define BLURV(load)\ |
|
124 |
+ intptr_t x = -2*width;\ |
|
125 |
+ __asm__ volatile(\ |
|
126 |
+ "movdqa %6, %%xmm7 \n"\ |
|
127 |
+ "1: \n"\ |
|
128 |
+ load" (%4,%0), %%xmm0 \n"\ |
|
129 |
+ load" (%5,%0), %%xmm1 \n"\ |
|
130 |
+ "movdqa %%xmm0, %%xmm2 \n"\ |
|
131 |
+ "movdqa %%xmm1, %%xmm3 \n"\ |
|
132 |
+ "psrlw $8, %%xmm0 \n"\ |
|
133 |
+ "psrlw $8, %%xmm1 \n"\ |
|
134 |
+ "pand %%xmm7, %%xmm2 \n"\ |
|
135 |
+ "pand %%xmm7, %%xmm3 \n"\ |
|
136 |
+ "paddw %%xmm1, %%xmm0 \n"\ |
|
137 |
+ "paddw %%xmm3, %%xmm2 \n"\ |
|
138 |
+ "paddw %%xmm2, %%xmm0 \n"\ |
|
139 |
+ "paddw (%2,%0), %%xmm0 \n"\ |
|
140 |
+ "movdqa (%1,%0), %%xmm1 \n"\ |
|
141 |
+ "movdqa %%xmm0, (%1,%0) \n"\ |
|
142 |
+ "psubw %%xmm1, %%xmm0 \n"\ |
|
143 |
+ "movdqa %%xmm0, (%3,%0) \n"\ |
|
144 |
+ "add $16, %0 \n"\ |
|
145 |
+ "jl 1b \n"\ |
|
146 |
+ :"+&r"(x)\ |
|
147 |
+ :"r"(buf+width),\ |
|
148 |
+ "r"(buf1+width),\ |
|
149 |
+ "r"(dc+width),\ |
|
150 |
+ "r"(src+width*2),\ |
|
151 |
+ "r"(src+width*2+src_linesize),\ |
|
152 |
+ "m"(*pw_ff)\ |
|
153 |
+ :"memory"\ |
|
154 |
+ ); |
|
155 |
+ if (((intptr_t) src | src_linesize) & 15) { |
|
156 |
+ BLURV("movdqu"); |
|
157 |
+ } else { |
|
158 |
+ BLURV("movdqa"); |
|
159 |
+ } |
|
160 |
+#endif // HAVE_SSE |
|
161 |
+} |