... | ... |
@@ -2175,6 +2175,7 @@ sab_filter_deps="gpl swscale" |
2175 | 2175 |
scale_filter_deps="swscale" |
2176 | 2176 |
smartblur_filter_deps="gpl swscale" |
2177 | 2177 |
showspectrum_filter_deps="avcodec rdft" |
2178 |
+spp_filter_deps="gpl avcodec fft" |
|
2178 | 2179 |
stereo3d_filter_deps="gpl" |
2179 | 2180 |
subtitles_filter_deps="avformat avcodec libass" |
2180 | 2181 |
super2xsai_filter_deps="gpl" |
... | ... |
@@ -6476,6 +6476,42 @@ stereo3d=abl:sbsr |
6476 | 6476 |
@end example |
6477 | 6477 |
@end itemize |
6478 | 6478 |
|
6479 |
+@section spp |
|
6480 |
+ |
|
6481 |
+Apply a simple postprocessing filter that compresses and decompresses the image |
|
6482 |
+at several (or - in the case of @option{quality} level @code{6} - all) shifts |
|
6483 |
+and average the results. |
|
6484 |
+ |
|
6485 |
+The filter accepts the following options: |
|
6486 |
+ |
|
6487 |
+@table @option |
|
6488 |
+@item quality |
|
6489 |
+Set quality. This option defines the number of levels for averaging. It accepts |
|
6490 |
+an integer in the range 0-6. If set to @code{0}, the filter will have no |
|
6491 |
+effect. A value of @code{6} means the higher quality. For each increment of |
|
6492 |
+that value the speed drops by a factor of approximately 2. Default value is |
|
6493 |
+@code{3}. |
|
6494 |
+ |
|
6495 |
+@item qp |
|
6496 |
+Force a constant quantization parameter. If not set, the filter will use the QP |
|
6497 |
+from the video stream (if available). |
|
6498 |
+ |
|
6499 |
+@item mode |
|
6500 |
+Set thresholding mode. Available modes are: |
|
6501 |
+ |
|
6502 |
+@table @samp |
|
6503 |
+@item hard |
|
6504 |
+Set hard thresholding (default). |
|
6505 |
+@item soft |
|
6506 |
+Set soft thresholding (better de-ringing effect, but likely blurrier). |
|
6507 |
+@end table |
|
6508 |
+ |
|
6509 |
+@item use_bframe_qp |
|
6510 |
+Enable the use of the QP from the B-Frames if set to @code{1}. Using this |
|
6511 |
+option may cause flicker since the B-Frames have often larger QP. Default is |
|
6512 |
+@code{0} (not enabled). |
|
6513 |
+@end table |
|
6514 |
+ |
|
6479 | 6515 |
@anchor{subtitles} |
6480 | 6516 |
@section subtitles |
6481 | 6517 |
|
... | ... |
@@ -182,6 +182,7 @@ OBJS-$(CONFIG_SETTB_FILTER) += f_settb.o |
182 | 182 |
OBJS-$(CONFIG_SHOWINFO_FILTER) += vf_showinfo.o |
183 | 183 |
OBJS-$(CONFIG_SMARTBLUR_FILTER) += vf_smartblur.o |
184 | 184 |
OBJS-$(CONFIG_SPLIT_FILTER) += split.o |
185 |
+OBJS-$(CONFIG_SPP_FILTER) += vf_spp.o |
|
185 | 186 |
OBJS-$(CONFIG_STEREO3D_FILTER) += vf_stereo3d.o |
186 | 187 |
OBJS-$(CONFIG_SUBTITLES_FILTER) += vf_subtitles.o |
187 | 188 |
OBJS-$(CONFIG_SUPER2XSAI_FILTER) += vf_super2xsai.o |
... | ... |
@@ -177,6 +177,7 @@ void avfilter_register_all(void) |
177 | 177 |
REGISTER_FILTER(SHOWINFO, showinfo, vf); |
178 | 178 |
REGISTER_FILTER(SMARTBLUR, smartblur, vf); |
179 | 179 |
REGISTER_FILTER(SPLIT, split, vf); |
180 |
+ REGISTER_FILTER(SPP, spp, vf); |
|
180 | 181 |
REGISTER_FILTER(STEREO3D, stereo3d, vf); |
181 | 182 |
REGISTER_FILTER(SUBTITLES, subtitles, vf); |
182 | 183 |
REGISTER_FILTER(SUPER2XSAI, super2xsai, vf); |
... | ... |
@@ -30,8 +30,8 @@ |
30 | 30 |
#include "libavutil/avutil.h" |
31 | 31 |
|
32 | 32 |
#define LIBAVFILTER_VERSION_MAJOR 3 |
33 |
-#define LIBAVFILTER_VERSION_MINOR 76 |
|
34 |
-#define LIBAVFILTER_VERSION_MICRO 101 |
|
33 |
+#define LIBAVFILTER_VERSION_MINOR 77 |
|
34 |
+#define LIBAVFILTER_VERSION_MICRO 100 |
|
35 | 35 |
|
36 | 36 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |
37 | 37 |
LIBAVFILTER_VERSION_MINOR, \ |
38 | 38 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,437 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
|
2 |
+ * Copyright (c) 2013 Clément Bœsch <ubitux@gmail.com> |
|
3 |
+ * |
|
4 |
+ * This file is part of FFmpeg. |
|
5 |
+ * |
|
6 |
+ * FFmpeg is free software; you can redistribute it and/or modify |
|
7 |
+ * it under the terms of the GNU General Public License as published by |
|
8 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
9 |
+ * (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 |
+ * GNU General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU General Public License along |
|
17 |
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
18 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+/** |
|
22 |
+ * @file |
|
23 |
+ * Simple post processing filter |
|
24 |
+ * |
|
25 |
+ * This implementation is based on an algorithm described in |
|
26 |
+ * "Aria Nosratinia Embedded Post-Processing for |
|
27 |
+ * Enhancement of Compressed Images (1999)" |
|
28 |
+ * |
|
29 |
+ * Originally written by Michael Niedermayer for the MPlayer project, and |
|
30 |
+ * ported by Clément Bœsch for FFmpeg. |
|
31 |
+ */ |
|
32 |
+ |
|
33 |
+#include "libavcodec/dsputil.h" |
|
34 |
+#include "libavutil/avassert.h" |
|
35 |
+#include "libavutil/imgutils.h" |
|
36 |
+#include "libavutil/opt.h" |
|
37 |
+#include "libavutil/pixdesc.h" |
|
38 |
+#include "internal.h" |
|
39 |
+#include "vf_spp.h" |
|
40 |
+ |
|
41 |
+enum mode { |
|
42 |
+ MODE_HARD, |
|
43 |
+ MODE_SOFT, |
|
44 |
+ NB_MODES |
|
45 |
+}; |
|
46 |
+ |
|
47 |
+#define OFFSET(x) offsetof(SPPContext, x) |
|
48 |
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
|
49 |
+static const AVOption spp_options[] = { |
|
50 |
+ { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 3}, 0, MAX_LEVEL, FLAGS }, |
|
51 |
+ { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, FLAGS }, |
|
52 |
+ { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_HARD}, 0, NB_MODES - 1, FLAGS, "mode" }, |
|
53 |
+ { "hard", "hard thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD}, INT_MIN, INT_MAX, FLAGS, "mode" }, |
|
54 |
+ { "soft", "soft thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT}, INT_MIN, INT_MAX, FLAGS, "mode" }, |
|
55 |
+ { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS }, |
|
56 |
+ { NULL } |
|
57 |
+}; |
|
58 |
+ |
|
59 |
+AVFILTER_DEFINE_CLASS(spp); |
|
60 |
+ |
|
61 |
+// XXX: share between filters? |
|
62 |
+DECLARE_ALIGNED(8, static const uint8_t, ldither)[8][8] = { |
|
63 |
+ { 0, 48, 12, 60, 3, 51, 15, 63 }, |
|
64 |
+ { 32, 16, 44, 28, 35, 19, 47, 31 }, |
|
65 |
+ { 8, 56, 4, 52, 11, 59, 7, 55 }, |
|
66 |
+ { 40, 24, 36, 20, 43, 27, 39, 23 }, |
|
67 |
+ { 2, 50, 14, 62, 1, 49, 13, 61 }, |
|
68 |
+ { 34, 18, 46, 30, 33, 17, 45, 29 }, |
|
69 |
+ { 10, 58, 6, 54, 9, 57, 5, 53 }, |
|
70 |
+ { 42, 26, 38, 22, 41, 25, 37, 21 }, |
|
71 |
+}; |
|
72 |
+ |
|
73 |
+static const uint8_t offset[127][2] = { |
|
74 |
+ {0,0}, |
|
75 |
+ {0,0}, {4,4}, // quality = 1 |
|
76 |
+ {0,0}, {2,2}, {6,4}, {4,6}, // quality = 2 |
|
77 |
+ {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7}, // quality = 3 |
|
78 |
+ |
|
79 |
+ {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3}, // quality = 4 |
|
80 |
+ {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7}, |
|
81 |
+ |
|
82 |
+ {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7}, // quality = 5 |
|
83 |
+ {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7}, |
|
84 |
+ {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7}, |
|
85 |
+ {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7}, |
|
86 |
+ |
|
87 |
+ {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2}, // quality = 6 |
|
88 |
+ {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0}, |
|
89 |
+ {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3}, |
|
90 |
+ {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1}, |
|
91 |
+ {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3}, |
|
92 |
+ {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1}, |
|
93 |
+ {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2}, |
|
94 |
+ {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0}, |
|
95 |
+}; |
|
96 |
+ |
|
97 |
+static void hardthresh_c(int16_t dst[64], const int16_t src[64], |
|
98 |
+ int qp, const uint8_t *permutation) |
|
99 |
+{ |
|
100 |
+ int i; |
|
101 |
+ int bias = 0; // FIXME |
|
102 |
+ |
|
103 |
+ unsigned threshold1 = qp * ((1<<4) - bias) - 1; |
|
104 |
+ unsigned threshold2 = threshold1 << 1; |
|
105 |
+ |
|
106 |
+ memset(dst, 0, 64 * sizeof(dst[0])); |
|
107 |
+ dst[0] = (src[0] + 4) >> 3; |
|
108 |
+ |
|
109 |
+ for (i = 1; i < 64; i++) { |
|
110 |
+ int level = src[i]; |
|
111 |
+ if (((unsigned)(level + threshold1)) > threshold2) { |
|
112 |
+ const int j = permutation[i]; |
|
113 |
+ dst[j] = (level + 4) >> 3; |
|
114 |
+ } |
|
115 |
+ } |
|
116 |
+} |
|
117 |
+ |
|
118 |
+static void softthresh_c(int16_t dst[64], const int16_t src[64], |
|
119 |
+ int qp, const uint8_t *permutation) |
|
120 |
+{ |
|
121 |
+ int i; |
|
122 |
+ int bias = 0; //FIXME |
|
123 |
+ |
|
124 |
+ unsigned threshold1 = qp * ((1<<4) - bias) - 1; |
|
125 |
+ unsigned threshold2 = threshold1 << 1; |
|
126 |
+ |
|
127 |
+ memset(dst, 0, 64 * sizeof(dst[0])); |
|
128 |
+ dst[0] = (src[0] + 4) >> 3; |
|
129 |
+ |
|
130 |
+ for (i = 1; i < 64; i++) { |
|
131 |
+ int level = src[i]; |
|
132 |
+ if (((unsigned)(level + threshold1)) > threshold2) { |
|
133 |
+ const int j = permutation[i]; |
|
134 |
+ if (level > 0) dst[j] = (level - threshold1 + 4) >> 3; |
|
135 |
+ else dst[j] = (level + threshold1 + 4) >> 3; |
|
136 |
+ } |
|
137 |
+ } |
|
138 |
+} |
|
139 |
+ |
|
140 |
+static void store_slice_c(uint8_t *dst, const int16_t *src, |
|
141 |
+ int dst_linesize, int src_linesize, |
|
142 |
+ int width, int height, int log2_scale, |
|
143 |
+ const uint8_t dither[8][8]) |
|
144 |
+{ |
|
145 |
+ int y, x; |
|
146 |
+ |
|
147 |
+#define STORE(pos) do { \ |
|
148 |
+ temp = ((src[x + y*src_linesize + pos] << log2_scale) + d[pos]) >> 6; \ |
|
149 |
+ if (temp & 0x100) \ |
|
150 |
+ temp = ~(temp >> 31); \ |
|
151 |
+ dst[x + y*dst_linesize + pos] = temp; \ |
|
152 |
+} while (0) |
|
153 |
+ |
|
154 |
+ for (y = 0; y < height; y++) { |
|
155 |
+ const uint8_t *d = dither[y]; |
|
156 |
+ for (x = 0; x < width; x += 8) { |
|
157 |
+ int temp; |
|
158 |
+ STORE(0); |
|
159 |
+ STORE(1); |
|
160 |
+ STORE(2); |
|
161 |
+ STORE(3); |
|
162 |
+ STORE(4); |
|
163 |
+ STORE(5); |
|
164 |
+ STORE(6); |
|
165 |
+ STORE(7); |
|
166 |
+ } |
|
167 |
+ } |
|
168 |
+} |
|
169 |
+ |
|
170 |
+static inline void add_block(int16_t *dst, int linesize, const int16_t block[64]) |
|
171 |
+{ |
|
172 |
+ int y; |
|
173 |
+ |
|
174 |
+ for (y = 0; y < 8; y++) { |
|
175 |
+ *(uint32_t *)&dst[0 + y*linesize] += *(uint32_t *)&block[0 + y*8]; |
|
176 |
+ *(uint32_t *)&dst[2 + y*linesize] += *(uint32_t *)&block[2 + y*8]; |
|
177 |
+ *(uint32_t *)&dst[4 + y*linesize] += *(uint32_t *)&block[4 + y*8]; |
|
178 |
+ *(uint32_t *)&dst[6 + y*linesize] += *(uint32_t *)&block[6 + y*8]; |
|
179 |
+ } |
|
180 |
+} |
|
181 |
+ |
|
182 |
+// XXX: export the function? |
|
183 |
+static inline int norm_qscale(int qscale, int type) |
|
184 |
+{ |
|
185 |
+ switch (type) { |
|
186 |
+ case FF_QSCALE_TYPE_MPEG1: return qscale; |
|
187 |
+ case FF_QSCALE_TYPE_MPEG2: return qscale >> 1; |
|
188 |
+ case FF_QSCALE_TYPE_H264: return qscale >> 2; |
|
189 |
+ case FF_QSCALE_TYPE_VP56: return (63 - qscale + 2) >> 2; |
|
190 |
+ } |
|
191 |
+ return qscale; |
|
192 |
+} |
|
193 |
+ |
|
194 |
+static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, |
|
195 |
+ int dst_linesize, int src_linesize, int width, int height, |
|
196 |
+ const uint8_t *qp_table, int qp_stride, int is_luma) |
|
197 |
+{ |
|
198 |
+ int x, y, i; |
|
199 |
+ const int count = 1 << p->log2_count; |
|
200 |
+ const int linesize = is_luma ? p->temp_linesize : FFALIGN(width+16, 16); |
|
201 |
+ DECLARE_ALIGNED(16, uint64_t, block_align)[32]; |
|
202 |
+ int16_t *block = (int16_t *)block_align; |
|
203 |
+ int16_t *block2 = (int16_t *)(block_align + 16); |
|
204 |
+ |
|
205 |
+ for (y = 0; y < height; y++) { |
|
206 |
+ int index = 8 + 8*linesize + y*linesize; |
|
207 |
+ memcpy(p->src + index, src + y*src_linesize, width); |
|
208 |
+ for (x = 0; x < 8; x++) { |
|
209 |
+ p->src[index - x - 1] = p->src[index + x ]; |
|
210 |
+ p->src[index + width + x ] = p->src[index + width - x - 1]; |
|
211 |
+ } |
|
212 |
+ } |
|
213 |
+ for (y = 0; y < 8; y++) { |
|
214 |
+ memcpy(p->src + ( 7-y)*linesize, p->src + ( y+8)*linesize, linesize); |
|
215 |
+ memcpy(p->src + (height+8+y)*linesize, p->src + (height-y+7)*linesize, linesize); |
|
216 |
+ } |
|
217 |
+ |
|
218 |
+ for (y = 0; y < height + 8; y += 8) { |
|
219 |
+ memset(p->temp + (8 + y) * linesize, 0, 8 * linesize * sizeof(*p->temp)); |
|
220 |
+ for (x = 0; x < width + 8; x += 8) { |
|
221 |
+ int qp; |
|
222 |
+ |
|
223 |
+ if (p->qp) { |
|
224 |
+ qp = p->qp; |
|
225 |
+ } else{ |
|
226 |
+ const int qps = 3 + is_luma; |
|
227 |
+ qp = qp_table[(FFMIN(x, width - 1) >> qps) + (FFMIN(y, height - 1) >> qps) * qp_stride]; |
|
228 |
+ qp = FFMAX(1, norm_qscale(qp, p->qscale_type)); |
|
229 |
+ } |
|
230 |
+ for (i = 0; i < count; i++) { |
|
231 |
+ const int x1 = x + offset[i + count - 1][0]; |
|
232 |
+ const int y1 = y + offset[i + count - 1][1]; |
|
233 |
+ const int index = x1 + y1*linesize; |
|
234 |
+ p->dsp.get_pixels(block, p->src + index, linesize); |
|
235 |
+ p->dsp.fdct(block); |
|
236 |
+ p->requantize(block2, block, qp, p->dsp.idct_permutation); |
|
237 |
+ p->dsp.idct(block2); |
|
238 |
+ add_block(p->temp + index, linesize, block2); |
|
239 |
+ } |
|
240 |
+ } |
|
241 |
+ if (y) |
|
242 |
+ p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize, |
|
243 |
+ dst_linesize, linesize, width, |
|
244 |
+ FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count, |
|
245 |
+ ldither); |
|
246 |
+ } |
|
247 |
+} |
|
248 |
+ |
|
249 |
+static int query_formats(AVFilterContext *ctx) |
|
250 |
+{ |
|
251 |
+ static const enum PixelFormat pix_fmts[] = { |
|
252 |
+ AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, |
|
253 |
+ AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, |
|
254 |
+ AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P, |
|
255 |
+ AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, |
|
256 |
+ AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P, |
|
257 |
+ AV_PIX_FMT_NONE |
|
258 |
+ }; |
|
259 |
+ ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); |
|
260 |
+ return 0; |
|
261 |
+} |
|
262 |
+ |
|
263 |
+static int config_input(AVFilterLink *inlink) |
|
264 |
+{ |
|
265 |
+ SPPContext *spp = inlink->dst->priv; |
|
266 |
+ const int h = FFALIGN(inlink->h + 16, 16); |
|
267 |
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); |
|
268 |
+ |
|
269 |
+ spp->hsub = desc->log2_chroma_w; |
|
270 |
+ spp->vsub = desc->log2_chroma_h; |
|
271 |
+ spp->temp_linesize = FFALIGN(inlink->w + 16, 16); |
|
272 |
+ spp->temp = av_malloc(spp->temp_linesize * h * sizeof(*spp->temp)); |
|
273 |
+ spp->src = av_malloc(spp->temp_linesize * h * sizeof(*spp->src)); |
|
274 |
+ if (!spp->use_bframe_qp) { |
|
275 |
+ /* we are assuming here the qp blocks will not be smaller that 16x16 */ |
|
276 |
+ spp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4); |
|
277 |
+ spp->non_b_qp_table = av_calloc(spp->non_b_qp_alloc_size, sizeof(*spp->non_b_qp_table)); |
|
278 |
+ if (!spp->non_b_qp_table) |
|
279 |
+ return AVERROR(ENOMEM); |
|
280 |
+ } |
|
281 |
+ if (!spp->temp || !spp->src) |
|
282 |
+ return AVERROR(ENOMEM); |
|
283 |
+ return 0; |
|
284 |
+} |
|
285 |
+ |
|
286 |
+static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
|
287 |
+{ |
|
288 |
+ AVFilterContext *ctx = inlink->dst; |
|
289 |
+ SPPContext *spp = ctx->priv; |
|
290 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
291 |
+ AVFrame *out = in; |
|
292 |
+ int qp_stride = 0; |
|
293 |
+ const int8_t *qp_table = NULL; |
|
294 |
+ |
|
295 |
+ /* if we are not in a constant user quantizer mode and we don't want to use |
|
296 |
+ * the quantizers from the B-frames (B-frames often have a higher QP), we |
|
297 |
+ * need to save the qp table from the last non B-frame; this is what the |
|
298 |
+ * following code block does */ |
|
299 |
+ if (!spp->qp) { |
|
300 |
+ qp_table = av_frame_get_qp_table(in, &qp_stride, &spp->qscale_type); |
|
301 |
+ |
|
302 |
+ if (qp_table && !spp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) { |
|
303 |
+ int w, h; |
|
304 |
+ |
|
305 |
+ /* if the qp stride is not set, it means the QP are only defined on |
|
306 |
+ * a line basis */ |
|
307 |
+ if (!qp_stride) { |
|
308 |
+ w = FF_CEIL_RSHIFT(inlink->w, 4); |
|
309 |
+ h = 1; |
|
310 |
+ } else { |
|
311 |
+ w = FF_CEIL_RSHIFT(qp_stride, 4); |
|
312 |
+ h = FF_CEIL_RSHIFT(inlink->h, 4); |
|
313 |
+ } |
|
314 |
+ av_assert0(w * h <= spp->non_b_qp_alloc_size); |
|
315 |
+ memcpy(spp->non_b_qp_table, qp_table, w * h); |
|
316 |
+ } |
|
317 |
+ } |
|
318 |
+ |
|
319 |
+ if (spp->log2_count && !ctx->is_disabled) { |
|
320 |
+ if (!spp->use_bframe_qp && spp->non_b_qp_table) |
|
321 |
+ qp_table = spp->non_b_qp_table; |
|
322 |
+ |
|
323 |
+ if (qp_table || spp->qp) { |
|
324 |
+ const int cw = FF_CEIL_RSHIFT(inlink->w, spp->hsub); |
|
325 |
+ const int ch = FF_CEIL_RSHIFT(inlink->h, spp->vsub); |
|
326 |
+ |
|
327 |
+ /* get a new frame if in-place is not possible or if the dimensions |
|
328 |
+ * are not multiple of 8 */ |
|
329 |
+ if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) { |
|
330 |
+ const int aligned_w = FFALIGN(inlink->w, 8); |
|
331 |
+ const int aligned_h = FFALIGN(inlink->h, 8); |
|
332 |
+ |
|
333 |
+ out = ff_get_video_buffer(outlink, aligned_w, aligned_h); |
|
334 |
+ if (!out) { |
|
335 |
+ av_frame_free(&in); |
|
336 |
+ return AVERROR(ENOMEM); |
|
337 |
+ } |
|
338 |
+ av_frame_copy_props(out, in); |
|
339 |
+ out->width = in->width; |
|
340 |
+ out->height = in->height; |
|
341 |
+ } |
|
342 |
+ |
|
343 |
+ filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1); |
|
344 |
+ filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw, ch, qp_table, qp_stride, 0); |
|
345 |
+ filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0); |
|
346 |
+ emms_c(); |
|
347 |
+ } |
|
348 |
+ } |
|
349 |
+ |
|
350 |
+ if (in != out) { |
|
351 |
+ if (in->data[3]) |
|
352 |
+ av_image_copy_plane(out->data[3], out->linesize[3], |
|
353 |
+ in ->data[3], in ->linesize[3], |
|
354 |
+ inlink->w, inlink->h); |
|
355 |
+ av_frame_free(&in); |
|
356 |
+ } |
|
357 |
+ return ff_filter_frame(outlink, out); |
|
358 |
+} |
|
359 |
+ |
|
360 |
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, |
|
361 |
+ char *res, int res_len, int flags) |
|
362 |
+{ |
|
363 |
+ SPPContext *spp = ctx->priv; |
|
364 |
+ |
|
365 |
+ if (!strcmp(cmd, "level")) { |
|
366 |
+ if (!strcmp(args, "max")) |
|
367 |
+ spp->log2_count = MAX_LEVEL; |
|
368 |
+ else |
|
369 |
+ spp->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL); |
|
370 |
+ return 0; |
|
371 |
+ } |
|
372 |
+ return AVERROR(ENOSYS); |
|
373 |
+} |
|
374 |
+ |
|
375 |
+static av_cold int init(AVFilterContext *ctx) |
|
376 |
+{ |
|
377 |
+ SPPContext *spp = ctx->priv; |
|
378 |
+ |
|
379 |
+ spp->avctx = avcodec_alloc_context3(NULL); |
|
380 |
+ if (!spp->avctx) |
|
381 |
+ return AVERROR(ENOMEM); |
|
382 |
+ avpriv_dsputil_init(&spp->dsp, spp->avctx); |
|
383 |
+ spp->store_slice = store_slice_c; |
|
384 |
+ switch (spp->mode) { |
|
385 |
+ case MODE_HARD: spp->requantize = hardthresh_c; break; |
|
386 |
+ case MODE_SOFT: spp->requantize = softthresh_c; break; |
|
387 |
+ } |
|
388 |
+ if (ARCH_X86) |
|
389 |
+ ff_spp_init_x86(spp); |
|
390 |
+ return 0; |
|
391 |
+} |
|
392 |
+ |
|
393 |
+static av_cold void uninit(AVFilterContext *ctx) |
|
394 |
+{ |
|
395 |
+ SPPContext *spp = ctx->priv; |
|
396 |
+ |
|
397 |
+ av_freep(&spp->temp); |
|
398 |
+ av_freep(&spp->src); |
|
399 |
+ if (spp->avctx) { |
|
400 |
+ avcodec_close(spp->avctx); |
|
401 |
+ av_freep(&spp->avctx); |
|
402 |
+ } |
|
403 |
+ av_freep(&spp->non_b_qp_table); |
|
404 |
+} |
|
405 |
+ |
|
406 |
+static const AVFilterPad spp_inputs[] = { |
|
407 |
+ { |
|
408 |
+ .name = "default", |
|
409 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
410 |
+ .config_props = config_input, |
|
411 |
+ .filter_frame = filter_frame, |
|
412 |
+ }, |
|
413 |
+ { NULL } |
|
414 |
+}; |
|
415 |
+ |
|
416 |
+static const AVFilterPad spp_outputs[] = { |
|
417 |
+ { |
|
418 |
+ .name = "default", |
|
419 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
420 |
+ }, |
|
421 |
+ { NULL } |
|
422 |
+}; |
|
423 |
+ |
|
424 |
+AVFilter avfilter_vf_spp = { |
|
425 |
+ .name = "spp", |
|
426 |
+ .description = NULL_IF_CONFIG_SMALL("XXX"), |
|
427 |
+ .priv_size = sizeof(SPPContext), |
|
428 |
+ .init = init, |
|
429 |
+ .uninit = uninit, |
|
430 |
+ .query_formats = query_formats, |
|
431 |
+ .inputs = spp_inputs, |
|
432 |
+ .outputs = spp_outputs, |
|
433 |
+ .process_command = process_command, |
|
434 |
+ .priv_class = &spp_class, |
|
435 |
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, |
|
436 |
+}; |
0 | 437 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,59 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
|
2 |
+ * Copyright (c) 2013 Clément Bœsch |
|
3 |
+ * |
|
4 |
+ * This file is part of FFmpeg. |
|
5 |
+ * |
|
6 |
+ * FFmpeg is free software; you can redistribute it and/or modify |
|
7 |
+ * it under the terms of the GNU General Public License as published by |
|
8 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
9 |
+ * (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 |
+ * GNU General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU General Public License along |
|
17 |
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
18 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#ifndef AVFILTER_SPP_H |
|
22 |
+#define AVFILTER_SPP_H |
|
23 |
+ |
|
24 |
+#include "libavcodec/avcodec.h" |
|
25 |
+#include "libavcodec/dsputil.h" |
|
26 |
+#include "avfilter.h" |
|
27 |
+ |
|
28 |
+#define MAX_LEVEL 6 /* quality levels */ |
|
29 |
+ |
|
30 |
+typedef struct { |
|
31 |
+ const AVClass *av_class; |
|
32 |
+ |
|
33 |
+ int log2_count; |
|
34 |
+ int qp; |
|
35 |
+ int mode; |
|
36 |
+ int qscale_type; |
|
37 |
+ int temp_linesize; |
|
38 |
+ uint8_t *src; |
|
39 |
+ int16_t *temp; |
|
40 |
+ AVCodecContext *avctx; |
|
41 |
+ DSPContext dsp; |
|
42 |
+ int8_t *non_b_qp_table; |
|
43 |
+ int non_b_qp_alloc_size; |
|
44 |
+ int use_bframe_qp; |
|
45 |
+ int hsub, vsub; |
|
46 |
+ |
|
47 |
+ void (*store_slice)(uint8_t *dst, const int16_t *src, |
|
48 |
+ int dst_stride, int src_stride, |
|
49 |
+ int width, int height, int log2_scale, |
|
50 |
+ const uint8_t dither[8][8]); |
|
51 |
+ |
|
52 |
+ void (*requantize)(int16_t dst[64], const int16_t src[64], |
|
53 |
+ int qp, const uint8_t *permutation); |
|
54 |
+} SPPContext; |
|
55 |
+ |
|
56 |
+void ff_spp_init_x86(SPPContext *s); |
|
57 |
+ |
|
58 |
+#endif /* AVFILTER_SPP_H */ |
6 | 7 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,233 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or modify |
|
6 |
+ * it under the terms of the GNU General Public License as published by |
|
7 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
8 |
+ * (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
+ * GNU General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU General Public License along |
|
16 |
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
|
17 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+ |
|
21 |
+#include "libavutil/attributes.h" |
|
22 |
+#include "libavutil/cpu.h" |
|
23 |
+#include "libavutil/mem.h" |
|
24 |
+#include "libavutil/x86/asm.h" |
|
25 |
+#include "libavfilter/vf_spp.h" |
|
26 |
+ |
|
27 |
+#if HAVE_MMX_INLINE |
|
28 |
+static void hardthresh_mmx(int16_t dst[64], const int16_t src[64], |
|
29 |
+ int qp, const uint8_t *permutation) |
|
30 |
+{ |
|
31 |
+ int bias = 0; //FIXME |
|
32 |
+ unsigned int threshold1; |
|
33 |
+ |
|
34 |
+ threshold1 = qp * ((1<<4) - bias) - 1; |
|
35 |
+ |
|
36 |
+#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ |
|
37 |
+ "movq " #src0 ", %%mm0 \n" \ |
|
38 |
+ "movq " #src1 ", %%mm1 \n" \ |
|
39 |
+ "movq " #src2 ", %%mm2 \n" \ |
|
40 |
+ "movq " #src3 ", %%mm3 \n" \ |
|
41 |
+ "psubw %%mm4, %%mm0 \n" \ |
|
42 |
+ "psubw %%mm4, %%mm1 \n" \ |
|
43 |
+ "psubw %%mm4, %%mm2 \n" \ |
|
44 |
+ "psubw %%mm4, %%mm3 \n" \ |
|
45 |
+ "paddusw %%mm5, %%mm0 \n" \ |
|
46 |
+ "paddusw %%mm5, %%mm1 \n" \ |
|
47 |
+ "paddusw %%mm5, %%mm2 \n" \ |
|
48 |
+ "paddusw %%mm5, %%mm3 \n" \ |
|
49 |
+ "paddw %%mm6, %%mm0 \n" \ |
|
50 |
+ "paddw %%mm6, %%mm1 \n" \ |
|
51 |
+ "paddw %%mm6, %%mm2 \n" \ |
|
52 |
+ "paddw %%mm6, %%mm3 \n" \ |
|
53 |
+ "psubusw %%mm6, %%mm0 \n" \ |
|
54 |
+ "psubusw %%mm6, %%mm1 \n" \ |
|
55 |
+ "psubusw %%mm6, %%mm2 \n" \ |
|
56 |
+ "psubusw %%mm6, %%mm3 \n" \ |
|
57 |
+ "psraw $3, %%mm0 \n" \ |
|
58 |
+ "psraw $3, %%mm1 \n" \ |
|
59 |
+ "psraw $3, %%mm2 \n" \ |
|
60 |
+ "psraw $3, %%mm3 \n" \ |
|
61 |
+ \ |
|
62 |
+ "movq %%mm0, %%mm7 \n" \ |
|
63 |
+ "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ |
|
64 |
+ "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ |
|
65 |
+ "movq %%mm1, %%mm2 \n" \ |
|
66 |
+ "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ |
|
67 |
+ "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ |
|
68 |
+ "movq %%mm0, %%mm3 \n" \ |
|
69 |
+ "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ |
|
70 |
+ "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ |
|
71 |
+ "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ |
|
72 |
+ "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ |
|
73 |
+ \ |
|
74 |
+ "movq %%mm0, " #dst0 " \n" \ |
|
75 |
+ "movq %%mm7, " #dst1 " \n" \ |
|
76 |
+ "movq %%mm3, " #dst2 " \n" \ |
|
77 |
+ "movq %%mm1, " #dst3 " \n" |
|
78 |
+ |
|
79 |
+ __asm__ volatile( |
|
80 |
+ "movd %2, %%mm4 \n" |
|
81 |
+ "movd %3, %%mm5 \n" |
|
82 |
+ "movd %4, %%mm6 \n" |
|
83 |
+ "packssdw %%mm4, %%mm4 \n" |
|
84 |
+ "packssdw %%mm5, %%mm5 \n" |
|
85 |
+ "packssdw %%mm6, %%mm6 \n" |
|
86 |
+ "packssdw %%mm4, %%mm4 \n" |
|
87 |
+ "packssdw %%mm5, %%mm5 \n" |
|
88 |
+ "packssdw %%mm6, %%mm6 \n" |
|
89 |
+ REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) |
|
90 |
+ REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) |
|
91 |
+ REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) |
|
92 |
+ REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) |
|
93 |
+ : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed? |
|
94 |
+ ); |
|
95 |
+ dst[0] = (src[0] + 4) >> 3; |
|
96 |
+} |
|
97 |
+ |
|
98 |
+static void softthresh_mmx(int16_t dst[64], const int16_t src[64], |
|
99 |
+ int qp, const uint8_t *permutation) |
|
100 |
+{ |
|
101 |
+ int bias = 0; //FIXME |
|
102 |
+ unsigned int threshold1; |
|
103 |
+ |
|
104 |
+ threshold1 = qp*((1<<4) - bias) - 1; |
|
105 |
+ |
|
106 |
+#undef REQUANT_CORE |
|
107 |
+#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ |
|
108 |
+ "movq " #src0 ", %%mm0 \n" \ |
|
109 |
+ "movq " #src1 ", %%mm1 \n" \ |
|
110 |
+ "pxor %%mm6, %%mm6 \n" \ |
|
111 |
+ "pxor %%mm7, %%mm7 \n" \ |
|
112 |
+ "pcmpgtw %%mm0, %%mm6 \n" \ |
|
113 |
+ "pcmpgtw %%mm1, %%mm7 \n" \ |
|
114 |
+ "pxor %%mm6, %%mm0 \n" \ |
|
115 |
+ "pxor %%mm7, %%mm1 \n" \ |
|
116 |
+ "psubusw %%mm4, %%mm0 \n" \ |
|
117 |
+ "psubusw %%mm4, %%mm1 \n" \ |
|
118 |
+ "pxor %%mm6, %%mm0 \n" \ |
|
119 |
+ "pxor %%mm7, %%mm1 \n" \ |
|
120 |
+ "movq " #src2 ", %%mm2 \n" \ |
|
121 |
+ "movq " #src3 ", %%mm3 \n" \ |
|
122 |
+ "pxor %%mm6, %%mm6 \n" \ |
|
123 |
+ "pxor %%mm7, %%mm7 \n" \ |
|
124 |
+ "pcmpgtw %%mm2, %%mm6 \n" \ |
|
125 |
+ "pcmpgtw %%mm3, %%mm7 \n" \ |
|
126 |
+ "pxor %%mm6, %%mm2 \n" \ |
|
127 |
+ "pxor %%mm7, %%mm3 \n" \ |
|
128 |
+ "psubusw %%mm4, %%mm2 \n" \ |
|
129 |
+ "psubusw %%mm4, %%mm3 \n" \ |
|
130 |
+ "pxor %%mm6, %%mm2 \n" \ |
|
131 |
+ "pxor %%mm7, %%mm3 \n" \ |
|
132 |
+ \ |
|
133 |
+ "paddsw %%mm5, %%mm0 \n" \ |
|
134 |
+ "paddsw %%mm5, %%mm1 \n" \ |
|
135 |
+ "paddsw %%mm5, %%mm2 \n" \ |
|
136 |
+ "paddsw %%mm5, %%mm3 \n" \ |
|
137 |
+ "psraw $3, %%mm0 \n" \ |
|
138 |
+ "psraw $3, %%mm1 \n" \ |
|
139 |
+ "psraw $3, %%mm2 \n" \ |
|
140 |
+ "psraw $3, %%mm3 \n" \ |
|
141 |
+ \ |
|
142 |
+ "movq %%mm0, %%mm7 \n" \ |
|
143 |
+ "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ |
|
144 |
+ "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ |
|
145 |
+ "movq %%mm1, %%mm2 \n" \ |
|
146 |
+ "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ |
|
147 |
+ "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ |
|
148 |
+ "movq %%mm0, %%mm3 \n" \ |
|
149 |
+ "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ |
|
150 |
+ "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ |
|
151 |
+ "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ |
|
152 |
+ "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ |
|
153 |
+ \ |
|
154 |
+ "movq %%mm0, " #dst0 " \n" \ |
|
155 |
+ "movq %%mm7, " #dst1 " \n" \ |
|
156 |
+ "movq %%mm3, " #dst2 " \n" \ |
|
157 |
+ "movq %%mm1, " #dst3 " \n" |
|
158 |
+ |
|
159 |
+ __asm__ volatile( |
|
160 |
+ "movd %2, %%mm4 \n" |
|
161 |
+ "movd %3, %%mm5 \n" |
|
162 |
+ "packssdw %%mm4, %%mm4 \n" |
|
163 |
+ "packssdw %%mm5, %%mm5 \n" |
|
164 |
+ "packssdw %%mm4, %%mm4 \n" |
|
165 |
+ "packssdw %%mm5, %%mm5 \n" |
|
166 |
+ REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) |
|
167 |
+ REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) |
|
168 |
+ REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) |
|
169 |
+ REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) |
|
170 |
+ : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed? |
|
171 |
+ ); |
|
172 |
+ |
|
173 |
+ dst[0] = (src[0] + 4) >> 3; |
|
174 |
+} |
|
175 |
+ |
|
176 |
+static void store_slice_mmx(uint8_t *dst, const int16_t *src, |
|
177 |
+ int dst_stride, int src_stride, |
|
178 |
+ int width, int height, int log2_scale, |
|
179 |
+ const uint8_t dither[8][8]) |
|
180 |
+{ |
|
181 |
+ int y; |
|
182 |
+ |
|
183 |
+ for (y = 0; y < height; y++) { |
|
184 |
+ uint8_t *dst1 = dst; |
|
185 |
+ const int16_t *src1 = src; |
|
186 |
+ __asm__ volatile( |
|
187 |
+ "movq (%3), %%mm3 \n" |
|
188 |
+ "movq (%3), %%mm4 \n" |
|
189 |
+ "movd %4, %%mm2 \n" |
|
190 |
+ "pxor %%mm0, %%mm0 \n" |
|
191 |
+ "punpcklbw %%mm0, %%mm3 \n" |
|
192 |
+ "punpckhbw %%mm0, %%mm4 \n" |
|
193 |
+ "psraw %%mm2, %%mm3 \n" |
|
194 |
+ "psraw %%mm2, %%mm4 \n" |
|
195 |
+ "movd %5, %%mm2 \n" |
|
196 |
+ "1: \n" |
|
197 |
+ "movq (%0), %%mm0 \n" |
|
198 |
+ "movq 8(%0), %%mm1 \n" |
|
199 |
+ "paddw %%mm3, %%mm0 \n" |
|
200 |
+ "paddw %%mm4, %%mm1 \n" |
|
201 |
+ "psraw %%mm2, %%mm0 \n" |
|
202 |
+ "psraw %%mm2, %%mm1 \n" |
|
203 |
+ "packuswb %%mm1, %%mm0 \n" |
|
204 |
+ "movq %%mm0, (%1) \n" |
|
205 |
+ "add $16, %0 \n" |
|
206 |
+ "add $8, %1 \n" |
|
207 |
+ "cmp %2, %1 \n" |
|
208 |
+ " jb 1b \n" |
|
209 |
+ : "+r" (src1), "+r"(dst1) |
|
210 |
+ : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(MAX_LEVEL - log2_scale) |
|
211 |
+ ); |
|
212 |
+ src += src_stride; |
|
213 |
+ dst += dst_stride; |
|
214 |
+ } |
|
215 |
+} |
|
216 |
+ |
|
217 |
+#endif /* HAVE_MMX_INLINE */ |
|
218 |
+ |
|
219 |
+av_cold void ff_spp_init_x86(SPPContext *s) |
|
220 |
+{ |
|
221 |
+#if HAVE_MMX_INLINE |
|
222 |
+ int cpu_flags = av_get_cpu_flags(); |
|
223 |
+ |
|
224 |
+ if (cpu_flags & AV_CPU_FLAG_MMX) { |
|
225 |
+ s->store_slice = store_slice_mmx; |
|
226 |
+ switch (s->mode) { |
|
227 |
+ case 0: s->requantize = hardthresh_mmx; break; |
|
228 |
+ case 1: s->requantize = softthresh_mmx; break; |
|
229 |
+ } |
|
230 |
+ } |
|
231 |
+#endif |
|
232 |
+} |