Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Michael Niedermayer authored on 2014/12/13 00:22:27... | ... |
@@ -186,6 +186,36 @@ static void store_slice_c(uint8_t *dst, const uint16_t *src, |
186 | 186 |
} |
187 | 187 |
} |
188 | 188 |
|
189 |
+static void store_slice16_c(uint16_t *dst, const uint16_t *src, |
|
190 |
+ int dst_linesize, int src_linesize, |
|
191 |
+ int width, int height, int log2_scale, |
|
192 |
+ const uint8_t dither[8][8]) |
|
193 |
+{ |
|
194 |
+ int y, x; |
|
195 |
+ |
|
196 |
+#define STORE16(pos) do { \ |
|
197 |
+ temp = ((src[x + y*src_linesize + pos] << log2_scale) + (d[pos]>>1)) >> 5; \ |
|
198 |
+ if (temp & 0x400) \ |
|
199 |
+ temp = ~(temp >> 31); \ |
|
200 |
+ dst[x + y*dst_linesize + pos] = temp; \ |
|
201 |
+} while (0) |
|
202 |
+ |
|
203 |
+ for (y = 0; y < height; y++) { |
|
204 |
+ const uint8_t *d = dither[y]; |
|
205 |
+ for (x = 0; x < width; x += 8) { |
|
206 |
+ int temp; |
|
207 |
+ STORE16(0); |
|
208 |
+ STORE16(1); |
|
209 |
+ STORE16(2); |
|
210 |
+ STORE16(3); |
|
211 |
+ STORE16(4); |
|
212 |
+ STORE16(5); |
|
213 |
+ STORE16(6); |
|
214 |
+ STORE16(7); |
|
215 |
+ } |
|
216 |
+ } |
|
217 |
+} |
|
218 |
+ |
|
189 | 219 |
static inline void add_block(uint16_t *dst, int linesize, const int16_t block[64]) |
190 | 220 |
{ |
191 | 221 |
int y; |
... | ... |
@@ -212,7 +242,7 @@ static inline int norm_qscale(int qscale, int type) |
212 | 212 |
|
213 | 213 |
static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, |
214 | 214 |
int dst_linesize, int src_linesize, int width, int height, |
215 |
- const uint8_t *qp_table, int qp_stride, int is_luma) |
|
215 |
+ const uint8_t *qp_table, int qp_stride, int is_luma, int sample_bytes) |
|
216 | 216 |
{ |
217 | 217 |
int x, y, i; |
218 | 218 |
const int count = 1 << p->log2_count; |
... | ... |
@@ -220,18 +250,26 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, |
220 | 220 |
DECLARE_ALIGNED(16, uint64_t, block_align)[32]; |
221 | 221 |
int16_t *block = (int16_t *)block_align; |
222 | 222 |
int16_t *block2 = (int16_t *)(block_align + 16); |
223 |
+ uint16_t *psrc16 = (uint16_t*)p->src; |
|
223 | 224 |
|
224 | 225 |
for (y = 0; y < height; y++) { |
225 | 226 |
int index = 8 + 8*linesize + y*linesize; |
226 |
- memcpy(p->src + index, src + y*src_linesize, width); |
|
227 |
- for (x = 0; x < 8; x++) { |
|
228 |
- p->src[index - x - 1] = p->src[index + x ]; |
|
229 |
- p->src[index + width + x ] = p->src[index + width - x - 1]; |
|
227 |
+ memcpy(p->src + index*sample_bytes, src + y*src_linesize, width*sample_bytes); |
|
228 |
+ if (sample_bytes == 1) { |
|
229 |
+ for (x = 0; x < 8; x++) { |
|
230 |
+ p->src[index - x - 1] = p->src[index + x ]; |
|
231 |
+ p->src[index + width + x ] = p->src[index + width - x - 1]; |
|
232 |
+ } |
|
233 |
+ } else { |
|
234 |
+ for (x = 0; x < 8; x++) { |
|
235 |
+ psrc16[index - x - 1] = psrc16[index + x ]; |
|
236 |
+ psrc16[index + width + x ] = psrc16[index + width - x - 1]; |
|
237 |
+ } |
|
230 | 238 |
} |
231 | 239 |
} |
232 | 240 |
for (y = 0; y < 8; y++) { |
233 |
- memcpy(p->src + ( 7-y)*linesize, p->src + ( y+8)*linesize, linesize); |
|
234 |
- memcpy(p->src + (height+8+y)*linesize, p->src + (height-y+7)*linesize, linesize); |
|
241 |
+ memcpy(p->src + ( 7-y)*linesize * sample_bytes, p->src + ( y+8)*linesize * sample_bytes, linesize * sample_bytes); |
|
242 |
+ memcpy(p->src + (height+8+y)*linesize * sample_bytes, p->src + (height-y+7)*linesize * sample_bytes, linesize * sample_bytes); |
|
235 | 243 |
} |
236 | 244 |
|
237 | 245 |
for (y = 0; y < height + 8; y += 8) { |
... | ... |
@@ -250,18 +288,26 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, |
250 | 250 |
const int x1 = x + offset[i + count - 1][0]; |
251 | 251 |
const int y1 = y + offset[i + count - 1][1]; |
252 | 252 |
const int index = x1 + y1*linesize; |
253 |
- p->dct->get_pixels(block, p->src + index, linesize); |
|
253 |
+ p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize); |
|
254 | 254 |
p->dct->fdct(block); |
255 | 255 |
p->requantize(block2, block, qp, p->dct->idct_permutation); |
256 | 256 |
p->dct->idct(block2); |
257 | 257 |
add_block(p->temp + index, linesize, block2); |
258 | 258 |
} |
259 | 259 |
} |
260 |
- if (y) |
|
261 |
- p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize, |
|
262 |
- dst_linesize, linesize, width, |
|
263 |
- FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count, |
|
264 |
- ldither); |
|
260 |
+ if (y) { |
|
261 |
+ if (sample_bytes == 1) { |
|
262 |
+ p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize, |
|
263 |
+ dst_linesize, linesize, width, |
|
264 |
+ FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count, |
|
265 |
+ ldither); |
|
266 |
+ } else { |
|
267 |
+ store_slice16_c(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize, |
|
268 |
+ dst_linesize/2, linesize, width, |
|
269 |
+ FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count, |
|
270 |
+ ldither); |
|
271 |
+ } |
|
272 |
+ } |
|
265 | 273 |
} |
266 | 274 |
} |
267 | 275 |
|
... | ... |
@@ -273,6 +319,8 @@ static int query_formats(AVFilterContext *ctx) |
273 | 273 |
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P, |
274 | 274 |
AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, |
275 | 275 |
AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P, |
276 |
+ AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, |
|
277 |
+ AV_PIX_FMT_YUV420P10, |
|
276 | 278 |
AV_PIX_FMT_NONE |
277 | 279 |
}; |
278 | 280 |
ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); |
... | ... |
@@ -284,12 +332,19 @@ static int config_input(AVFilterLink *inlink) |
284 | 284 |
SPPContext *spp = inlink->dst->priv; |
285 | 285 |
const int h = FFALIGN(inlink->h + 16, 16); |
286 | 286 |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); |
287 |
+ const int bps = desc->comp[0].depth_minus1 + 1; |
|
288 |
+ |
|
289 |
+ av_opt_set_int(spp->dct, "bits_per_sample", bps, 0); |
|
290 |
+ avcodec_dct_init(spp->dct); |
|
291 |
+ |
|
292 |
+ if (ARCH_X86) |
|
293 |
+ ff_spp_init_x86(spp); |
|
287 | 294 |
|
288 | 295 |
spp->hsub = desc->log2_chroma_w; |
289 | 296 |
spp->vsub = desc->log2_chroma_h; |
290 | 297 |
spp->temp_linesize = FFALIGN(inlink->w + 16, 16); |
291 | 298 |
spp->temp = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->temp)); |
292 |
- spp->src = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src)); |
|
299 |
+ spp->src = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src) * 2); |
|
293 | 300 |
|
294 | 301 |
if (!spp->temp || !spp->src) |
295 | 302 |
return AVERROR(ENOMEM); |
... | ... |
@@ -304,6 +359,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
304 | 304 |
AVFrame *out = in; |
305 | 305 |
int qp_stride = 0; |
306 | 306 |
const int8_t *qp_table = NULL; |
307 |
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); |
|
308 |
+ const int sample_bytes = desc->comp[0].depth_minus1 < 8 ? 1 : 2; |
|
307 | 309 |
|
308 | 310 |
/* if we are not in a constant user quantizer mode and we don't want to use |
309 | 311 |
* the quantizers from the B-frames (B-frames often have a higher QP), we |
... | ... |
@@ -363,9 +420,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
363 | 363 |
out->height = in->height; |
364 | 364 |
} |
365 | 365 |
|
366 |
- filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1); |
|
367 |
- filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw, ch, qp_table, qp_stride, 0); |
|
368 |
- filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0); |
|
366 |
+ filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1, sample_bytes); |
|
367 |
+ filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw, ch, qp_table, qp_stride, 0, sample_bytes); |
|
368 |
+ filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0, sample_bytes); |
|
369 | 369 |
emms_c(); |
370 | 370 |
} |
371 | 371 |
} |
... | ... |
@@ -415,14 +472,11 @@ static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts) |
415 | 415 |
av_dict_free(opts); |
416 | 416 |
} |
417 | 417 |
|
418 |
- avcodec_dct_init(spp->dct); |
|
419 | 418 |
spp->store_slice = store_slice_c; |
420 | 419 |
switch (spp->mode) { |
421 | 420 |
case MODE_HARD: spp->requantize = hardthresh_c; break; |
422 | 421 |
case MODE_SOFT: spp->requantize = softthresh_c; break; |
423 | 422 |
} |
424 |
- if (ARCH_X86) |
|
425 |
- ff_spp_init_x86(spp); |
|
426 | 423 |
return 0; |
427 | 424 |
} |
428 | 425 |
|
... | ... |
@@ -224,9 +224,11 @@ av_cold void ff_spp_init_x86(SPPContext *s) |
224 | 224 |
|
225 | 225 |
if (cpu_flags & AV_CPU_FLAG_MMX) { |
226 | 226 |
s->store_slice = store_slice_mmx; |
227 |
- switch (s->mode) { |
|
228 |
- case 0: s->requantize = hardthresh_mmx; break; |
|
229 |
- case 1: s->requantize = softthresh_mmx; break; |
|
227 |
+ if (av_get_int(s->dct, "bits_per_sample", NULL) <= 8) { |
|
228 |
+ switch (s->mode) { |
|
229 |
+ case 0: s->requantize = hardthresh_mmx; break; |
|
230 |
+ case 1: s->requantize = softthresh_mmx; break; |
|
231 |
+ } |
|
230 | 232 |
} |
231 | 233 |
} |
232 | 234 |
#endif |