... | ... |
@@ -29,6 +29,7 @@ |
29 | 29 |
# undef pixel2 |
30 | 30 |
# undef pixel4 |
31 | 31 |
# undef dctcoef |
32 |
+# undef idctin |
|
32 | 33 |
# undef INIT_CLIP |
33 | 34 |
# undef no_rnd_avg_pixel4 |
34 | 35 |
# undef rnd_avg_pixel4 |
... | ... |
@@ -53,6 +54,16 @@ |
53 | 53 |
# define pixel4 uint64_t |
54 | 54 |
# define dctcoef int32_t |
55 | 55 |
|
56 |
+#ifdef IN_IDCT_DEPTH |
|
57 |
+#if IN_IDCT_DEPTH == 32 |
|
58 |
+# define idctin int32_t |
|
59 |
+#else |
|
60 |
+# define idctin int16_t |
|
61 |
+#endif |
|
62 |
+#else |
|
63 |
+# define idctin int16_t |
|
64 |
+#endif |
|
65 |
+ |
|
56 | 66 |
# define INIT_CLIP |
57 | 67 |
# define no_rnd_avg_pixel4 no_rnd_avg64 |
58 | 68 |
# define rnd_avg_pixel4 rnd_avg64 |
... | ... |
@@ -71,6 +82,7 @@ |
71 | 71 |
# define pixel2 uint16_t |
72 | 72 |
# define pixel4 uint32_t |
73 | 73 |
# define dctcoef int16_t |
74 |
+# define idctin int16_t |
|
74 | 75 |
|
75 | 76 |
# define INIT_CLIP |
76 | 77 |
# define no_rnd_avg_pixel4 no_rnd_avg32 |
... | ... |
@@ -87,7 +99,10 @@ |
87 | 87 |
# define CLIP(a) av_clip_uint8(a) |
88 | 88 |
#endif |
89 | 89 |
|
90 |
-#define FUNC3(a, b, c) a ## _ ## b ## c |
|
90 |
+#define FUNC3(a, b, c) a ## _ ## b ## c |
|
91 | 91 |
#define FUNC2(a, b, c) FUNC3(a, b, c) |
92 | 92 |
#define FUNC(a) FUNC2(a, BIT_DEPTH,) |
93 | 93 |
#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c) |
94 |
+#define FUNC4(a, b, c) a ## _int ## b ## _ ## c ## bit |
|
95 |
+#define FUNC5(a, b, c) FUNC4(a, b, c) |
|
96 |
+#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH) |
... | ... |
@@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) |
256 | 256 |
c->perm_type = FF_IDCT_PERM_NONE; |
257 | 257 |
} else { |
258 | 258 |
if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) { |
259 |
- c->idct_put = ff_simple_idct_put_10; |
|
260 |
- c->idct_add = ff_simple_idct_add_10; |
|
261 |
- c->idct = ff_simple_idct_10; |
|
259 |
+ c->idct_put = ff_simple_idct_put_int16_10bit; |
|
260 |
+ c->idct_add = ff_simple_idct_add_int16_10bit; |
|
261 |
+ c->idct = ff_simple_idct_int16_10bit; |
|
262 | 262 |
c->perm_type = FF_IDCT_PERM_NONE; |
263 | 263 |
} else if (avctx->bits_per_raw_sample == 12) { |
264 |
- c->idct_put = ff_simple_idct_put_12; |
|
265 |
- c->idct_add = ff_simple_idct_add_12; |
|
266 |
- c->idct = ff_simple_idct_12; |
|
264 |
+ c->idct_put = ff_simple_idct_put_int16_12bit; |
|
265 |
+ c->idct_add = ff_simple_idct_add_int16_12bit; |
|
266 |
+ c->idct = ff_simple_idct_int16_12bit; |
|
267 | 267 |
c->perm_type = FF_IDCT_PERM_NONE; |
268 | 268 |
} else { |
269 | 269 |
if (avctx->idct_algo == FF_IDCT_INT) { |
... | ... |
@@ -280,9 +280,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) |
280 | 280 |
#endif /* CONFIG_FAANIDCT */ |
281 | 281 |
} else { // accurate/default |
282 | 282 |
/* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */ |
283 |
- c->idct_put = ff_simple_idct_put_8; |
|
284 |
- c->idct_add = ff_simple_idct_add_8; |
|
285 |
- c->idct = ff_simple_idct_8; |
|
283 |
+ c->idct_put = ff_simple_idct_put_int16_8bit; |
|
284 |
+ c->idct_add = ff_simple_idct_add_int16_8bit; |
|
285 |
+ c->idct = ff_simple_idct_int16_8bit; |
|
286 | 286 |
c->perm_type = FF_IDCT_PERM_NONE; |
287 | 287 |
} |
288 | 288 |
} |
... | ... |
@@ -755,7 +755,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, |
755 | 755 |
s->block_last_index[0 /* FIXME */] = |
756 | 756 |
s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); |
757 | 757 |
s->dct_unquantize_inter(s, temp, 0, s->qscale); |
758 |
- ff_simple_idct_8(temp); // FIXME |
|
758 |
+ ff_simple_idct_int16_8bit(temp); // FIXME |
|
759 | 759 |
|
760 | 760 |
for (i = 0; i < 64; i++) |
761 | 761 |
sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); |
... | ... |
@@ -30,6 +30,8 @@ |
30 | 30 |
#include "mathops.h" |
31 | 31 |
#include "simple_idct.h" |
32 | 32 |
|
33 |
+#define IN_IDCT_DEPTH 16 |
|
34 |
+ |
|
33 | 35 |
#define BIT_DEPTH 8 |
34 | 36 |
#include "simple_idct_template.c" |
35 | 37 |
#undef BIT_DEPTH |
... | ... |
@@ -46,6 +48,13 @@ |
46 | 46 |
#define BIT_DEPTH 12 |
47 | 47 |
#include "simple_idct_template.c" |
48 | 48 |
#undef BIT_DEPTH |
49 |
+#undef IN_IDCT_DEPTH |
|
50 |
+ |
|
51 |
+#define IN_IDCT_DEPTH 32 |
|
52 |
+#define BIT_DEPTH 10 |
|
53 |
+#include "simple_idct_template.c" |
|
54 |
+#undef BIT_DEPTH |
|
55 |
+#undef IN_IDCT_DEPTH |
|
49 | 56 |
|
50 | 57 |
/* 2x4x8 idct */ |
51 | 58 |
|
... | ... |
@@ -115,7 +124,7 @@ void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
115 | 115 |
|
116 | 116 |
/* IDCT8 on each line */ |
117 | 117 |
for(i=0; i<8; i++) { |
118 |
- idctRowCondDC_8(block + i*8, 0); |
|
118 |
+ idctRowCondDC_int16_8bit(block + i*8, 0); |
|
119 | 119 |
} |
120 | 120 |
|
121 | 121 |
/* IDCT4 and store */ |
... | ... |
@@ -188,7 +197,7 @@ void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
188 | 188 |
|
189 | 189 |
/* IDCT8 on each line */ |
190 | 190 |
for(i=0; i<4; i++) { |
191 |
- idctRowCondDC_8(block + i*8, 0); |
|
191 |
+ idctRowCondDC_int16_8bit(block + i*8, 0); |
|
192 | 192 |
} |
193 | 193 |
|
194 | 194 |
/* IDCT4 and store */ |
... | ... |
@@ -208,7 +217,7 @@ void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
208 | 208 |
|
209 | 209 |
/* IDCT8 and store */ |
210 | 210 |
for(i=0; i<4; i++){ |
211 |
- idctSparseColAdd_8(dest + i, line_size, block + i); |
|
211 |
+ idctSparseColAdd_int16_8bit(dest + i, line_size, block + i); |
|
212 | 212 |
} |
213 | 213 |
} |
214 | 214 |
|
... | ... |
@@ -31,20 +31,24 @@ |
31 | 31 |
#include <stddef.h> |
32 | 32 |
#include <stdint.h> |
33 | 33 |
|
34 |
-void ff_simple_idct_put_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
35 |
-void ff_simple_idct_add_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
36 |
-void ff_simple_idct_8(int16_t *block); |
|
34 |
+void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
35 |
+void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
36 |
+void ff_simple_idct_int16_8bit(int16_t *block); |
|
37 | 37 |
|
38 |
-void ff_simple_idct_put_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
39 |
-void ff_simple_idct_add_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
40 |
-void ff_simple_idct_10(int16_t *block); |
|
38 |
+void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
39 |
+void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
40 |
+void ff_simple_idct_int16_10bit(int16_t *block); |
|
41 | 41 |
|
42 |
-void ff_simple_idct_put_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
43 |
-void ff_simple_idct_add_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
44 |
-void ff_simple_idct_12(int16_t *block); |
|
42 |
+void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
43 |
+void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
44 |
+void ff_simple_idct_int32_10bit(int16_t *block); |
|
45 |
+ |
|
46 |
+void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
47 |
+void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); |
|
48 |
+void ff_simple_idct_int16_12bit(int16_t *block); |
|
45 | 49 |
|
46 | 50 |
/** |
47 |
- * Special version of ff_simple_idct_10() which does dequantization |
|
51 |
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization |
|
48 | 52 |
* and scales by a factor of 2 more between the two IDCTs to account |
49 | 53 |
* for larger scale of input coefficients. |
50 | 54 |
*/ |
... | ... |
@@ -77,6 +77,10 @@ |
77 | 77 |
#define ROW_SHIFT 13 |
78 | 78 |
#define COL_SHIFT 18 |
79 | 79 |
#define DC_SHIFT 1 |
80 |
+# elif IN_IDCT_DEPTH == 32 |
|
81 |
+#define ROW_SHIFT 13 |
|
82 |
+#define COL_SHIFT 21 |
|
83 |
+#define DC_SHIFT 2 |
|
80 | 84 |
# else |
81 | 85 |
#define ROW_SHIFT 12 |
82 | 86 |
#define COL_SHIFT 19 |
... | ... |
@@ -109,11 +113,13 @@ |
109 | 109 |
#ifdef EXTRA_SHIFT |
110 | 110 |
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) |
111 | 111 |
#else |
112 |
-static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) |
|
112 |
+static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) |
|
113 | 113 |
#endif |
114 | 114 |
{ |
115 | 115 |
SUINT a0, a1, a2, a3, b0, b1, b2, b3; |
116 | 116 |
|
117 |
+// TODO: Add DC-only support for int32_t input |
|
118 |
+#if IN_IDCT_DEPTH == 16 |
|
117 | 119 |
#if HAVE_FAST_64BIT |
118 | 120 |
#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) |
119 | 121 |
if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { |
... | ... |
@@ -148,6 +154,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) |
148 | 148 |
return; |
149 | 149 |
} |
150 | 150 |
#endif |
151 |
+#endif |
|
151 | 152 |
|
152 | 153 |
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); |
153 | 154 |
a1 = a0; |
... | ... |
@@ -168,7 +175,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) |
168 | 168 |
b3 = MUL(W7, row[1]); |
169 | 169 |
MAC(b3, -W5, row[3]); |
170 | 170 |
|
171 |
+#if IN_IDCT_DEPTH == 32 |
|
172 |
+ if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) { |
|
173 |
+#else |
|
171 | 174 |
if (AV_RN64A(row + 4)) { |
175 |
+#endif |
|
172 | 176 |
a0 += W4*row[4] + W6*row[6]; |
173 | 177 |
a1 += - W4*row[4] - W2*row[6]; |
174 | 178 |
a2 += - W4*row[4] + W2*row[6]; |
... | ... |
@@ -250,8 +261,8 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) |
250 | 250 |
#ifdef EXTRA_SHIFT |
251 | 251 |
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) |
252 | 252 |
#else |
253 |
-static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, |
|
254 |
- int16_t *col) |
|
253 |
+static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, |
|
254 |
+ idctin *col) |
|
255 | 255 |
{ |
256 | 256 |
SUINT a0, a1, a2, a3, b0, b1, b2, b3; |
257 | 257 |
|
... | ... |
@@ -274,8 +285,8 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, |
274 | 274 |
dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); |
275 | 275 |
} |
276 | 276 |
|
277 |
-static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, |
|
278 |
- int16_t *col) |
|
277 |
+static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, |
|
278 |
+ idctin *col) |
|
279 | 279 |
{ |
280 | 280 |
int a0, a1, a2, a3, b0, b1, b2, b3; |
281 | 281 |
|
... | ... |
@@ -298,7 +309,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, |
298 | 298 |
dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); |
299 | 299 |
} |
300 | 300 |
|
301 |
-static inline void FUNC(idctSparseCol)(int16_t *col) |
|
301 |
+static inline void FUNC6(idctSparseCol)(idctin *col) |
|
302 | 302 |
#endif |
303 | 303 |
{ |
304 | 304 |
int a0, a1, a2, a3, b0, b1, b2, b3; |
... | ... |
@@ -316,21 +327,23 @@ static inline void FUNC(idctSparseCol)(int16_t *col) |
316 | 316 |
} |
317 | 317 |
|
318 | 318 |
#ifndef EXTRA_SHIFT |
319 |
-void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) |
|
319 |
+void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_) |
|
320 | 320 |
{ |
321 |
+ idctin *block = (idctin *)block_; |
|
321 | 322 |
pixel *dest = (pixel *)dest_; |
322 | 323 |
int i; |
323 | 324 |
|
324 | 325 |
line_size /= sizeof(pixel); |
325 | 326 |
|
326 | 327 |
for (i = 0; i < 8; i++) |
327 |
- FUNC(idctRowCondDC)(block + i*8, 0); |
|
328 |
+ FUNC6(idctRowCondDC)(block + i*8, 0); |
|
328 | 329 |
|
329 | 330 |
for (i = 0; i < 8; i++) |
330 |
- FUNC(idctSparseColPut)(dest + i, line_size, block + i); |
|
331 |
+ FUNC6(idctSparseColPut)(dest + i, line_size, block + i); |
|
331 | 332 |
} |
332 | 333 |
|
333 |
-void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) |
|
334 |
+#if IN_IDCT_DEPTH == 16 |
|
335 |
+void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) |
|
334 | 336 |
{ |
335 | 337 |
pixel *dest = (pixel *)dest_; |
336 | 338 |
int i; |
... | ... |
@@ -338,20 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc |
338 | 338 |
line_size /= sizeof(pixel); |
339 | 339 |
|
340 | 340 |
for (i = 0; i < 8; i++) |
341 |
- FUNC(idctRowCondDC)(block + i*8, 0); |
|
341 |
+ FUNC6(idctRowCondDC)(block + i*8, 0); |
|
342 | 342 |
|
343 | 343 |
for (i = 0; i < 8; i++) |
344 |
- FUNC(idctSparseColAdd)(dest + i, line_size, block + i); |
|
344 |
+ FUNC6(idctSparseColAdd)(dest + i, line_size, block + i); |
|
345 | 345 |
} |
346 | 346 |
|
347 |
-void FUNC(ff_simple_idct)(int16_t *block) |
|
347 |
+void FUNC6(ff_simple_idct)(int16_t *block) |
|
348 | 348 |
{ |
349 | 349 |
int i; |
350 | 350 |
|
351 | 351 |
for (i = 0; i < 8; i++) |
352 |
- FUNC(idctRowCondDC)(block + i*8, 0); |
|
352 |
+ FUNC6(idctRowCondDC)(block + i*8, 0); |
|
353 | 353 |
|
354 | 354 |
for (i = 0; i < 8; i++) |
355 |
- FUNC(idctSparseCol)(block + i); |
|
355 |
+ FUNC6(idctSparseCol)(block + i); |
|
356 | 356 |
} |
357 | 357 |
#endif |
358 |
+#endif |
... | ... |
@@ -82,9 +82,9 @@ static void ff_prores_idct_wrap(int16_t *dst){ |
82 | 82 |
static const struct algo idct_tab[] = { |
83 | 83 |
{ "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, |
84 | 84 |
{ "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, |
85 |
- { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, |
|
86 |
- { "SIMPLE-C10", ff_simple_idct_10, FF_IDCT_PERM_NONE }, |
|
87 |
- { "SIMPLE-C12", ff_simple_idct_12, FF_IDCT_PERM_NONE, 0, 1 }, |
|
85 |
+ { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE }, |
|
86 |
+ { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE }, |
|
87 |
+ { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 }, |
|
88 | 88 |
{ "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, |
89 | 89 |
#if CONFIG_FAANIDCT |
90 | 90 |
{ "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, |
... | ... |
@@ -314,11 +314,11 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo |
314 | 314 |
v->multires = get_bits1(gb); |
315 | 315 |
v->res_fasttx = get_bits1(gb); |
316 | 316 |
if (!v->res_fasttx) { |
317 |
- v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_8; |
|
317 |
+ v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_int16_8bit; |
|
318 | 318 |
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; |
319 | 319 |
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; |
320 | 320 |
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; |
321 |
- v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8; |
|
321 |
+ v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_int16_8bit; |
|
322 | 322 |
v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; |
323 | 323 |
v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; |
324 | 324 |
v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add; |