Browse code

simple_idct: Template functions to support an input bitdepth parameter

Kieran Kunhya authored on 2017/12/27 10:08:39
Showing 8 changed files
... ...
@@ -29,6 +29,7 @@
29 29
 #   undef pixel2
30 30
 #   undef pixel4
31 31
 #   undef dctcoef
32
+#   undef idctin
32 33
 #   undef INIT_CLIP
33 34
 #   undef no_rnd_avg_pixel4
34 35
 #   undef rnd_avg_pixel4
... ...
@@ -53,6 +54,16 @@
53 53
 #   define pixel4 uint64_t
54 54
 #   define dctcoef int32_t
55 55
 
56
+#ifdef IN_IDCT_DEPTH
57
+#if IN_IDCT_DEPTH == 32
58
+#   define idctin int32_t
59
+#else
60
+#   define idctin int16_t
61
+#endif
62
+#else
63
+#   define idctin int16_t
64
+#endif
65
+
56 66
 #   define INIT_CLIP
57 67
 #   define no_rnd_avg_pixel4 no_rnd_avg64
58 68
 #   define    rnd_avg_pixel4    rnd_avg64
... ...
@@ -71,6 +82,7 @@
71 71
 #   define pixel2 uint16_t
72 72
 #   define pixel4 uint32_t
73 73
 #   define dctcoef int16_t
74
+#   define idctin  int16_t
74 75
 
75 76
 #   define INIT_CLIP
76 77
 #   define no_rnd_avg_pixel4 no_rnd_avg32
... ...
@@ -87,7 +99,10 @@
87 87
 #   define CLIP(a) av_clip_uint8(a)
88 88
 #endif
89 89
 
90
-#define FUNC3(a, b, c)  a ## _ ## b ## c
90
+#define FUNC3(a, b, c)  a ## _ ## b ##  c
91 91
 #define FUNC2(a, b, c)  FUNC3(a, b, c)
92 92
 #define FUNC(a)  FUNC2(a, BIT_DEPTH,)
93 93
 #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
94
+#define FUNC4(a, b, c)  a ## _int ## b ## _ ## c ## bit
95
+#define FUNC5(a, b, c)  FUNC4(a, b, c)
96
+#define FUNC6(a)  FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
... ...
@@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
256 256
         c->perm_type = FF_IDCT_PERM_NONE;
257 257
     } else {
258 258
         if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
259
-            c->idct_put              = ff_simple_idct_put_10;
260
-            c->idct_add              = ff_simple_idct_add_10;
261
-            c->idct                  = ff_simple_idct_10;
259
+            c->idct_put              = ff_simple_idct_put_int16_10bit;
260
+            c->idct_add              = ff_simple_idct_add_int16_10bit;
261
+            c->idct                  = ff_simple_idct_int16_10bit;
262 262
             c->perm_type             = FF_IDCT_PERM_NONE;
263 263
         } else if (avctx->bits_per_raw_sample == 12) {
264
-            c->idct_put              = ff_simple_idct_put_12;
265
-            c->idct_add              = ff_simple_idct_add_12;
266
-            c->idct                  = ff_simple_idct_12;
264
+            c->idct_put              = ff_simple_idct_put_int16_12bit;
265
+            c->idct_add              = ff_simple_idct_add_int16_12bit;
266
+            c->idct                  = ff_simple_idct_int16_12bit;
267 267
             c->perm_type             = FF_IDCT_PERM_NONE;
268 268
         } else {
269 269
             if (avctx->idct_algo == FF_IDCT_INT) {
... ...
@@ -280,9 +280,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
280 280
 #endif /* CONFIG_FAANIDCT */
281 281
             } else { // accurate/default
282 282
                 /* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
283
-                c->idct_put  = ff_simple_idct_put_8;
284
-                c->idct_add  = ff_simple_idct_add_8;
285
-                c->idct      = ff_simple_idct_8;
283
+                c->idct_put  = ff_simple_idct_put_int16_8bit;
284
+                c->idct_add  = ff_simple_idct_add_int16_8bit;
285
+                c->idct      = ff_simple_idct_int16_8bit;
286 286
                 c->perm_type = FF_IDCT_PERM_NONE;
287 287
             }
288 288
         }
... ...
@@ -755,7 +755,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
755 755
     s->block_last_index[0 /* FIXME */] =
756 756
         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
757 757
     s->dct_unquantize_inter(s, temp, 0, s->qscale);
758
-    ff_simple_idct_8(temp); // FIXME
758
+    ff_simple_idct_int16_8bit(temp); // FIXME
759 759
 
760 760
     for (i = 0; i < 64; i++)
761 761
         sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
... ...
@@ -30,6 +30,8 @@
30 30
 #include "mathops.h"
31 31
 #include "simple_idct.h"
32 32
 
33
+#define IN_IDCT_DEPTH 16
34
+
33 35
 #define BIT_DEPTH 8
34 36
 #include "simple_idct_template.c"
35 37
 #undef BIT_DEPTH
... ...
@@ -46,6 +48,13 @@
46 46
 #define BIT_DEPTH 12
47 47
 #include "simple_idct_template.c"
48 48
 #undef BIT_DEPTH
49
+#undef IN_IDCT_DEPTH
50
+
51
+#define IN_IDCT_DEPTH 32
52
+#define BIT_DEPTH 10
53
+#include "simple_idct_template.c"
54
+#undef BIT_DEPTH
55
+#undef IN_IDCT_DEPTH
49 56
 
50 57
 /* 2x4x8 idct */
51 58
 
... ...
@@ -115,7 +124,7 @@ void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
115 115
 
116 116
     /* IDCT8 on each line */
117 117
     for(i=0; i<8; i++) {
118
-        idctRowCondDC_8(block + i*8, 0);
118
+        idctRowCondDC_int16_8bit(block + i*8, 0);
119 119
     }
120 120
 
121 121
     /* IDCT4 and store */
... ...
@@ -188,7 +197,7 @@ void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
188 188
 
189 189
     /* IDCT8 on each line */
190 190
     for(i=0; i<4; i++) {
191
-        idctRowCondDC_8(block + i*8, 0);
191
+        idctRowCondDC_int16_8bit(block + i*8, 0);
192 192
     }
193 193
 
194 194
     /* IDCT4 and store */
... ...
@@ -208,7 +217,7 @@ void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
208 208
 
209 209
     /* IDCT8 and store */
210 210
     for(i=0; i<4; i++){
211
-        idctSparseColAdd_8(dest + i, line_size, block + i);
211
+        idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
212 212
     }
213 213
 }
214 214
 
... ...
@@ -31,20 +31,24 @@
31 31
 #include <stddef.h>
32 32
 #include <stdint.h>
33 33
 
34
-void ff_simple_idct_put_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
35
-void ff_simple_idct_add_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
36
-void ff_simple_idct_8(int16_t *block);
34
+void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
35
+void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
36
+void ff_simple_idct_int16_8bit(int16_t *block);
37 37
 
38
-void ff_simple_idct_put_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
39
-void ff_simple_idct_add_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
40
-void ff_simple_idct_10(int16_t *block);
38
+void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
39
+void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
40
+void ff_simple_idct_int16_10bit(int16_t *block);
41 41
 
42
-void ff_simple_idct_put_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
43
-void ff_simple_idct_add_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
44
-void ff_simple_idct_12(int16_t *block);
42
+void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
43
+void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
44
+void ff_simple_idct_int32_10bit(int16_t *block);
45
+
46
+void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
47
+void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
48
+void ff_simple_idct_int16_12bit(int16_t *block);
45 49
 
46 50
 /**
47
- * Special version of ff_simple_idct_10() which does dequantization
51
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization
48 52
  * and scales by a factor of 2 more between the two IDCTs to account
49 53
  * for larger scale of input coefficients.
50 54
  */
... ...
@@ -77,6 +77,10 @@
77 77
 #define ROW_SHIFT 13
78 78
 #define COL_SHIFT 18
79 79
 #define DC_SHIFT  1
80
+#   elif IN_IDCT_DEPTH == 32
81
+#define ROW_SHIFT 13
82
+#define COL_SHIFT 21
83
+#define DC_SHIFT  2
80 84
 #   else
81 85
 #define ROW_SHIFT 12
82 86
 #define COL_SHIFT 19
... ...
@@ -109,11 +113,13 @@
109 109
 #ifdef EXTRA_SHIFT
110 110
 static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
111 111
 #else
112
-static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
112
+static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
113 113
 #endif
114 114
 {
115 115
     SUINT a0, a1, a2, a3, b0, b1, b2, b3;
116 116
 
117
+// TODO: Add DC-only support for int32_t input
118
+#if IN_IDCT_DEPTH == 16
117 119
 #if HAVE_FAST_64BIT
118 120
 #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
119 121
     if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
... ...
@@ -148,6 +154,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
148 148
         return;
149 149
     }
150 150
 #endif
151
+#endif
151 152
 
152 153
     a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
153 154
     a1 = a0;
... ...
@@ -168,7 +175,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
168 168
     b3 = MUL(W7, row[1]);
169 169
     MAC(b3, -W5, row[3]);
170 170
 
171
+#if IN_IDCT_DEPTH == 32
172
+    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
173
+#else
171 174
     if (AV_RN64A(row + 4)) {
175
+#endif
172 176
         a0 +=   W4*row[4] + W6*row[6];
173 177
         a1 += - W4*row[4] - W2*row[6];
174 178
         a2 += - W4*row[4] + W2*row[6];
... ...
@@ -250,8 +261,8 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
250 250
 #ifdef EXTRA_SHIFT
251 251
 static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
252 252
 #else
253
-static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
254
-                                          int16_t *col)
253
+static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
254
+                                          idctin *col)
255 255
 {
256 256
     SUINT a0, a1, a2, a3, b0, b1, b2, b3;
257 257
 
... ...
@@ -274,8 +285,8 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
274 274
     dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
275 275
 }
276 276
 
277
-static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
278
-                                          int16_t *col)
277
+static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
278
+                                          idctin *col)
279 279
 {
280 280
     int a0, a1, a2, a3, b0, b1, b2, b3;
281 281
 
... ...
@@ -298,7 +309,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
298 298
     dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
299 299
 }
300 300
 
301
-static inline void FUNC(idctSparseCol)(int16_t *col)
301
+static inline void FUNC6(idctSparseCol)(idctin *col)
302 302
 #endif
303 303
 {
304 304
     int a0, a1, a2, a3, b0, b1, b2, b3;
... ...
@@ -316,21 +327,23 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
316 316
 }
317 317
 
318 318
 #ifndef EXTRA_SHIFT
319
-void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
319
+void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
320 320
 {
321
+    idctin *block = (idctin *)block_;
321 322
     pixel *dest = (pixel *)dest_;
322 323
     int i;
323 324
 
324 325
     line_size /= sizeof(pixel);
325 326
 
326 327
     for (i = 0; i < 8; i++)
327
-        FUNC(idctRowCondDC)(block + i*8, 0);
328
+        FUNC6(idctRowCondDC)(block + i*8, 0);
328 329
 
329 330
     for (i = 0; i < 8; i++)
330
-        FUNC(idctSparseColPut)(dest + i, line_size, block + i);
331
+        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
331 332
 }
332 333
 
333
-void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
334
+#if IN_IDCT_DEPTH == 16
335
+void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
334 336
 {
335 337
     pixel *dest = (pixel *)dest_;
336 338
     int i;
... ...
@@ -338,20 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc
338 338
     line_size /= sizeof(pixel);
339 339
 
340 340
     for (i = 0; i < 8; i++)
341
-        FUNC(idctRowCondDC)(block + i*8, 0);
341
+        FUNC6(idctRowCondDC)(block + i*8, 0);
342 342
 
343 343
     for (i = 0; i < 8; i++)
344
-        FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
344
+        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
345 345
 }
346 346
 
347
-void FUNC(ff_simple_idct)(int16_t *block)
347
+void FUNC6(ff_simple_idct)(int16_t *block)
348 348
 {
349 349
     int i;
350 350
 
351 351
     for (i = 0; i < 8; i++)
352
-        FUNC(idctRowCondDC)(block + i*8, 0);
352
+        FUNC6(idctRowCondDC)(block + i*8, 0);
353 353
 
354 354
     for (i = 0; i < 8; i++)
355
-        FUNC(idctSparseCol)(block + i);
355
+        FUNC6(idctSparseCol)(block + i);
356 356
 }
357 357
 #endif
358
+#endif
... ...
@@ -82,9 +82,9 @@ static void ff_prores_idct_wrap(int16_t *dst){
82 82
 static const struct algo idct_tab[] = {
83 83
     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
84 84
     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
85
-    { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
86
-    { "SIMPLE-C10",  ff_simple_idct_10,    FF_IDCT_PERM_NONE },
87
-    { "SIMPLE-C12",  ff_simple_idct_12,    FF_IDCT_PERM_NONE, 0, 1 },
85
+    { "SIMPLE-C",    ff_simple_idct_int16_8bit,     FF_IDCT_PERM_NONE },
86
+    { "SIMPLE-C10",  ff_simple_idct_int16_10bit,    FF_IDCT_PERM_NONE },
87
+    { "SIMPLE-C12",  ff_simple_idct_int16_12bit,    FF_IDCT_PERM_NONE, 0, 1 },
88 88
     { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
89 89
 #if CONFIG_FAANIDCT
90 90
     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
... ...
@@ -314,11 +314,11 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo
314 314
     v->multires        = get_bits1(gb);
315 315
     v->res_fasttx      = get_bits1(gb);
316 316
     if (!v->res_fasttx) {
317
-        v->vc1dsp.vc1_inv_trans_8x8    = ff_simple_idct_8;
317
+        v->vc1dsp.vc1_inv_trans_8x8    = ff_simple_idct_int16_8bit;
318 318
         v->vc1dsp.vc1_inv_trans_8x4    = ff_simple_idct84_add;
319 319
         v->vc1dsp.vc1_inv_trans_4x8    = ff_simple_idct48_add;
320 320
         v->vc1dsp.vc1_inv_trans_4x4    = ff_simple_idct44_add;
321
-        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8;
321
+        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_int16_8bit;
322 322
         v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
323 323
         v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
324 324
         v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;