Browse code

prores: extract idct into its own dspcontext and merge with put_pixels.

Ronald S. Bultje authored on 2011/10/07 00:03:38
Showing 8 changed files
... ...
@@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
295 295
 OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
296 296
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
297 297
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
298
-OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
298
+OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
299 299
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
300 300
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
301 301
                                           celp_filters.o acelp_vectors.o \
... ...
@@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
145 145
     }
146 146
 }
147 147
 
148
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
149
+                                   int idct_permutation_type)
150
+{
151
+    int i;
152
+
153
+    switch(idct_permutation_type){
154
+    case FF_NO_IDCT_PERM:
155
+        for(i=0; i<64; i++)
156
+            idct_permutation[i]= i;
157
+        break;
158
+    case FF_LIBMPEG2_IDCT_PERM:
159
+        for(i=0; i<64; i++)
160
+            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
161
+        break;
162
+    case FF_SIMPLE_IDCT_PERM:
163
+        for(i=0; i<64; i++)
164
+            idct_permutation[i]= simple_mmx_permutation[i];
165
+        break;
166
+    case FF_TRANSPOSE_IDCT_PERM:
167
+        for(i=0; i<64; i++)
168
+            idct_permutation[i]= ((i&7)<<3) | (i>>3);
169
+        break;
170
+    case FF_PARTTRANS_IDCT_PERM:
171
+        for(i=0; i<64; i++)
172
+            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
173
+        break;
174
+    case FF_SSE2_IDCT_PERM:
175
+        for(i=0; i<64; i++)
176
+            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
177
+        break;
178
+    default:
179
+        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
180
+    }
181
+}
182
+
148 183
 static int pix_sum_c(uint8_t * pix, int line_size)
149 184
 {
150 185
     int s, i, j;
... ...
@@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3123 3123
             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3124 3124
     }
3125 3125
 
3126
-    switch(c->idct_permutation_type){
3127
-    case FF_NO_IDCT_PERM:
3128
-        for(i=0; i<64; i++)
3129
-            c->idct_permutation[i]= i;
3130
-        break;
3131
-    case FF_LIBMPEG2_IDCT_PERM:
3132
-        for(i=0; i<64; i++)
3133
-            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3134
-        break;
3135
-    case FF_SIMPLE_IDCT_PERM:
3136
-        for(i=0; i<64; i++)
3137
-            c->idct_permutation[i]= simple_mmx_permutation[i];
3138
-        break;
3139
-    case FF_TRANSPOSE_IDCT_PERM:
3140
-        for(i=0; i<64; i++)
3141
-            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3142
-        break;
3143
-    case FF_PARTTRANS_IDCT_PERM:
3144
-        for(i=0; i<64; i++)
3145
-            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
3146
-        break;
3147
-    case FF_SSE2_IDCT_PERM:
3148
-        for(i=0; i<64; i++)
3149
-            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
3150
-        break;
3151
-    default:
3152
-        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
3153
-    }
3126
+    ff_init_scantable_permutation(c->idct_permutation,
3127
+                                  c->idct_permutation_type);
3154 3128
 }
... ...
@@ -202,6 +202,8 @@ typedef struct ScanTable{
202 202
 } ScanTable;
203 203
 
204 204
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
205
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
206
+                                   int idct_permutation_type);
205 207
 
206 208
 #define EMULATED_EDGE(depth) \
207 209
 void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
... ...
@@ -34,17 +34,11 @@
34 34
 
35 35
 #include "libavutil/intmath.h"
36 36
 #include "avcodec.h"
37
-#include "dsputil.h"
37
+#include "proresdsp.h"
38 38
 #include "get_bits.h"
39 39
 
40
-#define BITS_PER_SAMPLE 10                              ///< output precision of that decoder
41
-#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
42
-#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
43
-#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
44
-
45
-
46 40
 typedef struct {
47
-    DSPContext dsp;
41
+    ProresDSPContext dsp;
48 42
     AVFrame    picture;
49 43
     ScanTable  scantable;
50 44
     int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
... ...
@@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
104 104
 
105 105
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
106 106
 
107
-    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
108
-    dsputil_init(&ctx->dsp, avctx);
107
+    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
108
+    ff_proresdsp_init(&ctx->dsp);
109 109
 
110 110
     avctx->coded_frame = &ctx->picture;
111 111
     avcodec_get_frame_defaults(&ctx->picture);
... ...
@@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
449 449
 }
450 450
 
451 451
 
452
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
453
-
454
-/**
455
- * Add bias value, clamp and output pixels of a slice
456
- */
457
-static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
458
-                       int mbs_per_slice, int blocks_per_mb)
459
-{
460
-    int mb, x, y, src_offset, dst_offset;
461
-    const DCTELEM *src1, *src2;
462
-    uint16_t *dst1, *dst2;
463
-
464
-    src1 = in;
465
-    src2 = in + (blocks_per_mb << 5);
466
-    dst1 = out;
467
-    dst2 = out + (stride << 3);
468
-
469
-    for (mb = 0; mb < mbs_per_slice; mb++) {
470
-        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
471
-            for (x = 0; x < 8; x++) {
472
-                src_offset = (y << 3) + x;
473
-
474
-                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
475
-                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
476
-
477
-                if (blocks_per_mb > 2) {
478
-                    dst1[dst_offset + x + 8] =
479
-                        CLIP_AND_BIAS(src1[src_offset + 64]);
480
-                    dst2[dst_offset + x + 8] =
481
-                        CLIP_AND_BIAS(src2[src_offset + 64]);
482
-                }
483
-            }
484
-        }
485
-
486
-        src1 += blocks_per_mb << 6;
487
-        src2 += blocks_per_mb << 6;
488
-        dst1 += blocks_per_mb << 2;
489
-        dst2 += blocks_per_mb << 2;
490
-    }
491
-}
492
-
493
-
494 452
 /**
495 453
  * Decode a slice plane (luma or chroma).
496 454
  */
... ...
@@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
502 502
 {
503 503
     GetBitContext gb;
504 504
     DCTELEM *block_ptr;
505
-    int i, blk_num, blocks_per_slice;
505
+    int mb_num, blocks_per_slice;
506 506
 
507 507
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
508 508
 
... ...
@@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
518 518
     /* inverse quantization, inverse transform and output */
519 519
     block_ptr = ctx->blocks;
520 520
 
521
-    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
522
-        /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
523
-         * and the input of the inverse transform should be scaled by 2
524
-         * in order to avoid rounding errors.
525
-         * Due to the fact the existing Libav transforms are incompatible with
526
-         * that input I temporally introduced the coarse solution below... */
527
-        for (i = 0; i < 64; i++)
528
-            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
529
-
530
-        ctx->dsp.idct(block_ptr);
521
+    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
522
+        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
523
+        block_ptr += 64;
524
+        if (blocks_per_mb > 2) {
525
+            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
526
+            block_ptr += 64;
527
+        }
528
+        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
529
+        block_ptr += 64;
530
+        if (blocks_per_mb > 2) {
531
+            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
532
+            block_ptr += 64;
533
+        }
531 534
     }
532
-
533
-    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
534
-               blocks_per_mb);
535 535
 }
536 536
 
537 537
 
538 538
new file mode 100644
... ...
@@ -0,0 +1,61 @@
0
+/*
1
+ * Apple ProRes compatible decoder
2
+ *
3
+ * Copyright (c) 2010-2011 Maxim Poliakovski
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "proresdsp.h"
23
+#include "simple_idct.h"
24
+
25
+#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
26
+#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
27
+#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
28
+
29
+#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
30
+
31
+/**
32
+ * Add bias value, clamp and output pixels of a slice
33
+ */
34
+static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
35
+{
36
+    int x, y, src_offset, dst_offset;
37
+
38
+    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
39
+        for (x = 0; x < 8; x++) {
40
+            src_offset = (y << 3) + x;
41
+
42
+            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
43
+        }
44
+    }
45
+}
46
+
47
+static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
48
+{
49
+    ff_prores_idct(block, qmat);
50
+    put_pixels(out, linesize >> 1, block);
51
+}
52
+
53
+void ff_proresdsp_init(ProresDSPContext *dsp)
54
+{
55
+    dsp->idct_put = prores_idct_put_c;
56
+    dsp->idct_permutation_type = FF_NO_IDCT_PERM;
57
+
58
+    ff_init_scantable_permutation(dsp->idct_permutation,
59
+                                  dsp->idct_permutation_type);
60
+}
0 61
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+/*
1
+ * Apple ProRes compatible decoder
2
+ *
3
+ * Copyright (c) 2010-2011 Maxim Poliakovski
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#ifndef AVCODEC_PRORESDSP_H
23
+#define AVCODEC_PRORESDSP_H
24
+
25
+#include "dsputil.h"
26
+
27
+#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
28
+
29
+typedef struct {
30
+    int idct_permutation_type;
31
+    uint8_t idct_permutation[64];
32
+    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
33
+} ProresDSPContext;
34
+
35
+void ff_proresdsp_init(ProresDSPContext *dsp);
36
+
37
+#endif /* AVCODEC_PRORESDSP_H */
... ...
@@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
221 221
         idct4col_add(dest + i, line_size, block + i);
222 222
     }
223 223
 }
224
+
225
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
226
+{
227
+    int i;
228
+
229
+    for (i = 0; i < 64; i++)
230
+        block[i] *= qmat[i];
231
+
232
+    for (i = 0; i < 8; i++)
233
+        idctRowCondDC_10(block + i*8);
234
+
235
+    for (i = 0; i < 64; i++)
236
+        block[i] >>= 2;
237
+
238
+    for (i = 0; i < 8; i++)
239
+        idctSparseCol_10(block + i);
240
+}
... ...
@@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block);
38 38
 void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
39 39
 void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
40 40
 void ff_simple_idct_10(DCTELEM *block);
41
+/**
42
+ * Special version of ff_simple_idct_10() which does dequantization
43
+ * and scales by a factor of 2 more between the two IDCTs to account
44
+ * for larger scale of input coefficients.
45
+ */
46
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
41 47
 
42 48
 void ff_simple_idct_mmx(int16_t *block);
43 49
 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);