Browse code

Merge commit '88bd7fdc821aaa0cbcf44cf075c62aaa42121e3f'

* commit '88bd7fdc821aaa0cbcf44cf075c62aaa42121e3f':
Drop DCTELEM typedef

Conflicts:
libavcodec/alpha/dsputil_alpha.h
libavcodec/alpha/motion_est_alpha.c
libavcodec/arm/dsputil_init_armv6.c
libavcodec/bfin/dsputil_bfin.h
libavcodec/bfin/pixels_bfin.S
libavcodec/cavs.c
libavcodec/cavsdec.c
libavcodec/dct-test.c
libavcodec/dnxhdenc.c
libavcodec/dsputil.c
libavcodec/dsputil.h
libavcodec/dsputil_template.c
libavcodec/eamad.c
libavcodec/h264_cavlc.c
libavcodec/h264idct_template.c
libavcodec/mpeg12.c
libavcodec/mpegvideo.c
libavcodec/mpegvideo.h
libavcodec/mpegvideo_enc.c
libavcodec/ppc/dsputil_altivec.c
libavcodec/proresdsp.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2013/01/24 01:44:56
Showing 169 changed files
... ...
@@ -148,7 +148,7 @@ Alignment:
148 148
 Some instructions on some architectures have strict alignment restrictions,
149 149
 for example most SSE/SSE2 instructions on x86.
150 150
 The minimum guaranteed alignment is written in the .h files, for example:
151
-    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
151
+    void (*put_pixels_clamped)(const int16_t *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
152 152
 
153 153
 
154 154
 General Tips:
... ...
@@ -138,7 +138,7 @@ typedef struct FourXContext {
138 138
     int mv[256];
139 139
     VLC pre_vlc;
140 140
     int last_dc;
141
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
141
+    DECLARE_ALIGNED(16, int16_t, block)[6][64];
142 142
     void *bitstream_buffer;
143 143
     unsigned int bitstream_buffer_size;
144 144
     int version;
... ...
@@ -153,7 +153,7 @@ typedef struct FourXContext {
153 153
 
154 154
 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
155 155
 
156
-static void idct(DCTELEM block[64])
156
+static void idct(int16_t block[64])
157 157
 {
158 158
     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
159 159
     int tmp10, tmp11, tmp12, tmp13;
... ...
@@ -471,7 +471,7 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
471 471
  * decode block and dequantize.
472 472
  * Note this is almost identical to MJPEG.
473 473
  */
474
-static int decode_i_block(FourXContext *f, DCTELEM *block)
474
+static int decode_i_block(FourXContext *f, int16_t *block)
475 475
 {
476 476
     int code, i, j, level, val;
477 477
 
... ...
@@ -521,7 +521,7 @@ static int decode_i_block(FourXContext *f, DCTELEM *block)
521 521
 
522 522
 static inline void idct_put(FourXContext *f, int x, int y)
523 523
 {
524
-    DCTELEM (*block)[64] = f->block;
524
+    int16_t (*block)[64] = f->block;
525 525
     int stride           = f->current_picture->linesize[0] >> 1;
526 526
     int i;
527 527
     uint16_t *dst = ((uint16_t*)f->current_picture->data[0]) + y * stride + x;
... ...
@@ -542,7 +542,7 @@ static inline void idct_put(FourXContext *f, int x, int y)
542 542
      * cr = (-1b - 4g + 5r) / 14 */
543 543
     for (y = 0; y < 8; y++) {
544 544
         for (x = 0; x < 8; x++) {
545
-            DCTELEM *temp = block[(x >> 2) + 2 * (y >> 2)] +
545
+            int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
546 546
                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
547 547
             int cb = block[4][x + 8 * y];
548 548
             int cr = block[5][x + 8 * y];
... ...
@@ -23,15 +23,15 @@
23 23
 #include "dsputil_alpha.h"
24 24
 #include "asm.h"
25 25
 
26
-void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
26
+void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
27 27
                                  int line_size);
28
-void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
28
+void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
29 29
                                  int line_size);
30 30
 
31 31
 #if 0
32 32
 /* These functions were the base for the optimized assembler routines,
33 33
    and remain here for documentation purposes.  */
34
-static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
34
+static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
35 35
                                    int line_size)
36 36
 {
37 37
     int i = 8;
... ...
@@ -55,7 +55,7 @@ static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
55 55
     } while (--i);
56 56
 }
57 57
 
58
-void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
58
+void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
59 59
                             int line_size)
60 60
 {
61 61
     int h = 8;
... ...
@@ -100,9 +100,9 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
100 100
 }
101 101
 #endif
102 102
 
103
-static void clear_blocks_axp(DCTELEM *blocks) {
103
+static void clear_blocks_axp(int16_t *blocks) {
104 104
     uint64_t *p = (uint64_t *) blocks;
105
-    int n = sizeof(DCTELEM) * 6 * 64;
105
+    int n = sizeof(int16_t) * 6 * 64;
106 106
 
107 107
     do {
108 108
         p[0] = 0;
... ...
@@ -19,26 +19,26 @@
19 19
 #ifndef AVCODEC_ALPHA_DSPUTIL_ALPHA_H
20 20
 #define AVCODEC_ALPHA_DSPUTIL_ALPHA_H
21 21
 
22
-#include "libavcodec/dsputil.h"
22
+#include <stdint.h>
23 23
 
24
-void ff_simple_idct_axp(DCTELEM *block);
25
-void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
26
-void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
24
+void ff_simple_idct_axp(int16_t *block);
25
+void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
26
+void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
27 27
 
28 28
 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
29 29
                         int line_size, int h);
30
-void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
30
+void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
31 31
                                 int line_size);
32
-void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
32
+void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
33 33
                                 int line_size);
34
-extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
34
+extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
35 35
                                         int line_size);
36
-extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
36
+extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
37 37
                                         int line_size);
38 38
 
39
-void get_pixels_mvi(DCTELEM *av_restrict block,
40
-                    const uint8_t *av_restrict pixels, int line_size);
41
-void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
39
+void get_pixels_mvi(int16_t *restrict block,
40
+                    const uint8_t *restrict pixels, int line_size);
41
+void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
42 42
                      int stride);
43 43
 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
44 44
 int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
... ...
@@ -135,7 +135,7 @@ $aligned:
135 135
         .end put_pixels_axp_asm
136 136
 
137 137
 /************************************************************************
138
- * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
138
+ * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
139 139
  *                                 int line_size)
140 140
  */
141 141
         .align 6
... ...
@@ -185,7 +185,7 @@ put_pixels_clamped_mvi_asm:
185 185
         .end put_pixels_clamped_mvi_asm
186 186
 
187 187
 /************************************************************************
188
- * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
188
+ * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
189 189
  *                                 int line_size)
190 190
  */
191 191
         .align 6
... ...
@@ -23,8 +23,8 @@
23 23
 #include "dsputil_alpha.h"
24 24
 #include "asm.h"
25 25
 
26
-void get_pixels_mvi(DCTELEM *av_restrict block,
27
-                    const uint8_t *av_restrict pixels, int line_size)
26
+void get_pixels_mvi(int16_t *restrict block,
27
+                    const uint8_t *restrict pixels, int line_size)
28 28
 {
29 29
     int h = 8;
30 30
 
... ...
@@ -40,7 +40,7 @@ void get_pixels_mvi(DCTELEM *av_restrict block,
40 40
     } while (--h);
41 41
 }
42 42
 
43
-void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
43
+void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
44 44
                      int stride) {
45 45
     int h = 8;
46 46
     uint64_t mask = 0x4040;
... ...
@@ -23,7 +23,7 @@
23 23
 #include "libavcodec/mpegvideo.h"
24 24
 #include "asm.h"
25 25
 
26
-static void dct_unquantize_h263_axp(DCTELEM *block, int n_coeffs,
26
+static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs,
27 27
                                     uint64_t qscale, uint64_t qadd)
28 28
 {
29 29
     uint64_t qmul = qscale << 1;
... ...
@@ -69,12 +69,12 @@ static void dct_unquantize_h263_axp(DCTELEM *block, int n_coeffs,
69 69
     }
70 70
 }
71 71
 
72
-static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block,
72
+static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block,
73 73
                                     int n, int qscale)
74 74
 {
75 75
     int n_coeffs;
76 76
     uint64_t qadd;
77
-    DCTELEM block0 = block[0];
77
+    int16_t block0 = block[0];
78 78
 
79 79
     if (!s->h263_aic) {
80 80
         if (n < 4)
... ...
@@ -96,7 +96,7 @@ static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block,
96 96
     block[0] = block0;
97 97
 }
98 98
 
99
-static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block,
99
+static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block,
100 100
                                     int n, int qscale)
101 101
 {
102 102
     int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
... ...
@@ -44,7 +44,7 @@
44 44
 #define COL_SHIFT 20
45 45
 
46 46
 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
47
-static inline int idct_row(DCTELEM *row)
47
+static inline int idct_row(int16_t *row)
48 48
 {
49 49
     int a0, a1, a2, a3, b0, b1, b2, b3, t;
50 50
     uint64_t l, r, t2;
... ...
@@ -152,7 +152,7 @@ static inline int idct_row(DCTELEM *row)
152 152
     return 2;
153 153
 }
154 154
 
155
-static inline void idct_col(DCTELEM *col)
155
+static inline void idct_col(int16_t *col)
156 156
 {
157 157
     int a0, a1, a2, a3, b0, b1, b2, b3;
158 158
 
... ...
@@ -229,7 +229,7 @@ static inline void idct_col(DCTELEM *col)
229 229
 
230 230
 /* If all rows but the first one are zero after row transformation,
231 231
    all rows will be identical after column transformation.  */
232
-static inline void idct_col2(DCTELEM *col)
232
+static inline void idct_col2(int16_t *col)
233 233
 {
234 234
     int i;
235 235
     uint64_t l, r;
... ...
@@ -251,7 +251,7 @@ static inline void idct_col2(DCTELEM *col)
251 251
     stq(l, col + 14 * 4); stq(r, col + 15 * 4);
252 252
 }
253 253
 
254
-void ff_simple_idct_axp(DCTELEM *block)
254
+void ff_simple_idct_axp(int16_t *block)
255 255
 {
256 256
 
257 257
     int i;
... ...
@@ -291,13 +291,13 @@ void ff_simple_idct_axp(DCTELEM *block)
291 291
     }
292 292
 }
293 293
 
294
-void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
294
+void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block)
295 295
 {
296 296
     ff_simple_idct_axp(block);
297 297
     put_pixels_clamped_axp_p(block, dest, line_size);
298 298
 }
299 299
 
300
-void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
300
+void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block)
301 301
 {
302 302
     ff_simple_idct_axp(block);
303 303
     add_pixels_clamped_axp_p(block, dest, line_size);
... ...
@@ -23,12 +23,12 @@
23 23
 #include "libavcodec/dsputil.h"
24 24
 #include "dsputil_arm.h"
25 25
 
26
-void ff_j_rev_dct_arm(DCTELEM *data);
27
-void ff_simple_idct_arm(DCTELEM *data);
26
+void ff_j_rev_dct_arm(int16_t *data);
27
+void ff_simple_idct_arm(int16_t *data);
28 28
 
29 29
 /* XXX: local hack */
30
-static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
31
-static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
30
+static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
31
+static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
32 32
 
33 33
 void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
34 34
 void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
... ...
@@ -48,27 +48,27 @@ CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm,  ff_put_no_rnd_pixels8_x2_arm, 8)
48 48
 CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm,  ff_put_no_rnd_pixels8_y2_arm, 8)
49 49
 CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8)
50 50
 
51
-void ff_add_pixels_clamped_arm(const DCTELEM *block, uint8_t *dest,
51
+void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
52 52
                                int line_size);
53 53
 
54 54
 /* XXX: those functions should be suppressed ASAP when all IDCTs are
55 55
    converted */
56
-static void j_rev_dct_arm_put(uint8_t *dest, int line_size, DCTELEM *block)
56
+static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block)
57 57
 {
58 58
     ff_j_rev_dct_arm (block);
59 59
     ff_put_pixels_clamped(block, dest, line_size);
60 60
 }
61
-static void j_rev_dct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
61
+static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block)
62 62
 {
63 63
     ff_j_rev_dct_arm (block);
64 64
     ff_add_pixels_clamped(block, dest, line_size);
65 65
 }
66
-static void simple_idct_arm_put(uint8_t *dest, int line_size, DCTELEM *block)
66
+static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block)
67 67
 {
68 68
     ff_simple_idct_arm (block);
69 69
     ff_put_pixels_clamped(block, dest, line_size);
70 70
 }
71
-static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
71
+static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
72 72
 {
73 73
     ff_simple_idct_arm (block);
74 74
     ff_add_pixels_clamped(block, dest, line_size);
... ...
@@ -21,9 +21,9 @@
21 21
 #include "libavcodec/dsputil.h"
22 22
 #include "dsputil_arm.h"
23 23
 
24
-void ff_simple_idct_armv5te(DCTELEM *data);
25
-void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
26
-void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
24
+void ff_simple_idct_armv5te(int16_t *data);
25
+void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data);
26
+void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data);
27 27
 
28 28
 av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx)
29 29
 {
... ...
@@ -24,9 +24,9 @@
24 24
 #include "libavcodec/dsputil.h"
25 25
 #include "dsputil_arm.h"
26 26
 
27
-void ff_simple_idct_armv6(DCTELEM *data);
28
-void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
29
-void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
27
+void ff_simple_idct_armv6(int16_t *data);
28
+void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
29
+void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
30 30
 
31 31
 void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, int, int);
32 32
 void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, int, int);
... ...
@@ -46,12 +46,12 @@ void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int);
46 46
 
47 47
 void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, int, int);
48 48
 
49
-void ff_add_pixels_clamped_armv6(const DCTELEM *block,
50
-                                 uint8_t *av_restrict pixels,
49
+void ff_add_pixels_clamped_armv6(const int16_t *block,
50
+                                 uint8_t *restrict pixels,
51 51
                                  int line_size);
52 52
 
53
-void ff_get_pixels_armv6(DCTELEM *block, const uint8_t *pixels, int stride);
54
-void ff_diff_pixels_armv6(DCTELEM *block, const uint8_t *s1,
53
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
54
+void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
55 55
                           const uint8_t *s2, int stride);
56 56
 
57 57
 int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
... ...
@@ -25,12 +25,12 @@
25 25
 #include "libavcodec/dsputil.h"
26 26
 #include "dsputil_arm.h"
27 27
 
28
-void ff_simple_idct_neon(DCTELEM *data);
29
-void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
30
-void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
28
+void ff_simple_idct_neon(int16_t *data);
29
+void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
30
+void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
31 31
 
32
-void ff_clear_block_neon(DCTELEM *block);
33
-void ff_clear_blocks_neon(DCTELEM *blocks);
32
+void ff_clear_block_neon(int16_t *block);
33
+void ff_clear_blocks_neon(int16_t *blocks);
34 34
 
35 35
 void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
36 36
 void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
... ...
@@ -62,9 +62,9 @@ void ff_avg_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
62 62
 void ff_avg_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
63 63
 void ff_avg_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
64 64
 
65
-void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
66
-void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
67
-void ff_put_signed_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
65
+void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
66
+void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
67
+void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
68 68
 
69 69
 void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
70 70
 void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
... ...
@@ -50,22 +50,22 @@ void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
50 50
                                     int height, int log2_den, int weightd,
51 51
                                     int weights, int offset);
52 52
 
53
-void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
54
-void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
53
+void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
54
+void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
55 55
 void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
56
-                             DCTELEM *block, int stride,
56
+                             int16_t *block, int stride,
57 57
                              const uint8_t nnzc[6*8]);
58 58
 void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
59
-                                  DCTELEM *block, int stride,
59
+                                  int16_t *block, int stride,
60 60
                                   const uint8_t nnzc[6*8]);
61 61
 void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
62
-                            DCTELEM *block, int stride,
62
+                            int16_t *block, int stride,
63 63
                             const uint8_t nnzc[6*8]);
64 64
 
65
-void ff_h264_idct8_add_neon(uint8_t *dst, DCTELEM *block, int stride);
66
-void ff_h264_idct8_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
65
+void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
66
+void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
67 67
 void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
68
-                             DCTELEM *block, int stride,
68
+                             int16_t *block, int stride,
69 69
                              const uint8_t nnzc[6*8]);
70 70
 
71 71
 static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
... ...
@@ -66,7 +66,7 @@ row_loop:
66 66
         ldrsh r2, [lr, # 2]             @ r2 = 'd2'
67 67
 
68 68
         @ Optimization for row that have all items except the first set to 0
69
-        @ (this works as the DCTELEMS are always 4-byte aligned)
69
+        @ (this works as the int16_t are always 4-byte aligned)
70 70
         ldr r5, [lr, # 0]
71 71
         ldr r6, [lr, # 4]
72 72
         ldr r3, [lr, # 8]
... ...
@@ -34,9 +34,9 @@ CHK_OFFS(MpegEncContext, inter_scantable.raster_end, INTER_SCANTAB_RASTER_END);
34 34
 CHK_OFFS(MpegEncContext, h263_aic,         H263_AIC);
35 35
 #endif
36 36
 
37
-void ff_dct_unquantize_h263_inter_neon(MpegEncContext *s, DCTELEM *block,
37
+void ff_dct_unquantize_h263_inter_neon(MpegEncContext *s, int16_t *block,
38 38
                                        int n, int qscale);
39
-void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block,
39
+void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, int16_t *block,
40 40
                                        int n, int qscale);
41 41
 
42 42
 void ff_MPV_common_init_arm(MpegEncContext *s)
... ...
@@ -25,7 +25,7 @@
25 25
 #include "libavcodec/mpegvideo.h"
26 26
 #include "mpegvideo_arm.h"
27 27
 
28
-void ff_dct_unquantize_h263_armv5te(DCTELEM *block, int qmul, int qadd, int count);
28
+void ff_dct_unquantize_h263_armv5te(int16_t *block, int qmul, int qadd, int count);
29 29
 
30 30
 #ifdef ENABLE_ARM_TESTS
31 31
 /**
... ...
@@ -33,7 +33,7 @@ void ff_dct_unquantize_h263_armv5te(DCTELEM *block, int qmul, int qadd, int coun
33 33
  * have optimized implementations for each architecture. Is also used as a reference
34 34
  * implementation in regression tests
35 35
  */
36
-static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
36
+static inline void dct_unquantize_h263_helper_c(int16_t *block, int qmul, int qadd, int count)
37 37
 {
38 38
     int i, level;
39 39
     for (i = 0; i < count; i++) {
... ...
@@ -51,7 +51,7 @@ static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qa
51 51
 #endif
52 52
 
53 53
 static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
54
-                                  DCTELEM *block, int n, int qscale)
54
+                                  int16_t *block, int n, int qscale)
55 55
 {
56 56
     int level, qmul, qadd;
57 57
     int nCoeffs;
... ...
@@ -80,7 +80,7 @@ static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
80 80
 }
81 81
 
82 82
 static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
83
-                                  DCTELEM *block, int n, int qscale)
83
+                                  int16_t *block, int n, int qscale)
84 84
 {
85 85
     int qmul, qadd;
86 86
     int nCoeffs;
... ...
@@ -24,11 +24,11 @@
24 24
 #include "libavcodec/rv34dsp.h"
25 25
 #include "libavutil/arm/cpu.h"
26 26
 
27
-void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
27
+void ff_rv34_inv_transform_noround_neon(int16_t *block);
28 28
 
29
-void ff_rv34_inv_transform_noround_dc_neon(DCTELEM *block);
29
+void ff_rv34_inv_transform_noround_dc_neon(int16_t *block);
30 30
 
31
-void ff_rv34_idct_add_neon(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
31
+void ff_rv34_idct_add_neon(uint8_t *dst, ptrdiff_t stride, int16_t *block);
32 32
 void ff_rv34_idct_dc_add_neon(uint8_t *dst, ptrdiff_t stride, int dc);
33 33
 
34 34
 void ff_rv34dsp_init_arm(RV34DSPContext *c, DSPContext* dsp)
... ...
@@ -67,7 +67,7 @@
67 67
         vsub.s32        q15, q14, q9    @ z0 - z3
68 68
 .endm
69 69
 
70
-/* void rv34_idct_add_c(uint8_t *dst, int stride, DCTELEM *block) */
70
+/* void rv34_idct_add_c(uint8_t *dst, int stride, int16_t *block) */
71 71
 function ff_rv34_idct_add_neon, export=1
72 72
         mov             r3,  r0
73 73
         rv34_inv_transform   r2
... ...
@@ -97,7 +97,7 @@ function ff_rv34_idct_add_neon, export=1
97 97
         bx              lr
98 98
 endfunc
99 99
 
100
-/* void rv34_inv_transform_noround_neon(DCTELEM *block); */
100
+/* void rv34_inv_transform_noround_neon(int16_t *block); */
101 101
 function ff_rv34_inv_transform_noround_neon, export=1
102 102
         rv34_inv_transform   r0
103 103
         vshl.s32        q11, q2,  #1
... ...
@@ -142,7 +142,7 @@ function ff_rv34_idct_dc_add_neon, export=1
142 142
         bx              lr
143 143
 endfunc
144 144
 
145
-/* void rv34_inv_transform_dc_noround_c(DCTELEM *block) */
145
+/* void rv34_inv_transform_dc_noround_c(int16_t *block) */
146 146
 function ff_rv34_inv_transform_noround_dc_neon, export=1
147 147
         vld1.16         {d28[]}, [r0,:16]       @ block[0]
148 148
         vmov.i16        d4,  #251
... ...
@@ -375,7 +375,7 @@ endfunc
375 375
         sub    r0, r0, #(16*7)
376 376
         .endm
377 377
 
378
-/* void ff_simple_idct_armv6(DCTELEM *data); */
378
+/* void ff_simple_idct_armv6(int16_t *data); */
379 379
 function ff_simple_idct_armv6, export=1
380 380
         push   {r4-r11, lr}
381 381
         sub    sp, sp, #128
... ...
@@ -390,7 +390,7 @@ function ff_simple_idct_armv6, export=1
390 390
         pop    {r4-r11, pc}
391 391
 endfunc
392 392
 
393
-/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
393
+/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); */
394 394
 function ff_simple_idct_add_armv6, export=1
395 395
         push   {r0, r1, r4-r11, lr}
396 396
         sub    sp, sp, #128
... ...
@@ -407,7 +407,7 @@ function ff_simple_idct_add_armv6, export=1
407 407
         pop    {r4-r11, pc}
408 408
 endfunc
409 409
 
410
-/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
410
+/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); */
411 411
 function ff_simple_idct_put_armv6, export=1
412 412
         push   {r0, r1, r4-r11, lr}
413 413
         sub    sp, sp, #128
... ...
@@ -261,7 +261,7 @@ endconst
261 261
         pop             {r4-r7, pc}
262 262
         .endm
263 263
 
264
-/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
264
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, int16_t *data); */
265 265
 function ff_simple_idct_put_neon, export=1
266 266
         idct_start      r2
267 267
 
... ...
@@ -316,7 +316,7 @@ function idct_col4_add8_neon
316 316
         bx              lr
317 317
 endfunc
318 318
 
319
-/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
319
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, int16_t *data); */
320 320
 function ff_simple_idct_add_neon, export=1
321 321
         idct_start      r2
322 322
 
... ...
@@ -355,7 +355,7 @@ function idct_col4_st16_neon
355 355
         bx              lr
356 356
 endfunc
357 357
 
358
-/* void ff_simple_idct_neon(DCTELEM *data); */
358
+/* void ff_simple_idct_neon(int16_t *data); */
359 359
 function ff_simple_idct_neon, export=1
360 360
         idct_start      r0
361 361
 
... ...
@@ -24,9 +24,9 @@
24 24
 #include "libavcodec/dsputil.h"
25 25
 #include "libavcodec/vp3dsp.h"
26 26
 
27
-void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
28
-void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
29
-void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data);
27
+void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
28
+void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
29
+void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const int16_t *data);
30 30
 
31 31
 void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
32 32
 void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
... ...
@@ -23,7 +23,7 @@
23 23
 
24 24
 #if HAVE_ARMV6_EXTERNAL
25 25
 #define decode_block_coeffs_internal ff_decode_block_coeffs_armv6
26
-int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, DCTELEM block[16],
26
+int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, int16_t block[16],
27 27
                                  uint8_t probs[8][3][NUM_DCT_TOKENS-1],
28 28
                                  int i, uint8_t *token_prob, int16_t qmul[2]);
29 29
 #endif
... ...
@@ -56,7 +56,7 @@
56 56
 
57 57
 @ idct
58 58
 
59
-@ void vp8_luma_dc_wht(DCTELEM block[4][4][16], DCTELEM dc[16])
59
+@ void vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16])
60 60
 function ff_vp8_luma_dc_wht_armv6, export=1
61 61
         push            {r4-r10, lr}
62 62
 
... ...
@@ -179,7 +179,7 @@ function ff_vp8_luma_dc_wht_armv6, export=1
179 179
         pop             {r4-r10, pc}
180 180
 endfunc
181 181
 
182
-@ void vp8_luma_dc_wht_dc(DCTELEM block[4][4][16], DCTELEM dc[16])
182
+@ void vp8_luma_dc_wht_dc(int16_t block[4][4][16], int16_t dc[16])
183 183
 function ff_vp8_luma_dc_wht_dc_armv6, export=1
184 184
         ldrsh           r2,  [r1]
185 185
         mov             r3,  #0
... ...
@@ -192,7 +192,7 @@ function ff_vp8_luma_dc_wht_dc_armv6, export=1
192 192
         bx              lr
193 193
 endfunc
194 194
 
195
-@ void vp8_idct_add(uint8_t *dst, DCTELEM block[16], int stride)
195
+@ void vp8_idct_add(uint8_t *dst, int16_t block[16], int stride)
196 196
 function ff_vp8_idct_add_armv6, export=1
197 197
         push            {r4-r12, lr}
198 198
         sub             sp,  sp,  #32
... ...
@@ -314,7 +314,7 @@ function ff_vp8_idct_add_armv6, export=1
314 314
         pop             {r4-r12, pc}
315 315
 endfunc
316 316
 
317
-@ void vp8_idct_dc_add(uint8_t *dst, DCTELEM block[16], int stride)
317
+@ void vp8_idct_dc_add(uint8_t *dst, int16_t block[16], int stride)
318 318
 function ff_vp8_idct_dc_add_armv6, export=1
319 319
         push            {r4-r6, lr}
320 320
         add             r6,  r0,  r2,  lsl #1
... ...
@@ -355,7 +355,7 @@ function ff_vp8_idct_dc_add_armv6, export=1
355 355
         pop             {r4-r6, pc}
356 356
 endfunc
357 357
 
358
-@ void vp8_idct_dc_add4uv(uint8_t *dst, DCTELEM block[4][16], int stride)
358
+@ void vp8_idct_dc_add4uv(uint8_t *dst, int16_t block[4][16], int stride)
359 359
 function ff_vp8_idct_dc_add4uv_armv6, export=1
360 360
         push            {r4, lr}
361 361
 
... ...
@@ -371,7 +371,7 @@ function ff_vp8_idct_dc_add4uv_armv6, export=1
371 371
         pop             {r4, pc}
372 372
 endfunc
373 373
 
374
-@ void vp8_idct_dc_add4y(uint8_t *dst, DCTELEM block[4][16], int stride)
374
+@ void vp8_idct_dc_add4y(uint8_t *dst, int16_t block[4][16], int stride)
375 375
 function ff_vp8_idct_dc_add4y_armv6, export=1
376 376
         push            {r4, lr}
377 377
 
... ...
@@ -22,13 +22,13 @@
22 22
 #include "libavcodec/vp8dsp.h"
23 23
 #include "vp8dsp.h"
24 24
 
25
-void ff_vp8_luma_dc_wht_armv6(DCTELEM block[4][4][16], DCTELEM dc[16]);
26
-void ff_vp8_luma_dc_wht_dc_armv6(DCTELEM block[4][4][16], DCTELEM dc[16]);
25
+void ff_vp8_luma_dc_wht_armv6(int16_t block[4][4][16], int16_t dc[16]);
26
+void ff_vp8_luma_dc_wht_dc_armv6(int16_t block[4][4][16], int16_t dc[16]);
27 27
 
28
-void ff_vp8_idct_add_armv6(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
29
-void ff_vp8_idct_dc_add_armv6(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
30
-void ff_vp8_idct_dc_add4y_armv6(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride);
31
-void ff_vp8_idct_dc_add4uv_armv6(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride);
28
+void ff_vp8_idct_add_armv6(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
29
+void ff_vp8_idct_dc_add_armv6(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
30
+void ff_vp8_idct_dc_add4y_armv6(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
31
+void ff_vp8_idct_dc_add4uv_armv6(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
32 32
 
33 33
 VP8_LF(armv6);
34 34
 
... ...
@@ -22,12 +22,12 @@
22 22
 #include "libavcodec/vp8dsp.h"
23 23
 #include "vp8dsp.h"
24 24
 
25
-void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]);
25
+void ff_vp8_luma_dc_wht_neon(int16_t block[4][4][16], int16_t dc[16]);
26 26
 
27
-void ff_vp8_idct_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
28
-void ff_vp8_idct_dc_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
29
-void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride);
30
-void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride);
27
+void ff_vp8_idct_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
28
+void ff_vp8_idct_dc_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
29
+void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
30
+void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
31 31
 
32 32
 VP8_LF(neon);
33 33
 
... ...
@@ -48,7 +48,7 @@ typedef struct ASV1Context{
48 48
     int mb_height;
49 49
     int mb_width2;
50 50
     int mb_height2;
51
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
51
+    DECLARE_ALIGNED(16, int16_t, block)[6][64];
52 52
     uint16_t intra_matrix[64];
53 53
     int q_intra_matrix[64];
54 54
     uint8_t *bitstream_buffer;
... ...
@@ -94,7 +94,7 @@ static inline int asv2_get_level(GetBitContext *gb)
94 94
         return code - 31;
95 95
 }
96 96
 
97
-static inline int asv1_decode_block(ASV1Context *a, DCTELEM block[64])
97
+static inline int asv1_decode_block(ASV1Context *a, int16_t block[64])
98 98
 {
99 99
     int i;
100 100
 
... ...
@@ -125,7 +125,7 @@ static inline int asv1_decode_block(ASV1Context *a, DCTELEM block[64])
125 125
     return 0;
126 126
 }
127 127
 
128
-static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64])
128
+static inline int asv2_decode_block(ASV1Context *a, int16_t block[64])
129 129
 {
130 130
     int i, count, ccp;
131 131
 
... ...
@@ -161,7 +161,7 @@ static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64])
161 161
     return 0;
162 162
 }
163 163
 
164
-static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64])
164
+static inline int decode_mb(ASV1Context *a, int16_t block[6][64])
165 165
 {
166 166
     int i;
167 167
 
... ...
@@ -183,7 +183,7 @@ static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64])
183 183
 
184 184
 static inline void idct_put(ASV1Context *a, int mb_x, int mb_y)
185 185
 {
186
-    DCTELEM (*block)[64] = a->block;
186
+    int16_t (*block)[64] = a->block;
187 187
     int linesize         = a->picture.linesize[0];
188 188
 
189 189
     uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
... ...
@@ -56,7 +56,7 @@ static inline void asv2_put_level(PutBitContext *pb, int level){
56 56
     }
57 57
 }
58 58
 
59
-static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
59
+static inline void asv1_encode_block(ASV1Context *a, int16_t block[64]){
60 60
     int i;
61 61
     int nc_count=0;
62 62
 
... ...
@@ -89,7 +89,7 @@ static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
89 89
     put_bits(&a->pb, ff_asv_ccp_tab[16][1], ff_asv_ccp_tab[16][0]);
90 90
 }
91 91
 
92
-static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){
92
+static inline void asv2_encode_block(ASV1Context *a, int16_t block[64]){
93 93
     int i;
94 94
     int count=0;
95 95
 
... ...
@@ -130,7 +130,7 @@ static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){
130 130
 
131 131
 #define MAX_MB_SIZE (30*16*16*3/2/8)
132 132
 
133
-static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){
133
+static inline int encode_mb(ASV1Context *a, int16_t block[6][64]){
134 134
     int i;
135 135
 
136 136
     if (a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb)>>3) < MAX_MB_SIZE) {
... ...
@@ -149,7 +149,7 @@ static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){
149 149
 }
150 150
 
151 151
 static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
152
-    DCTELEM (*block)[64]= a->block;
152
+    int16_t (*block)[64]= a->block;
153 153
     int linesize= a->picture.linesize[0];
154 154
     int i;
155 155
 
... ...
@@ -21,7 +21,7 @@
21 21
    low level assembler interface wrapper
22 22
 
23 23
 DEFUN(put_pixels_clamped,mL1,
24
-        (DCTELEM *block, uint8_t *dest, int line_size)):
24
+        (int16_t *block, uint8_t *dest, int line_size)):
25 25
 
26 26
       body
27 27
 
... ...
@@ -27,20 +27,20 @@
27 27
 
28 28
 int off;
29 29
 
30
-static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
30
+static void bfin_idct_add (uint8_t *dest, int line_size, int16_t *block)
31 31
 {
32 32
     ff_bfin_idct (block);
33 33
     ff_bfin_add_pixels_clamped (block, dest, line_size);
34 34
 }
35 35
 
36
-static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
36
+static void bfin_idct_put (uint8_t *dest, int line_size, int16_t *block)
37 37
 {
38 38
     ff_bfin_idct (block);
39 39
     ff_bfin_put_pixels_clamped (block, dest, line_size);
40 40
 }
41 41
 
42 42
 
43
-static void bfin_clear_blocks (DCTELEM *blocks)
43
+static void bfin_clear_blocks (int16_t *blocks)
44 44
 {
45 45
     // This is just a simple memset.
46 46
     //
... ...
@@ -24,8 +24,9 @@
24 24
 #ifndef AVCODEC_BFIN_DSPUTIL_BFIN_H
25 25
 #define AVCODEC_BFIN_DSPUTIL_BFIN_H
26 26
 
27
+#include <stdint.h>
28
+
27 29
 #include "config.h"
28
-#include "libavcodec/dsputil.h"
29 30
 
30 31
 #if defined(__FDPIC__) && CONFIG_SRAM
31 32
 #define attribute_l1_text  __attribute__ ((l1_text))
... ...
@@ -35,15 +36,15 @@
35 35
 #define attribute_l1_data_b
36 36
 #endif
37 37
 
38
-void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
39
-void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
40
-void ff_bfin_vp3_idct (DCTELEM *block);
41
-void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block);
42
-void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, DCTELEM *block);
43
-void ff_bfin_add_pixels_clamped (const DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
44
-void ff_bfin_put_pixels_clamped (const DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
45
-void ff_bfin_diff_pixels (DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)  attribute_l1_text;
46
-void ff_bfin_get_pixels  (DCTELEM *av_restrict block, const uint8_t *pixels, int line_size) attribute_l1_text;
38
+void ff_bfin_idct (int16_t *block) attribute_l1_text;
39
+void ff_bfin_fdct (int16_t *block) attribute_l1_text;
40
+void ff_bfin_vp3_idct (int16_t *block);
41
+void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, int16_t *block);
42
+void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, int16_t *block);
43
+void ff_bfin_add_pixels_clamped (const int16_t *block, uint8_t *dest, int line_size) attribute_l1_text;
44
+void ff_bfin_put_pixels_clamped (const int16_t *block, uint8_t *dest, int line_size) attribute_l1_text;
45
+void ff_bfin_diff_pixels (int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)  attribute_l1_text;
46
+void ff_bfin_get_pixels  (int16_t *restrict block, const uint8_t *pixels, int line_size) attribute_l1_text;
47 47
 int  ff_bfin_pix_norm1  (uint8_t * pix, int line_size) attribute_l1_text;
48 48
 int  ff_bfin_z_sad8x8   (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
49 49
 int  ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
... ...
@@ -20,7 +20,7 @@
20 20
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
  */
22 22
 /*
23
-  void ff_bfin_fdct (DCTELEM *buf);
23
+  void ff_bfin_fdct (int16_t *buf);
24 24
 
25 25
   This implementation works only for 8x8 input. The range of input
26 26
   must be -256 to 255 i.e. 8bit input represented in a 16bit data
... ...
@@ -61,9 +61,9 @@ Notation
61 61
   Other registers used:
62 62
         I0, I1, I2, I3, B0, B2, B3, M0, M1, L3 registers and LC0.
63 63
 
64
-  Input - r0 - pointer to start of DCTELEM *block
64
+  Input - r0 - pointer to start of int16_t *block
65 65
 
66
-  Output - The DCT output coefficients in the DCTELEM *block
66
+  Output - The DCT output coefficients in the int16_t *block
67 67
 
68 68
   Register constraint:
69 69
                This code is called from jpeg_encode.
... ...
@@ -147,7 +147,7 @@ vtmp:   .space 128
147 147
 
148 148
 .text
149 149
 DEFUN(fdct,mL1,
150
-        (DCTELEM *block)):
150
+        (int16_t *block)):
151 151
     [--SP] = (R7:4, P5:3);          // Push the registers onto the stack.
152 152
 
153 153
     b0 = r0;
... ...
@@ -22,7 +22,7 @@
22 22
 /*
23 23
    This blackfin DSP code implements an 8x8 inverse type II DCT.
24 24
 
25
-Prototype       : void ff_bfin_idct(DCTELEM *in)
25
+Prototype       : void ff_bfin_idct(int16_t *in)
26 26
 
27 27
 Registers Used  : A0, A1, R0-R7, I0-I3, B0, B2, B3, M0-M2, L0-L3, P0-P5, LC0.
28 28
 
... ...
@@ -90,7 +90,7 @@ vtmp: .space 256
90 90
 
91 91
 .text
92 92
 DEFUN(idct,mL1,
93
-        (DCTELEM *block)):
93
+        (int16_t *block)):
94 94
 
95 95
 /********************** Function Prologue *********************************/
96 96
     link 16;
... ...
@@ -26,7 +26,7 @@
26 26
 #include "dsputil_bfin.h"
27 27
 
28 28
 static int dct_quantize_bfin (MpegEncContext *s,
29
-                              DCTELEM *block, int n,
29
+                              int16_t *block, int n,
30 30
                               int qscale, int *overflow)
31 31
 {
32 32
     int last_non_zero, q, start_i;
... ...
@@ -21,7 +21,7 @@
21 21
 #include "config_bfin.h"
22 22
 
23 23
 DEFUN(put_pixels_clamped,mL1,
24
-        (DCTELEM *block, uint8_t *dest, int line_size)):
24
+        (int16_t *block, uint8_t *dest, int line_size)):
25 25
     [--SP] = (R7:4);
26 26
     R4 = 0;
27 27
     R5.l = 0x00ff;
... ...
@@ -51,7 +51,7 @@ ppc$1: R2 = Max(R0, R4) (V)      || [I1++M1] = R6;
51 51
 DEFUN_END(put_pixels_clamped)
52 52
 
53 53
 DEFUN(add_pixels_clamped,mL1,
54
-        (DCTELEM *block, uint8_t *dest, int line_size)):
54
+        (int16_t *block, uint8_t *dest, int line_size)):
55 55
     [-- SP] = (R7:4);
56 56
     R4 = 0;
57 57
     I0 = 0;
... ...
@@ -442,7 +442,7 @@ LE$8OT: DISALGNEXCPT                       || R2  =[I1++]   || [I3++M2] = R5;
442 442
         rts;
443 443
 
444 444
 DEFUN(diff_pixels,mL1,
445
-       (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride)):
445
+       (int16_t *block, uint8_t *s1, uint8_t *s2, int stride)):
446 446
         link 0;
447 447
         [--sp] = (r7:4);
448 448
         p0=8;
... ...
@@ -518,7 +518,7 @@ DEFUN_END(pix_sum)
518 518
 
519 519
 
520 520
 DEFUN(get_pixels,mL1,
521
-        (DCTELEM *av_restrict block, const uint8_t *pixels, int line_size)):
521
+        (int16_t *av_restrict block, const uint8_t *pixels, int line_size)):
522 522
         [--sp] = (r7:4);
523 523
         i3=r0;        // dest
524 524
         i0=r1;        // src0
... ...
@@ -23,7 +23,7 @@
23 23
 #include "dsputil_bfin.h"
24 24
 
25 25
 /* Intra iDCT offset 128 */
26
-void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
26
+void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, int16_t *block)
27 27
 {
28 28
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + 128;
29 29
     int i,j;
... ...
@@ -36,7 +36,7 @@ void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
36 36
 }
37 37
 
38 38
 /* Inter iDCT */
39
-void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
39
+void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, int16_t *block)
40 40
 {
41 41
     ff_bfin_vp3_idct (block);
42 42
     ff_bfin_add_pixels_clamped (block, dest, line_size);
... ...
@@ -22,7 +22,7 @@
22 22
 /*
23 23
    This blackfin DSP code implements an 8x8 inverse type II DCT.
24 24
 
25
-Prototype       : void ff_bfin_vp3_idct(DCTELEM *in)
25
+Prototype       : void ff_bfin_vp3_idct(int16_t *in)
26 26
 
27 27
 Registers Used  : A0, A1, R0-R7, I0-I3, B0, B2, B3, M0-M2, L0-L3, P0-P5, LC0.
28 28
 
... ...
@@ -63,7 +63,7 @@ vtmp: .space 256
63 63
 
64 64
 .text
65 65
 DEFUN(vp3_idct,mL1,
66
-        (DCTELEM *block)):
66
+        (int16_t *block)):
67 67
 
68 68
 /********************** Function Prologue *********************************/
69 69
     link 16;
... ...
@@ -700,7 +700,7 @@ static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t *
700 700
  * @param masks_count number of masks to decode
701 701
  * @return 0 on success, negative value in other cases
702 702
  */
703
-static int read_residue(GetBitContext *gb, DCTELEM block[64], int masks_count)
703
+static int read_residue(GetBitContext *gb, int16_t block[64], int masks_count)
704 704
 {
705 705
     int coef_list[128];
706 706
     int mode_list[128];
... ...
@@ -804,7 +804,7 @@ static int binkb_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
804 804
     int v, col[2];
805 805
     const uint8_t *scan;
806 806
     int xoff, yoff;
807
-    LOCAL_ALIGNED_16(DCTELEM, block, [64]);
807
+    LOCAL_ALIGNED_16(int16_t, block, [64]);
808 808
     LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
809 809
     int coordmap[64];
810 810
     int ybias = is_key ? -15 : 0;
... ...
@@ -950,7 +950,7 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
950 950
     int v, col[2];
951 951
     const uint8_t *scan;
952 952
     int xoff, yoff;
953
-    LOCAL_ALIGNED_16(DCTELEM, block, [64]);
953
+    LOCAL_ALIGNED_16(int16_t, block, [64]);
954 954
     LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
955 955
     LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
956 956
     int coordmap[64];
... ...
@@ -715,7 +715,7 @@ void ff_cavs_init_top_lines(AVSContext *h) {
715 715
     /* alloc space for co-located MVs and types */
716 716
     h->col_mv       = av_mallocz( h->mb_width*h->mb_height*4*sizeof(cavs_vector));
717 717
     h->col_type_base = av_mallocz(h->mb_width*h->mb_height);
718
-    h->block        = av_mallocz(64*sizeof(DCTELEM));
718
+    h->block        = av_mallocz(64*sizeof(int16_t));
719 719
 }
720 720
 
721 721
 av_cold int ff_cavs_init(AVCodecContext *avctx) {
... ...
@@ -234,7 +234,7 @@ typedef struct AVSContext {
234 234
     uint8_t *edge_emu_buffer;
235 235
 
236 236
     int got_keyframe;
237
-    DCTELEM *block;
237
+    int16_t *block;
238 238
 } AVSContext;
239 239
 
240 240
 extern const uint8_t     ff_cavs_partition_flags[30];
... ...
@@ -517,8 +517,8 @@ static inline int get_ue_code(GetBitContext *gb, int order)
517 517
     return get_ue_golomb(gb);
518 518
 }
519 519
 
520
-static inline int dequant(AVSContext *h, DCTELEM *level_buf, uint8_t *run_buf,
521
-                          DCTELEM *dst, int mul, int shift, int coeff_num)
520
+static inline int dequant(AVSContext *h, int16_t *level_buf, uint8_t *run_buf,
521
+                          int16_t *dst, int mul, int shift, int coeff_num)
522 522
 {
523 523
     int round = 1 << (shift - 1);
524 524
     int pos = -1;
... ...
@@ -553,9 +553,9 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
553 553
 {
554 554
     int i, esc_code, level, mask;
555 555
     unsigned int level_code, run;
556
-    DCTELEM level_buf[65];
556
+    int16_t level_buf[65];
557 557
     uint8_t run_buf[65];
558
-    DCTELEM *block = h->block;
558
+    int16_t *block = h->block;
559 559
 
560 560
     for (i = 0; i < 65; i++) {
561 561
         level_code = get_ue_code(gb, r->golomb_order);
... ...
@@ -183,9 +183,9 @@ static void cavs_filter_ch_c(uint8_t *d, int stride, int alpha, int beta, int tc
183 183
  *
184 184
  ****************************************************************************/
185 185
 
186
-static void cavs_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride) {
186
+static void cavs_idct8_add_c(uint8_t *dst, int16_t *block, int stride) {
187 187
     int i;
188
-    DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
188
+    int16_t (*src)[8] = (int16_t(*)[8])block;
189 189
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
190 190
 
191 191
     src[0][0] += 8;
... ...
@@ -32,7 +32,7 @@ typedef struct CAVSDSPContext {
32 32
     void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
33 33
     void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
34 34
     void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
35
-    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
35
+    void (*cavs_idct8_add)(uint8_t *dst, int16_t *block, int stride);
36 36
     int idct_perm;
37 37
 } CAVSDSPContext;
38 38
 
... ...
@@ -39,6 +39,7 @@
39 39
 #include "libavutil/lfg.h"
40 40
 #include "libavutil/time.h"
41 41
 
42
+#include "dsputil.h"
42 43
 #include "simple_idct.h"
43 44
 #include "aandcttab.h"
44 45
 #include "faandct.h"
... ...
@@ -48,28 +49,28 @@
48 48
 
49 49
 #undef printf
50 50
 
51
-void ff_mmx_idct(DCTELEM *data);
52
-void ff_mmxext_idct(DCTELEM *data);
51
+void ff_mmx_idct(int16_t *data);
52
+void ff_mmxext_idct(int16_t *data);
53 53
 
54 54
 // BFIN
55
-void ff_bfin_idct(DCTELEM *block);
56
-void ff_bfin_fdct(DCTELEM *block);
55
+void ff_bfin_idct(int16_t *block);
56
+void ff_bfin_fdct(int16_t *block);
57 57
 
58 58
 // ALTIVEC
59
-void ff_fdct_altivec(DCTELEM *block);
59
+void ff_fdct_altivec(int16_t *block);
60 60
 
61 61
 // ARM
62
-void ff_j_rev_dct_arm(DCTELEM *data);
63
-void ff_simple_idct_arm(DCTELEM *data);
64
-void ff_simple_idct_armv5te(DCTELEM *data);
65
-void ff_simple_idct_armv6(DCTELEM *data);
66
-void ff_simple_idct_neon(DCTELEM *data);
62
+void ff_j_rev_dct_arm(int16_t *data);
63
+void ff_simple_idct_arm(int16_t *data);
64
+void ff_simple_idct_armv5te(int16_t *data);
65
+void ff_simple_idct_armv6(int16_t *data);
66
+void ff_simple_idct_neon(int16_t *data);
67 67
 
68
-void ff_simple_idct_axp(DCTELEM *data);
68
+void ff_simple_idct_axp(int16_t *data);
69 69
 
70 70
 struct algo {
71 71
     const char *name;
72
-    void (*func)(DCTELEM *block);
72
+    void (*func)(int16_t *block);
73 73
     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
74 74
                      SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format;
75 75
     int mm_support;
... ...
@@ -103,9 +104,9 @@ static const struct algo fdct_tab[] = {
103 103
 
104 104
 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
105 105
 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
106
-                                DCTELEM *block, int16_t *qmat);
106
+                                int16_t *block, int16_t *qmat);
107 107
 
108
-static void ff_prores_idct_put_10_sse2_wrap(DCTELEM *dst){
108
+static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
109 109
     DECLARE_ALIGNED(16, static int16_t, qmat)[64];
110 110
     DECLARE_ALIGNED(16, static int16_t, tmp)[64];
111 111
     int i;
... ...
@@ -193,10 +194,10 @@ static void idct_mmx_init(void)
193 193
     }
194 194
 }
195 195
 
196
-DECLARE_ALIGNED(16, static DCTELEM, block)[64];
197
-DECLARE_ALIGNED(8,  static DCTELEM, block1)[64];
196
+DECLARE_ALIGNED(16, static int16_t, block)[64];
197
+DECLARE_ALIGNED(8,  static int16_t, block1)[64];
198 198
 
199
-static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng, int vals)
199
+static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
200 200
 {
201 201
     int i, j;
202 202
 
... ...
@@ -226,7 +227,7 @@ static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng, in
226 226
     }
227 227
 }
228 228
 
229
-static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
229
+static void permute(int16_t dst[64], const int16_t src[64], int perm)
230 230
 {
231 231
     int i;
232 232
 
... ...
@@ -253,7 +254,7 @@ static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
253 253
 
254 254
 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
255 255
 {
256
-    void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
256
+    void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
257 257
     int it, i, scale;
258 258
     int err_inf, v;
259 259
     int64_t err2, ti, ti1, it1, err_sum = 0;
... ...
@@ -45,11 +45,11 @@ typedef struct DNXHDContext {
45 45
     VLC ac_vlc, dc_vlc, run_vlc;
46 46
     int last_dc[3];
47 47
     DSPContext dsp;
48
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
48
+    DECLARE_ALIGNED(16, int16_t, blocks)[8][64];
49 49
     ScanTable scantable;
50 50
     const CIDEntry *cid_table;
51 51
     int bit_depth; // 8, 10 or 0 if not initialized at all.
52
-    void (*decode_dct_block)(struct DNXHDContext *ctx, DCTELEM *block,
52
+    void (*decode_dct_block)(struct DNXHDContext *ctx, int16_t *block,
53 53
                              int n, int qscale);
54 54
     int last_qscale;
55 55
     int luma_scale[64];
... ...
@@ -59,8 +59,8 @@ typedef struct DNXHDContext {
59 59
 #define DNXHD_VLC_BITS 9
60 60
 #define DNXHD_DC_VLC_BITS 7
61 61
 
62
-static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
63
-static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
62
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, int16_t *block, int n, int qscale);
63
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, int16_t *block, int n, int qscale);
64 64
 
65 65
 static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
66 66
 {
... ...
@@ -190,7 +190,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
190 190
 }
191 191
 
192 192
 static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
193
-                                                    DCTELEM *block, int n,
193
+                                                    int16_t *block, int n,
194 194
                                                     int qscale,
195 195
                                                     int index_bits,
196 196
                                                     int level_bias,
... ...
@@ -272,13 +272,13 @@ static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
272 272
     CLOSE_READER(bs, &ctx->gb);
273 273
 }
274 274
 
275
-static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block,
275
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, int16_t *block,
276 276
                                      int n, int qscale)
277 277
 {
278 278
     dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6);
279 279
 }
280 280
 
281
-static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block,
281
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, int16_t *block,
282 282
                                       int n, int qscale)
283 283
 {
284 284
     dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4);
... ...
@@ -51,7 +51,7 @@ static const AVClass class = {
51 51
 
52 52
 #define LAMBDA_FRAC_BITS 10
53 53
 
54
-static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *av_restrict block, const uint8_t *pixels, int line_size)
54
+static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size)
55 55
 {
56 56
     int i;
57 57
     for (i = 0; i < 4; i++) {
... ...
@@ -68,7 +68,7 @@ static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *av_restrict block, const uint
68 68
     memcpy(block + 24, block - 32, sizeof(*block) * 8);
69 69
 }
70 70
 
71
-static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *av_restrict block, const uint8_t *pixels, int line_size)
71
+static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size)
72 72
 {
73 73
     int i;
74 74
     const uint16_t* pixels16 = (const uint16_t*)pixels;
... ...
@@ -88,7 +88,7 @@ static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *av_restrict
88 88
     memcpy(block + 24, block - 32, sizeof(*block) * 8);
89 89
 }
90 90
 
91
-static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block,
91
+static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
92 92
                                     int n, int qscale, int *overflow)
93 93
 {
94 94
     const uint8_t *scantable= ctx->intra_scantable.scantable;
... ...
@@ -392,7 +392,7 @@ static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
392 392
              (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
393 393
 }
394 394
 
395
-static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
395
+static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block, int last_index, int n)
396 396
 {
397 397
     int last_non_zero = 0;
398 398
     int slevel, i, j;
... ...
@@ -415,7 +415,7 @@ static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *b
415 415
     put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
416 416
 }
417 417
 
418
-static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
418
+static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n, int qscale, int last_index)
419 419
 {
420 420
     const uint8_t *weight_matrix;
421 421
     int level;
... ...
@@ -456,7 +456,7 @@ static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *b
456 456
     }
457 457
 }
458 458
 
459
-static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
459
+static av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block)
460 460
 {
461 461
     int score = 0;
462 462
     int i;
... ...
@@ -465,7 +465,7 @@ static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
465 465
     return score;
466 466
 }
467 467
 
468
-static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
468
+static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index)
469 469
 {
470 470
     int last_non_zero = 0;
471 471
     int bits = 0;
... ...
@@ -527,7 +527,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
527 527
     DNXHDEncContext *ctx = avctx->priv_data;
528 528
     int mb_y = jobnr, mb_x;
529 529
     int qscale = ctx->qscale;
530
-    LOCAL_ALIGNED_16(DCTELEM, block, [64]);
530
+    LOCAL_ALIGNED_16(int16_t, block, [64]);
531 531
     ctx = ctx->thread[threadnr];
532 532
 
533 533
     ctx->m.last_dc[0] =
... ...
@@ -544,7 +544,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
544 544
         dnxhd_get_blocks(ctx, mb_x, mb_y);
545 545
 
546 546
         for (i = 0; i < 8; i++) {
547
-            DCTELEM *src_block = ctx->blocks[i];
547
+            int16_t *src_block = ctx->blocks[i];
548 548
             int overflow, nbits, diff, last_index;
549 549
             int n = dnxhd_switch_matrix(ctx, i);
550 550
 
... ...
@@ -593,7 +593,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int
593 593
         dnxhd_get_blocks(ctx, mb_x, mb_y);
594 594
 
595 595
         for (i = 0; i < 8; i++) {
596
-            DCTELEM *block = ctx->blocks[i];
596
+            int16_t *block = ctx->blocks[i];
597 597
             int overflow, n = dnxhd_switch_matrix(ctx, i);
598 598
             int last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow);
599 599
             //START_TIMER;
... ...
@@ -64,7 +64,7 @@ typedef struct DNXHDEncContext {
64 64
     int nitris_compat;
65 65
     unsigned min_padding;
66 66
 
67
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
67
+    DECLARE_ALIGNED(16, int16_t, blocks)[8][64];
68 68
 
69 69
     int      (*qmatrix_c)     [64];
70 70
     int      (*qmatrix_l)     [64];
... ...
@@ -90,7 +90,7 @@ typedef struct DNXHDEncContext {
90 90
     RCCMPEntry *mb_cmp;
91 91
     RCEntry   (*mb_rc)[8160];
92 92
 
93
-    void (*get_pixels_8x4_sym)(DCTELEM * /*align 16*/, const uint8_t *, int);
93
+    void (*get_pixels_8x4_sym)(int16_t * /*align 16*/, const uint8_t *, int);
94 94
 } DNXHDEncContext;
95 95
 
96 96
 void ff_dnxhdenc_init_x86(DNXHDEncContext *ctx);
... ...
@@ -351,7 +351,7 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
351 351
     return s;
352 352
 }
353 353
 
354
-static void diff_pixels_c(DCTELEM *av_restrict block, const uint8_t *s1,
354
+static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
355 355
                           const uint8_t *s2, int stride){
356 356
     int i;
357 357
 
... ...
@@ -371,8 +371,7 @@ static void diff_pixels_c(DCTELEM *av_restrict block, const uint8_t *s1,
371 371
     }
372 372
 }
373 373
 
374
-
375
-static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
374
+static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
376 375
                                  int line_size)
377 376
 {
378 377
     int i;
... ...
@@ -393,7 +392,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixe
393 393
     }
394 394
 }
395 395
 
396
-static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
396
+static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
397 397
                                  int line_size)
398 398
 {
399 399
     int i;
... ...
@@ -410,7 +409,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pix
410 410
     }
411 411
 }
412 412
 
413
-static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
413
+static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
414 414
                                  int line_size)
415 415
 {
416 416
     int i;
... ...
@@ -425,7 +424,7 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pix
425 425
     }
426 426
 }
427 427
 
428
-static void put_signed_pixels_clamped_c(const DCTELEM *block,
428
+static void put_signed_pixels_clamped_c(const int16_t *block,
429 429
                                         uint8_t *av_restrict pixels,
430 430
                                         int line_size)
431 431
 {
... ...
@@ -446,7 +445,7 @@ static void put_signed_pixels_clamped_c(const DCTELEM *block,
446 446
     }
447 447
 }
448 448
 
449
-static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
449
+static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
450 450
                                  int line_size)
451 451
 {
452 452
     int i;
... ...
@@ -466,7 +465,7 @@ static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixe
466 466
     }
467 467
 }
468 468
 
469
-static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
469
+static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
470 470
                           int line_size)
471 471
 {
472 472
     int i;
... ...
@@ -482,7 +481,7 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pix
482 482
     }
483 483
 }
484 484
 
485
-static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
485
+static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
486 486
                           int line_size)
487 487
 {
488 488
     int i;
... ...
@@ -496,7 +495,7 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pix
496 496
     }
497 497
 }
498 498
 
499
-static int sum_abs_dctelem_c(DCTELEM *block)
499
+static int sum_abs_dctelem_c(int16_t *block)
500 500
 {
501 501
     int sum=0, i;
502 502
     for(i=0; i<64; i++)
... ...
@@ -1834,10 +1833,10 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1834 1834
  * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1835 1835
  *                  (inverse) permutated to scantable order!
1836 1836
  */
1837
-void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1837
+void ff_block_permute(int16_t *block, uint8_t *permutation, const uint8_t *scantable, int last)
1838 1838
 {
1839 1839
     int i;
1840
-    DCTELEM temp[64];
1840
+    int16_t temp[64];
1841 1841
 
1842 1842
     if(last<=0) return;
1843 1843
     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
... ...
@@ -2160,7 +2159,7 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_
2160 2160
 
2161 2161
 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2162 2162
     MpegEncContext * const s= (MpegEncContext *)c;
2163
-    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2163
+    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2164 2164
 
2165 2165
     av_assert2(h==8);
2166 2166
 
... ...
@@ -2199,7 +2198,7 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
2199 2199
 
2200 2200
 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2201 2201
     MpegEncContext * const s= (MpegEncContext *)c;
2202
-    DCTELEM dct[8][8];
2202
+    int16_t dct[8][8];
2203 2203
     int i;
2204 2204
     int sum=0;
2205 2205
 
... ...
@@ -2224,7 +2223,7 @@ static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
2224 2224
 
2225 2225
 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2226 2226
     MpegEncContext * const s= (MpegEncContext *)c;
2227
-    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2227
+    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2228 2228
     int sum=0, i;
2229 2229
 
2230 2230
     av_assert2(h==8);
... ...
@@ -2240,8 +2239,8 @@ static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
2240 2240
 
2241 2241
 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2242 2242
     MpegEncContext * const s= (MpegEncContext *)c;
2243
-    LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2244
-    DCTELEM * const bak = temp+64;
2243
+    LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2244
+    int16_t * const bak = temp+64;
2245 2245
     int sum=0, i;
2246 2246
 
2247 2247
     av_assert2(h==8);
... ...
@@ -2249,7 +2248,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
2249 2249
 
2250 2250
     s->dsp.diff_pixels(temp, src1, src2, stride);
2251 2251
 
2252
-    memcpy(bak, temp, 64*sizeof(DCTELEM));
2252
+    memcpy(bak, temp, 64*sizeof(int16_t));
2253 2253
 
2254 2254
     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2255 2255
     s->dct_unquantize_inter(s, temp, 0, s->qscale);
... ...
@@ -2264,7 +2263,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
2264 2264
 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2265 2265
     MpegEncContext * const s= (MpegEncContext *)c;
2266 2266
     const uint8_t *scantable= s->intra_scantable.permutated;
2267
-    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2267
+    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2268 2268
     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2269 2269
     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2270 2270
     int i, last, run, bits, level, distortion, start_i;
... ...
@@ -2340,7 +2339,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
2340 2340
 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2341 2341
     MpegEncContext * const s= (MpegEncContext *)c;
2342 2342
     const uint8_t *scantable= s->intra_scantable.permutated;
2343
-    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2343
+    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2344 2344
     int i, last, run, bits, level, start_i;
2345 2345
     const int esc_length= s->ac_esc_length;
2346 2346
     uint8_t * length;
... ...
@@ -2577,44 +2576,44 @@ static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2577 2577
     } while (len > 0);
2578 2578
 }
2579 2579
 
2580
-static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2580
+static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2581 2581
 {
2582 2582
     ff_j_rev_dct (block);
2583 2583
     put_pixels_clamped_c(block, dest, line_size);
2584 2584
 }
2585
-static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2585
+static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2586 2586
 {
2587 2587
     ff_j_rev_dct (block);
2588 2588
     add_pixels_clamped_c(block, dest, line_size);
2589 2589
 }
2590 2590
 
2591
-static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2591
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
2592 2592
 {
2593 2593
     ff_j_rev_dct4 (block);
2594 2594
     put_pixels_clamped4_c(block, dest, line_size);
2595 2595
 }
2596
-static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2596
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
2597 2597
 {
2598 2598
     ff_j_rev_dct4 (block);
2599 2599
     add_pixels_clamped4_c(block, dest, line_size);
2600 2600
 }
2601 2601
 
2602
-static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2602
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
2603 2603
 {
2604 2604
     ff_j_rev_dct2 (block);
2605 2605
     put_pixels_clamped2_c(block, dest, line_size);
2606 2606
 }
2607
-static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2607
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
2608 2608
 {
2609 2609
     ff_j_rev_dct2 (block);
2610 2610
     add_pixels_clamped2_c(block, dest, line_size);
2611 2611
 }
2612 2612
 
2613
-static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2613
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
2614 2614
 {
2615 2615
     dest[0] = av_clip_uint8((block[0] + 4)>>3);
2616 2616
 }
2617
-static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2617
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
2618 2618
 {
2619 2619
     dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2620 2620
 }
... ...
@@ -36,37 +36,36 @@
36 36
 
37 37
 //#define DEBUG
38 38
 /* dct code */
39
-typedef short DCTELEM;
40 39
 
41
-void ff_fdct_ifast (DCTELEM *data);
42
-void ff_fdct_ifast248 (DCTELEM *data);
43
-void ff_jpeg_fdct_islow_8(DCTELEM *data);
44
-void ff_jpeg_fdct_islow_10(DCTELEM *data);
45
-void ff_fdct248_islow_8(DCTELEM *data);
46
-void ff_fdct248_islow_10(DCTELEM *data);
40
+void ff_fdct_ifast(int16_t *data);
41
+void ff_fdct_ifast248(int16_t *data);
42
+void ff_jpeg_fdct_islow_8(int16_t *data);
43
+void ff_jpeg_fdct_islow_10(int16_t *data);
44
+void ff_fdct248_islow_8(int16_t *data);
45
+void ff_fdct248_islow_10(int16_t *data);
47 46
 
48
-void ff_j_rev_dct (DCTELEM *data);
49
-void ff_j_rev_dct4 (DCTELEM *data);
50
-void ff_j_rev_dct2 (DCTELEM *data);
51
-void ff_j_rev_dct1 (DCTELEM *data);
47
+void ff_j_rev_dct(int16_t *data);
48
+void ff_j_rev_dct4(int16_t *data);
49
+void ff_j_rev_dct2(int16_t *data);
50
+void ff_j_rev_dct1(int16_t *data);
52 51
 
53
-void ff_fdct_mmx(DCTELEM *block);
54
-void ff_fdct_mmxext(DCTELEM *block);
55
-void ff_fdct_sse2(DCTELEM *block);
52
+void ff_fdct_mmx(int16_t *block);
53
+void ff_fdct_mmxext(int16_t *block);
54
+void ff_fdct_sse2(int16_t *block);
56 55
 
57 56
 #define H264_IDCT(depth) \
58
-void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
59
-void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
60
-void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
61
-void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
62
-void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
63
-void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
64
-void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
65
-void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
66
-void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
67
-void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
68
-void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\
69
-void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
57
+void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, int16_t *block, int stride);\
58
+void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, int16_t *block, int stride);\
59
+void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, int16_t *block, int stride);\
60
+void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, int16_t *block, int stride);\
61
+void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[6*8]);\
62
+void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[6*8]);\
63
+void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[6*8]);\
64
+void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[6*8]);\
65
+void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[6*8]);\
66
+void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(int16_t *output, int16_t *input, int qmul);\
67
+void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);\
68
+void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);
70 69
 
71 70
 H264_IDCT( 8)
72 71
 H264_IDCT( 9)
... ...
@@ -74,8 +73,8 @@ H264_IDCT(10)
74 74
 H264_IDCT(12)
75 75
 H264_IDCT(14)
76 76
 
77
-void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
78
-void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77
+void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp);
78
+void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block, int stride, int qp, int dc);
79 79
 
80 80
 /* encoding scans */
81 81
 extern const uint8_t ff_alternate_horizontal_scan[64];
... ...
@@ -135,11 +134,11 @@ could be reached easily ...
135 135
 */
136 136
 
137 137
 /*
138
-void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
139
-void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
140
-void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
141
-void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
142
-void clear_blocks_c(DCTELEM *blocks);
138
+void get_pixels_c(int16_t *block, const uint8_t *pixels, int line_size);
139
+void diff_pixels_c(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride);
140
+void put_pixels_clamped_c(const int16_t *block, uint8_t *pixels, int line_size);
141
+void add_pixels_clamped_c(const int16_t *block, uint8_t *pixels, int line_size);
142
+void clear_blocks_c(int16_t *blocks);
143 143
 */
144 144
 
145 145
 /* add and put pixel (decoding) */
... ...
@@ -212,14 +211,14 @@ typedef struct DSPContext {
212 212
     int dct_bits;
213 213
 
214 214
     /* pixel ops : interface with DCT */
215
-    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
216
-    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
217
-    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
218
-    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
219
-    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
220
-    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
221
-    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
222
-    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
215
+    void (*get_pixels)(int16_t *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
216
+    void (*diff_pixels)(int16_t *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
217
+    void (*put_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
218
+    void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
219
+    void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
220
+    void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size);
221
+    void (*add_pixels4)(uint8_t *pixels, int16_t *block, int line_size);
222
+    int (*sum_abs_dctelem)(int16_t *block/*align 16*/);
223 223
     /**
224 224
      * translational global motion compensation.
225 225
      */
... ...
@@ -229,8 +228,8 @@ typedef struct DSPContext {
229 229
      */
230 230
     void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
231 231
                     int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
232
-    void (*clear_block)(DCTELEM *block/*align 16*/);
233
-    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
232
+    void (*clear_block)(int16_t *block/*align 16*/);
233
+    void (*clear_blocks)(int16_t *blocks/*align 16*/);
234 234
     int (*pix_sum)(uint8_t * pix, int line_size);
235 235
     int (*pix_norm1)(uint8_t * pix, int line_size);
236 236
 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
... ...
@@ -362,24 +361,24 @@ typedef struct DSPContext {
362 362
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
363 363
 
364 364
     /* (I)DCT */
365
-    void (*fdct)(DCTELEM *block/* align 16*/);
366
-    void (*fdct248)(DCTELEM *block/* align 16*/);
365
+    void (*fdct)(int16_t *block/* align 16*/);
366
+    void (*fdct248)(int16_t *block/* align 16*/);
367 367
 
368 368
     /* IDCT really*/
369
-    void (*idct)(DCTELEM *block/* align 16*/);
369
+    void (*idct)(int16_t *block/* align 16*/);
370 370
 
371 371
     /**
372 372
      * block -> idct -> clip to unsigned 8 bit -> dest.
373 373
      * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
374 374
      * @param line_size size in bytes of a horizontal line of dest
375 375
      */
376
-    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
376
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, int16_t *block/*align 16*/);
377 377
 
378 378
     /**
379 379
      * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
380 380
      * @param line_size size in bytes of a horizontal line of dest
381 381
      */
382
-    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
382
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, int16_t *block/*align 16*/);
383 383
 
384 384
     /**
385 385
      * idct input permutation.
... ...
@@ -470,7 +469,7 @@ int ff_check_alignment(void);
470 470
  * permute block according to permuatation.
471 471
  * @param last last non zero element in scantable order
472 472
  */
473
-void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
473
+void ff_block_permute(int16_t *block, uint8_t *permutation, const uint8_t *scantable, int last);
474 474
 
475 475
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
476 476
 
... ...
@@ -114,7 +114,7 @@ static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height,
114 114
 }
115 115
 
116 116
 #define DCTELEM_FUNCS(dctcoef, suffix)                                  \
117
-static void FUNCC(get_pixels ## suffix)(DCTELEM *av_restrict _block,    \
117
+static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block,    \
118 118
                                         const uint8_t *_pixels,         \
119 119
                                         int line_size)                  \
120 120
 {                                                                       \
... ...
@@ -138,7 +138,7 @@ static void FUNCC(get_pixels ## suffix)(DCTELEM *av_restrict _block,    \
138 138
 }                                                                       \
139 139
                                                                         \
140 140
 static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels,  \
141
-                                         DCTELEM *_block,               \
141
+                                         int16_t *_block,               \
142 142
                                          int line_size)                 \
143 143
 {                                                                       \
144 144
     int i;                                                              \
... ...
@@ -161,7 +161,7 @@ static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels,  \
161 161
 }                                                                       \
162 162
                                                                         \
163 163
 static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels,  \
164
-                                         DCTELEM *_block,               \
164
+                                         int16_t *_block,               \
165 165
                                          int line_size)                 \
166 166
 {                                                                       \
167 167
     int i;                                                              \
... ...
@@ -179,20 +179,20 @@ static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels,  \
179 179
     }                                                                   \
180 180
 }                                                                       \
181 181
                                                                         \
182
-static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \
182
+static void FUNCC(clear_block ## suffix)(int16_t *block)                \
183 183
 {                                                                       \
184 184
     memset(block, 0, sizeof(dctcoef)*64);                               \
185 185
 }                                                                       \
186 186
                                                                         \
187 187
 /**                                                                     \
188
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \
188
+ * memset(blocks, 0, sizeof(int16_t)*6*64)                              \
189 189
  */                                                                     \
190
-static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \
190
+static void FUNCC(clear_blocks ## suffix)(int16_t *blocks)              \
191 191
 {                                                                       \
192 192
     memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
193 193
 }
194 194
 
195
-DCTELEM_FUNCS(DCTELEM, _16)
195
+DCTELEM_FUNCS(int16_t, _16)
196 196
 #if BIT_DEPTH > 8
197 197
 DCTELEM_FUNCS(dctcoef, _32)
198 198
 #endif
... ...
@@ -417,7 +417,7 @@ typedef struct EncBlockInfo {
417 417
     int      cur_ac;
418 418
     int      cno;
419 419
     int      dct_mode;
420
-    DCTELEM  mb[64];
420
+    int16_t  mb[64];
421 421
     uint8_t  next[64];
422 422
     uint8_t  sign[64];
423 423
     uint8_t  partial_bit_count;
... ...
@@ -506,7 +506,7 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i
506 506
 {
507 507
     const int *weight;
508 508
     const uint8_t* zigzag_scan;
509
-    LOCAL_ALIGNED_16(DCTELEM, blk, [64]);
509
+    LOCAL_ALIGNED_16(int16_t, blk, [64]);
510 510
     int i, area;
511 511
     /* We offer two different methods for class number assignment: the
512 512
        method suggested in SMPTE 314M Table 22, and an improved
... ...
@@ -40,9 +40,9 @@ typedef struct DVVideoContext {
40 40
 
41 41
     uint8_t  dv_zigzag[2][64];
42 42
 
43
-    void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
44
-    void (*fdct[2])(DCTELEM *block);
45
-    void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
43
+    void (*get_pixels)(int16_t *block, const uint8_t *pixels, int line_size);
44
+    void (*fdct[2])(int16_t *block);
45
+    void (*idct_put[2])(uint8_t *dest, int line_size, int16_t *block);
46 46
     me_cmp_func ildct_cmp;
47 47
 } DVVideoContext;
48 48
 
... ...
@@ -49,7 +49,7 @@ typedef struct BlockInfo {
49 49
     const uint32_t *factor_table;
50 50
     const uint8_t *scan_table;
51 51
     uint8_t pos; /* position in block */
52
-    void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
52
+    void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
53 53
     uint8_t partial_bit_count;
54 54
     uint32_t partial_bit_buffer;
55 55
     int shift_offset;
... ...
@@ -58,7 +58,7 @@ typedef struct BlockInfo {
58 58
 static const int dv_iweight_bits = 14;
59 59
 
60 60
 /* decode AC coefficients */
61
-static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
61
+static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, int16_t *block)
62 62
 {
63 63
     int last_index = gb->size_in_bits;
64 64
     const uint8_t  *scan_table   = mb->scan_table;
... ...
@@ -136,14 +136,14 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
136 136
     int quant, dc, dct_mode, class1, j;
137 137
     int mb_index, mb_x, mb_y, last_index;
138 138
     int y_stride, linesize;
139
-    DCTELEM *block, *block1;
139
+    int16_t *block, *block1;
140 140
     int c_offset;
141 141
     uint8_t *y_ptr;
142 142
     const uint8_t *buf_ptr;
143 143
     PutBitContext pb, vs_pb;
144 144
     GetBitContext gb;
145 145
     BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1;
146
-    LOCAL_ALIGNED_16(DCTELEM, sblock, [5*DV_MAX_BPM], [64]);
146
+    LOCAL_ALIGNED_16(int16_t, sblock, [5*DV_MAX_BPM], [64]);
147 147
     LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [  80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
148 148
     LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5*80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
149 149
     const int log2_blocksize = 3-s->avctx->lowres;
... ...
@@ -64,7 +64,7 @@
64 64
 #define MUNGE_8BIT(x) av_clip_uint8((x)>>4)
65 65
 #define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_8BIT,src)
66 66
 
67
-static inline void ea_idct_col(DCTELEM *dest, const DCTELEM *src) {
67
+static inline void ea_idct_col(int16_t *dest, const int16_t *src) {
68 68
     if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) {
69 69
         dest[0]  =
70 70
         dest[8]  =
... ...
@@ -78,9 +78,9 @@ static inline void ea_idct_col(DCTELEM *dest, const DCTELEM *src) {
78 78
         IDCT_COL(dest, src);
79 79
 }
80 80
 
81
-void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block) {
81
+void ff_ea_idct_put_c(uint8_t *dest, int linesize, int16_t *block) {
82 82
     int i;
83
-    DCTELEM temp[64];
83
+    int16_t temp[64];
84 84
     block[0] += 4;
85 85
     for (i=0; i<8; i++)
86 86
         ea_idct_col(&temp[i], &block[i]);
... ...
@@ -20,8 +20,7 @@
20 20
 #define AVCODEC_EAIDCT_H
21 21
 
22 22
 #include <stdint.h>
23
-#include "dsputil.h"
24 23
 
25
-void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
24
+void ff_ea_idct_put_c(uint8_t *dest, int linesize, int16_t *block);
26 25
 
27 26
 #endif /* AVCODEC_EAIDCT_H */
... ...
@@ -51,7 +51,7 @@ typedef struct MadContext {
51 51
     GetBitContext gb;
52 52
     void *bitstream_buf;
53 53
     unsigned int bitstream_buf_size;
54
-    DECLARE_ALIGNED(16, DCTELEM, block)[64];
54
+    DECLARE_ALIGNED(16, int16_t, block)[64];
55 55
     ScanTable scantable;
56 56
     uint16_t quant_matrix[64];
57 57
     int mb_x;
... ...
@@ -102,7 +102,7 @@ static inline void comp_block(MadContext *t, int mb_x, int mb_y,
102 102
     }
103 103
 }
104 104
 
105
-static inline void idct_put(MadContext *t, DCTELEM *block, int mb_x, int mb_y, int j)
105
+static inline void idct_put(MadContext *t, int16_t *block, int mb_x, int mb_y, int j)
106 106
 {
107 107
     if (j < 4) {
108 108
         ff_ea_idct_put_c(
... ...
@@ -116,7 +116,7 @@ static inline void idct_put(MadContext *t, DCTELEM *block, int mb_x, int mb_y, i
116 116
     }
117 117
 }
118 118
 
119
-static inline int decode_block_intra(MadContext *s, DCTELEM * block)
119
+static inline int decode_block_intra(MadContext *s, int16_t * block)
120 120
 {
121 121
     int level, i, j, run;
122 122
     RLTable *rl = &ff_rl_mpeg1;
... ...
@@ -43,7 +43,7 @@ typedef struct TgqContext {
43 43
     int width,height;
44 44
     ScanTable scantable;
45 45
     int qtable[64];
46
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
46
+    DECLARE_ALIGNED(16, int16_t, block)[6][64];
47 47
     GetByteContext gb;
48 48
 } TgqContext;
49 49
 
... ...
@@ -58,7 +58,7 @@ static av_cold int tgq_decode_init(AVCodecContext *avctx){
58 58
     return 0;
59 59
 }
60 60
 
61
-static void tgq_decode_block(TgqContext *s, DCTELEM block[64], GetBitContext *gb){
61
+static void tgq_decode_block(TgqContext *s, int16_t block[64], GetBitContext *gb){
62 62
     uint8_t *perm = s->scantable.permutated;
63 63
     int i,j,value;
64 64
     block[0] = get_sbits(gb,8) * s->qtable[0];
... ...
@@ -103,7 +103,7 @@ static void tgq_decode_block(TgqContext *s, DCTELEM block[64], GetBitContext *gb
103 103
     block[0] += 128<<4;
104 104
 }
105 105
 
106
-static void tgq_idct_put_mb(TgqContext *s, DCTELEM (*block)[64], int mb_x, int mb_y){
106
+static void tgq_idct_put_mb(TgqContext *s, int16_t (*block)[64], int mb_x, int mb_y){
107 107
     int linesize= s->frame.linesize[0];
108 108
     uint8_t *dest_y  = s->frame.data[0] + (mb_y * 16* linesize            ) + mb_x * 16;
109 109
     uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8;
... ...
@@ -40,7 +40,7 @@ typedef struct TqiContext {
40 40
     AVFrame frame;
41 41
     void *bitstream_buf;
42 42
     unsigned int bitstream_buf_size;
43
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
43
+    DECLARE_ALIGNED(16, int16_t, block)[6][64];
44 44
 } TqiContext;
45 45
 
46 46
 static av_cold int tqi_decode_init(AVCodecContext *avctx)
... ...
@@ -58,7 +58,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
58 58
     return 0;
59 59
 }
60 60
 
61
-static int tqi_decode_mb(MpegEncContext *s, DCTELEM (*block)[64])
61
+static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64])
62 62
 {
63 63
     int n;
64 64
     s->dsp.clear_blocks(block[0]);
... ...
@@ -69,7 +69,7 @@ static int tqi_decode_mb(MpegEncContext *s, DCTELEM (*block)[64])
69 69
     return 0;
70 70
 }
71 71
 
72
-static inline void tqi_idct_put(TqiContext *t, DCTELEM (*block)[64])
72
+static inline void tqi_idct_put(TqiContext *t, int16_t (*block)[64])
73 73
 {
74 74
     MpegEncContext *s = &t->s;
75 75
     int linesize= t->frame.linesize[0];
... ...
@@ -64,7 +64,7 @@ B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
64 64
 B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
65 65
 };
66 66
 
67
-static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
67
+static av_always_inline void row_fdct(FLOAT temp[64], int16_t *data)
68 68
 {
69 69
     FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
70 70
     FLOAT tmp10, tmp11, tmp12, tmp13;
... ...
@@ -119,7 +119,7 @@ static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
119 119
     }
120 120
 }
121 121
 
122
-void ff_faandct(DCTELEM * data)
122
+void ff_faandct(int16_t *data)
123 123
 {
124 124
     FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
125 125
     FLOAT tmp10, tmp11, tmp12, tmp13;
... ...
@@ -179,7 +179,7 @@ void ff_faandct(DCTELEM * data)
179 179
     }
180 180
 }
181 181
 
182
-void ff_faandct248(DCTELEM * data)
182
+void ff_faandct248(int16_t *data)
183 183
 {
184 184
     FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
185 185
     FLOAT tmp10, tmp11, tmp12, tmp13;
... ...
@@ -29,9 +29,9 @@
29 29
 #ifndef AVCODEC_FAANDCT_H
30 30
 #define AVCODEC_FAANDCT_H
31 31
 
32
-#include "dsputil.h"
32
+#include <stdint.h>
33 33
 
34
-void ff_faandct(DCTELEM * data);
35
-void ff_faandct248(DCTELEM * data);
34
+void ff_faandct(int16_t *data);
35
+void ff_faandct248(int16_t *data);
36 36
 
37 37
 #endif /* AVCODEC_FAANDCT_H */
... ...
@@ -47,7 +47,7 @@ B6*B0/8, B6*B1/8, B6*B2/8, B6*B3/8, B6*B4/8, B6*B5/8, B6*B6/8, B6*B7/8,
47 47
 B7*B0/8, B7*B1/8, B7*B2/8, B7*B3/8, B7*B4/8, B7*B5/8, B7*B6/8, B7*B7/8,
48 48
 };
49 49
 
50
-static inline void p8idct(DCTELEM data[64], FLOAT temp[64], uint8_t *dest, int stride, int x, int y, int type){
50
+static inline void p8idct(int16_t data[64], FLOAT temp[64], uint8_t *dest, int stride, int x, int y, int type){
51 51
     int i;
52 52
     FLOAT av_unused tmp0;
53 53
     FLOAT s04, d04, s17, d17, s26, d26, s53, d53;
... ...
@@ -129,7 +129,7 @@ static inline void p8idct(DCTELEM data[64], FLOAT temp[64], uint8_t *dest, int s
129 129
     }
130 130
 }
131 131
 
132
-void ff_faanidct(DCTELEM block[64]){
132
+void ff_faanidct(int16_t block[64]){
133 133
     FLOAT temp[64];
134 134
     int i;
135 135
 
... ...
@@ -142,7 +142,7 @@ void ff_faanidct(DCTELEM block[64]){
142 142
     p8idct(block, temp, NULL, 0, 8, 1, 1);
143 143
 }
144 144
 
145
-void ff_faanidct_add(uint8_t *dest, int line_size, DCTELEM block[64]){
145
+void ff_faanidct_add(uint8_t *dest, int line_size, int16_t block[64]){
146 146
     FLOAT temp[64];
147 147
     int i;
148 148
 
... ...
@@ -155,7 +155,7 @@ void ff_faanidct_add(uint8_t *dest, int line_size, DCTELEM block[64]){
155 155
     p8idct(NULL , temp, dest, line_size, 8, 1, 2);
156 156
 }
157 157
 
158
-void ff_faanidct_put(uint8_t *dest, int line_size, DCTELEM block[64]){
158
+void ff_faanidct_put(uint8_t *dest, int line_size, int16_t block[64]){
159 159
     FLOAT temp[64];
160 160
     int i;
161 161
 
... ...
@@ -23,10 +23,9 @@
23 23
 #define AVCODEC_FAANIDCT_H
24 24
 
25 25
 #include <stdint.h>
26
-#include "dsputil.h"
27 26
 
28
-void ff_faanidct(DCTELEM block[64]);
29
-void ff_faanidct_add(uint8_t *dest, int line_size, DCTELEM block[64]);
30
-void ff_faanidct_put(uint8_t *dest, int line_size, DCTELEM block[64]);
27
+void ff_faanidct(int16_t block[64]);
28
+void ff_faanidct_add(uint8_t *dest, int line_size, int16_t block[64]);
29
+void ff_faanidct_put(uint8_t *dest, int line_size, int16_t block[64]);
31 30
 
32 31
 #endif /* AVCODEC_FAANIDCT_H */
... ...
@@ -48,7 +48,7 @@ static VLC h261_mtype_vlc;
48 48
 static VLC h261_mv_vlc;
49 49
 static VLC h261_cbp_vlc;
50 50
 
51
-static int h261_decode_block(H261Context * h, DCTELEM * block, int n, int coded);
51
+static int h261_decode_block(H261Context * h, int16_t * block, int n, int coded);
52 52
 
53 53
 static av_cold void h261_decode_init_vlc(H261Context *h){
54 54
     static int done = 0;
... ...
@@ -366,7 +366,7 @@ intra:
366 366
  * Decode a macroblock.
367 367
  * @return <0 if an error occurred
368 368
  */
369
-static int h261_decode_block(H261Context * h, DCTELEM * block,
369
+static int h261_decode_block(H261Context * h, int16_t * block,
370 370
                              int n, int coded)
371 371
 {
372 372
     MpegEncContext * const s = &h->s;
... ...
@@ -35,7 +35,7 @@
35 35
 
36 36
 extern uint8_t ff_h261_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
37 37
 
38
-static void h261_encode_block(H261Context * h, DCTELEM * block,
38
+static void h261_encode_block(H261Context * h, int16_t * block,
39 39
                               int n);
40 40
 
41 41
 int ff_h261_get_picture_format(int width, int height){
... ...
@@ -144,7 +144,7 @@ static void h261_encode_motion(H261Context * h, int val){
144 144
 }
145 145
 
146 146
 static inline int get_cbp(MpegEncContext * s,
147
-                      DCTELEM block[6][64])
147
+                      int16_t block[6][64])
148 148
 {
149 149
     int i, cbp;
150 150
     cbp= 0;
... ...
@@ -155,7 +155,7 @@ static inline int get_cbp(MpegEncContext * s,
155 155
     return cbp;
156 156
 }
157 157
 void ff_h261_encode_mb(MpegEncContext * s,
158
-         DCTELEM block[6][64],
158
+         int16_t block[6][64],
159 159
          int motion_x, int motion_y)
160 160
 {
161 161
     H261Context * h = (H261Context *)s;
... ...
@@ -256,7 +256,7 @@ void ff_h261_encode_init(MpegEncContext *s){
256 256
  * @param block the 8x8 block
257 257
  * @param n block index (0-3 are luma, 4-5 are chroma)
258 258
  */
259
-static void h261_encode_block(H261Context * h, DCTELEM * block, int n){
259
+static void h261_encode_block(H261Context * h, int16_t * block, int n){
260 260
     MpegEncContext * const s = &h->s;
261 261
     int level, run, i, j, last_index, last_non_zero, sign, slevel, code;
262 262
     RLTable *rl;
... ...
@@ -226,7 +226,7 @@ void ff_h263_loop_filter(MpegEncContext * s){
226 226
     }
227 227
 }
228 228
 
229
-void ff_h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
229
+void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n)
230 230
 {
231 231
     int x, y, wrap, a, c, pred_dc, scale, i;
232 232
     int16_t *dc_val, *ac_val, *ac_val1;
... ...
@@ -73,7 +73,7 @@ int ff_h263_decode_frame(AVCodecContext *avctx,
73 73
                              AVPacket *avpkt);
74 74
 int ff_h263_decode_end(AVCodecContext *avctx);
75 75
 void ff_h263_encode_mb(MpegEncContext *s,
76
-                       DCTELEM block[6][64],
76
+                       int16_t block[6][64],
77 77
                        int motion_x, int motion_y);
78 78
 void ff_h263_encode_picture_header(MpegEncContext *s, int picture_number);
79 79
 void ff_h263_encode_gob_header(MpegEncContext * s, int mb_line);
... ...
@@ -89,7 +89,7 @@ int ff_h263_decode_mba(MpegEncContext *s);
89 89
 void ff_h263_encode_mba(MpegEncContext *s);
90 90
 void ff_init_qscale_tab(MpegEncContext *s);
91 91
 int ff_h263_pred_dc(MpegEncContext * s, int n, int16_t **dc_val_ptr);
92
-void ff_h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n);
92
+void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n);
93 93
 
94 94
 
95 95
 /**
... ...
@@ -99,7 +99,7 @@ void ff_h263_show_pict_info(MpegEncContext *s);
99 99
 
100 100
 int ff_intel_h263_decode_picture_header(MpegEncContext *s);
101 101
 int ff_h263_decode_mb(MpegEncContext *s,
102
-                      DCTELEM block[6][64]);
102
+                      int16_t block[6][64]);
103 103
 
104 104
 /**
105 105
  * Return the value of the 3bit "source format" syntax element.
... ...
@@ -144,7 +144,7 @@ static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y
144 144
 }
145 145
 
146 146
 static inline int get_p_cbp(MpegEncContext * s,
147
-                      DCTELEM block[6][64],
147
+                      int16_t block[6][64],
148 148
                       int motion_x, int motion_y){
149 149
     int cbp, i;
150 150
 
... ...
@@ -1769,7 +1769,7 @@ static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
1769 1769
     }
1770 1770
 }
1771 1771
 
1772
-static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
1772
+static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
1773 1773
                                         int index)
1774 1774
 {
1775 1775
     if (high_bit_depth) {
... ...
@@ -1778,7 +1778,7 @@ static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
1778 1778
         return AV_RN16A(mb + index);
1779 1779
 }
1780 1780
 
1781
-static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth,
1781
+static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
1782 1782
                                          int index, int value)
1783 1783
 {
1784 1784
     if (high_bit_depth) {
... ...
@@ -1797,8 +1797,8 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
1797 1797
                                                        uint8_t *dest_y, int p)
1798 1798
 {
1799 1799
     MpegEncContext *const s = &h->s;
1800
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1801
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1800
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
1801
+    void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
1802 1802
     int i;
1803 1803
     int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
1804 1804
     block_offset += 16 * p;
... ...
@@ -1914,7 +1914,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
1914 1914
                                                     uint8_t *dest_y, int p)
1915 1915
 {
1916 1916
     MpegEncContext *const s = &h->s;
1917
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1917
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
1918 1918
     int i;
1919 1919
     block_offset += 16 * p;
1920 1920
     if (!IS_INTRA4x4(mb_type)) {
... ...
@@ -29,7 +29,6 @@
29 29
 #define AVCODEC_H264_H
30 30
 
31 31
 #include "libavutil/intreadwrite.h"
32
-#include "dsputil.h"
33 32
 #include "cabac.h"
34 33
 #include "mpegvideo.h"
35 34
 #include "h264dsp.h"
... ...
@@ -390,9 +389,9 @@ typedef struct H264Context {
390 390
     GetBitContext *intra_gb_ptr;
391 391
     GetBitContext *inter_gb_ptr;
392 392
 
393
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
394
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2];
395
-    DCTELEM mb_padding[256 * 2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
393
+    DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
394
+    DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
395
+    int16_t mb_padding[256 * 2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
396 396
 
397 397
     /**
398 398
      * Cabac
... ...
@@ -1560,7 +1560,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
1560 1560
 }
1561 1561
 
1562 1562
 static av_always_inline void
1563
-decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
1563
+decode_cabac_residual_internal(H264Context *h, int16_t *block,
1564 1564
                                int cat, int n, const uint8_t *scantable,
1565 1565
                                const uint32_t *qmul, int max_coeff,
1566 1566
                                int is_dc, int chroma422)
... ...
@@ -1744,18 +1744,27 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
1744 1744
 
1745 1745
 }
1746 1746
 
1747
-static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
1747
+static void decode_cabac_residual_dc_internal(H264Context *h, int16_t *block,
1748
+                                              int cat, int n,
1749
+                                              const uint8_t *scantable,
1750
+                                              int max_coeff)
1751
+{
1748 1752
     decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
1749 1753
 }
1750 1754
 
1751
-static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
1755
+static void decode_cabac_residual_dc_internal_422(H264Context *h, int16_t *block,
1752 1756
                                                   int cat, int n, const uint8_t *scantable,
1753 1757
                                                   int max_coeff)
1754 1758
 {
1755 1759
     decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
1756 1760
 }
1757 1761
 
1758
-static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
1762
+static void decode_cabac_residual_nondc_internal(H264Context *h, int16_t *block,
1763
+                                                 int cat, int n,
1764
+                                                 const uint8_t *scantable,
1765
+                                                 const uint32_t *qmul,
1766
+                                                 int max_coeff)
1767
+{
1759 1768
     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
1760 1769
 }
1761 1770
 
... ...
@@ -1771,7 +1780,12 @@ static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block
1771 1771
  * because it allows improved constant propagation into get_cabac_cbf_ctx,
1772 1772
  * as well as because most blocks have zero CBFs. */
1773 1773
 
1774
-static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
1774
+static av_always_inline void decode_cabac_residual_dc(H264Context *h,
1775
+                                                      int16_t *block,
1776
+                                                      int cat, int n,
1777
+                                                      const uint8_t *scantable,
1778
+                                                      int max_coeff)
1779
+{
1775 1780
     /* read coded block flag */
1776 1781
     if( get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 1 ) ] ) == 0 ) {
1777 1782
         h->non_zero_count_cache[scan8[n]] = 0;
... ...
@@ -1781,7 +1795,7 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
1781 1781
 }
1782 1782
 
1783 1783
 static av_always_inline void
1784
-decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
1784
+decode_cabac_residual_dc_422(H264Context *h, int16_t *block,
1785 1785
                              int cat, int n, const uint8_t *scantable,
1786 1786
                              int max_coeff)
1787 1787
 {
... ...
@@ -1793,7 +1807,13 @@ decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
1793 1793
     decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
1794 1794
 }
1795 1795
 
1796
-static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
1796
+static av_always_inline void decode_cabac_residual_nondc(H264Context *h,
1797
+                                                         int16_t *block,
1798
+                                                         int cat, int n,
1799
+                                                         const uint8_t *scantable,
1800
+                                                         const uint32_t *qmul,
1801
+                                                         int max_coeff)
1802
+{
1797 1803
     /* read coded block flag */
1798 1804
     if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
1799 1805
         if( max_coeff == 64 ) {
... ...
@@ -2361,7 +2381,7 @@ decode_intra_mb:
2361 2361
             if( cbp&0x20 ) {
2362 2362
                 int c, i, i8x8;
2363 2363
                 for( c = 0; c < 2; c++ ) {
2364
-                    DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
2364
+                    int16_t *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
2365 2365
                     qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
2366 2366
                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
2367 2367
                         for (i = 0; i < 4; i++) {
... ...
@@ -442,7 +442,7 @@ static inline int get_level_prefix(GetBitContext *gb){
442 442
  * @param max_coeff number of coefficients in the block
443 443
  * @return <0 if an error occurred
444 444
  */
445
-static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445
+static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446 446
     MpegEncContext * const s = &h->s;
447 447
     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448 448
     int level[16];
... ...
@@ -662,7 +662,7 @@ static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *
662 662
         for(i8x8=0; i8x8<4; i8x8++){
663 663
             if(cbp & (1<<i8x8)){
664 664
                 if(IS_8x8DCT(mb_type)){
665
-                    DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665
+                    int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
666 666
                     uint8_t *nnz;
667 667
                     for(i4x4=0; i4x4<4; i4x4++){
668 668
                         const int index= i4x4 + 4*i8x8 + p*16;
... ...
@@ -1143,7 +1143,7 @@ decode_intra_mb:
1143 1143
             if(cbp&0x20){
1144 1144
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1145 1145
                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1146
-                    DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1146
+                    int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1147 1147
                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1148 1148
                         for (i4x4=0; i4x4<4; i4x4++) {
1149 1149
                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
... ...
@@ -52,7 +52,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
52 52
     const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
53 53
     /* is_h264 should always be true if SVQ3 is disabled. */
54 54
     const int is_h264 = !CONFIG_SVQ3_DECODER || SIMPLE || s->codec_id == AV_CODEC_ID_H264;
55
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
55
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
56 56
     const int block_h   = 16 >> s->chroma_y_shift;
57 57
     const int chroma422 = CHROMA422;
58 58
 
... ...
@@ -29,8 +29,6 @@
29 29
 
30 30
 #include <stdint.h>
31 31
 
32
-#include "dsputil.h"
33
-
34 32
 typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
35 33
                                  int log2_denom, int weight, int offset);
36 34
 typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
... ...
@@ -80,29 +78,29 @@ typedef struct H264DSPContext {
80 80
 
81 81
     /* IDCT */
82 82
     void (*h264_idct_add)(uint8_t *dst /*align 4*/,
83
-                          DCTELEM *block /*align 16*/, int stride);
83
+                          int16_t *block /*align 16*/, int stride);
84 84
     void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
85
-                           DCTELEM *block /*align 16*/, int stride);
85
+                           int16_t *block /*align 16*/, int stride);
86 86
     void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
87
-                             DCTELEM *block /*align 16*/, int stride);
87
+                             int16_t *block /*align 16*/, int stride);
88 88
     void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
89
-                              DCTELEM *block /*align 16*/, int stride);
89
+                              int16_t *block /*align 16*/, int stride);
90 90
 
91 91
     void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
92
-                            DCTELEM *block /*align 16*/, int stride,
92
+                            int16_t *block /*align 16*/, int stride,
93 93
                             const uint8_t nnzc[15 * 8]);
94 94
     void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
95
-                            DCTELEM *block /*align 16*/, int stride,
95
+                            int16_t *block /*align 16*/, int stride,
96 96
                             const uint8_t nnzc[15 * 8]);
97 97
     void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
98
-                           DCTELEM *block /*align 16*/, int stride,
98
+                           int16_t *block /*align 16*/, int stride,
99 99
                            const uint8_t nnzc[15 * 8]);
100 100
     void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
101
-                                 DCTELEM *block /*align 16*/,
101
+                                 int16_t *block /*align 16*/,
102 102
                                  int stride, const uint8_t nnzc[15 * 8]);
103
-    void (*h264_luma_dc_dequant_idct)(DCTELEM *output,
104
-                                      DCTELEM *input /*align 16*/, int qmul);
105
-    void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
103
+    void (*h264_luma_dc_dequant_idct)(int16_t *output,
104
+                                      int16_t *input /*align 16*/, int qmul);
105
+    void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
106 106
 } H264DSPContext;
107 107
 
108 108
 void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
... ...
@@ -47,7 +47,7 @@ static const uint8_t scan8[16*3]={
47 47
 };
48 48
 #endif
49 49
 
50
-void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
50
+void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride)
51 51
 {
52 52
     int i;
53 53
     pixel *dst = (pixel*)_dst;
... ...
@@ -81,7 +81,7 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
81 81
     }
82 82
 }
83 83
 
84
-void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
84
+void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){
85 85
     int i;
86 86
     pixel *dst = (pixel*)_dst;
87 87
     dctcoef *block = (dctcoef*)_block;
... ...
@@ -154,10 +154,10 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
154 154
 }
155 155
 
156 156
 // assumes all AC coefs are 0
157
-void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
157
+void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *block, int stride){
158 158
     int i, j;
159 159
     int dc = (((dctcoef*)block)[0] + 32) >> 6;
160
-    pixel *dst = (pixel*)p_dst;
160
+    pixel *dst = (pixel*)_dst;
161 161
     stride >>= sizeof(pixel)-1;
162 162
     for( j = 0; j < 4; j++ )
163 163
     {
... ...
@@ -167,10 +167,10 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
167 167
     }
168 168
 }
169 169
 
170
-void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
170
+void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *block, int stride){
171 171
     int i, j;
172 172
     int dc = (((dctcoef*)block)[0] + 32) >> 6;
173
-    pixel *dst = (pixel*)p_dst;
173
+    pixel *dst = (pixel*)_dst;
174 174
     stride >>= sizeof(pixel)-1;
175 175
     for( j = 0; j < 8; j++ )
176 176
     {
... ...
@@ -180,7 +180,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
180 180
     }
181 181
 }
182 182
 
183
-void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
183
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
184 184
     int i;
185 185
     for(i=0; i<16; i++){
186 186
         int nnz = nnzc[ scan8[i] ];
... ...
@@ -191,7 +191,7 @@ void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *b
191 191
     }
192 192
 }
193 193
 
194
-void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
194
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
195 195
     int i;
196 196
     for(i=0; i<16; i++){
197 197
         if(nnzc[ scan8[i] ])             FUNCC(ff_h264_idct_add   )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
... ...
@@ -199,7 +199,7 @@ void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTEL
199 199
     }
200 200
 }
201 201
 
202
-void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
202
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
203 203
     int i;
204 204
     for(i=0; i<16; i+=4){
205 205
         int nnz = nnzc[ scan8[i] ];
... ...
@@ -210,7 +210,7 @@ void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *b
210 210
     }
211 211
 }
212 212
 
213
-void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
213
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
214 214
     int i, j;
215 215
     for(j=1; j<3; j++){
216 216
         for(i=j*16; i<j*16+4; i++){
... ...
@@ -222,7 +222,7 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
222 222
     }
223 223
 }
224 224
 
225
-void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
225
+void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
226 226
     int i, j;
227 227
 
228 228
     for(j=1; j<3; j++){
... ...
@@ -248,13 +248,13 @@ void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTEL
248 248
  * IDCT transforms the 16 dc values and dequantizes them.
249 249
  * @param qmul quantization parameter
250 250
  */
251
-void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, int qmul){
251
+void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
252 252
 #define stride 16
253 253
     int i;
254 254
     int temp[16];
255 255
     static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
256
-    dctcoef *input = (dctcoef*)p_input;
257
-    dctcoef *output = (dctcoef*)p_output;
256
+    dctcoef *input = (dctcoef*)_input;
257
+    dctcoef *output = (dctcoef*)_output;
258 258
 
259 259
     for(i=0; i<4; i++){
260 260
         const int z0= input[4*i+0] + input[4*i+1];
... ...
@@ -283,7 +283,7 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
283 283
 #undef stride
284 284
 }
285 285
 
286
-void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
286
+void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
287 287
     const int stride= 16*2;
288 288
     const int xStride= 16;
289 289
     int i;
... ...
@@ -310,7 +310,7 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
310 310
     }
311 311
 }
312 312
 
313
-void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
313
+void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
314 314
     const int stride= 16*2;
315 315
     const int xStride= 16;
316 316
     int a,b,c,d,e;
... ...
@@ -28,8 +28,8 @@
28 28
 #ifndef AVCODEC_H264PRED_H
29 29
 #define AVCODEC_H264PRED_H
30 30
 
31
-#include "libavutil/common.h"
32
-#include "dsputil.h"
31
+#include <stddef.h>
32
+#include <stdint.h>
33 33
 
34 34
 /**
35 35
  * Prediction types
... ...
@@ -98,15 +98,15 @@ typedef struct H264PredContext {
98 98
     void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
99 99
 
100 100
     void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
101
-                          const DCTELEM *block /*align 16*/, ptrdiff_t stride);
101
+                          const int16_t *block /*align 16*/, ptrdiff_t stride);
102 102
     void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
103
-                           const DCTELEM *block /*align 16*/, ptrdiff_t stride);
103
+                           const int16_t *block /*align 16*/, ptrdiff_t stride);
104 104
     void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
105 105
                           const int *block_offset,
106
-                          const DCTELEM *block /*align 16*/, ptrdiff_t stride);
106
+                          const int16_t *block /*align 16*/, ptrdiff_t stride);
107 107
     void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
108 108
                             const int *block_offset,
109
-                            const DCTELEM *block /*align 16*/, ptrdiff_t stride);
109
+                            const int16_t *block /*align 16*/, ptrdiff_t stride);
110 110
 } H264PredContext;
111 111
 
112 112
 void ff_h264_pred_init(H264PredContext *h, int codec_id,
... ...
@@ -1132,7 +1132,7 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1132 1132
 #undef PL
1133 1133
 #undef SRC
1134 1134
 
1135
-static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
1135
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const int16_t *_block,
1136 1136
                                         ptrdiff_t stride)
1137 1137
 {
1138 1138
     int i;
... ...
@@ -1151,7 +1151,7 @@ static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
1151 1151
     }
1152 1152
 }
1153 1153
 
1154
-static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
1154
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const int16_t *_block,
1155 1155
                                           ptrdiff_t stride)
1156 1156
 {
1157 1157
     int i;
... ...
@@ -1169,7 +1169,7 @@ static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
1169 1169
     }
1170 1170
 }
1171 1171
 
1172
-static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
1172
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const int16_t *_block,
1173 1173
                                          ptrdiff_t stride)
1174 1174
 {
1175 1175
     int i;
... ...
@@ -1192,7 +1192,7 @@ static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
1192 1192
     }
1193 1193
 }
1194 1194
 
1195
-static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
1195
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const int16_t *_block,
1196 1196
                                            ptrdiff_t stride)
1197 1197
 {
1198 1198
     int i;
... ...
@@ -1215,7 +1215,7 @@ static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
1215 1215
 }
1216 1216
 
1217 1217
 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1218
-                                          const DCTELEM *block,
1218
+                                          const int16_t *block,
1219 1219
                                           ptrdiff_t stride)
1220 1220
 {
1221 1221
     int i;
... ...
@@ -1225,7 +1225,7 @@ static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1225 1225
 
1226 1226
 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1227 1227
                                             const int *block_offset,
1228
-                                            const DCTELEM *block,
1228
+                                            const int16_t *block,
1229 1229
                                             ptrdiff_t stride)
1230 1230
 {
1231 1231
     int i;
... ...
@@ -1234,7 +1234,7 @@ static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1234 1234
 }
1235 1235
 
1236 1236
 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1237
-                                        const DCTELEM *block, ptrdiff_t stride)
1237
+                                        const int16_t *block, ptrdiff_t stride)
1238 1238
 {
1239 1239
     int i;
1240 1240
     for(i=0; i<4; i++)
... ...
@@ -1242,7 +1242,7 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1242 1242
 }
1243 1243
 
1244 1244
 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1245
-                                         const DCTELEM *block, ptrdiff_t stride)
1245
+                                         const int16_t *block, ptrdiff_t stride)
1246 1246
 {
1247 1247
     int i;
1248 1248
     for(i=0; i<4; i++)
... ...
@@ -1252,7 +1252,7 @@ static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1252 1252
 }
1253 1253
 
1254 1254
 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1255
-                                          const DCTELEM *block,
1255
+                                          const int16_t *block,
1256 1256
                                           ptrdiff_t stride)
1257 1257
 {
1258 1258
     int i;
... ...
@@ -1262,7 +1262,7 @@ static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1262 1262
 
1263 1263
 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1264 1264
                                            const int *block_offset,
1265
-                                           const DCTELEM *block, ptrdiff_t stride)
1265
+                                           const int16_t *block, ptrdiff_t stride)
1266 1266
 {
1267 1267
     int i;
1268 1268
     for(i=0; i<4; i++)
... ...
@@ -439,7 +439,7 @@ static void h263_decode_dquant(MpegEncContext *s){
439 439
     ff_set_qscale(s, s->qscale);
440 440
 }
441 441
 
442
-static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
442
+static int h263_decode_block(MpegEncContext * s, int16_t * block,
443 443
                              int n, int coded)
444 444
 {
445 445
     int code, level, i, j, last, run;
... ...
@@ -564,7 +564,7 @@ not_coded:
564 564
 
565 565
 static int h263_skip_b_part(MpegEncContext *s, int cbp)
566 566
 {
567
-    LOCAL_ALIGNED_16(DCTELEM, dblock, [64]);
567
+    LOCAL_ALIGNED_16(int16_t, dblock, [64]);
568 568
     int i, mbi;
569 569
     int bli[6];
570 570
 
... ...
@@ -603,7 +603,7 @@ static int h263_get_modb(GetBitContext *gb, int pb_frame, int *cbpb)
603 603
 }
604 604
 
605 605
 int ff_h263_decode_mb(MpegEncContext *s,
606
-                      DCTELEM block[6][64])
606
+                      int16_t block[6][64])
607 607
 {
608 608
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
609 609
     int16_t *mot_val;
... ...
@@ -303,7 +303,7 @@ static const int dquant_code[5]= {1,0,9,2,3};
303 303
  * @param block the 8x8 block
304 304
  * @param n block index (0-3 are luma, 4-5 are chroma)
305 305
  */
306
-static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
306
+static void h263_encode_block(MpegEncContext * s, int16_t * block, int n)
307 307
 {
308 308
     int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
309 309
     RLTable *rl;
... ...
@@ -452,7 +452,7 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
452 452
 }
453 453
 
454 454
 void ff_h263_encode_mb(MpegEncContext * s,
455
-                       DCTELEM block[6][64],
455
+                       int16_t block[6][64],
456 456
                        int motion_x, int motion_y)
457 457
 {
458 458
     int cbpc, cbpy, i, cbp, pred_x, pred_y;
... ...
@@ -136,17 +136,17 @@
136 136
 #endif
137 137
 
138 138
 
139
-/* Multiply a DCTELEM variable by an int32_t constant, and immediately
140
- * descale to yield a DCTELEM result.
139
+/* Multiply a int16_t variable by an int32_t constant, and immediately
140
+ * descale to yield a int16_t result.
141 141
  */
142 142
 
143
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
143
+#define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
144 144
 
145
-static av_always_inline void row_fdct(DCTELEM * data){
145
+static av_always_inline void row_fdct(int16_t * data){
146 146
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
147 147
   int tmp10, tmp11, tmp12, tmp13;
148 148
   int z1, z2, z3, z4, z5, z11, z13;
149
-  DCTELEM *dataptr;
149
+  int16_t *dataptr;
150 150
   int ctr;
151 151
 
152 152
   /* Pass 1: process rows. */
... ...
@@ -205,12 +205,12 @@ static av_always_inline void row_fdct(DCTELEM * data){
205 205
  */
206 206
 
207 207
 GLOBAL(void)
208
-ff_fdct_ifast (DCTELEM * data)
208
+ff_fdct_ifast (int16_t * data)
209 209
 {
210 210
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
211 211
   int tmp10, tmp11, tmp12, tmp13;
212 212
   int z1, z2, z3, z4, z5, z11, z13;
213
-  DCTELEM *dataptr;
213
+  int16_t *dataptr;
214 214
   int ctr;
215 215
 
216 216
   row_fdct(data);
... ...
@@ -271,12 +271,12 @@ ff_fdct_ifast (DCTELEM * data)
271 271
  */
272 272
 
273 273
 GLOBAL(void)
274
-ff_fdct_ifast248 (DCTELEM * data)
274
+ff_fdct_ifast248 (int16_t * data)
275 275
 {
276 276
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
277 277
   int tmp10, tmp11, tmp12, tmp13;
278 278
   int z1;
279
-  DCTELEM *dataptr;
279
+  int16_t *dataptr;
280 280
   int ctr;
281 281
 
282 282
   row_fdct(data);
... ...
@@ -184,12 +184,12 @@
184 184
 #endif
185 185
 
186 186
 
187
-static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
187
+static av_always_inline void FUNC(row_fdct)(int16_t *data)
188 188
 {
189 189
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
190 190
   int tmp10, tmp11, tmp12, tmp13;
191 191
   int z1, z2, z3, z4, z5;
192
-  DCTELEM *dataptr;
192
+  int16_t *dataptr;
193 193
   int ctr;
194 194
 
195 195
   /* Pass 1: process rows. */
... ...
@@ -216,13 +216,13 @@ static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
216 216
     tmp11 = tmp1 + tmp2;
217 217
     tmp12 = tmp1 - tmp2;
218 218
 
219
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
220
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
219
+    dataptr[0] = (int16_t) ((tmp10 + tmp11) << PASS1_BITS);
220
+    dataptr[4] = (int16_t) ((tmp10 - tmp11) << PASS1_BITS);
221 221
 
222 222
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
223
-    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
223
+    dataptr[2] = (int16_t) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
224 224
                                    CONST_BITS-PASS1_BITS);
225
-    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
225
+    dataptr[6] = (int16_t) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
226 226
                                    CONST_BITS-PASS1_BITS);
227 227
 
228 228
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
... ...
@@ -248,10 +248,10 @@ static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
248 248
     z3 += z5;
249 249
     z4 += z5;
250 250
 
251
-    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
252
-    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
253
-    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
254
-    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
251
+    dataptr[7] = (int16_t) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
252
+    dataptr[5] = (int16_t) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
253
+    dataptr[3] = (int16_t) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
254
+    dataptr[1] = (int16_t) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
255 255
 
256 256
     dataptr += DCTSIZE;         /* advance pointer to next row */
257 257
   }
... ...
@@ -262,12 +262,12 @@ static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
262 262
  */
263 263
 
264 264
 GLOBAL(void)
265
-FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
265
+FUNC(ff_jpeg_fdct_islow)(int16_t *data)
266 266
 {
267 267
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
268 268
   int tmp10, tmp11, tmp12, tmp13;
269 269
   int z1, z2, z3, z4, z5;
270
-  DCTELEM *dataptr;
270
+  int16_t *dataptr;
271 271
   int ctr;
272 272
 
273 273
   FUNC(row_fdct)(data);
... ...
@@ -344,12 +344,12 @@ FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
344 344
  * you do even part two times.
345 345
  */
346 346
 GLOBAL(void)
347
-FUNC(ff_fdct248_islow)(DCTELEM *data)
347
+FUNC(ff_fdct248_islow)(int16_t *data)
348 348
 {
349 349
   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
350 350
   int tmp10, tmp11, tmp12, tmp13;
351 351
   int z1;
352
-  DCTELEM *dataptr;
352
+  int16_t *dataptr;
353 353
   int ctr;
354 354
 
355 355
   FUNC(row_fdct)(data);
... ...
@@ -74,7 +74,7 @@
74 74
 
75 75
 #define RIGHT_SHIFT(x, n) ((x) >> (n))
76 76
 
77
-typedef DCTELEM DCTBLOCK[DCTSIZE2];
77
+typedef int16_t DCTBLOCK[DCTSIZE2];
78 78
 
79 79
 #define CONST_BITS 13
80 80
 
... ...
@@ -213,7 +213,7 @@ void ff_j_rev_dct(DCTBLOCK data)
213 213
   int32_t tmp10, tmp11, tmp12, tmp13;
214 214
   int32_t z1, z2, z3, z4, z5;
215 215
   int32_t d0, d1, d2, d3, d4, d5, d6, d7;
216
-  register DCTELEM *dataptr;
216
+  register int16_t *dataptr;
217 217
   int rowctr;
218 218
 
219 219
   /* Pass 1: process rows. */
... ...
@@ -249,7 +249,7 @@ void ff_j_rev_dct(DCTBLOCK data)
249 249
       /* AC terms all zero */
250 250
       if (d0) {
251 251
           /* Compute a 32 bit value to assign. */
252
-          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
252
+          int16_t dcval = (int16_t) (d0 << PASS1_BITS);
253 253
           register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
254 254
 
255 255
           idataptr[0] = v;
... ...
@@ -574,14 +574,14 @@ void ff_j_rev_dct(DCTBLOCK data)
574 574
 }
575 575
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
576 576
 
577
-    dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
578
-    dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
579
-    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
580
-    dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
581
-    dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
582
-    dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
583
-    dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
584
-    dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
577
+    dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
578
+    dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
579
+    dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
580
+    dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
581
+    dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
582
+    dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
583
+    dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
584
+    dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
585 585
 
586 586
     dataptr += DCTSIZE;         /* advance pointer to next row */
587 587
   }
... ...
@@ -920,21 +920,21 @@ void ff_j_rev_dct(DCTBLOCK data)
920 920
 
921 921
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
922 922
 
923
-    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
923
+    dataptr[DCTSIZE*0] = (int16_t) DESCALE(tmp10 + tmp3,
924 924
                                            CONST_BITS+PASS1_BITS+3);
925
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
925
+    dataptr[DCTSIZE*7] = (int16_t) DESCALE(tmp10 - tmp3,
926 926
                                            CONST_BITS+PASS1_BITS+3);
927
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
927
+    dataptr[DCTSIZE*1] = (int16_t) DESCALE(tmp11 + tmp2,
928 928
                                            CONST_BITS+PASS1_BITS+3);
929
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
929
+    dataptr[DCTSIZE*6] = (int16_t) DESCALE(tmp11 - tmp2,
930 930
                                            CONST_BITS+PASS1_BITS+3);
931
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
931
+    dataptr[DCTSIZE*2] = (int16_t) DESCALE(tmp12 + tmp1,
932 932
                                            CONST_BITS+PASS1_BITS+3);
933
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
933
+    dataptr[DCTSIZE*5] = (int16_t) DESCALE(tmp12 - tmp1,
934 934
                                            CONST_BITS+PASS1_BITS+3);
935
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
935
+    dataptr[DCTSIZE*3] = (int16_t) DESCALE(tmp13 + tmp0,
936 936
                                            CONST_BITS+PASS1_BITS+3);
937
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
937
+    dataptr[DCTSIZE*4] = (int16_t) DESCALE(tmp13 - tmp0,
938 938
                                            CONST_BITS+PASS1_BITS+3);
939 939
 
940 940
     dataptr++;                  /* advance pointer to next column */
... ...
@@ -951,7 +951,7 @@ void ff_j_rev_dct4(DCTBLOCK data)
951 951
   int32_t tmp10, tmp11, tmp12, tmp13;
952 952
   int32_t z1;
953 953
   int32_t d0, d2, d4, d6;
954
-  register DCTELEM *dataptr;
954
+  register int16_t *dataptr;
955 955
   int rowctr;
956 956
 
957 957
   /* Pass 1: process rows. */
... ...
@@ -983,7 +983,7 @@ void ff_j_rev_dct4(DCTBLOCK data)
983 983
       /* AC terms all zero */
984 984
       if (d0) {
985 985
           /* Compute a 32 bit value to assign. */
986
-          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
986
+          int16_t dcval = (int16_t) (d0 << PASS1_BITS);
987 987
           register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
988 988
 
989 989
           idataptr[0] = v;
... ...
@@ -1045,10 +1045,10 @@ void ff_j_rev_dct4(DCTBLOCK data)
1045 1045
 
1046 1046
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1047 1047
 
1048
-    dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1049
-    dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
1050
-    dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
1051
-    dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
1048
+    dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1049
+    dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
1050
+    dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
1051
+    dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
1052 1052
 
1053 1053
     dataptr += DCTSTRIDE;       /* advance pointer to next row */
1054 1054
   }
... ...
@@ -45,14 +45,14 @@ typedef struct MDECContext{
45 45
     int mb_width;
46 46
     int mb_height;
47 47
     int mb_x, mb_y;
48
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
48
+    DECLARE_ALIGNED(16, int16_t, block)[6][64];
49 49
     uint8_t *bitstream_buffer;
50 50
     unsigned int bitstream_buffer_size;
51 51
     int block_last_index[6];
52 52
 } MDECContext;
53 53
 
54 54
 //very similar to MPEG-1
55
-static inline int mdec_decode_block_intra(MDECContext *a, DCTELEM *block, int n)
55
+static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n)
56 56
 {
57 57
     int level, diff, i, j, run;
58 58
     int component;
... ...
@@ -119,7 +119,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, DCTELEM *block, int n)
119 119
     return 0;
120 120
 }
121 121
 
122
-static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
122
+static inline int decode_mb(MDECContext *a, int16_t block[6][64]){
123 123
     int i;
124 124
     const int block_index[6]= {5,4,0,1,2,3};
125 125
 
... ...
@@ -134,7 +134,7 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
134 134
 }
135 135
 
136 136
 static inline void idct_put(MDECContext *a, int mb_x, int mb_y){
137
-    DCTELEM (*block)[64]= a->block;
137
+    int16_t (*block)[64]= a->block;
138 138
     int linesize= a->picture.linesize[0];
139 139
 
140 140
     uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
... ...
@@ -47,7 +47,7 @@ typedef struct {
47 47
     AVFrame         buf_ptrs    [16];
48 48
     AVPicture       flipped_ptrs[16];
49 49
 
50
-    DECLARE_ALIGNED(16, DCTELEM, dct_block)[64];
50
+    DECLARE_ALIGNED(16, int16_t, dct_block)[64];
51 51
 
52 52
     GetBitContext   gb;
53 53
     ScanTable       scantable;
... ...
@@ -183,7 +183,7 @@ static const int8_t vlcdec_lookup[9][64] = {
183 183
 
184 184
 static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
185 185
 {
186
-    DCTELEM *block = ctx->dct_block;
186
+    int16_t *block = ctx->dct_block;
187 187
     unsigned int pos;
188 188
 
189 189
     ctx->dsp.clear_block(block);
... ...
@@ -498,7 +498,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
498 498
 }
499 499
 
500 500
 /* decode block and dequantize */
501
-static int decode_block(MJpegDecodeContext *s, DCTELEM *block, int component,
501
+static int decode_block(MJpegDecodeContext *s, int16_t *block, int component,
502 502
                         int dc_index, int ac_index, int16_t *quant_matrix)
503 503
 {
504 504
     int code, i, j, level, val;
... ...
@@ -546,7 +546,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, int component,
546 546
     return 0;
547 547
 }
548 548
 
549
-static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block,
549
+static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block,
550 550
                                  int component, int dc_index,
551 551
                                  int16_t *quant_matrix, int Al)
552 552
 {
... ...
@@ -564,7 +564,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block,
564 564
 }
565 565
 
566 566
 /* decode block and dequantize - progressive JPEG version */
567
-static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block,
567
+static int decode_block_progressive(MJpegDecodeContext *s, int16_t *block,
568 568
                                     uint8_t *last_nnz, int ac_index,
569 569
                                     int16_t *quant_matrix,
570 570
                                     int ss, int se, int Al, int *EOBRUN)
... ...
@@ -662,7 +662,7 @@ for (; ; i++) {                                                     \
662 662
 }
663 663
 
664 664
 /* decode block and dequantize - progressive JPEG refinement pass */
665
-static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block,
665
+static int decode_block_refinement(MJpegDecodeContext *s, int16_t *block,
666 666
                                    uint8_t *last_nnz,
667 667
                                    int ac_index, int16_t *quant_matrix,
668 668
                                    int ss, int se, int Al, int *EOBRUN)
... ...
@@ -1081,7 +1081,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
1081 1081
                     } else {
1082 1082
                         int block_idx  = s->block_stride[c] * (v * mb_y + y) +
1083 1083
                                          (h * mb_x + x);
1084
-                        DCTELEM *block = s->blocks[c][block_idx];
1084
+                        int16_t *block = s->blocks[c][block_idx];
1085 1085
                         if (Ah)
1086 1086
                             block[0] += get_bits1(&s->gb) *
1087 1087
                                         s->quant_matrixes[s->quant_index[c]][0] << Al;
... ...
@@ -1139,7 +1139,7 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
1139 1139
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1140 1140
         uint8_t *ptr     = data + (mb_y * linesize * 8 >> s->avctx->lowres);
1141 1141
         int block_idx    = mb_y * s->block_stride[c];
1142
-        DCTELEM (*block)[64] = &s->blocks[c][block_idx];
1142
+        int16_t (*block)[64] = &s->blocks[c][block_idx];
1143 1143
         uint8_t *last_nnz    = &s->last_nnz[c][block_idx];
1144 1144
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, block++, last_nnz++) {
1145 1145
                 int ret;
... ...
@@ -91,8 +91,8 @@ typedef struct MJpegDecodeContext {
91 91
     int got_picture;                                ///< we found a SOF and picture is valid, too.
92 92
     int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
93 93
     int8_t *qscale_table;
94
-    DECLARE_ALIGNED(16, DCTELEM, block)[64];
95
-    DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
94
+    DECLARE_ALIGNED(16, int16_t, block)[64];
95
+    int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
96 96
     uint8_t *last_nnz[MAX_COMPONENTS];
97 97
     uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
98 98
     ScanTable scantable;
... ...
@@ -397,7 +397,7 @@ void ff_mjpeg_encode_dc(MpegEncContext *s, int val,
397 397
     }
398 398
 }
399 399
 
400
-static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
400
+static void encode_block(MpegEncContext *s, int16_t *block, int n)
401 401
 {
402 402
     int mant, nbits, code, i, j;
403 403
     int component, dc, run, last_index, val;
... ...
@@ -455,7 +455,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
455 455
         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
456 456
 }
457 457
 
458
-void ff_mjpeg_encode_mb(MpegEncContext *s, DCTELEM block[6][64])
458
+void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[6][64])
459 459
 {
460 460
     int i;
461 461
     if (s->chroma_format == CHROMA_444) {
... ...
@@ -33,7 +33,8 @@
33 33
 #ifndef AVCODEC_MJPEGENC_H
34 34
 #define AVCODEC_MJPEGENC_H
35 35
 
36
-#include "dsputil.h"
36
+#include <stdint.h>
37
+
37 38
 #include "mpegvideo.h"
38 39
 
39 40
 typedef struct MJpegContext {
... ...
@@ -55,6 +56,6 @@ void ff_mjpeg_encode_picture_trailer(MpegEncContext *s);
55 55
 void ff_mjpeg_encode_stuffing(MpegEncContext *s);
56 56
 void ff_mjpeg_encode_dc(MpegEncContext *s, int val,
57 57
                         uint8_t *huff_size, uint16_t *huff_code);
58
-void ff_mjpeg_encode_mb(MpegEncContext *s, DCTELEM block[6][64]);
58
+void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[6][64]);
59 59
 
60 60
 #endif /* AVCODEC_MJPEGENC_H */
... ...
@@ -81,7 +81,7 @@ static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
81 81
     return sign_extend(val, 5 + shift);
82 82
 }
83 83
 
84
-static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
84
+static inline int mpeg1_decode_block_intra(MpegEncContext *s, int16_t *block, int n)
85 85
 {
86 86
     int level, dc, diff, i, j, run;
87 87
     int component;
... ...
@@ -152,12 +152,12 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, in
152 152
    return 0;
153 153
 }
154 154
 
155
-int ff_mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
155
+int ff_mpeg1_decode_block_intra(MpegEncContext *s, int16_t *block, int n)
156 156
 {
157 157
     return mpeg1_decode_block_intra(s, block, n);
158 158
 }
159 159
 
160
-static inline int mpeg1_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
160
+static inline int mpeg1_decode_block_inter(MpegEncContext *s, int16_t *block, int n)
161 161
 {
162 162
     int level, i, j, run;
163 163
     RLTable *rl = &ff_rl_mpeg1;
... ...
@@ -237,7 +237,7 @@ end:
237 237
  * Changing this would eat up any speed benefits it has.
238 238
  * Do not use "fast" flag if you need the code to be robust.
239 239
  */
240
-static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
240
+static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, int16_t *block, int n)
241 241
 {
242 242
     int level, i, j, run;
243 243
     RLTable *rl = &ff_rl_mpeg1;
... ...
@@ -309,7 +309,7 @@ end:
309 309
 }
310 310
 
311 311
 
312
-static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n)
312
+static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, int16_t *block, int n)
313 313
 {
314 314
     int level, i, j, run;
315 315
     RLTable *rl = &ff_rl_mpeg1;
... ...
@@ -394,7 +394,7 @@ end:
394 394
  * Do not use "fast" flag if you need the code to be robust.
395 395
  */
396 396
 static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
397
-                                                    DCTELEM *block, int n)
397
+                                                    int16_t *block, int n)
398 398
 {
399 399
     int level, i, j, run;
400 400
     RLTable *rl = &ff_rl_mpeg1;
... ...
@@ -455,7 +455,7 @@ end:
455 455
 }
456 456
 
457 457
 
458
-static inline int mpeg2_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
458
+static inline int mpeg2_decode_block_intra(MpegEncContext *s, int16_t *block, int n)
459 459
 {
460 460
     int level, dc, diff, i, j, run;
461 461
     int component;
... ...
@@ -538,7 +538,7 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s, DCTELEM *block, in
538 538
  * Changing this would eat up any speed benefits it has.
539 539
  * Do not use "fast" flag if you need the code to be robust.
540 540
  */
541
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n)
541
+static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, int16_t *block, int n)
542 542
 {
543 543
     int level, dc, diff, j, run;
544 544
     int component;
... ...
@@ -737,7 +737,7 @@ static inline int get_qscale(MpegEncContext *s)
737 737
 
738 738
 static void exchange_uv(MpegEncContext *s)
739 739
 {
740
-    DCTELEM (*tmp)[64];
740
+    int16_t (*tmp)[64];
741 741
 
742 742
     tmp           = s->pblocks[4];
743 743
     s->pblocks[4] = s->pblocks[5];
... ...
@@ -750,7 +750,7 @@ static void exchange_uv(MpegEncContext *s)
750 750
 #define MT_16X8  2
751 751
 #define MT_DMV   3
752 752
 
753
-static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
753
+static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
754 754
 {
755 755
     int i, j, k, cbp, val, mb_type, motion_type;
756 756
     const int mb_block_count = 4 + (1 << s->chroma_format);
... ...
@@ -71,6 +71,6 @@ static inline int decode_dc(GetBitContext *gb, int component)
71 71
     return diff;
72 72
 }
73 73
 
74
-extern int ff_mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
74
+extern int ff_mpeg1_decode_block_intra(MpegEncContext *s, int16_t *block, int n);
75 75
 
76 76
 #endif /* AVCODEC_MPEG12_H */
... ...
@@ -52,7 +52,7 @@ static const uint8_t svcd_scan_offset_placeholder[14] = {
52 52
 };
53 53
 
54 54
 static void mpeg1_encode_block(MpegEncContext *s,
55
-                         DCTELEM *block,
55
+                         int16_t *block,
56 56
                          int component);
57 57
 static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code);    // RAL: f_code parameter added
58 58
 
... ...
@@ -471,7 +471,7 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
471 471
 }
472 472
 
473 473
 static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
474
-                                                   DCTELEM block[6][64],
474
+                                                   int16_t block[6][64],
475 475
                                                    int motion_x, int motion_y,
476 476
                                                    int mb_block_count)
477 477
 {
... ...
@@ -695,7 +695,7 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
695 695
     }
696 696
 }
697 697
 
698
-void ff_mpeg1_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y)
698
+void ff_mpeg1_encode_mb(MpegEncContext *s, int16_t block[6][64], int motion_x, int motion_y)
699 699
 {
700 700
     if (s->chroma_format == CHROMA_420) mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 6);
701 701
     else                                mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 8);
... ...
@@ -878,7 +878,7 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component)
878 878
 }
879 879
 
880 880
 static void mpeg1_encode_block(MpegEncContext *s,
881
-                               DCTELEM *block,
881
+                               int16_t *block,
882 882
                                int n)
883 883
 {
884 884
     int alevel, level, last_non_zero, dc, diff, i, j, run, last_index, sign;
... ...
@@ -83,9 +83,9 @@ extern const uint16_t ff_mpeg4_resync_prefix[8];
83 83
 extern const uint8_t ff_mpeg4_dc_threshold[8];
84 84
 
85 85
 void ff_mpeg4_encode_mb(MpegEncContext *s,
86
-                        DCTELEM block[6][64],
86
+                        int16_t block[6][64],
87 87
                         int motion_x, int motion_y);
88
-void ff_mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
88
+void ff_mpeg4_pred_ac(MpegEncContext * s, int16_t *block, int n,
89 89
                       int dir);
90 90
 void ff_set_mpeg4_time(MpegEncContext * s);
91 91
 void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
... ...
@@ -53,7 +53,7 @@ static const int mb_type_b_map[4]= {
53 53
  * @param n block index (0-3 are luma, 4-5 are chroma)
54 54
  * @param dir the ac prediction direction
55 55
  */
56
-void ff_mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
56
+void ff_mpeg4_pred_ac(MpegEncContext * s, int16_t *block, int n,
57 57
                       int dir)
58 58
 {
59 59
     int i;
... ...
@@ -843,7 +843,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
843 843
  * Decode a block.
844 844
  * @return <0 if an error occurred
845 845
  */
846
-static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
846
+static inline int mpeg4_decode_block(MpegEncContext * s, int16_t * block,
847 847
                               int n, int coded, int intra, int rvlc)
848 848
 {
849 849
     int level, i, last, run;
... ...
@@ -1089,7 +1089,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
1089 1089
  * decode partition C of one MB.
1090 1090
  * @return <0 if an error occurred
1091 1091
  */
1092
-static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
1092
+static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64])
1093 1093
 {
1094 1094
     int cbp, mb_type;
1095 1095
     const int xy= s->mb_x + s->mb_y*s->mb_stride;
... ...
@@ -1172,7 +1172,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
1172 1172
 }
1173 1173
 
1174 1174
 static int mpeg4_decode_mb(MpegEncContext *s,
1175
-                      DCTELEM block[6][64])
1175
+                      int16_t block[6][64])
1176 1176
 {
1177 1177
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
1178 1178
     int16_t *mot_val;
... ...
@@ -59,7 +59,7 @@ max run: 29/41
59 59
  * Return the number of bits that encoding the 8x8 block in block would need.
60 60
  * @param[in]  block_last_index last index in scantable order that refers to a non zero element in block.
61 61
  */
62
-static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int block_last_index, uint8_t scantable[64]){
62
+static inline int get_block_rate(MpegEncContext * s, int16_t block[64], int block_last_index, uint8_t scantable[64]){
63 63
     int last=0;
64 64
     int j;
65 65
     int rate=0;
... ...
@@ -91,7 +91,7 @@ static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int bloc
91 91
  * @param[out] st scantable for each 8x8 block
92 92
  * @param[in] zigzag_last_index index referring to the last non zero coefficient in zigzag order
93 93
  */
94
-static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], const int dir[6], uint8_t *st[6], const int zigzag_last_index[6])
94
+static inline void restore_ac_coeffs(MpegEncContext * s, int16_t block[6][64], const int dir[6], uint8_t *st[6], const int zigzag_last_index[6])
95 95
 {
96 96
     int i, n;
97 97
     memcpy(s->block_last_index, zigzag_last_index, sizeof(int)*6);
... ...
@@ -122,7 +122,7 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], c
122 122
  * @param[out] st scantable for each 8x8 block
123 123
  * @param[out] zigzag_last_index index referring to the last non zero coefficient in zigzag order
124 124
  */
125
-static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], const int dir[6], uint8_t *st[6], int zigzag_last_index[6])
125
+static inline int decide_ac_pred(MpegEncContext * s, int16_t block[6][64], const int dir[6], uint8_t *st[6], int zigzag_last_index[6])
126 126
 {
127 127
     int score= 0;
128 128
     int i, n;
... ...
@@ -294,7 +294,7 @@ static inline int mpeg4_get_dc_length(int level, int n){
294 294
  * Encode an 8x8 block.
295 295
  * @param n block index (0-3 are luma, 4-5 are chroma)
296 296
  */
297
-static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
297
+static inline void mpeg4_encode_block(MpegEncContext * s, int16_t * block, int n, int intra_dc,
298 298
                                uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
299 299
 {
300 300
     int i, last_non_zero;
... ...
@@ -345,7 +345,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
345 345
     }
346 346
 }
347 347
 
348
-static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
348
+static int mpeg4_get_block_length(MpegEncContext * s, int16_t * block, int n, int intra_dc,
349 349
                                uint8_t *scan_table)
350 350
 {
351 351
     int i, last_non_zero;
... ...
@@ -396,7 +396,7 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
396 396
     return len;
397 397
 }
398 398
 
399
-static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6],
399
+static inline void mpeg4_encode_blocks(MpegEncContext * s, int16_t block[6][64], int intra_dc[6],
400 400
                                uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){
401 401
     int i;
402 402
 
... ...
@@ -425,7 +425,7 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64],
425 425
     }
426 426
 }
427 427
 
428
-static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
428
+static inline int get_b_cbp(MpegEncContext * s, int16_t block[6][64],
429 429
                             int motion_x, int motion_y, int mb_type)
430 430
 {
431 431
     int cbp = 0, i;
... ...
@@ -469,7 +469,7 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
469 469
 static const int dquant_code[5]= {1,0,9,2,3};
470 470
 
471 471
 void ff_mpeg4_encode_mb(MpegEncContext * s,
472
-                        DCTELEM block[6][64],
472
+                        int16_t block[6][64],
473 473
                         int motion_x, int motion_y)
474 474
 {
475 475
     int cbpc, cbpy, pred_x, pred_y;
... ...
@@ -43,19 +43,19 @@
43 43
 //#include <assert.h>
44 44
 
45 45
 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
46
-                                   DCTELEM *block, int n, int qscale);
46
+                                   int16_t *block, int n, int qscale);
47 47
 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
48
-                                   DCTELEM *block, int n, int qscale);
48
+                                   int16_t *block, int n, int qscale);
49 49
 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
50
-                                   DCTELEM *block, int n, int qscale);
50
+                                   int16_t *block, int n, int qscale);
51 51
 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
52
-                                   DCTELEM *block, int n, int qscale);
52
+                                   int16_t *block, int n, int qscale);
53 53
 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
54
-                                   DCTELEM *block, int n, int qscale);
54
+                                   int16_t *block, int n, int qscale);
55 55
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
56
-                                  DCTELEM *block, int n, int qscale);
56
+                                  int16_t *block, int n, int qscale);
57 57
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
58
-                                  DCTELEM *block, int n, int qscale);
58
+                                  int16_t *block, int n, int qscale);
59 59
 
60 60
 
61 61
 //#define DEBUG
... ...
@@ -392,7 +392,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared)
392 392
         }
393 393
         if (s->avctx->debug&FF_DEBUG_DCT_COEFF) {
394 394
             FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.dct_coeff,
395
-                              64 * mb_array_size * sizeof(DCTELEM) * 6, fail)
395
+                              64 * mb_array_size * sizeof(int16_t) * 6, fail)
396 396
         }
397 397
         pic->f.qstride = s->mb_stride;
398 398
         FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.pan_scan,
... ...
@@ -469,7 +469,7 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base)
469 469
                               2 * 64 * sizeof(int), fail)
470 470
         }
471 471
     }
472
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64 * 12 * 2 * sizeof(DCTELEM), fail)
472
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64 * 12 * 2 * sizeof(int16_t), fail)
473 473
     s->block = s->blocks[0];
474 474
 
475 475
     for (i = 0; i < 12; i++) {
... ...
@@ -2434,7 +2434,7 @@ unhandled:
2434 2434
 
2435 2435
 /* put block[] to dest[] */
2436 2436
 static inline void put_dct(MpegEncContext *s,
2437
-                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
2437
+                           int16_t *block, int i, uint8_t *dest, int line_size, int qscale)
2438 2438
 {
2439 2439
     s->dct_unquantize_intra(s, block, i, qscale);
2440 2440
     s->dsp.idct_put (dest, line_size, block);
... ...
@@ -2442,7 +2442,7 @@ static inline void put_dct(MpegEncContext *s,
2442 2442
 
2443 2443
 /* add block[] to dest[] */
2444 2444
 static inline void add_dct(MpegEncContext *s,
2445
-                           DCTELEM *block, int i, uint8_t *dest, int line_size)
2445
+                           int16_t *block, int i, uint8_t *dest, int line_size)
2446 2446
 {
2447 2447
     if (s->block_last_index[i] >= 0) {
2448 2448
         s->dsp.idct_add (dest, line_size, block);
... ...
@@ -2450,7 +2450,7 @@ static inline void add_dct(MpegEncContext *s,
2450 2450
 }
2451 2451
 
2452 2452
 static inline void add_dequant_dct(MpegEncContext *s,
2453
-                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
2453
+                           int16_t *block, int i, uint8_t *dest, int line_size, int qscale)
2454 2454
 {
2455 2455
     if (s->block_last_index[i] >= 0) {
2456 2456
         s->dct_unquantize_inter(s, block, i, qscale);
... ...
@@ -2503,7 +2503,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
2503 2503
    s->interlaced_dct : true if interlaced dct used (mpeg2)
2504 2504
  */
2505 2505
 static av_always_inline
2506
-void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
2506
+void MPV_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
2507 2507
                             int lowres_flag, int is_mpeg12)
2508 2508
 {
2509 2509
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
... ...
@@ -2515,7 +2515,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
2515 2515
     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
2516 2516
        /* save DCT coefficients */
2517 2517
        int i,j;
2518
-       DCTELEM *dct = &s->current_picture.f.dct_coeff[mb_xy * 64 * 6];
2518
+       int16_t *dct = &s->current_picture.f.dct_coeff[mb_xy * 64 * 6];
2519 2519
        av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y);
2520 2520
        for(i=0; i<6; i++){
2521 2521
            for(j=0; j<64; j++){
... ...
@@ -2746,7 +2746,7 @@ skip_idct:
2746 2746
     }
2747 2747
 }
2748 2748
 
2749
-void ff_MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2749
+void ff_MPV_decode_mb(MpegEncContext *s, int16_t block[12][64]){
2750 2750
 #if !CONFIG_SMALL
2751 2751
     if(s->out_format == FMT_MPEG1) {
2752 2752
         if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
... ...
@@ -2888,7 +2888,7 @@ void ff_mpeg_flush(AVCodecContext *avctx){
2888 2888
 }
2889 2889
 
2890 2890
 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2891
-                                   DCTELEM *block, int n, int qscale)
2891
+                                   int16_t *block, int n, int qscale)
2892 2892
 {
2893 2893
     int i, level, nCoeffs;
2894 2894
     const uint16_t *quant_matrix;
... ...
@@ -2917,7 +2917,7 @@ static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2917 2917
 }
2918 2918
 
2919 2919
 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2920
-                                   DCTELEM *block, int n, int qscale)
2920
+                                   int16_t *block, int n, int qscale)
2921 2921
 {
2922 2922
     int i, level, nCoeffs;
2923 2923
     const uint16_t *quant_matrix;
... ...
@@ -2946,7 +2946,7 @@ static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2946 2946
 }
2947 2947
 
2948 2948
 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2949
-                                   DCTELEM *block, int n, int qscale)
2949
+                                   int16_t *block, int n, int qscale)
2950 2950
 {
2951 2951
     int i, level, nCoeffs;
2952 2952
     const uint16_t *quant_matrix;
... ...
@@ -2973,7 +2973,7 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2973 2973
 }
2974 2974
 
2975 2975
 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2976
-                                   DCTELEM *block, int n, int qscale)
2976
+                                   int16_t *block, int n, int qscale)
2977 2977
 {
2978 2978
     int i, level, nCoeffs;
2979 2979
     const uint16_t *quant_matrix;
... ...
@@ -3004,7 +3004,7 @@ static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
3004 3004
 }
3005 3005
 
3006 3006
 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
3007
-                                   DCTELEM *block, int n, int qscale)
3007
+                                   int16_t *block, int n, int qscale)
3008 3008
 {
3009 3009
     int i, level, nCoeffs;
3010 3010
     const uint16_t *quant_matrix;
... ...
@@ -3035,7 +3035,7 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
3035 3035
 }
3036 3036
 
3037 3037
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
3038
-                                  DCTELEM *block, int n, int qscale)
3038
+                                  int16_t *block, int n, int qscale)
3039 3039
 {
3040 3040
     int i, level, qmul, qadd;
3041 3041
     int nCoeffs;
... ...
@@ -3069,7 +3069,7 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
3069 3069
 }
3070 3070
 
3071 3071
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
3072
-                                  DCTELEM *block, int n, int qscale)
3072
+                                  int16_t *block, int n, int qscale)
3073 3073
 {
3074 3074
     int i, level, qmul, qadd;
3075 3075
     int nCoeffs;
... ...
@@ -681,39 +681,39 @@ typedef struct MpegEncContext {
681 681
 
682 682
     uint8_t *ptr_lastgob;
683 683
     int swap_uv;             //vcr2 codec is an MPEG-2 variant with U and V swapped
684
-    DCTELEM (*pblocks[12])[64];
684
+    int16_t (*pblocks[12])[64];
685 685
 
686
-    DCTELEM (*block)[64]; ///< points to one of the following blocks
687
-    DCTELEM (*blocks)[12][64]; // for HQ mode we need to keep the best block
688
-    int (*decode_mb)(struct MpegEncContext *s, DCTELEM block[6][64]); // used by some codecs to avoid a switch()
686
+    int16_t (*block)[64]; ///< points to one of the following blocks
687
+    int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
688
+    int (*decode_mb)(struct MpegEncContext *s, int16_t block[6][64]); // used by some codecs to avoid a switch()
689 689
 #define SLICE_OK         0
690 690
 #define SLICE_ERROR     -1
691 691
 #define SLICE_END       -2 ///<end marker found
692 692
 #define SLICE_NOEND     -3 ///<no end marker or error found but mb count exceeded
693 693
 
694 694
     void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
695
-                           DCTELEM *block/*align 16*/, int n, int qscale);
695
+                           int16_t *block/*align 16*/, int n, int qscale);
696 696
     void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
697
-                           DCTELEM *block/*align 16*/, int n, int qscale);
697
+                           int16_t *block/*align 16*/, int n, int qscale);
698 698
     void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
699
-                           DCTELEM *block/*align 16*/, int n, int qscale);
699
+                           int16_t *block/*align 16*/, int n, int qscale);
700 700
     void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
701
-                           DCTELEM *block/*align 16*/, int n, int qscale);
701
+                           int16_t *block/*align 16*/, int n, int qscale);
702 702
     void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
703
-                           DCTELEM *block/*align 16*/, int n, int qscale);
703
+                           int16_t *block/*align 16*/, int n, int qscale);
704 704
     void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
705
-                           DCTELEM *block/*align 16*/, int n, int qscale);
705
+                           int16_t *block/*align 16*/, int n, int qscale);
706 706
     void (*dct_unquantize_h261_intra)(struct MpegEncContext *s,
707
-                           DCTELEM *block/*align 16*/, int n, int qscale);
707
+                           int16_t *block/*align 16*/, int n, int qscale);
708 708
     void (*dct_unquantize_h261_inter)(struct MpegEncContext *s,
709
-                           DCTELEM *block/*align 16*/, int n, int qscale);
709
+                           int16_t *block/*align 16*/, int n, int qscale);
710 710
     void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
711
-                           DCTELEM *block/*align 16*/, int n, int qscale);
711
+                           int16_t *block/*align 16*/, int n, int qscale);
712 712
     void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
713
-                           DCTELEM *block/*align 16*/, int n, int qscale);
714
-    int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
715
-    int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
716
-    void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block);
713
+                           int16_t *block/*align 16*/, int n, int qscale);
714
+    int (*dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
715
+    int (*fast_dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
716
+    void (*denoise_dct)(struct MpegEncContext *s, int16_t *block);
717 717
 
718 718
     int mpv_flags;      ///< flags set by private options
719 719
     int quantizer_noise_shaping;
... ...
@@ -776,7 +776,7 @@ int ff_MPV_common_init(MpegEncContext *s);
776 776
 int ff_mpv_frame_size_alloc(MpegEncContext *s, int linesize);
777 777
 int ff_MPV_common_frame_size_change(MpegEncContext *s);
778 778
 void ff_MPV_common_end(MpegEncContext *s);
779
-void ff_MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]);
779
+void ff_MPV_decode_mb(MpegEncContext *s, int16_t block[12][64]);
780 780
 int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
781 781
 void ff_MPV_frame_end(MpegEncContext *s);
782 782
 int ff_MPV_encode_init(AVCodecContext *avctx);
... ...
@@ -796,7 +796,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict);
796 796
 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
797 797
 void ff_release_unused_pictures(MpegEncContext *s, int remove_current);
798 798
 int ff_find_unused_picture(MpegEncContext *s, int shared);
799
-void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
799
+void ff_denoise_dct(MpegEncContext *s, int16_t *block);
800 800
 int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
801 801
 int ff_MPV_lowest_referenced_row(MpegEncContext *s, int dir);
802 802
 void ff_MPV_report_decode_progress(MpegEncContext *s);
... ...
@@ -812,7 +812,7 @@ int ff_dct_common_init(MpegEncContext *s);
812 812
 int ff_dct_encode_init(MpegEncContext *s);
813 813
 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
814 814
                        const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra);
815
-int ff_dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
815
+int ff_dct_quantize_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
816 816
 
817 817
 void ff_init_block_index(MpegEncContext *s);
818 818
 void ff_copy_picture(Picture *dst, Picture *src);
... ...
@@ -887,7 +887,7 @@ extern const uint8_t * const ff_mpeg2_dc_scale_table[4];
887 887
 
888 888
 void ff_mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
889 889
 void ff_mpeg1_encode_mb(MpegEncContext *s,
890
-                        DCTELEM block[6][64],
890
+                        int16_t block[6][64],
891 891
                         int motion_x, int motion_y);
892 892
 void ff_mpeg1_encode_init(MpegEncContext *s);
893 893
 void ff_mpeg1_encode_slice_header(MpegEncContext *s);
... ...
@@ -902,7 +902,7 @@ extern const uint8_t ff_h263_loop_filter_strength[32];
902 902
 void ff_h261_loop_filter(MpegEncContext *s);
903 903
 void ff_h261_reorder_mb_index(MpegEncContext* s);
904 904
 void ff_h261_encode_mb(MpegEncContext *s,
905
-                    DCTELEM block[6][64],
905
+                    int16_t block[6][64],
906 906
                     int motion_x, int motion_y);
907 907
 void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number);
908 908
 void ff_h261_encode_init(MpegEncContext *s);
... ...
@@ -919,7 +919,7 @@ void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number);
919 919
 void ff_msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number);
920 920
 void ff_msmpeg4_encode_ext_header(MpegEncContext * s);
921 921
 void ff_msmpeg4_encode_mb(MpegEncContext * s,
922
-                          DCTELEM block[6][64],
922
+                          int16_t block[6][64],
923 923
                           int motion_x, int motion_y);
924 924
 int ff_msmpeg4_decode_picture_header(MpegEncContext * s);
925 925
 int ff_msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
... ...
@@ -927,14 +927,14 @@ int ff_msmpeg4_decode_init(AVCodecContext *avctx);
927 927
 void ff_msmpeg4_encode_init(MpegEncContext *s);
928 928
 int ff_wmv2_decode_picture_header(MpegEncContext * s);
929 929
 int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s);
930
-void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
930
+void ff_wmv2_add_mb(MpegEncContext *s, int16_t block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
931 931
 void ff_mspel_motion(MpegEncContext *s,
932 932
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
933 933
                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
934 934
                                int motion_x, int motion_y, int h);
935 935
 int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
936 936
 void ff_wmv2_encode_mb(MpegEncContext * s,
937
-                       DCTELEM block[6][64],
937
+                       int16_t block[6][64],
938 938
                        int motion_x, int motion_y);
939 939
 
940 940
 #endif /* AVCODEC_MPEGVIDEO_H */
... ...
@@ -52,10 +52,10 @@
52 52
 //#include <assert.h>
53 53
 
54 54
 static int encode_picture(MpegEncContext *s, int picture_number);
55
-static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55
+static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
56 56
 static int sse_mb(MpegEncContext *s);
57
-static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
58
-static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
+static void denoise_dct_c(MpegEncContext *s, int16_t *block);
58
+static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
59 59
 
60 60
 //#define DEBUG
61 61
 
... ...
@@ -1727,7 +1727,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s,
1727 1727
     int score = 0;
1728 1728
     int run = 0;
1729 1729
     int i;
1730
-    DCTELEM *block = s->block[n];
1730
+    int16_t *block = s->block[n];
1731 1731
     const int last_index = s->block_last_index[n];
1732 1732
     int skip_dc;
1733 1733
 
... ...
@@ -1767,7 +1767,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s,
1767 1767
         s->block_last_index[n] = -1;
1768 1768
 }
1769 1769
 
1770
-static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1770
+static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1771 1771
                                int last_index)
1772 1772
 {
1773 1773
     int i;
... ...
@@ -1832,7 +1832,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
1832 1832
                                                 int mb_block_count)
1833 1833
 {
1834 1834
     int16_t weight[12][64];
1835
-    DCTELEM orig[12][64];
1835
+    int16_t orig[12][64];
1836 1836
     const int mb_x = s->mb_x;
1837 1837
     const int mb_y = s->mb_y;
1838 1838
     int i;
... ...
@@ -2093,7 +2093,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
2093 2093
                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2094 2094
                                   wrap_c);
2095 2095
         }
2096
-        memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2096
+        memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2097 2097
     }
2098 2098
 
2099 2099
     /* DCT & quantize */
... ...
@@ -2839,7 +2839,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
2839 2839
                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2840 2840
                         const int last_qp= backup_s.qscale;
2841 2841
                         int qpi, qp, dc[6];
2842
-                        DCTELEM ac[6][16];
2842
+                        int16_t ac[6][16];
2843 2843
                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2844 2844
                         static const int dquant_tab[4]={-1,1,-2,2};
2845 2845
 
... ...
@@ -2864,7 +2864,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
2864 2864
                             if(s->mb_intra && s->dc_val[0]){
2865 2865
                                 for(i=0; i<6; i++){
2866 2866
                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2867
-                                    memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2867
+                                    memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2868 2868
                                 }
2869 2869
                             }
2870 2870
 
... ...
@@ -2874,7 +2874,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
2874 2874
                                 if(s->mb_intra && s->dc_val[0]){
2875 2875
                                     for(i=0; i<6; i++){
2876 2876
                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2877
-                                        memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2877
+                                        memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2878 2878
                                     }
2879 2879
                                 }
2880 2880
                             }
... ...
@@ -3479,7 +3479,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
3479 3479
     return 0;
3480 3480
 }
3481 3481
 
3482
-static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3482
+static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3483 3483
     const int intra= s->mb_intra;
3484 3484
     int i;
3485 3485
 
... ...
@@ -3504,7 +3504,7 @@ static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3504 3504
 }
3505 3505
 
3506 3506
 static int dct_quantize_trellis_c(MpegEncContext *s,
3507
-                                  DCTELEM *block, int n,
3507
+                                  int16_t *block, int n,
3508 3508
                                   int qscale, int *overflow){
3509 3509
     const int *qmat;
3510 3510
     const uint8_t *scantable= s->intra_scantable.scantable;
... ...
@@ -3611,7 +3611,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
3611 3611
     *overflow= s->max_qcoeff < max; //overflow might have happened
3612 3612
 
3613 3613
     if(last_non_zero < start_i){
3614
-        memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3614
+        memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3615 3615
         return last_non_zero;
3616 3616
     }
3617 3617
 
... ...
@@ -3743,7 +3743,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
3743 3743
 
3744 3744
     dc= FFABS(block[0]);
3745 3745
     last_non_zero= last_i - 1;
3746
-    memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3746
+    memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3747 3747
 
3748 3748
     if(last_non_zero < start_i)
3749 3749
         return last_non_zero;
... ...
@@ -3818,10 +3818,10 @@ static void build_basis(uint8_t *perm){
3818 3818
 }
3819 3819
 
3820 3820
 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3821
-                        DCTELEM *block, int16_t *weight, DCTELEM *orig,
3821
+                        int16_t *block, int16_t *weight, int16_t *orig,
3822 3822
                         int n, int qscale){
3823 3823
     int16_t rem[64];
3824
-    LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3824
+    LOCAL_ALIGNED_16(int16_t, d1, [64]);
3825 3825
     const uint8_t *scantable= s->intra_scantable.scantable;
3826 3826
     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3827 3827
 //    unsigned int threshold1, threshold2;
... ...
@@ -4191,7 +4191,7 @@ STOP_TIMER("iterative search")
4191 4191
 }
4192 4192
 
4193 4193
 int ff_dct_quantize_c(MpegEncContext *s,
4194
-                        DCTELEM *block, int n,
4194
+                        int16_t *block, int n,
4195 4195
                         int qscale, int *overflow)
4196 4196
 {
4197 4197
     int i, j, level, last_non_zero, q, start_i;
... ...
@@ -44,7 +44,7 @@ void ff_xvmc_init_block(MpegEncContext *s)
44 44
     struct xvmc_pix_fmt *render = (struct xvmc_pix_fmt*)s->current_picture.f.data[2];
45 45
     assert(render && render->xvmc_id == AV_XVMC_ID);
46 46
 
47
-    s->block = (DCTELEM (*)[64])(render->data_blocks + render->next_free_data_block_num * 64);
47
+    s->block = (int16_t (*)[64])(render->data_blocks + render->next_free_data_block_num * 64);
48 48
 }
49 49
 
50 50
 /**
... ...
@@ -404,7 +404,7 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
404 404
     return val;
405 405
 }
406 406
 
407
-static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
407
+static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
408 408
 {
409 409
     int cbp, code, i;
410 410
     uint32_t * const mb_type_ptr = &s->current_picture.f.mb_type[s->mb_x + s->mb_y*s->mb_stride];
... ...
@@ -494,7 +494,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
494 494
     return 0;
495 495
 }
496 496
 
497
-static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
497
+static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64])
498 498
 {
499 499
     int cbp, code, i;
500 500
     uint8_t *coded_val;
... ...
@@ -938,7 +938,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
938 938
 }
939 939
 
940 940
 //#define ERROR_DETAILS
941
-int ff_msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
941
+int ff_msmpeg4_decode_block(MpegEncContext * s, int16_t * block,
942 942
                               int n, int coded, const uint8_t *scan_table)
943 943
 {
944 944
     int level, i, last, run, run_diff;
... ...
@@ -26,7 +26,6 @@
26 26
 
27 27
 #include "config.h"
28 28
 #include "avcodec.h"
29
-#include "dsputil.h"
30 29
 #include "mpegvideo.h"
31 30
 #include "msmpeg4data.h"
32 31
 #include "put_bits.h"
... ...
@@ -45,17 +44,17 @@ extern VLC ff_inter_intra_vlc;
45 45
 
46 46
 void ff_msmpeg4_code012(PutBitContext *pb, int n);
47 47
 void ff_msmpeg4_common_init(MpegEncContext *s);
48
-void ff_msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
48
+void ff_msmpeg4_encode_block(MpegEncContext * s, int16_t * block, int n);
49 49
 void ff_msmpeg4_handle_slices(MpegEncContext *s);
50 50
 void ff_msmpeg4_encode_motion(MpegEncContext * s, int mx, int my);
51 51
 int ff_msmpeg4_coded_block_pred(MpegEncContext * s, int n,
52 52
                                 uint8_t **coded_block_ptr);
53 53
 int ff_msmpeg4_decode_motion(MpegEncContext * s, int *mx_ptr, int *my_ptr);
54
-int ff_msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
54
+int ff_msmpeg4_decode_block(MpegEncContext * s, int16_t * block,
55 55
                             int n, int coded, const uint8_t *scan_table);
56 56
 int ff_msmpeg4_pred_dc(MpegEncContext *s, int n,
57 57
                        int16_t **dc_val_ptr, int *dir_ptr);
58
-int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
58
+int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]);
59 59
 
60 60
 #define CONFIG_MSMPEG4_DECODER (CONFIG_MSMPEG4V1_DECODER || \
61 61
                                 CONFIG_MSMPEG4V2_DECODER || \
... ...
@@ -369,7 +369,7 @@ static void msmpeg4v2_encode_motion(MpegEncContext * s, int val)
369 369
 }
370 370
 
371 371
 void ff_msmpeg4_encode_mb(MpegEncContext * s,
372
-                          DCTELEM block[6][64],
372
+                          int16_t block[6][64],
373 373
                           int motion_x, int motion_y)
374 374
 {
375 375
     int cbp, coded_cbp, i;
... ...
@@ -570,7 +570,7 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
570 570
 /* Encoding of a block. Very similar to MPEG4 except for a different
571 571
    escape coding (same as H263) and more vlc tables.
572 572
  */
573
-void ff_msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
573
+void ff_msmpeg4_encode_block(MpegEncContext * s, int16_t * block, int n)
574 574
 {
575 575
     int level, run, last, i, j, last_index;
576 576
     int last_non_zero, sign, slevel;
... ...
@@ -476,7 +476,7 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
476 476
     return s;
477 477
 }
478 478
 
479
-static void get_pixels_altivec(DCTELEM *av_restrict block, const uint8_t *pixels, int line_size)
479
+static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, int line_size)
480 480
 {
481 481
     int i;
482 482
     vector unsigned char perm = vec_lvsl(0, pixels);
... ...
@@ -502,7 +502,7 @@ static void get_pixels_altivec(DCTELEM *av_restrict block, const uint8_t *pixels
502 502
     }
503 503
 }
504 504
 
505
-static void diff_pixels_altivec(DCTELEM *av_restrict block, const uint8_t *s1,
505
+static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
506 506
         const uint8_t *s2, int stride)
507 507
 {
508 508
     int i;
... ...
@@ -576,7 +576,7 @@ static void diff_pixels_altivec(DCTELEM *av_restrict block, const uint8_t *s1,
576 576
 }
577 577
 
578 578
 
579
-static void clear_block_altivec(DCTELEM *block) {
579
+static void clear_block_altivec(int16_t *block) {
580 580
     LOAD_ZERO;
581 581
     vec_st(zero_s16v,   0, block);
582 582
     vec_st(zero_s16v,  16, block);
... ...
@@ -47,7 +47,7 @@ distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
47 47
 see <http://developer.apple.com/technotes/tn/tn2087.html>
48 48
 and <http://developer.apple.com/technotes/tn/tn2086.html>
49 49
 */
50
-static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
50
+static void clear_blocks_dcbz32_ppc(int16_t *blocks)
51 51
 {
52 52
     register int misal = ((unsigned long)blocks & 0x00000010);
53 53
     register int i = 0;
... ...
@@ -58,7 +58,7 @@ static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
58 58
         ((unsigned long*)blocks)[3] = 0L;
59 59
         i += 16;
60 60
     }
61
-    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
61
+    for ( ; i < sizeof(int16_t)*6*64-31 ; i += 32) {
62 62
         __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
63 63
     }
64 64
     if (misal) {
... ...
@@ -73,7 +73,7 @@ static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
73 73
 /* same as above, when dcbzl clear a whole 128B cache line
74 74
    i.e. the PPC970 aka G5 */
75 75
 #if HAVE_DCBZL
76
-static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
76
+static void clear_blocks_dcbz128_ppc(int16_t *blocks)
77 77
 {
78 78
     register int misal = ((unsigned long)blocks & 0x0000007f);
79 79
     register int i = 0;
... ...
@@ -81,17 +81,17 @@ static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
81 81
         // we could probably also optimize this case,
82 82
         // but there's not much point as the machines
83 83
         // aren't available yet (2003-06-26)
84
-        memset(blocks, 0, sizeof(DCTELEM)*6*64);
84
+        memset(blocks, 0, sizeof(int16_t)*6*64);
85 85
     }
86 86
     else
87
-        for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
87
+        for ( ; i < sizeof(int16_t)*6*64 ; i += 128) {
88 88
             __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
89 89
         }
90 90
 }
91 91
 #else
92
-static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
92
+static void clear_blocks_dcbz128_ppc(int16_t *blocks)
93 93
 {
94
-    memset(blocks, 0, sizeof(DCTELEM)*6*64);
94
+    memset(blocks, 0, sizeof(int16_t)*6*64);
95 95
 }
96 96
 #endif
97 97
 
... ...
@@ -315,7 +315,7 @@ H264_MC(avg_, 16, altivec)
315 315
     va_u32 = vec_splat((vec_u32)va_u8, 0);                  \
316 316
     vec_ste(va_u32, element, (uint32_t*)dst);
317 317
 
318
-static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
318
+static void ff_h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride)
319 319
 {
320 320
     vec_s16 va0, va1, va2, va3;
321 321
     vec_s16 vz0, vz1, vz2, vz3;
... ...
@@ -429,7 +429,7 @@ static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
429 429
     vec_st( hv, 0, dest );                                     \
430 430
  }
431 431
 
432
-static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
432
+static void ff_h264_idct8_add_altivec( uint8_t *dst, int16_t *dct, int stride ) {
433 433
     vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
434 434
     vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
435 435
     vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
... ...
@@ -473,7 +473,7 @@ static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride )
473 473
     ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
474 474
 }
475 475
 
476
-static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
476
+static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
477 477
 {
478 478
     vec_s16 dc16;
479 479
     vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
... ...
@@ -518,17 +518,17 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
518 518
     }
519 519
 }
520 520
 
521
-static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
521
+static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
522 522
 {
523 523
     h264_idct_dc_add_internal(dst, block, stride, 4);
524 524
 }
525 525
 
526
-static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
526
+static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
527 527
 {
528 528
     h264_idct_dc_add_internal(dst, block, stride, 8);
529 529
 }
530 530
 
531
-static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
531
+static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
532 532
     int i;
533 533
     for(i=0; i<16; i++){
534 534
         int nnz = nnzc[ scan8[i] ];
... ...
@@ -539,7 +539,7 @@ static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DC
539 539
     }
540 540
 }
541 541
 
542
-static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
542
+static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
543 543
     int i;
544 544
     for(i=0; i<16; i++){
545 545
         if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
... ...
@@ -547,7 +547,7 @@ static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offse
547 547
     }
548 548
 }
549 549
 
550
-static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
550
+static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
551 551
     int i;
552 552
     for(i=0; i<16; i+=4){
553 553
         int nnz = nnzc[ scan8[i] ];
... ...
@@ -558,7 +558,7 @@ static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DC
558 558
     }
559 559
 }
560 560
 
561
-static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
561
+static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
562 562
     int i, j;
563 563
     for (j = 1; j < 3; j++) {
564 564
         for(i = j * 16; i < j * 16 + 4; i++){
... ...
@@ -35,7 +35,7 @@
35 35
 /* AltiVec version of dct_unquantize_h263
36 36
    this code assumes `block' is 16 bytes-aligned */
37 37
 static void dct_unquantize_h263_altivec(MpegEncContext *s,
38
-                                 DCTELEM *block, int n, int qscale)
38
+                                 int16_t *block, int n, int qscale)
39 39
 {
40 40
     int i, level, qmul, qadd;
41 41
     int nCoeffs;
... ...
@@ -129,7 +129,7 @@ do { \
129 129
 
130 130
 /** Do inverse transform on 8x8 block
131 131
 */
132
-static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
132
+static void vc1_inv_trans_8x8_altivec(int16_t block[64])
133 133
 {
134 134
     vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
135 135
     vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
... ...
@@ -224,7 +224,7 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
224 224
 
225 225
 /** Do inverse transform on 8x4 part of block
226 226
 */
227
-static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
227
+static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block)
228 228
 {
229 229
     vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
230 230
     vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
... ...
@@ -116,7 +116,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C)
116 116
 #define ADD8(a) vec_add(a, eight)
117 117
 #define SHIFT4(a) vec_sra(a, four)
118 118
 
119
-static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
119
+static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64])
120 120
 {
121 121
     vec_u8 t;
122 122
     IDCT_START
... ...
@@ -145,7 +145,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
145 145
     memset(block, 0, sizeof(*block) * 64);
146 146
 }
147 147
 
148
-static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
148
+static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64])
149 149
 {
150 150
     LOAD_ZERO;
151 151
     vec_u8 t, vdst;
... ...
@@ -294,10 +294,10 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
294 294
 
295 295
 static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
296 296
 
297
-static av_always_inline void decode_dc_coeffs(GetBitContext *gb, DCTELEM *out,
297
+static av_always_inline void decode_dc_coeffs(GetBitContext *gb, int16_t *out,
298 298
                                               int blocks_per_slice)
299 299
 {
300
-    DCTELEM prev_dc;
300
+    int16_t prev_dc;
301 301
     int code, i, sign;
302 302
 
303 303
     OPEN_READER(re, gb);
... ...
@@ -325,7 +325,7 @@ static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29, 0x29,
325 325
 static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28, 0x28, 0x28, 0x28, 0x4C };
326 326
 
327 327
 static av_always_inline void decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
328
-                                              DCTELEM *out, int blocks_per_slice)
328
+                                              int16_t *out, int blocks_per_slice)
329 329
 {
330 330
     ProresContext *ctx = avctx->priv_data;
331 331
     int block_mask, sign;
... ...
@@ -372,8 +372,8 @@ static void decode_slice_luma(AVCodecContext *avctx, SliceContext *slice,
372 372
                               const int16_t *qmat)
373 373
 {
374 374
     ProresContext *ctx = avctx->priv_data;
375
-    LOCAL_ALIGNED_16(DCTELEM, blocks, [8*4*64]);
376
-    DCTELEM *block;
375
+    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
376
+    int16_t *block;
377 377
     GetBitContext gb;
378 378
     int i, blocks_per_slice = slice->mb_count<<2;
379 379
 
... ...
@@ -402,8 +402,8 @@ static void decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
402 402
                                 const int16_t *qmat, int log2_blocks_per_mb)
403 403
 {
404 404
     ProresContext *ctx = avctx->priv_data;
405
-    LOCAL_ALIGNED_16(DCTELEM, blocks, [8*4*64]);
406
-    DCTELEM *block;
405
+    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
406
+    int16_t *block;
407 407
     GetBitContext gb;
408 408
     int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
409 409
 
... ...
@@ -34,6 +34,7 @@
34 34
 
35 35
 #include "libavutil/intmath.h"
36 36
 #include "avcodec.h"
37
+#include "dsputil.h"
37 38
 #include "internal.h"
38 39
 #include "proresdata.h"
39 40
 #include "proresdsp.h"
... ...
@@ -45,7 +46,7 @@ typedef struct {
45 45
     int x_pos, y_pos;
46 46
     int slice_width;
47 47
     int prev_slice_sf;               ///< scalefactor of the previous decoded slice
48
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[8 * 4 * 64];
48
+    DECLARE_ALIGNED(16, int16_t, blocks)[8 * 4 * 64];
49 49
     DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled)[64];
50 50
     DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled)[64];
51 51
 } ProresThreadData;
... ...
@@ -340,10 +341,10 @@ static inline int decode_vlc_codeword(GetBitContext *gb, unsigned codebook)
340 340
 /**
341 341
  * Decode DC coefficients for all blocks in a slice.
342 342
  */
343
-static inline void decode_dc_coeffs(GetBitContext *gb, DCTELEM *out,
343
+static inline void decode_dc_coeffs(GetBitContext *gb, int16_t *out,
344 344
                                     int nblocks)
345 345
 {
346
-    DCTELEM prev_dc;
346
+    int16_t prev_dc;
347 347
     int     i, sign;
348 348
     int16_t delta;
349 349
     unsigned int code;
... ...
@@ -368,7 +369,7 @@ static inline void decode_dc_coeffs(GetBitContext *gb, DCTELEM *out,
368 368
 /**
369 369
  * Decode AC coefficients for all blocks in a slice.
370 370
  */
371
-static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
371
+static inline void decode_ac_coeffs(GetBitContext *gb, int16_t *out,
372 372
                                     int blocks_per_slice,
373 373
                                     int plane_size_factor,
374 374
                                     const uint8_t *scan)
... ...
@@ -421,7 +422,7 @@ static void decode_slice_plane(ProresContext *ctx, ProresThreadData *td,
421 421
                                const int16_t *qmat, int is_chroma)
422 422
 {
423 423
     GetBitContext gb;
424
-    DCTELEM *block_ptr;
424
+    int16_t *block_ptr;
425 425
     int mb_num, blocks_per_slice;
426 426
 
427 427
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
... ...
@@ -20,6 +20,7 @@
20 20
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
  */
22 22
 
23
+#include "dsputil.h"
23 24
 #include "proresdsp.h"
24 25
 #include "simple_idct.h"
25 26
 #include "libavutil/common.h"
... ...
@@ -34,7 +35,7 @@
34 34
 /**
35 35
  * Add bias value, clamp and output pixels of a slice
36 36
  */
37
-static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
37
+static void put_pixels(uint16_t *dst, int stride, const int16_t *in)
38 38
 {
39 39
     int x, y, src_offset, dst_offset;
40 40
 
... ...
@@ -47,7 +48,7 @@ static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
47 47
     }
48 48
 }
49 49
 
50
-static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
50
+static void prores_idct_put_c(uint16_t *out, int linesize, int16_t *block, const int16_t *qmat)
51 51
 {
52 52
     ff_prores_idct(block, qmat);
53 53
     put_pixels(out, linesize >> 1, block);
... ...
@@ -55,7 +56,7 @@ static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const
55 55
 #endif
56 56
 
57 57
 #if CONFIG_PRORES_KOSTYA_ENCODER
58
-static void prores_fdct_c(const uint16_t *src, int linesize, DCTELEM *block)
58
+static void prores_fdct_c(const uint16_t *src, int linesize, int16_t *block)
59 59
 {
60 60
     int x, y;
61 61
     const uint16_t *tsrc = src;
... ...
@@ -23,7 +23,7 @@
23 23
 #ifndef AVCODEC_PRORESDSP_H
24 24
 #define AVCODEC_PRORESDSP_H
25 25
 
26
-#include "dsputil.h"
26
+#include <stdint.h>
27 27
 
28 28
 #define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
29 29
 
... ...
@@ -32,8 +32,8 @@ typedef struct ProresDSPContext {
32 32
     uint8_t idct_permutation[64];
33 33
     int dct_permutation_type;
34 34
     uint8_t dct_permutation[64];
35
-    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
36
-    void (* fdct) (const uint16_t *src, int linesize, DCTELEM *block);
35
+    void (* idct_put) (uint16_t *out, int linesize, int16_t *block, const int16_t *qmat);
36
+    void (* fdct) (const uint16_t *src, int linesize, int16_t *block);
37 37
 } ProresDSPContext;
38 38
 
39 39
 void ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx);
... ...
@@ -198,7 +198,7 @@ static av_always_inline int get_level(int val)
198 198
 
199 199
 static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
200 200
 
201
-static void encode_dc_coeffs(PutBitContext *pb, DCTELEM *in,
201
+static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
202 202
         int blocks_per_slice, int *qmat)
203 203
 {
204 204
     int prev_dc, code;
... ...
@@ -230,7 +230,7 @@ static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
230 230
         0x28, 0x28, 0x28, 0x4C };
231 231
 
232 232
 static void encode_ac_coeffs(AVCodecContext *avctx, PutBitContext *pb,
233
-        DCTELEM *in, int blocks_per_slice, int *qmat)
233
+        int16_t *in, int blocks_per_slice, int *qmat)
234 234
 {
235 235
     int prev_run = 4;
236 236
     int prev_level = 2;
... ...
@@ -260,7 +260,7 @@ static void encode_ac_coeffs(AVCodecContext *avctx, PutBitContext *pb,
260 260
     }
261 261
 }
262 262
 
263
-static void get(uint8_t *pixels, int stride, DCTELEM* block)
263
+static void get(uint8_t *pixels, int stride, int16_t* block)
264 264
 {
265 265
     int16_t *p = (int16_t*)pixels;
266 266
     int i, j;
... ...
@@ -275,7 +275,7 @@ static void get(uint8_t *pixels, int stride, DCTELEM* block)
275 275
     }
276 276
 }
277 277
 
278
-static void fdct_get(uint8_t *pixels, int stride, DCTELEM* block)
278
+static void fdct_get(uint8_t *pixels, int stride, int16_t* block)
279 279
 {
280 280
     get(pixels, stride, block);
281 281
     ff_jpeg_fdct_islow_10(block);
... ...
@@ -285,7 +285,7 @@ static int encode_slice_plane(AVCodecContext *avctx, int mb_count,
285 285
         uint8_t *src, int src_stride, uint8_t *buf, unsigned buf_size,
286 286
         int *qmat, int chroma)
287 287
 {
288
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[DEFAULT_SLICE_MB_WIDTH << 8], *block;
288
+    DECLARE_ALIGNED(16, int16_t, blocks)[DEFAULT_SLICE_MB_WIDTH << 8], *block;
289 289
     int i, blocks_per_slice;
290 290
     PutBitContext pb;
291 291
 
... ...
@@ -25,6 +25,7 @@
25 25
 
26 26
 #include "libavutil/opt.h"
27 27
 #include "avcodec.h"
28
+#include "dsputil.h"
28 29
 #include "put_bits.h"
29 30
 #include "bytestream.h"
30 31
 #include "internal.h"
... ...
@@ -170,7 +171,7 @@ struct TrellisNode {
170 170
 #define MAX_STORED_Q 16
171 171
 
172 172
 typedef struct ProresThreadData {
173
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
173
+    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
174 174
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
175 175
     int16_t custom_q[64];
176 176
     struct TrellisNode *nodes;
... ...
@@ -178,7 +179,7 @@ typedef struct ProresThreadData {
178 178
 
179 179
 typedef struct ProresContext {
180 180
     AVClass *class;
181
-    DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
181
+    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
182 182
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
183 183
     int16_t quants[MAX_STORED_Q][64];
184 184
     int16_t custom_q[64];
... ...
@@ -213,7 +214,7 @@ typedef struct ProresContext {
213 213
 
214 214
 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
215 215
                            int linesize, int x, int y, int w, int h,
216
-                           DCTELEM *blocks, uint16_t *emu_buf,
216
+                           int16_t *blocks, uint16_t *emu_buf,
217 217
                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
218 218
 {
219 219
     const uint16_t *esrc;
... ...
@@ -317,7 +318,7 @@ static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int
317 317
 #define GET_SIGN(x)  ((x) >> 31)
318 318
 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
319 319
 
320
-static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
320
+static void encode_dcs(PutBitContext *pb, int16_t *blocks,
321 321
                        int blocks_per_slice, int scale)
322 322
 {
323 323
     int i;
... ...
@@ -343,7 +344,7 @@ static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
343 343
     }
344 344
 }
345 345
 
346
-static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
346
+static void encode_acs(PutBitContext *pb, int16_t *blocks,
347 347
                        int blocks_per_slice,
348 348
                        int plane_size_factor,
349 349
                        const uint8_t *scan, const int16_t *qmat)
... ...
@@ -379,7 +380,7 @@ static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
379 379
 
380 380
 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
381 381
                               const uint16_t *src, int linesize,
382
-                              int mbs_per_slice, DCTELEM *blocks,
382
+                              int mbs_per_slice, int16_t *blocks,
383 383
                               int blocks_per_mb, int plane_size_factor,
384 384
                               const int16_t *qmat)
385 385
 {
... ...
@@ -481,7 +482,7 @@ static inline int estimate_vlc(unsigned codebook, int val)
481 481
     }
482 482
 }
483 483
 
484
-static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
484
+static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
485 485
                         int scale)
486 486
 {
487 487
     int i;
... ...
@@ -512,7 +513,7 @@ static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
512 512
     return bits;
513 513
 }
514 514
 
515
-static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
515
+static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
516 516
                         int plane_size_factor,
517 517
                         const uint8_t *scan, const int16_t *qmat)
518 518
 {
... ...
@@ -44,7 +44,7 @@
44 44
  * aligned this could be done faster in a different way, e.g. as it is done
45 45
  * in MPlayer libmpcodecs/native/rtjpegn.c.
46 46
  */
47
-static inline int get_block(GetBitContext *gb, DCTELEM *block, const uint8_t *scan,
47
+static inline int get_block(GetBitContext *gb, int16_t *block, const uint8_t *scan,
48 48
                             const uint32_t *quant) {
49 49
     int coeff, i, n;
50 50
     int8_t ac;
... ...
@@ -61,7 +61,7 @@ static inline int get_block(GetBitContext *gb, DCTELEM *block, const uint8_t *sc
61 61
 
62 62
     // normally we would only need to clear the (63 - coeff) last values,
63 63
     // but since we do not know where they are we just clear the whole block
64
-    memset(block, 0, 64 * sizeof(DCTELEM));
64
+    memset(block, 0, 64 * sizeof(int16_t));
65 65
 
66 66
     // 2 bits per coefficient
67 67
     while (coeff) {
... ...
@@ -121,7 +121,7 @@ int ff_rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
121 121
     if (res > 0) \
122 122
         c->dsp->idct_put(dst, stride, block); \
123 123
 } while (0)
124
-            DCTELEM *block = c->block;
124
+            int16_t *block = c->block;
125 125
             BLOCK(c->lquant, y1, f->linesize[0]);
126 126
             y1 += 8;
127 127
             BLOCK(c->lquant, y1, f->linesize[0]);
... ...
@@ -35,7 +35,7 @@ typedef struct RTJpegContext {
35 35
     uint8_t scan[64];
36 36
     uint32_t lquant[64];
37 37
     uint32_t cquant[64];
38
-    DECLARE_ALIGNED(16, DCTELEM, block)[64];
38
+    DECLARE_ALIGNED(16, int16_t, block)[64];
39 39
 } RTJpegContext;
40 40
 
41 41
 void ff_rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
... ...
@@ -217,7 +217,7 @@ static int rv34_decode_cbp(GetBitContext *gb, RV34VLC *vlc, int table)
217 217
 /**
218 218
  * Get one coefficient value from the bitstream and store it.
219 219
  */
220
-static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *gb, VLC* vlc, int q)
220
+static inline void decode_coeff(int16_t *dst, int coef, int esc, GetBitContext *gb, VLC* vlc, int q)
221 221
 {
222 222
     if(coef){
223 223
         if(coef == esc){
... ...
@@ -237,7 +237,7 @@ static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *
237 237
 /**
238 238
  * Decode 2x2 subblock of coefficients.
239 239
  */
240
-static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc, int q)
240
+static inline void decode_subblock(int16_t *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc, int q)
241 241
 {
242 242
     int flags = modulo_three_table[code];
243 243
 
... ...
@@ -255,13 +255,13 @@ static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2,
255 255
 /**
256 256
  * Decode a single coefficient.
257 257
  */
258
-static inline void decode_subblock1(DCTELEM *dst, int code, GetBitContext *gb, VLC *vlc, int q)
258
+static inline void decode_subblock1(int16_t *dst, int code, GetBitContext *gb, VLC *vlc, int q)
259 259
 {
260 260
     int coeff = modulo_three_table[code] >> 6;
261 261
     decode_coeff(dst, coeff, 3, gb, vlc, q);
262 262
 }
263 263
 
264
-static inline void decode_subblock3(DCTELEM *dst, int code, GetBitContext *gb, VLC *vlc,
264
+static inline void decode_subblock3(int16_t *dst, int code, GetBitContext *gb, VLC *vlc,
265 265
                                     int q_dc, int q_ac1, int q_ac2)
266 266
 {
267 267
     int flags = modulo_three_table[code];
... ...
@@ -283,7 +283,7 @@ static inline void decode_subblock3(DCTELEM *dst, int code, GetBitContext *gb, V
283 283
  *  o--o
284 284
  */
285 285
 
286
-static int rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *rvlc, int fc, int sc, int q_dc, int q_ac1, int q_ac2)
286
+static int rv34_decode_block(int16_t *dst, GetBitContext *gb, RV34VLC *rvlc, int fc, int sc, int q_dc, int q_ac1, int q_ac2)
287 287
 {
288 288
     int code, pattern, has_ac = 1;
289 289
 
... ...
@@ -995,7 +995,7 @@ static inline void rv34_process_block(RV34DecContext *r,
995 995
                                       int fc, int sc, int q_dc, int q_ac)
996 996
 {
997 997
     MpegEncContext *s = &r->s;
998
-    DCTELEM *ptr = s->block[0];
998
+    int16_t *ptr = s->block[0];
999 999
     int has_ac = rv34_decode_block(ptr, &s->gb, r->cur_vlcs,
1000 1000
                                    fc, sc, q_dc, q_ac, q_ac);
1001 1001
     if(has_ac){
... ...
@@ -1008,13 +1008,13 @@ static inline void rv34_process_block(RV34DecContext *r,
1008 1008
 
1009 1009
 static void rv34_output_i16x16(RV34DecContext *r, int8_t *intra_types, int cbp)
1010 1010
 {
1011
-    LOCAL_ALIGNED_16(DCTELEM, block16, [16]);
1011
+    LOCAL_ALIGNED_16(int16_t, block16, [16]);
1012 1012
     MpegEncContext *s    = &r->s;
1013 1013
     GetBitContext  *gb   = &s->gb;
1014 1014
     int             q_dc = rv34_qscale_tab[ r->luma_dc_quant_i[s->qscale] ],
1015 1015
                     q_ac = rv34_qscale_tab[s->qscale];
1016 1016
     uint8_t        *dst  = s->dest[0];
1017
-    DCTELEM        *ptr  = s->block[0];
1017
+    int16_t        *ptr  = s->block[0];
1018 1018
     int i, j, itype, has_ac;
1019 1019
 
1020 1020
     memset(block16, 0, 16 * sizeof(*block16));
... ...
@@ -1180,7 +1180,7 @@ static int rv34_decode_inter_macroblock(RV34DecContext *r, int8_t *intra_types)
1180 1180
     MpegEncContext *s   = &r->s;
1181 1181
     GetBitContext  *gb  = &s->gb;
1182 1182
     uint8_t        *dst = s->dest[0];
1183
-    DCTELEM        *ptr = s->block[0];
1183
+    int16_t        *ptr = s->block[0];
1184 1184
     int          mb_pos = s->mb_x + s->mb_y * s->mb_stride;
1185 1185
     int cbp, cbp2;
1186 1186
     int q_dc, q_ac, has_ac;
... ...
@@ -1220,7 +1220,7 @@ static int rv34_decode_inter_macroblock(RV34DecContext *r, int8_t *intra_types)
1220 1220
 
1221 1221
     if(r->is16){
1222 1222
         // Only for RV34_MB_P_MIX16x16
1223
-        LOCAL_ALIGNED_16(DCTELEM, block16, [16]);
1223
+        LOCAL_ALIGNED_16(int16_t, block16, [16]);
1224 1224
         memset(block16, 0, 16 * sizeof(*block16));
1225 1225
         q_dc = rv34_qscale_tab[ r->luma_dc_quant_p[s->qscale] ];
1226 1226
         q_ac = rv34_qscale_tab[s->qscale];
... ...
@@ -33,7 +33,7 @@
33 33
  * @{
34 34
  */
35 35
 
36
-static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
36
+static av_always_inline void rv34_row_transform(int temp[16], int16_t *block)
37 37
 {
38 38
     int i;
39 39
 
... ...
@@ -54,12 +54,12 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
54 54
  * Real Video 3.0/4.0 inverse transform + sample reconstruction
55 55
  * Code is almost the same as in SVQ3, only scaling is different.
56 56
  */
57
-static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){
57
+static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, int16_t *block){
58 58
     int      temp[16];
59 59
     int      i;
60 60
 
61 61
     rv34_row_transform(temp, block);
62
-    memset(block, 0, 16*sizeof(DCTELEM));
62
+    memset(block, 0, 16*sizeof(int16_t));
63 63
 
64 64
     for(i = 0; i < 4; i++){
65 65
         const int z0 = 13*(temp[4*0+i] +    temp[4*2+i]) + 0x200;
... ...
@@ -82,7 +82,7 @@ static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){
82 82
  * Code is almost the same as rv34_inv_transform()
83 83
  * but final coefficients are multiplied by 1.5 and have no rounding.
84 84
  */
85
-static void rv34_inv_transform_noround_c(DCTELEM *block){
85
+static void rv34_inv_transform_noround_c(int16_t *block){
86 86
     int temp[16];
87 87
     int i;
88 88
 
... ...
@@ -115,9 +115,9 @@ static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
115 115
     }
116 116
 }
117 117
 
118
-static void rv34_inv_transform_dc_noround_c(DCTELEM *block)
118
+static void rv34_inv_transform_dc_noround_c(int16_t *block)
119 119
 {
120
-    DCTELEM dc = (13 * 13 * 3 * block[0]) >> 11;
120
+    int16_t dc = (13 * 13 * 3 * block[0]) >> 11;
121 121
     int i, j;
122 122
 
123 123
     for (i = 0; i < 4; i++, block += 4)
... ...
@@ -34,9 +34,9 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
34 34
                                  uint8_t *src2/*align width (8 or 16)*/,
35 35
                                  int w1, int w2, ptrdiff_t stride);
36 36
 
37
-typedef void (*rv34_inv_transform_func)(DCTELEM *block);
37
+typedef void (*rv34_inv_transform_func)(int16_t *block);
38 38
 
39
-typedef void (*rv34_idct_add_func)(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
39
+typedef void (*rv34_idct_add_func)(uint8_t *dst, ptrdiff_t stride, int16_t *block);
40 40
 typedef void (*rv34_idct_dc_add_func)(uint8_t *dst, ptrdiff_t stride,
41 41
                                       int   dc);
42 42
 
... ...
@@ -47,12 +47,12 @@ static void memzero_align8(void *dst,size_t size)
47 47
         fp_single_leave(fpscr);
48 48
 }
49 49
 
50
-static void clear_blocks_sh4(DCTELEM *blocks)
50
+static void clear_blocks_sh4(int16_t *blocks)
51 51
 {
52
-        memzero_align8(blocks,sizeof(DCTELEM)*6*64);
52
+        memzero_align8(blocks,sizeof(int16_t)*6*64);
53 53
 }
54 54
 
55
-static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
55
+static void idct_put(uint8_t *dest, int line_size, int16_t *block)
56 56
 {
57 57
         int i;
58 58
         uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
... ...
@@ -70,7 +70,7 @@ static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
70 70
                 block+=8;
71 71
         }
72 72
 }
73
-static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
73
+static void idct_add(uint8_t *dest, int line_size, int16_t *block)
74 74
 {
75 75
         int i;
76 76
         uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
... ...
@@ -22,7 +22,7 @@
22 22
 #include "libavcodec/avcodec.h"
23 23
 #include "libavcodec/dsputil.h"
24 24
 
25
-void ff_idct_sh4(DCTELEM *block);
25
+void ff_idct_sh4(int16_t *block);
26 26
 void ff_dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
27 27
 
28 28
 #endif /* AVCODEC_SH4_DSPUTIL_SH4_H */
... ...
@@ -89,7 +89,7 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
89 89
 
90 90
 //optimized
91 91
 
92
-void ff_idct_sh4(DCTELEM *block)
92
+void ff_idct_sh4(int16_t *block)
93 93
 {
94 94
         DEFREG;
95 95
 
... ...
@@ -50,7 +50,7 @@
50 50
    and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
51 51
 #define C_SHIFT (4+1+12)
52 52
 
53
-static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
53
+static inline void idct4col_put(uint8_t *dest, int line_size, const int16_t *col)
54 54
 {
55 55
     int c0, c1, c2, c3, a0, a1, a2, a3;
56 56
 
... ...
@@ -86,10 +86,10 @@ static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col
86 86
 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to
87 87
    compensate the extra butterfly stage - I don't have the full DV
88 88
    specification */
89
-void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
89
+void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
90 90
 {
91 91
     int i;
92
-    DCTELEM *ptr;
92
+    int16_t *ptr;
93 93
 
94 94
     /* butterfly */
95 95
     ptr = block;
... ...
@@ -129,7 +129,7 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
129 129
 #define C2 C_FIX(0.2705980501)
130 130
 #define C3 C_FIX(0.5)
131 131
 #define C_SHIFT (4+1+12)
132
-static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
132
+static inline void idct4col_add(uint8_t *dest, int line_size, const int16_t *col)
133 133
 {
134 134
     int c0, c1, c2, c3, a0, a1, a2, a3;
135 135
 
... ...
@@ -156,7 +156,7 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
156 156
 #define R2 R_FIX(0.2705980501)
157 157
 #define R3 R_FIX(0.5)
158 158
 #define R_SHIFT 11
159
-static inline void idct4row(DCTELEM *row)
159
+static inline void idct4row(int16_t *row)
160 160
 {
161 161
     int c0, c1, c2, c3, a0, a1, a2, a3;
162 162
 
... ...
@@ -174,7 +174,7 @@ static inline void idct4row(DCTELEM *row)
174 174
     row[3]= (c0 - c1) >> R_SHIFT;
175 175
 }
176 176
 
177
-void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
177
+void ff_simple_idct84_add(uint8_t *dest, int line_size, int16_t *block)
178 178
 {
179 179
     int i;
180 180
 
... ...
@@ -189,7 +189,7 @@ void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
189 189
     }
190 190
 }
191 191
 
192
-void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
192
+void ff_simple_idct48_add(uint8_t *dest, int line_size, int16_t *block)
193 193
 {
194 194
     int i;
195 195
 
... ...
@@ -204,7 +204,7 @@ void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
204 204
     }
205 205
 }
206 206
 
207
-void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
207
+void ff_simple_idct44_add(uint8_t *dest, int line_size, int16_t *block)
208 208
 {
209 209
     int i;
210 210
 
... ...
@@ -219,7 +219,7 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
219 219
     }
220 220
 }
221 221
 
222
-void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
222
+void ff_prores_idct(int16_t *block, const int16_t *qmat)
223 223
 {
224 224
     int i;
225 225
 
... ...
@@ -29,30 +29,29 @@
29 29
 #define AVCODEC_SIMPLE_IDCT_H
30 30
 
31 31
 #include <stdint.h>
32
-#include "dsputil.h"
33 32
 
34
-void ff_simple_idct_put_8(uint8_t *dest, int line_size, DCTELEM *block);
35
-void ff_simple_idct_add_8(uint8_t *dest, int line_size, DCTELEM *block);
36
-void ff_simple_idct_8(DCTELEM *block);
33
+void ff_simple_idct_put_8(uint8_t *dest, int line_size, int16_t *block);
34
+void ff_simple_idct_add_8(uint8_t *dest, int line_size, int16_t *block);
35
+void ff_simple_idct_8(int16_t *block);
37 36
 
38
-void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
39
-void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
40
-void ff_simple_idct_10(DCTELEM *block);
37
+void ff_simple_idct_put_10(uint8_t *dest, int line_size, int16_t *block);
38
+void ff_simple_idct_add_10(uint8_t *dest, int line_size, int16_t *block);
39
+void ff_simple_idct_10(int16_t *block);
41 40
 /**
42 41
  * Special version of ff_simple_idct_10() which does dequantization
43 42
  * and scales by a factor of 2 more between the two IDCTs to account
44 43
  * for larger scale of input coefficients.
45 44
  */
46
-void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
45
+void ff_prores_idct(int16_t *block, const int16_t *qmat);
47 46
 
48 47
 void ff_simple_idct_mmx(int16_t *block);
49 48
 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
50 49
 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
51 50
 
52
-void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block);
51
+void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block);
53 52
 
54
-void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block);
55
-void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block);
56
-void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block);
53
+void ff_simple_idct84_add(uint8_t *dest, int line_size, int16_t *block);
54
+void ff_simple_idct48_add(uint8_t *dest, int line_size, int16_t *block);
55
+void ff_simple_idct44_add(uint8_t *dest, int line_size, int16_t *block);
57 56
 
58 57
 #endif /* AVCODEC_SIMPLE_IDCT_H */
... ...
@@ -85,7 +85,7 @@
85 85
 
86 86
 #endif
87 87
 
88
-static inline void FUNC(idctRowCondDC)(DCTELEM *row, int extra_shift)
88
+static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
89 89
 {
90 90
     int a0, a1, a2, a3, b0, b1, b2, b3;
91 91
 
... ...
@@ -221,7 +221,7 @@ static inline void FUNC(idctRowCondDC)(DCTELEM *row, int extra_shift)
221 221
     } while (0)
222 222
 
223 223
 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
224
-                                          DCTELEM *col)
224
+                                          int16_t *col)
225 225
 {
226 226
     int a0, a1, a2, a3, b0, b1, b2, b3;
227 227
 
... ...
@@ -245,7 +245,7 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
245 245
 }
246 246
 
247 247
 static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
248
-                                          DCTELEM *col)
248
+                                          int16_t *col)
249 249
 {
250 250
     int a0, a1, a2, a3, b0, b1, b2, b3;
251 251
 
... ...
@@ -268,7 +268,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
268 268
     dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
269 269
 }
270 270
 
271
-static inline void FUNC(idctSparseCol)(DCTELEM *col)
271
+static inline void FUNC(idctSparseCol)(int16_t *col)
272 272
 {
273 273
     int a0, a1, a2, a3, b0, b1, b2, b3;
274 274
 
... ...
@@ -284,7 +284,7 @@ static inline void FUNC(idctSparseCol)(DCTELEM *col)
284 284
     col[56] = ((a0 - b0) >> COL_SHIFT);
285 285
 }
286 286
 
287
-void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block)
287
+void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
288 288
 {
289 289
     pixel *dest = (pixel *)dest_;
290 290
     int i;
... ...
@@ -298,7 +298,7 @@ void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block)
298 298
         FUNC(idctSparseColPut)(dest + i, line_size, block + i);
299 299
 }
300 300
 
301
-void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block)
301
+void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block)
302 302
 {
303 303
     pixel *dest = (pixel *)dest_;
304 304
     int i;
... ...
@@ -312,7 +312,7 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block)
312 312
         FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
313 313
 }
314 314
 
315
-void FUNC(ff_simple_idct)(DCTELEM *block)
315
+void FUNC(ff_simple_idct)(int16_t *block)
316 316
 {
317 317
     int i;
318 318
 
... ...
@@ -20,10 +20,9 @@
20 20
 #define AVCODEC_SPARC_DSPUTIL_VIS_H
21 21
 
22 22
 #include <stdint.h>
23
-#include "libavcodec/dsputil.h"
24 23
 
25
-void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data);
26
-void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data);
27
-void ff_simple_idct_vis(DCTELEM *data);
24
+void ff_simple_idct_put_vis(uint8_t *dest, int line_size, int16_t *data);
25
+void ff_simple_idct_add_vis(uint8_t *dest, int line_size, int16_t *data);
26
+void ff_simple_idct_vis(int16_t *data);
28 27
 
29 28
 #endif /* AVCODEC_SPARC_DSPUTIL_VIS_H */
... ...
@@ -388,7 +388,7 @@ static const DECLARE_ALIGNED(8, uint16_t, expand)[4] = {
388 388
         "st %%f14, [%12+" dest "] \n\t"\
389 389
 
390 390
 
391
-void ff_simple_idct_vis(DCTELEM *data) {
391
+void ff_simple_idct_vis(int16_t *data) {
392 392
     int out1, out2, out3, out4;
393 393
     DECLARE_ALIGNED(8, int16_t, temp)[8*8];
394 394
 
... ...
@@ -428,7 +428,7 @@ void ff_simple_idct_vis(DCTELEM *data) {
428 428
     );
429 429
 }
430 430
 
431
-void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) {
431
+void ff_simple_idct_put_vis(uint8_t *dest, int line_size, int16_t *data) {
432 432
     int out1, out2, out3, out4, out5;
433 433
     int r1, r2, r3, r4, r5, r6, r7;
434 434
 
... ...
@@ -478,7 +478,7 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) {
478 478
     );
479 479
 }
480 480
 
481
-void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) {
481
+void ff_simple_idct_add_vis(uint8_t *dest, int line_size, int16_t *data) {
482 482
     int out1, out2, out3, out4, out5, out6;
483 483
     int r1, r2, r3, r4, r5, r6, r7;
484 484
 
... ...
@@ -139,7 +139,7 @@ static const uint32_t svq3_dequant_coeff[32] = {
139 139
     61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
140 140
 };
141 141
 
142
-void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
142
+void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
143 143
 {
144 144
     const int qmul = svq3_dequant_coeff[qp];
145 145
 #define stride 16
... ...
@@ -174,7 +174,7 @@ void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
174 174
 }
175 175
 #undef stride
176 176
 
177
-void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block,
177
+void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block,
178 178
                         int stride, int qp, int dc)
179 179
 {
180 180
     const int qmul = svq3_dequant_coeff[qp];
... ...
@@ -212,7 +212,7 @@ void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block,
212 212
     }
213 213
 }
214 214
 
215
-static inline int svq3_decode_block(GetBitContext *gb, DCTELEM *block,
215
+static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
216 216
                                     int index, const int type)
217 217
 {
218 218
     static const uint8_t *const scan_patterns[4] =
... ...
@@ -384,7 +384,7 @@ typedef struct VC1Context{
384 384
     int bi_type;
385 385
     int x8_type;
386 386
 
387
-    DCTELEM (*block)[6][64];
387
+    int16_t (*block)[6][64];
388 388
     int n_allocated_blks, cur_blk_idx, left_blk_idx, topleft_blk_idx, top_blk_idx;
389 389
     uint32_t *cbp_base, *cbp;
390 390
     uint8_t *is_intra_base, *is_intra;
... ...
@@ -2556,7 +2556,7 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip,
2556 2556
  * @param coded are AC coeffs present or not
2557 2557
  * @param codingset set of VLC to decode data
2558 2558
  */
2559
-static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n,
2559
+static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
2560 2560
                               int coded, int codingset)
2561 2561
 {
2562 2562
     GetBitContext *gb = &v->s.gb;
... ...
@@ -2719,7 +2719,7 @@ not_coded:
2719 2719
  * @param codingset set of VLC to decode data
2720 2720
  * @param mquant quantizer value for this macroblock
2721 2721
  */
2722
-static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n,
2722
+static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
2723 2723
                                   int coded, int codingset, int mquant)
2724 2724
 {
2725 2725
     GetBitContext *gb = &v->s.gb;
... ...
@@ -2931,7 +2931,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n,
2931 2931
  * @param mquant block quantizer
2932 2932
  * @param codingset set of VLC to decode data
2933 2933
  */
2934
-static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n,
2934
+static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
2935 2935
                                   int coded, int mquant, int codingset)
2936 2936
 {
2937 2937
     GetBitContext *gb = &v->s.gb;
... ...
@@ -3141,7 +3141,7 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n,
3141 3141
 
3142 3142
 /** Decode P block
3143 3143
  */
3144
-static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n,
3144
+static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
3145 3145
                               int mquant, int ttmb, int first_block,
3146 3146
                               uint8_t *dst, int linesize, int skip_block,
3147 3147
                               int *ttmb_out)
... ...
@@ -4523,7 +4523,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
4523 4523
         s->mb_x = 0;
4524 4524
         ff_init_block_index(s);
4525 4525
         for (;s->mb_x < s->mb_width; s->mb_x++) {
4526
-            DCTELEM (*block)[64] = v->block[v->cur_blk_idx];
4526
+            int16_t (*block)[64] = v->block[v->cur_blk_idx];
4527 4527
             ff_update_block_index(s);
4528 4528
             s->dsp.clear_blocks(block[0]);
4529 4529
             mb_pos = s->mb_x + s->mb_y * s->mb_stride;
... ...
@@ -80,7 +80,7 @@ static void vc1_h_overlap_c(uint8_t* src, int stride)
80 80
     }
81 81
 }
82 82
 
83
-static void vc1_v_s_overlap_c(DCTELEM *top,  DCTELEM *bottom)
83
+static void vc1_v_s_overlap_c(int16_t *top,  int16_t *bottom)
84 84
 {
85 85
     int i;
86 86
     int a, b, c, d;
... ...
@@ -106,7 +106,7 @@ static void vc1_v_s_overlap_c(DCTELEM *top,  DCTELEM *bottom)
106 106
     }
107 107
 }
108 108
 
109
-static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
109
+static void vc1_h_s_overlap_c(int16_t *left, int16_t *right)
110 110
 {
111 111
     int i;
112 112
     int a, b, c, d;
... ...
@@ -230,7 +230,7 @@ static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq)
230 230
 
231 231
 /** Do inverse transform on 8x8 block
232 232
 */
233
-static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
233
+static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, int16_t *block)
234 234
 {
235 235
     int i;
236 236
     int dc = block[0];
... ...
@@ -249,11 +249,11 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
249 249
     }
250 250
 }
251 251
 
252
-static void vc1_inv_trans_8x8_c(DCTELEM block[64])
252
+static void vc1_inv_trans_8x8_c(int16_t block[64])
253 253
 {
254 254
     int i;
255 255
     register int t1,t2,t3,t4,t5,t6,t7,t8;
256
-    DCTELEM *src, *dst, temp[64];
256
+    int16_t *src, *dst, temp[64];
257 257
 
258 258
     src = block;
259 259
     dst = temp;
... ...
@@ -320,7 +320,7 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
320 320
 
321 321
 /** Do inverse transform on 8x4 part of block
322 322
 */
323
-static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
323
+static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, int16_t *block)
324 324
 {
325 325
     int i;
326 326
     int dc = block[0];
... ...
@@ -339,11 +339,11 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
339 339
     }
340 340
 }
341 341
 
342
-static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
342
+static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, int16_t *block)
343 343
 {
344 344
     int i;
345 345
     register int t1,t2,t3,t4,t5,t6,t7,t8;
346
-    DCTELEM *src, *dst;
346
+    int16_t *src, *dst;
347 347
 
348 348
     src = block;
349 349
     dst = block;
... ...
@@ -395,7 +395,7 @@ static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
395 395
 
396 396
 /** Do inverse transform on 4x8 parts of block
397 397
 */
398
-static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
398
+static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, int16_t *block)
399 399
 {
400 400
     int i;
401 401
     int dc = block[0];
... ...
@@ -410,11 +410,11 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
410 410
     }
411 411
 }
412 412
 
413
-static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
413
+static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, int16_t *block)
414 414
 {
415 415
     int i;
416 416
     register int t1,t2,t3,t4,t5,t6,t7,t8;
417
-    DCTELEM *src, *dst;
417
+    int16_t *src, *dst;
418 418
 
419 419
     src = block;
420 420
     dst = block;
... ...
@@ -466,7 +466,7 @@ static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
466 466
 
467 467
 /** Do inverse transform on 4x4 part of block
468 468
 */
469
-static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
469
+static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, int16_t *block)
470 470
 {
471 471
     int i;
472 472
     int dc = block[0];
... ...
@@ -481,11 +481,11 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
481 481
     }
482 482
 }
483 483
 
484
-static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
484
+static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, int16_t *block)
485 485
 {
486 486
     int i;
487 487
     register int t1,t2,t3,t4;
488
-    DCTELEM *src, *dst;
488
+    int16_t *src, *dst;
489 489
 
490 490
     src = block;
491 491
     dst = block;
... ...
@@ -32,18 +32,18 @@
32 32
 
33 33
 typedef struct VC1DSPContext {
34 34
     /* vc1 functions */
35
-    void (*vc1_inv_trans_8x8)(DCTELEM *b);
36
-    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
37
-    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
38
-    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
39
-    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
40
-    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
41
-    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
42
-    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
35
+    void (*vc1_inv_trans_8x8)(int16_t *b);
36
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, int16_t *block);
37
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, int16_t *block);
38
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, int16_t *block);
39
+    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, int16_t *block);
40
+    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, int16_t *block);
41
+    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, int16_t *block);
42
+    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, int16_t *block);
43 43
     void (*vc1_v_overlap)(uint8_t *src, int stride);
44 44
     void (*vc1_h_overlap)(uint8_t *src, int stride);
45
-    void (*vc1_v_s_overlap)(DCTELEM *top,  DCTELEM *bottom);
46
-    void (*vc1_h_s_overlap)(DCTELEM *left, DCTELEM *right);
45
+    void (*vc1_v_s_overlap)(int16_t *top,  int16_t *bottom);
46
+    void (*vc1_h_s_overlap)(int16_t *left, int16_t *right);
47 47
     void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
48 48
     void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
49 49
     void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
... ...
@@ -142,7 +142,7 @@ typedef struct Vp3DecodeContext {
142 142
     DSPContext dsp;
143 143
     VideoDSPContext vdsp;
144 144
     VP3DSPContext vp3dsp;
145
-    DECLARE_ALIGNED(16, DCTELEM, block)[64];
145
+    DECLARE_ALIGNED(16, int16_t, block)[64];
146 146
     int flipped_image;
147 147
     int last_slice_end;
148 148
     int skip_loop_filter;
... ...
@@ -926,7 +926,7 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
926 926
     int i, j = 0;
927 927
     int token;
928 928
     int zero_run = 0;
929
-    DCTELEM coeff = 0;
929
+    int16_t coeff = 0;
930 930
     int bits_to_get;
931 931
     int blocks_ended;
932 932
     int coeff_i = 0;
... ...
@@ -1356,7 +1356,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
1356 1356
  * for the next block in coding order
1357 1357
  */
1358 1358
 static inline int vp3_dequant(Vp3DecodeContext *s, Vp3Fragment *frag,
1359
-                              int plane, int inter, DCTELEM block[64])
1359
+                              int plane, int inter, int16_t block[64])
1360 1360
 {
1361 1361
     int16_t *dequantizer = s->qmat[frag->qpi][inter][plane];
1362 1362
     uint8_t *perm = s->scantable.permutated;
... ...
@@ -1465,7 +1465,7 @@ static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int
1465 1465
 static void render_slice(Vp3DecodeContext *s, int slice)
1466 1466
 {
1467 1467
     int x, y, i, j, fragment;
1468
-    DCTELEM *block = s->block;
1468
+    int16_t *block = s->block;
1469 1469
     int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
1470 1470
     int motion_halfpel_index;
1471 1471
     uint8_t *motion_source;
... ...
@@ -213,18 +213,23 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
213 213
     }
214 214
 }
215 215
 
216
-static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
216
+static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size,
217
+                           int16_t *block/*align 16*/)
218
+{
217 219
     idct(dest, line_size, block, 1);
218 220
     memset(block, 0, sizeof(*block) * 64);
219 221
 }
220 222
 
221
-static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
223
+static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size,
224
+                           int16_t *block/*align 16*/)
225
+{
222 226
     idct(dest, line_size, block, 2);
223 227
     memset(block, 0, sizeof(*block) * 64);
224 228
 }
225 229
 
226 230
 static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
227
-                              DCTELEM *block/*align 16*/){
231
+                              int16_t *block/*align 16*/)
232
+{
228 233
     int i, dc = (block[0] + 15) >> 5;
229 234
 
230 235
     for(i = 0; i < 8; i++){
... ...
@@ -21,7 +21,6 @@
21 21
 
22 22
 #include <stddef.h>
23 23
 #include <stdint.h>
24
-#include "dsputil.h"
25 24
 
26 25
 typedef struct VP3DSPContext {
27 26
     /**
... ...
@@ -39,9 +38,9 @@ typedef struct VP3DSPContext {
39 39
                                  const uint8_t *b,
40 40
                                  ptrdiff_t stride, int h);
41 41
 
42
-    void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
43
-    void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
44
-    void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block);
42
+    void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
43
+    void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
44
+    void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block);
45 45
     void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
46 46
     void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
47 47
 
... ...
@@ -67,7 +67,7 @@ typedef struct VP56RangeCoder {
67 67
 typedef struct VP56RefDc {
68 68
     uint8_t not_null_dc;
69 69
     VP56Frame ref_frame;
70
-    DCTELEM dc_coeff;
70
+    int16_t dc_coeff;
71 71
 } VP56RefDc;
72 72
 
73 73
 typedef struct VP56Macroblock {
... ...
@@ -125,12 +125,12 @@ struct vp56_context {
125 125
     VP56RefDc *above_blocks;
126 126
     VP56RefDc left_block[4];
127 127
     int above_block_idx[6];
128
-    DCTELEM prev_dc[3][3];    /* [plan][ref_frame] */
128
+    int16_t prev_dc[3][3];    /* [plan][ref_frame] */
129 129
 
130 130
     /* blocks / macroblock */
131 131
     VP56mb mb_type;
132 132
     VP56Macroblock *macroblocks;
133
-    DECLARE_ALIGNED(16, DCTELEM, block_coeff)[6][64];
133
+    DECLARE_ALIGNED(16, int16_t, block_coeff)[6][64];
134 134
 
135 135
     /* motion vectors */
136 136
     VP56mv mv[6];  /* vectors for each block in MB */
... ...
@@ -761,7 +761,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
761 761
  * @return 0 if no coeffs were decoded
762 762
  *         otherwise, the index of the last coeff decoded plus one
763 763
  */
764
-static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
764
+static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
765 765
                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
766 766
                                         int i, uint8_t *token_prob, int16_t qmul[2])
767 767
 {
... ...
@@ -829,7 +829,7 @@ skip_eob:
829 829
  *         otherwise, the index of the last coeff decoded plus one
830 830
  */
831 831
 static av_always_inline
832
-int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
832
+int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
833 833
                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
834 834
                         int i, int zero_nhood, int16_t qmul[2])
835 835
 {
... ...
@@ -96,8 +96,8 @@ typedef struct VP8Macroblock {
96 96
 } VP8Macroblock;
97 97
 
98 98
 typedef struct VP8ThreadData {
99
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
100
-    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
99
+    DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
100
+    DECLARE_ALIGNED(16, int16_t, block_dc)[16];
101 101
     /**
102 102
      * This is the index plus one of the last non-zero coeff
103 103
      * for each of the blocks in the current macroblock.
... ...
@@ -29,7 +29,7 @@
29 29
 #include "libavutil/common.h"
30 30
 
31 31
 // TODO: Maybe add dequant
32
-static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
32
+static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
33 33
 {
34 34
     int i, t0, t1, t2, t3;
35 35
 
... ...
@@ -62,7 +62,7 @@ static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
62 62
     }
63 63
 }
64 64
 
65
-static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
65
+static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
66 66
 {
67 67
     int i, val = (dc[0] + 3) >> 3;
68 68
     dc[0] = 0;
... ...
@@ -78,10 +78,10 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
78 78
 #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
79 79
 #define MUL_35468(a)  (((a)*35468) >> 16)
80 80
 
81
-static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
81
+static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
82 82
 {
83 83
     int i, t0, t1, t2, t3;
84
-    DCTELEM tmp[16];
84
+    int16_t tmp[16];
85 85
 
86 86
     for (i = 0; i < 4; i++) {
87 87
         t0 = block[0*4+i] + block[2*4+i];
... ...
@@ -113,7 +113,7 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
113 113
     }
114 114
 }
115 115
 
116
-static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
116
+static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
117 117
 {
118 118
     int i, dc = (block[0] + 4) >> 3;
119 119
     block[0] = 0;
... ...
@@ -127,7 +127,7 @@ static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
127 127
     }
128 128
 }
129 129
 
130
-static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride)
130
+static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
131 131
 {
132 132
     vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride);
133 133
     vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride);
... ...
@@ -135,7 +135,7 @@ static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t s
135 135
     vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride);
136 136
 }
137 137
 
138
-static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride)
138
+static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
139 139
 {
140 140
     vp8_idct_dc_add_c(dst+ 0, block[0], stride);
141 141
     vp8_idct_dc_add_c(dst+ 4, block[1], stride);
... ...
@@ -27,20 +27,21 @@
27 27
 #ifndef AVCODEC_VP8DSP_H
28 28
 #define AVCODEC_VP8DSP_H
29 29
 
30
-#include "dsputil.h"
30
+#include <stddef.h>
31
+#include <stdint.h>
31 32
 
32 33
 typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, ptrdiff_t dstStride,
33 34
                             uint8_t *src/*align 1*/, ptrdiff_t srcStride,
34 35
                             int h, int x, int y);
35 36
 
36 37
 typedef struct VP8DSPContext {
37
-    void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]);
38
-    void (*vp8_luma_dc_wht_dc)(DCTELEM block[4][4][16], DCTELEM dc[16]);
39
-    void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
40
-    void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride);
41
-    void (*vp8_idct_dc_add4y)(uint8_t *dst, DCTELEM block[4][16],
38
+    void (*vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16]);
39
+    void (*vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16]);
40
+    void (*vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
41
+    void (*vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
42
+    void (*vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16],
42 43
                               ptrdiff_t stride);
43
-    void (*vp8_idct_dc_add4uv)(uint8_t *dst, DCTELEM block[4][16],
44
+    void (*vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16],
44 45
                                ptrdiff_t stride);
45 46
 
46 47
     // loop filter applied to edges between macroblocks
... ...
@@ -48,7 +48,7 @@ av_cold void ff_wmv2_common_init(Wmv2Context * w){
48 48
     s->dsp.idct     = NULL;
49 49
 }
50 50
 
51
-static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){
51
+static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int stride, int n){
52 52
     MpegEncContext * const s= &w->s;
53 53
 
54 54
   if (s->block_last_index[n] >= 0) {
... ...
@@ -72,7 +72,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st
72 72
   }
73 73
 }
74 74
 
75
-void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
75
+void ff_wmv2_add_mb(MpegEncContext *s, int16_t block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
76 76
     Wmv2Context * const w= (Wmv2Context*)s;
77 77
 
78 78
     wmv2_add_block(w, block1[0], dest_y                    , s->linesize, 0);
... ...
@@ -52,7 +52,7 @@ typedef struct Wmv2Context{
52 52
     int hshift;
53 53
 
54 54
     ScanTable abt_scantable[2];
55
-    DECLARE_ALIGNED(16, DCTELEM, abt_block2)[6][64];
55
+    DECLARE_ALIGNED(16, int16_t, abt_block2)[6][64];
56 56
 }Wmv2Context;
57 57
 
58 58
 void ff_wmv2_common_init(Wmv2Context * w);
... ...
@@ -291,7 +291,7 @@ static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
291 291
     return mot_val;
292 292
 }
293 293
 
294
-static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){
294
+static inline int wmv2_decode_inter_block(Wmv2Context *w, int16_t *block, int n, int cbp){
295 295
     MpegEncContext * const s= &w->s;
296 296
     static const int sub_cbp_table[3]= {2,3,1};
297 297
     int sub_cbp;
... ...
@@ -331,7 +331,7 @@ static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n,
331 331
 }
332 332
 
333 333
 
334
-int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
334
+int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
335 335
 {
336 336
     Wmv2Context * const w= (Wmv2Context*)s;
337 337
     int cbp, code, i;
... ...
@@ -19,6 +19,7 @@
19 19
 #include "libavutil/attributes.h"
20 20
 #include "libavutil/common.h"
21 21
 #include "avcodec.h"
22
+#include "dsputil.h"
22 23
 #include "wmv2dsp.h"
23 24
 
24 25
 #define W0 2048
... ...
@@ -91,7 +92,7 @@ static void wmv2_idct_col(short * b)
91 91
     b[8 * 7] = (a0 + a2 - a1 - a5 + (1 << 13)) >> 14;
92 92
 }
93 93
 
94
-static void wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
94
+static void wmv2_idct_add_c(uint8_t *dest, int line_size, int16_t *block)
95 95
 {
96 96
     int i;
97 97
 
... ...
@@ -114,7 +115,7 @@ static void wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
114 114
     }
115 115
 }
116 116
 
117
-static void wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
117
+static void wmv2_idct_put_c(uint8_t *dest, int line_size, int16_t *block)
118 118
 {
119 119
     int i;
120 120
 
... ...
@@ -20,11 +20,10 @@
20 20
 #define AVCODEC_WMV2DSP_H
21 21
 
22 22
 #include <stdint.h>
23
-#include "dsputil.h"
24 23
 
25 24
 typedef struct WMV2DSPContext {
26
-    void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
27
-    void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
25
+    void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
26
+    void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
28 27
 
29 28
     int idct_perm;
30 29
 } WMV2DSPContext;
... ...
@@ -152,7 +152,7 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
152 152
  * useless M$ crap features. It is duplicated here in case someone wants
153 153
  * to add support for these crap features. */
154 154
 void ff_wmv2_encode_mb(MpegEncContext * s,
155
-                       DCTELEM block[6][64],
155
+                       int16_t block[6][64],
156 156
                        int motion_x, int motion_y)
157 157
 {
158 158
     Wmv2Context * const w= (Wmv2Context*)s;
... ...
@@ -26,7 +26,7 @@
26 26
 
27 27
 #if HAVE_SSE2_INLINE
28 28
 
29
-static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
29
+static void get_pixels_8x4_sym_sse2(int16_t *block, const uint8_t *pixels, int line_size)
30 30
 {
31 31
     __asm__ volatile(
32 32
         "pxor %%xmm5,      %%xmm5       \n\t"
... ...
@@ -226,10 +226,10 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
226 226
 /***********************************/
227 227
 /* standard MMX */
228 228
 
229
-void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
229
+void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
230 230
                                int line_size)
231 231
 {
232
-    const DCTELEM *p;
232
+    const int16_t *p;
233 233
     uint8_t *pix;
234 234
 
235 235
     /* read the pixels */
... ...
@@ -301,7 +301,7 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
301 301
     "movq               %%mm3, (%0, %3, 2)  \n\t"           \
302 302
     "movq               %%mm4, (%0, %1)     \n\t"
303 303
 
304
-void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
304
+void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
305 305
                                       int line_size)
306 306
 {
307 307
     x86_reg line_skip = line_size;
... ...
@@ -318,10 +318,10 @@ void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
318 318
         : "memory");
319 319
 }
320 320
 
321
-void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
321
+void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
322 322
                                int line_size)
323 323
 {
324
-    const DCTELEM *p;
324
+    const int16_t *p;
325 325
     uint8_t *pix;
326 326
     int i;
327 327
 
... ...
@@ -423,7 +423,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
423 423
 }
424 424
 
425 425
 #define CLEAR_BLOCKS(name, n)                           \
426
-static void name(DCTELEM *blocks)                       \
426
+static void name(int16_t *blocks)                       \
427 427
 {                                                       \
428 428
     __asm__ volatile (                                  \
429 429
         "pxor %%mm7, %%mm7              \n\t"           \
... ...
@@ -443,7 +443,7 @@ static void name(DCTELEM *blocks)                       \
443 443
 CLEAR_BLOCKS(clear_blocks_mmx, 6)
444 444
 CLEAR_BLOCKS(clear_block_mmx, 1)
445 445
 
446
-static void clear_block_sse(DCTELEM *block)
446
+static void clear_block_sse(int16_t *block)
447 447
 {
448 448
     __asm__ volatile (
449 449
         "xorps  %%xmm0, %%xmm0          \n"
... ...
@@ -460,7 +460,7 @@ static void clear_block_sse(DCTELEM *block)
460 460
     );
461 461
 }
462 462
 
463
-static void clear_blocks_sse(DCTELEM *blocks)
463
+static void clear_blocks_sse(int16_t *blocks)
464 464
 {
465 465
     __asm__ volatile (
466 466
         "xorps  %%xmm0, %%xmm0              \n"
... ...
@@ -1882,28 +1882,28 @@ void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride,
1882 1882
  * converted. */
1883 1883
 #if CONFIG_GPL
1884 1884
 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size,
1885
-                                    DCTELEM *block)
1885
+                                    int16_t *block)
1886 1886
 {
1887 1887
     ff_mmx_idct(block);
1888 1888
     ff_put_pixels_clamped_mmx(block, dest, line_size);
1889 1889
 }
1890 1890
 
1891 1891
 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size,
1892
-                                    DCTELEM *block)
1892
+                                    int16_t *block)
1893 1893
 {
1894 1894
     ff_mmx_idct(block);
1895 1895
     ff_add_pixels_clamped_mmx(block, dest, line_size);
1896 1896
 }
1897 1897
 
1898 1898
 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size,
1899
-                                     DCTELEM *block)
1899
+                                     int16_t *block)
1900 1900
 {
1901 1901
     ff_mmxext_idct(block);
1902 1902
     ff_put_pixels_clamped_mmx(block, dest, line_size);
1903 1903
 }
1904 1904
 
1905 1905
 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size,
1906
-                                     DCTELEM *block)
1906
+                                     int16_t *block)
1907 1907
 {
1908 1908
     ff_mmxext_idct(block);
1909 1909
     ff_add_pixels_clamped_mmx(block, dest, line_size);
... ...
@@ -83,9 +83,9 @@ extern const double ff_pd_2[2];
83 83
 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
84 84
 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
85 85
 
86
-void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
87
-void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
88
-void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
86
+void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
87
+void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
88
+void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
89 89
 
90 90
 void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
91 91
 void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
... ...
@@ -100,8 +100,8 @@ void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size)
100 100
 void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
101 101
 void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
102 102
 
103
-void ff_mmx_idct(DCTELEM *block);
104
-void ff_mmxext_idct(DCTELEM *block);
103
+void ff_mmx_idct(int16_t *block);
104
+void ff_mmxext_idct(int16_t *block);
105 105
 
106 106
 
107 107
 void ff_deinterlace_line_mmx(uint8_t *dst,
... ...
@@ -335,7 +335,7 @@ cglobal sse16, 5, 5, 8
335 335
     RET
336 336
 
337 337
 INIT_MMX mmx
338
-; get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
338
+; get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
339 339
 cglobal get_pixels, 3,4
340 340
     movsxdifnidn r2, r2d
341 341
     add          r0, 128
... ...
@@ -392,7 +392,7 @@ cglobal get_pixels, 3, 4
392 392
     RET
393 393
 
394 394
 INIT_MMX mmx
395
-; diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const unint8_t *s2, stride)
395
+; diff_pixels_mmx(int16_t *block, const uint8_t *s1, const unint8_t *s2, stride)
396 396
 cglobal diff_pixels, 4,5
397 397
     movsxdifnidn r3, r3d
398 398
     pxor         m7, m7
... ...
@@ -30,9 +30,9 @@
30 30
 #include "libavcodec/mathops.h"
31 31
 #include "dsputil_mmx.h"
32 32
 
33
-void ff_get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size);
34
-void ff_get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size);
35
-void ff_diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
33
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
34
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
35
+void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride);
36 36
 int ff_pix_sum16_mmx(uint8_t * pix, int line_size);
37 37
 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
38 38
 
... ...
@@ -798,7 +798,7 @@ static void sub_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *src1,
798 798
     HSUM(%%xmm0, %%xmm1, %0)
799 799
 
800 800
 #define DCT_SAD_FUNC(cpu) \
801
-static int sum_abs_dctelem_##cpu(DCTELEM *block){\
801
+static int sum_abs_dctelem_##cpu(int16_t *block){\
802 802
     int sum;\
803 803
     __asm__ volatile(\
804 804
         DCT_SAD\
... ...
@@ -302,7 +302,7 @@ cglobal h264_idct8_dc_add_8, 3, 3, 0
302 302
 
303 303
 INIT_MMX mmx
304 304
 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
305
-;             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
305
+;             int16_t *block, int stride, const uint8_t nnzc[6*8])
306 306
 cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
307 307
     xor          r5, r5
308 308
 %ifdef PIC
... ...
@@ -324,7 +324,7 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
324 324
     REP_RET
325 325
 
326 326
 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
327
-;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
327
+;                        int16_t *block, int stride, const uint8_t nnzc[6*8])
328 328
 cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
329 329
     %assign pad 128+4-(stack_offset&7)
330 330
     SUB         rsp, pad
... ...
@@ -357,7 +357,7 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
357 357
 
358 358
 INIT_MMX mmxext
359 359
 ; ff_h264_idct_add16_mmxext(uint8_t *dst, const int *block_offset,
360
-;                           DCTELEM *block, int stride, const uint8_t nnzc[6*8])
360
+;                           int16_t *block, int stride, const uint8_t nnzc[6*8])
361 361
 cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
362 362
     xor          r5, r5
363 363
 %ifdef PIC
... ...
@@ -402,7 +402,7 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
402 402
 
403 403
 INIT_MMX mmx
404 404
 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
405
-;                             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
405
+;                             int16_t *block, int stride, const uint8_t nnzc[6*8])
406 406
 cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
407 407
     xor          r5, r5
408 408
 %ifdef PIC
... ...
@@ -426,7 +426,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
426 426
 
427 427
 INIT_MMX mmxext
428 428
 ; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
429
-;                                DCTELEM *block, int stride,
429
+;                                int16_t *block, int stride,
430 430
 ;                                const uint8_t nnzc[6*8])
431 431
 cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
432 432
     xor          r5, r5
... ...
@@ -469,7 +469,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
469 469
     REP_RET
470 470
 
471 471
 ; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
472
-;                           DCTELEM *block, int stride,
472
+;                           int16_t *block, int stride,
473 473
 ;                           const uint8_t nnzc[6*8])
474 474
 cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
475 475
     %assign pad 128+4-(stack_offset&7)
... ...
@@ -530,7 +530,7 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
530 530
 
531 531
 INIT_XMM sse2
532 532
 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
533
-;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
533
+;                         int16_t *block, int stride, const uint8_t nnzc[6*8])
534 534
 cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
535 535
     xor          r5, r5
536 536
 %ifdef PIC
... ...
@@ -605,7 +605,7 @@ h264_idct_add8_mmx_plane:
605 605
     rep ret
606 606
 
607 607
 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
608
-;                       DCTELEM *block, int stride, const uint8_t nnzc[6*8])
608
+;                       int16_t *block, int stride, const uint8_t nnzc[6*8])
609 609
 cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
610 610
     mov          r5, 16
611 611
     add          r2, 512
... ...
@@ -669,7 +669,7 @@ h264_idct_add8_mmxext_plane:
669 669
 
670 670
 INIT_MMX mmxext
671 671
 ; ff_h264_idct_add8_mmxext(uint8_t **dest, const int *block_offset,
672
-;                          DCTELEM *block, int stride, const uint8_t nnzc[6*8])
672
+;                          int16_t *block, int stride, const uint8_t nnzc[6*8])
673 673
 cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
674 674
     mov          r5, 16
675 675
     add          r2, 512
... ...
@@ -746,7 +746,7 @@ h264_add8x4_idct_sse2:
746 746
 %endmacro
747 747
 
748 748
 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
749
-;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
749
+;                         int16_t *block, int stride, const uint8_t nnzc[6*8])
750 750
 cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
751 751
 %if ARCH_X86_64
752 752
     mov         r5, r0
... ...
@@ -793,7 +793,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
793 793
 %endmacro
794 794
 
795 795
 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
796
-;                              DCTELEM *block, int stride, const uint8_t nnzc[6*8])
796
+;                              int16_t *block, int stride, const uint8_t nnzc[6*8])
797 797
 cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
798 798
 %if ARCH_X86_64
799 799
     mov         r7, r0
... ...
@@ -844,7 +844,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
844 844
 %endmacro
845 845
 
846 846
 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
847
-;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
847
+;                        int16_t *block, int stride, const uint8_t nnzc[6*8])
848 848
 cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
849 849
     add          r2, 512
850 850
 %if ARCH_X86_64
... ...
@@ -861,7 +861,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
861 861
     add8_sse2_cycle 3, 0x64
862 862
     RET
863 863
 
864
-;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
864
+;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
865 865
 
866 866
 %macro WALSH4_1D 5
867 867
     SUMSUB_BADC w, %4, %3, %2, %1, %5
... ...
@@ -20,6 +20,7 @@
20 20
 
21 21
 #include "libavutil/cpu.h"
22 22
 #include "libavutil/x86/cpu.h"
23
+#include "libavcodec/avcodec.h"
23 24
 #include "libavcodec/h264pred.h"
24 25
 
25 26
 #define PRED4x4(TYPE, DEPTH, OPT) \
... ...
@@ -48,7 +48,7 @@ IDCT_ADD_FUNC(8, 10, avx)
48 48
 #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)                         \
49 49
 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT       \
50 50
     (uint8_t *dst, const int *block_offset,                             \
51
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
51
+     int16_t *block, int stride, const uint8_t nnzc[6 * 8]);
52 52
 
53 53
 IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
54 54
 IDCT_ADD_REP_FUNC(8, 4, 8, mmxext)
... ...
@@ -70,7 +70,7 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
70 70
 #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)                      \
71 71
 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT     \
72 72
     (uint8_t **dst, const int *block_offset,                          \
73
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
73
+     int16_t *block, int stride, const uint8_t nnzc[6 * 8]);
74 74
 
75 75
 IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
76 76
 IDCT_ADD_REP_FUNC2(, 8, 8, mmxext)
... ...
@@ -78,8 +78,8 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
78 78
 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
79 79
 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
80 80
 
81
-void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
82
-void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
81
+void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul);
82
+void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul);
83 83
 
84 84
 /***********************************/
85 85
 /* deblocking */
... ...
@@ -531,25 +531,25 @@ __asm__ volatile(
531 531
     :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16));
532 532
 }
533 533
 
534
-void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
534
+void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, int16_t *block)
535 535
 {
536 536
     ff_idct_xvid_mmx(block);
537 537
     ff_put_pixels_clamped_mmx(block, dest, line_size);
538 538
 }
539 539
 
540
-void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
540
+void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, int16_t *block)
541 541
 {
542 542
     ff_idct_xvid_mmx(block);
543 543
     ff_add_pixels_clamped_mmx(block, dest, line_size);
544 544
 }
545 545
 
546
-void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block)
546
+void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, int16_t *block)
547 547
 {
548 548
     ff_idct_xvid_mmxext(block);
549 549
     ff_put_pixels_clamped_mmx(block, dest, line_size);
550 550
 }
551 551
 
552
-void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block)
552
+void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, int16_t *block)
553 553
 {
554 554
     ff_idct_xvid_mmxext(block);
555 555
     ff_add_pixels_clamped_mmx(block, dest, line_size);
... ...
@@ -28,15 +28,13 @@
28 28
 
29 29
 #include <stdint.h>
30 30
 
31
-#include "libavcodec/dsputil.h"
32
-
33 31
 void ff_idct_xvid_mmx(short *block);
34
-void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block);
35
-void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block);
32
+void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, int16_t *block);
33
+void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, int16_t *block);
36 34
 
37 35
 void ff_idct_xvid_mmxext(short *block);
38
-void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block);
39
-void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block);
36
+void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, int16_t *block);
37
+void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, int16_t *block);
40 38
 
41 39
 void ff_idct_xvid_sse2(short *block);
42 40
 void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block);
... ...
@@ -29,7 +29,7 @@
29 29
 #if HAVE_INLINE_ASM
30 30
 
31 31
 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
32
-                                  DCTELEM *block, int n, int qscale)
32
+                                  int16_t *block, int n, int qscale)
33 33
 {
34 34
     x86_reg level, qmul, qadd, nCoeffs;
35 35
 
... ...
@@ -104,7 +104,7 @@ __asm__ volatile(
104 104
 
105 105
 
106 106
 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
107
-                                  DCTELEM *block, int n, int qscale)
107
+                                  int16_t *block, int n, int qscale)
108 108
 {
109 109
     x86_reg qmul, qadd, nCoeffs;
110 110
 
... ...
@@ -187,7 +187,7 @@ __asm__ volatile(
187 187
  high3 += tlow1
188 188
 */
189 189
 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
190
-                                     DCTELEM *block, int n, int qscale)
190
+                                     int16_t *block, int n, int qscale)
191 191
 {
192 192
     x86_reg nCoeffs;
193 193
     const uint16_t *quant_matrix;
... ...
@@ -256,7 +256,7 @@ __asm__ volatile(
256 256
 }
257 257
 
258 258
 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
259
-                                     DCTELEM *block, int n, int qscale)
259
+                                     int16_t *block, int n, int qscale)
260 260
 {
261 261
     x86_reg nCoeffs;
262 262
     const uint16_t *quant_matrix;
... ...
@@ -322,7 +322,7 @@ __asm__ volatile(
322 322
 }
323 323
 
324 324
 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325
-                                     DCTELEM *block, int n, int qscale)
325
+                                     int16_t *block, int n, int qscale)
326 326
 {
327 327
     x86_reg nCoeffs;
328 328
     const uint16_t *quant_matrix;
... ...
@@ -388,7 +388,7 @@ __asm__ volatile(
388 388
 }
389 389
 
390 390
 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
391
-                                     DCTELEM *block, int n, int qscale)
391
+                                     int16_t *block, int n, int qscale)
392 392
 {
393 393
     x86_reg nCoeffs;
394 394
     const uint16_t *quant_matrix;
... ...
@@ -464,7 +464,7 @@ __asm__ volatile(
464 464
         );
465 465
 }
466 466
 
467
-static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
467
+static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
468 468
     const int intra= s->mb_intra;
469 469
     int *sum= s->dct_error_sum[intra];
470 470
     uint16_t *offset= s->dct_offset[intra];
... ...
@@ -518,7 +518,7 @@ static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
518 518
     );
519 519
 }
520 520
 
521
-static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
521
+static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
522 522
     const int intra= s->mb_intra;
523 523
     int *sum= s->dct_error_sum[intra];
524 524
     uint16_t *offset= s->dct_offset[intra];
... ...
@@ -92,7 +92,7 @@
92 92
 #endif
93 93
 
94 94
 static int RENAME(dct_quantize)(MpegEncContext *s,
95
-                            DCTELEM *block, int n,
95
+                            int16_t *block, int n,
96 96
                             int qscale, int *overflow)
97 97
 {
98 98
     x86_reg last_non_zero_p1;
... ...
@@ -232,7 +232,7 @@ section .text align=16
232 232
 %endmacro
233 233
 
234 234
 ; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
235
-;                               DCTELEM *block, const int16_t *qmat);
235
+;                               int16_t *block, const int16_t *qmat);
236 236
 %macro idct_put_fn 1
237 237
 cglobal prores_idct_put_10, 4, 4, %1
238 238
     movsxd      r1,  r1d
... ...
@@ -21,14 +21,15 @@
21 21
  */
22 22
 
23 23
 #include "libavutil/x86/cpu.h"
24
+#include "libavcodec/dsputil.h"
24 25
 #include "libavcodec/proresdsp.h"
25 26
 
26 27
 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
27
-                                DCTELEM *block, const int16_t *qmat);
28
+                                int16_t *block, const int16_t *qmat);
28 29
 void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
29
-                                DCTELEM *block, const int16_t *qmat);
30
+                                int16_t *block, const int16_t *qmat);
30 31
 void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
31
-                                DCTELEM *block, const int16_t *qmat);
32
+                                int16_t *block, const int16_t *qmat);
32 33
 
33 34
 void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
34 35
 {
... ...
@@ -133,7 +133,7 @@ cglobal rv34_idct_dc_add, 3, 3
133 133
     mova        mm5, [pd_512]           ; 0x200
134 134
 %endmacro
135 135
 
136
-; ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
136
+; ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
137 137
 %macro COL_TRANSFORM  4
138 138
     pshufw      mm3, %2, 0xDD        ; col. 1,3,1,3
139 139
     pshufw       %2, %2, 0x88        ; col. 0,2,0,2
... ...
@@ -25,11 +25,11 @@
25 25
 #include "libavcodec/dsputil.h"
26 26
 #include "libavcodec/rv34dsp.h"
27 27
 
28
-void ff_rv34_idct_dc_mmxext(DCTELEM *block);
29
-void ff_rv34_idct_dc_noround_mmxext(DCTELEM *block);
28
+void ff_rv34_idct_dc_mmxext(int16_t *block);
29
+void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
30 30
 void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
31 31
 void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
32
-void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
32
+void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
33 33
 
34 34
 av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
35 35
 {
... ...
@@ -1154,12 +1154,12 @@ void ff_simple_idct_mmx(int16_t *block)
1154 1154
 
1155 1155
 //FIXME merge add/put into the idct
1156 1156
 
1157
-void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
1157
+void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
1158 1158
 {
1159 1159
     idct(block);
1160 1160
     ff_put_pixels_clamped_mmx(block, dest, line_size);
1161 1161
 }
1162
-void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
1162
+void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
1163 1163
 {
1164 1164
     idct(block);
1165 1165
     ff_add_pixels_clamped_mmx(block, dest, line_size);
... ...
@@ -493,7 +493,7 @@ DECLARE_FUNCTION(3, 2)
493 493
 DECLARE_FUNCTION(3, 3)
494 494
 
495 495
 static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
496
-                                        DCTELEM *block)
496
+                                        int16_t *block)
497 497
 {
498 498
     int dc = block[0];
499 499
     dc = (17 * dc +  4) >> 3;
... ...
@@ -532,7 +532,7 @@ static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
532 532
 }
533 533
 
534 534
 static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
535
-                                        DCTELEM *block)
535
+                                        int16_t *block)
536 536
 {
537 537
     int dc = block[0];
538 538
     dc = (17 * dc +  4) >> 3;
... ...
@@ -594,7 +594,7 @@ static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
594 594
 }
595 595
 
596 596
 static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
597
-                                        DCTELEM *block)
597
+                                        int16_t *block)
598 598
 {
599 599
     int dc = block[0];
600 600
     dc = ( 3 * dc +  1) >> 1;
... ...
@@ -633,7 +633,7 @@ static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
633 633
 }
634 634
 
635 635
 static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
636
-                                        DCTELEM *block)
636
+                                        int16_t *block)
637 637
 {
638 638
     int dc = block[0];
639 639
     dc = (3 * dc +  1) >> 1;
... ...
@@ -25,17 +25,18 @@
25 25
 #include "libavutil/x86/cpu.h"
26 26
 #include "libavutil/x86/asm.h"
27 27
 #include "libavcodec/avcodec.h"
28
+#include "libavcodec/dsputil.h"
28 29
 #include "libavcodec/vp3dsp.h"
29 30
 #include "config.h"
30 31
 
31
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
32
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
32
+void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
33
+void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
33 34
 
34
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
35
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
35
+void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, int16_t *block);
36
+void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, int16_t *block);
36 37
 
37 38
 void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
38
-                               DCTELEM *block);
39
+                               int16_t *block);
39 40
 
40 41
 void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
41 42
                                  int *bounding_values);
... ...
@@ -899,7 +899,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
899 899
     REP_RET
900 900
 
901 901
 ;-----------------------------------------------------------------------------
902
-; void vp8_idct_dc_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
902
+; void vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
903 903
 ;-----------------------------------------------------------------------------
904 904
 
905 905
 %macro ADD_DC 4
... ...
@@ -977,7 +977,7 @@ cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
977 977
     RET
978 978
 
979 979
 ;-----------------------------------------------------------------------------
980
-; void vp8_idct_dc_add4y_<opt>(uint8_t *dst, DCTELEM block[4][16], int stride);
980
+; void vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
981 981
 ;-----------------------------------------------------------------------------
982 982
 
983 983
 %if ARCH_X86_32
... ...
@@ -1050,7 +1050,7 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
1050 1050
     RET
1051 1051
 
1052 1052
 ;-----------------------------------------------------------------------------
1053
-; void vp8_idct_dc_add4uv_<opt>(uint8_t *dst, DCTELEM block[4][16], int stride);
1053
+; void vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
1054 1054
 ;-----------------------------------------------------------------------------
1055 1055
 
1056 1056
 INIT_MMX mmx
... ...
@@ -1092,7 +1092,7 @@ cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
1092 1092
     RET
1093 1093
 
1094 1094
 ;-----------------------------------------------------------------------------
1095
-; void vp8_idct_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
1095
+; void vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
1096 1096
 ;-----------------------------------------------------------------------------
1097 1097
 
1098 1098
 ; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
... ...
@@ -1172,7 +1172,7 @@ INIT_MMX sse
1172 1172
 VP8_IDCT_ADD
1173 1173
 
1174 1174
 ;-----------------------------------------------------------------------------
1175
-; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
1175
+; void vp8_luma_dc_wht_mmxext(int16_t block[4][4][16], int16_t dc[16])
1176 1176
 ;-----------------------------------------------------------------------------
1177 1177
 
1178 1178
 %macro SCATTER_WHT 3
... ...
@@ -230,21 +230,21 @@ HVBILIN(ssse3, 8,  4,  8)
230 230
 HVBILIN(ssse3, 8,  8, 16)
231 231
 HVBILIN(ssse3, 8, 16, 16)
232 232
 
233
-extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16],
233
+extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16],
234 234
                                    ptrdiff_t stride);
235
-extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16],
235
+extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
236 236
                                     ptrdiff_t stride);
237
-extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16],
237
+extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16],
238 238
                                       ptrdiff_t stride);
239
-extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16],
239
+extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
240 240
                                       ptrdiff_t stride);
241
-extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16],
241
+extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
242 242
                                       ptrdiff_t stride);
243
-extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
244
-extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
245
-extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16],
243
+extern void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]);
244
+extern void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
245
+extern void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16],
246 246
                                 ptrdiff_t stride);
247
-extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16],
247
+extern void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16],
248 248
                                 ptrdiff_t stride);
249 249
 
250 250
 #define DECLARE_LOOP_FILTER(NAME)\
... ...
@@ -164,10 +164,10 @@ static void mul_thrmat_c(struct vf_priv_s *p,int q)
164 164
         ((short*)p->threshold_mtx)[a]=q * ((short*)p->threshold_mtx_noq)[a];//ints faster in C
165 165
 }
166 166
 
167
-static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt);
168
-static void row_idct_c(DCTELEM* workspace,
167
+static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt);
168
+static void row_idct_c(int16_t* workspace,
169 169
                        int16_t* output_adr, int output_stride, int cnt);
170
-static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt);
170
+static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt);
171 171
 
172 172
 //this is rather ugly, but there is no need for function pointers
173 173
 #define store_slice_s store_slice_c
... ...
@@ -393,10 +393,10 @@ static void mul_thrmat_mmx(struct vf_priv_s *p, int q)
393 393
         );
394 394
 }
395 395
 
396
-static void column_fidct_mmx(int16_t* thr_adr,  DCTELEM *data,  DCTELEM *output,  int cnt);
397
-static void row_idct_mmx(DCTELEM* workspace,
396
+static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,  int cnt);
397
+static void row_idct_mmx(int16_t* workspace,
398 398
                          int16_t* output_adr,  int output_stride,  int cnt);
399
-static void row_fdct_mmx(DCTELEM *data,  const uint8_t *pixels,  int line_size,  int cnt);
399
+static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,  int cnt);
400 400
 
401 401
 #define store_slice_s store_slice_mmx
402 402
 #define store_slice2_s store_slice2_mmx
... ...
@@ -416,8 +416,8 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src,
416 416
     const int step=6-p->log2_count;
417 417
     const int qps= 3 + is_luma;
418 418
     int32_t __attribute__((aligned(32))) block_align[4*8*BLOCKSZ+ 4*8*BLOCKSZ];
419
-    DCTELEM *block= (DCTELEM *)block_align;
420
-    DCTELEM *block3=(DCTELEM *)(block_align+4*8*BLOCKSZ);
419
+    int16_t *block= (int16_t *)block_align;
420
+    int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ);
421 421
 
422 422
     memset(block3, 0, 4*8*BLOCKSZ);
423 423
 
... ...
@@ -460,8 +460,8 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src,
460 460
                     column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT
461 461
                 }
462 462
             row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));
463
-            memmove(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
464
-            memmove(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));
463
+            memmove(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(int16_t)); //cycling
464
+            memmove(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(int16_t));
465 465
         }
466 466
         //
467 467
         es=width+8-x0; //  8, ...
... ...
@@ -694,7 +694,7 @@ const vf_info_t ff_vf_info_fspp = {
694 694
 
695 695
 //#define MANGLE(a) #a
696 696
 
697
-//typedef int16_t DCTELEM; //! only int16_t
697
+//typedef int16_t int16_t; //! only int16_t
698 698
 
699 699
 #define DCTSIZE 8
700 700
 #define DCTSIZE_S "8"
... ...
@@ -745,15 +745,15 @@ static const int16_t FIX_1_082392200=FIX(1.082392200, 13);
745 745
 
746 746
 #if !HAVE_MMX
747 747
 
748
-static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
748
+static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt)
749 749
 {
750 750
     int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
751 751
     int_simd16_t tmp10, tmp11, tmp12, tmp13;
752 752
     int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
753 753
     int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
754 754
 
755
-    DCTELEM* dataptr;
756
-    DCTELEM* wsptr;
755
+    int16_t* dataptr;
756
+    int16_t* wsptr;
757 757
     int16_t *threshold;
758 758
     int ctr;
759 759
 
... ...
@@ -870,7 +870,7 @@ static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int
870 870
 
871 871
 #else /* HAVE_MMX */
872 872
 
873
-static void column_fidct_mmx(int16_t* thr_adr,  DCTELEM *data,  DCTELEM *output,  int cnt)
873
+static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,  int cnt)
874 874
 {
875 875
     uint64_t __attribute__((aligned(8))) temps[4];
876 876
     __asm__ volatile(
... ...
@@ -1605,14 +1605,14 @@ static void column_fidct_mmx(int16_t* thr_adr,  DCTELEM *data,  DCTELEM *output,
1605 1605
 
1606 1606
 #if !HAVE_MMX
1607 1607
 
1608
-static void row_idct_c(DCTELEM* workspace,
1608
+static void row_idct_c(int16_t* workspace,
1609 1609
                        int16_t* output_adr, int output_stride, int cnt)
1610 1610
 {
1611 1611
     int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1612 1612
     int_simd16_t tmp10, tmp11, tmp12, tmp13;
1613 1613
     int_simd16_t z5, z10, z11, z12, z13;
1614 1614
     int16_t* outptr;
1615
-    DCTELEM* wsptr;
1615
+    int16_t* wsptr;
1616 1616
 
1617 1617
     cnt*=4;
1618 1618
     wsptr = workspace;
... ...
@@ -1670,7 +1670,7 @@ static void row_idct_c(DCTELEM* workspace,
1670 1670
 
1671 1671
 #else /* HAVE_MMX */
1672 1672
 
1673
-static void row_idct_mmx (DCTELEM* workspace,
1673
+static void row_idct_mmx (int16_t* workspace,
1674 1674
                           int16_t* output_adr,  int output_stride,  int cnt)
1675 1675
 {
1676 1676
     uint64_t __attribute__((aligned(8))) temps[4];
... ...
@@ -1874,12 +1874,12 @@ static void row_idct_mmx (DCTELEM* workspace,
1874 1874
 
1875 1875
 #if !HAVE_MMX
1876 1876
 
1877
-static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt)
1877
+static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt)
1878 1878
 {
1879 1879
     int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1880 1880
     int_simd16_t tmp10, tmp11, tmp12, tmp13;
1881 1881
     int_simd16_t z1, z2, z3, z4, z5, z11, z13;
1882
-    DCTELEM *dataptr;
1882
+    int16_t *dataptr;
1883 1883
 
1884 1884
     cnt*=4;
1885 1885
     // Pass 1: process rows.
... ...
@@ -1937,7 +1937,7 @@ static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int
1937 1937
 
1938 1938
 #else /* HAVE_MMX */
1939 1939
 
1940
-static void row_fdct_mmx(DCTELEM *data,  const uint8_t *pixels,  int line_size,  int cnt)
1940
+static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,  int cnt)
1941 1941
 {
1942 1942
     uint64_t __attribute__((aligned(8))) temps[4];
1943 1943
     __asm__ volatile(
... ...
@@ -44,7 +44,7 @@
44 44
 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
45 45
 #define XMAX(a,b) ((a) > (b) ? (a) : (b))
46 46
 
47
-typedef short DCTELEM;
47
+typedef short int16_t;
48 48
 
49 49
 //===========================================================================//
50 50
 static const uint8_t  __attribute__((aligned(8))) dither[8][8]={
... ...
@@ -66,7 +66,7 @@ struct vf_priv_s {
66 66
     uint8_t *src;
67 67
 };
68 68
 #if 0
69
-static inline void dct7_c(DCTELEM *dst, int s0, int s1, int s2, int s3, int step){
69
+static inline void dct7_c(int16_t *dst, int s0, int s1, int s2, int s3, int step){
70 70
     int s, d;
71 71
     int dst2[64];
72 72
 //#define S0 (1024/0.37796447300922719759)
... ...
@@ -113,7 +113,7 @@ static inline void dct7_c(DCTELEM *dst, int s0, int s1, int s2, int s3, int step
113 113
 }
114 114
 #endif
115 115
 
116
-static inline void dctA_c(DCTELEM *dst, uint8_t *src, int stride){
116
+static inline void dctA_c(int16_t *dst, uint8_t *src, int stride){
117 117
     int i;
118 118
 
119 119
     for(i=0; i<4; i++){
... ...
@@ -135,7 +135,7 @@ static inline void dctA_c(DCTELEM *dst, uint8_t *src, int stride){
135 135
     }
136 136
 }
137 137
 
138
-static void dctB_c(DCTELEM *dst, DCTELEM *src){
138
+static void dctB_c(int16_t *dst, int16_t *src){
139 139
     int i;
140 140
 
141 141
     for(i=0; i<4; i++){
... ...
@@ -158,7 +158,7 @@ static void dctB_c(DCTELEM *dst, DCTELEM *src){
158 158
 }
159 159
 
160 160
 #if HAVE_MMX
161
-static void dctB_mmx(DCTELEM *dst, DCTELEM *src){
161
+static void dctB_mmx(int16_t *dst, int16_t *src){
162 162
     __asm__ volatile (
163 163
         "movq  (%0), %%mm0      \n\t"
164 164
         "movq  1*4*2(%0), %%mm1 \n\t"
... ...
@@ -191,7 +191,7 @@ static void dctB_mmx(DCTELEM *dst, DCTELEM *src){
191 191
 }
192 192
 #endif
193 193
 
194
-static void (*dctB)(DCTELEM *dst, DCTELEM *src)= dctB_c;
194
+static void (*dctB)(int16_t *dst, int16_t *src)= dctB_c;
195 195
 
196 196
 #define N0 4
197 197
 #define N1 5
... ...
@@ -228,7 +228,7 @@ static void init_thres2(void){
228 228
     }
229 229
 }
230 230
 
231
-static int hardthresh_c(DCTELEM *src, int qp){
231
+static int hardthresh_c(int16_t *src, int qp){
232 232
     int i;
233 233
     int a;
234 234
 
... ...
@@ -244,7 +244,7 @@ static int hardthresh_c(DCTELEM *src, int qp){
244 244
     return (a + (1<<11))>>12;
245 245
 }
246 246
 
247
-static int mediumthresh_c(DCTELEM *src, int qp){
247
+static int mediumthresh_c(int16_t *src, int qp){
248 248
     int i;
249 249
     int a;
250 250
 
... ...
@@ -265,7 +265,7 @@ static int mediumthresh_c(DCTELEM *src, int qp){
265 265
     return (a + (1<<11))>>12;
266 266
 }
267 267
 
268
-static int softthresh_c(DCTELEM *src, int qp){
268
+static int softthresh_c(int16_t *src, int qp){
269 269
     int i;
270 270
     int a;
271 271
 
... ...
@@ -282,14 +282,14 @@ static int softthresh_c(DCTELEM *src, int qp){
282 282
     return (a + (1<<11))>>12;
283 283
 }
284 284
 
285
-static int (*requantize)(DCTELEM *src, int qp)= hardthresh_c;
285
+static int (*requantize)(int16_t *src, int qp)= hardthresh_c;
286 286
 
287 287
 static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
288 288
     int x, y;
289 289
     const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
290 290
     uint8_t  *p_src= p->src + 8*stride;
291
-    DCTELEM *block= (DCTELEM *)p->src;
292
-    DCTELEM *temp= (DCTELEM *)(p->src + 32);
291
+    int16_t *block= (int16_t *)p->src;
292
+    int16_t *temp= (int16_t *)(p->src + 32);
293 293
 
294 294
     if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
295 295
     for(y=0; y<height; y++){
... ...
@@ -310,7 +310,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stri
310 310
         for(x=-8; x<0; x+=4){
311 311
             const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset
312 312
             uint8_t *src  = p_src + index;
313
-            DCTELEM *tp= temp+4*x;
313
+            int16_t *tp= temp+4*x;
314 314
 
315 315
             dctA_c(tp+4*8, src, stride);
316 316
         }
... ...
@@ -328,7 +328,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stri
328 328
             for(; x<end; x++){
329 329
                 const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset
330 330
                 uint8_t *src  = p_src + index;
331
-                DCTELEM *tp= temp+4*x;
331
+                int16_t *tp= temp+4*x;
332 332
                 int v;
333 333
 
334 334
                 if((x&3)==0)
... ...
@@ -105,7 +105,7 @@ struct vf_priv_s {
105 105
 
106 106
 #define SHIFT 22
107 107
 
108
-static void hardthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
108
+static void hardthresh_c(int16_t dst[64], int16_t src[64], int qp, uint8_t *permutation){
109 109
         int i;
110 110
         int bias= 0; //FIXME
111 111
         unsigned int threshold1, threshold2;
... ...
@@ -113,7 +113,7 @@ static void hardthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *perm
113 113
         threshold1= qp*((1<<4) - bias) - 1;
114 114
         threshold2= (threshold1<<1);
115 115
 
116
-        memset(dst, 0, 64*sizeof(DCTELEM));
116
+        memset(dst, 0, 64*sizeof(int16_t));
117 117
         dst[0]= (src[0] + 4)>>3;
118 118
 
119 119
         for(i=1; i<64; i++){
... ...
@@ -125,7 +125,7 @@ static void hardthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *perm
125 125
         }
126 126
 }
127 127
 
128
-static void softthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
128
+static void softthresh_c(int16_t dst[64], int16_t src[64], int qp, uint8_t *permutation){
129 129
         int i;
130 130
         int bias= 0; //FIXME
131 131
         unsigned int threshold1, threshold2;
... ...
@@ -133,7 +133,7 @@ static void softthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *perm
133 133
         threshold1= qp*((1<<4) - bias) - 1;
134 134
         threshold2= (threshold1<<1);
135 135
 
136
-        memset(dst, 0, 64*sizeof(DCTELEM));
136
+        memset(dst, 0, 64*sizeof(int16_t));
137 137
         dst[0]= (src[0] + 4)>>3;
138 138
 
139 139
         for(i=1; i<64; i++){
... ...
@@ -149,7 +149,7 @@ static void softthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *perm
149 149
 }
150 150
 
151 151
 #if HAVE_MMX
152
-static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
152
+static void hardthresh_mmx(int16_t dst[64], int16_t src[64], int qp, uint8_t *permutation){
153 153
         int bias= 0; //FIXME
154 154
         unsigned int threshold1;
155 155
 
... ...
@@ -217,7 +217,7 @@ static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
217 217
         dst[0]= (src[0] + 4)>>3;
218 218
 }
219 219
 
220
-static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
220
+static void softthresh_mmx(int16_t dst[64], int16_t src[64], int qp, uint8_t *permutation){
221 221
         int bias= 0; //FIXME
222 222
         unsigned int threshold1;
223 223
 
... ...
@@ -294,7 +294,7 @@ static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
294 294
 }
295 295
 #endif
296 296
 
297
-static inline void add_block(int16_t *dst, int stride, DCTELEM block[64]){
297
+static inline void add_block(int16_t *dst, int stride, int16_t block[64]){
298 298
         int y;
299 299
 
300 300
         for(y=0; y<8; y++){
... ...
@@ -372,15 +372,15 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_
372 372
 
373 373
 static void (*store_slice)(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)= store_slice_c;
374 374
 
375
-static void (*requantize)(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation)= hardthresh_c;
375
+static void (*requantize)(int16_t dst[64], int16_t src[64], int qp, uint8_t *permutation)= hardthresh_c;
376 376
 
377 377
 static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
378 378
         int x, y, i;
379 379
         const int count= 1<<p->log2_count;
380 380
         const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
381 381
         uint64_t __attribute__((aligned(16))) block_align[32];
382
-        DCTELEM *block = (DCTELEM *)block_align;
383
-        DCTELEM *block2= (DCTELEM *)(block_align+16);
382
+        int16_t *block = (int16_t *)block_align;
383
+        int16_t *block2= (int16_t *)(block_align+16);
384 384
 
385 385
         if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
386 386
         for(y=0; y<height; y++){
... ...
@@ -63,7 +63,7 @@ static int diff_planes(AVFilterContext *ctx,
63 63
     int x, y;
64 64
     int d, c = 0;
65 65
     int t = (w/16)*(h/16)*decimate->frac;
66
-    DCTELEM block[8*8];
66
+    int16_t block[8*8];
67 67
 
68 68
     /* compute difference for blocks of 8x8 bytes */
69 69
     for (y = 0; y < h-7; y += 4) {
... ...
@@ -173,7 +173,7 @@ vertical align =
173 173
 /* and * align
174 174
 arrays fitting in smaller types
175 175
 variables written to twice with no interspaced read
176
-memset(block, 0, 6*64*sizeof(DCTELEM)); -> clear_blocks
176
+memset(block, 0, 6*64*sizeof(int16_t)); -> clear_blocks
177 177
 check existence of long_name in AVCodec
178 178
 check that the patch does not touch codec & (de)muxer layer at the same time ->split
179 179