Browse code

Template dsputil_template.c with respect to pixel size, etc.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Oskar Arvidsson authored on 2011/03/30 00:48:55
Showing 1 changed files
... ...
@@ -29,18 +29,46 @@
29 29
 
30 30
 #include "dsputil.h"
31 31
 
32
+#define BIT_DEPTH 8
33
+
34
+#define pixel  uint8_t
35
+#define pixel2 uint16_t
36
+#define pixel4 uint32_t
37
+#define dctcoef int16_t
38
+
39
+#define FUNC(a)  a
40
+#define FUNCC(a) a ## _c
41
+#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
42
+#define CLIP(a) cm[a]
43
+#define AV_RN2P AV_RN16
44
+#define AV_RN4P AV_RN32
45
+#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
46
+
47
+#define no_rnd_avg_pixel4 no_rnd_avg32
48
+#define    rnd_avg_pixel4    rnd_avg32
49
+
32 50
 /* draw the edges of width 'w' of an image of size width, height */
33 51
 //FIXME check that this is ok for mpeg4 interlaced
34
-static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
52
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
35 53
 {
36
-    uint8_t *ptr, *last_line;
54
+    pixel *buf = (pixel*)_buf;
55
+    int wrap = _wrap / sizeof(pixel);
56
+    pixel *ptr, *last_line;
37 57
     int i;
38 58
 
39 59
     /* left and right */
40 60
     ptr = buf;
41 61
     for(i=0;i<height;i++) {
62
+#if BIT_DEPTH > 8
63
+        int j;
64
+        for (j = 0; j < w; j++) {
65
+            ptr[j-w] = ptr[0];
66
+            ptr[j+width] = ptr[width-1];
67
+        }
68
+#else
42 69
         memset(ptr - w, ptr[0], w);
43 70
         memset(ptr + width, ptr[width-1], w);
71
+#endif
44 72
         ptr += wrap;
45 73
     }
46 74
 
... ...
@@ -49,10 +77,10 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, i
49 49
     last_line = buf + (height - 1) * wrap;
50 50
     if (sides & EDGE_TOP)
51 51
         for(i = 0; i < w; i++)
52
-            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
52
+            memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
53 53
     if (sides & EDGE_BOTTOM)
54 54
         for (i = 0; i < w; i++)
55
-            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
55
+            memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
56 56
 }
57 57
 
58 58
 /**
... ...
@@ -67,7 +95,7 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, i
67 67
  * @param w width of the source buffer
68 68
  * @param h height of the source buffer
69 69
  */
70
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
70
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
71 71
                                     int src_x, int src_y, int w, int h){
72 72
     int x, y;
73 73
     int start_y, start_x, end_y, end_x;
... ...
@@ -80,10 +108,10 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
80 80
         src_y=1-block_h;
81 81
     }
82 82
     if(src_x>= w){
83
-        src+= (w-1-src_x);
83
+        src+= (w-1-src_x)*sizeof(pixel);
84 84
         src_x=w-1;
85 85
     }else if(src_x<=-block_w){
86
-        src+= (1-block_w-src_x);
86
+        src+= (1-block_w-src_x)*sizeof(pixel);
87 87
         src_x=1-block_w;
88 88
     }
89 89
 
... ...
@@ -95,18 +123,18 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
95 95
     assert(start_x < end_x && block_w);
96 96
 
97 97
     w    = end_x - start_x;
98
-    src += start_y*linesize + start_x;
99
-    buf += start_x;
98
+    src += start_y*linesize + start_x*sizeof(pixel);
99
+    buf += start_x*sizeof(pixel);
100 100
 
101 101
     //top
102 102
     for(y=0; y<start_y; y++){
103
-        memcpy(buf, src, w);
103
+        memcpy(buf, src, w*sizeof(pixel));
104 104
         buf += linesize;
105 105
     }
106 106
 
107 107
     // copy existing part
108 108
     for(; y<end_y; y++){
109
-        memcpy(buf, src, w);
109
+        memcpy(buf, src, w*sizeof(pixel));
110 110
         src += linesize;
111 111
         buf += linesize;
112 112
     }
... ...
@@ -114,28 +142,33 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
114 114
     //bottom
115 115
     src -= linesize;
116 116
     for(; y<block_h; y++){
117
-        memcpy(buf, src, w);
117
+        memcpy(buf, src, w*sizeof(pixel));
118 118
         buf += linesize;
119 119
     }
120 120
 
121
-    buf -= block_h * linesize + start_x;
121
+    buf -= block_h * linesize + start_x*sizeof(pixel);
122 122
     while (block_h--){
123
+        pixel *bufp = (pixel*)buf;
123 124
        //left
124 125
         for(x=0; x<start_x; x++){
125
-            buf[x] = buf[start_x];
126
+            bufp[x] = bufp[start_x];
126 127
         }
127 128
 
128 129
        //right
129 130
         for(x=end_x; x<block_w; x++){
130
-            buf[x] = buf[end_x - 1];
131
+            bufp[x] = bufp[end_x - 1];
131 132
         }
132 133
         buf += linesize;
133 134
     }
134 135
 }
135 136
 
136
-static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
137
+static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
137 138
 {
138 139
     int i;
140
+    pixel *restrict pixels = (pixel *restrict)_pixels;
141
+    dctcoef *block = (dctcoef*)_block;
142
+    line_size /= sizeof(pixel);
143
+
139 144
     for(i=0;i<8;i++) {
140 145
         pixels[0] += block[0];
141 146
         pixels[1] += block[1];
... ...
@@ -150,9 +183,13 @@ static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_siz
150 150
     }
151 151
 }
152 152
 
153
-static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
153
+static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
154 154
 {
155 155
     int i;
156
+    pixel *restrict pixels = (pixel *restrict)_pixels;
157
+    dctcoef *block = (dctcoef*)_block;
158
+    line_size /= sizeof(pixel);
159
+
156 160
     for(i=0;i<4;i++) {
157 161
         pixels[0] += block[0];
158 162
         pixels[1] += block[1];
... ...
@@ -296,127 +333,128 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels
296 296
         }\
297 297
 }\
298 298
 \
299
-CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
300
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
301
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
302
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
303
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
304
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
305
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
299
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8*sizeof(pixel))\
300
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\
301
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\
302
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\
303
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\
304
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\
305
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel))
306 306
 
307 307
 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
308 308
 #else // 64 bit variant
309 309
 
310 310
 #define PIXOP2(OPNAME, OP) \
311
-static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
311
+static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
312 312
     int i;\
313 313
     for(i=0; i<h; i++){\
314
-        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
314
+        OP(*((pixel2*)(block  )), AV_RN2P(pixels  ));\
315 315
         pixels+=line_size;\
316 316
         block +=line_size;\
317 317
     }\
318 318
 }\
319
-static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
319
+static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
320 320
     int i;\
321 321
     for(i=0; i<h; i++){\
322
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
322
+        OP(*((pixel4*)(block  )), AV_RN4P(pixels  ));\
323 323
         pixels+=line_size;\
324 324
         block +=line_size;\
325 325
     }\
326 326
 }\
327
-static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
327
+static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
328 328
     int i;\
329 329
     for(i=0; i<h; i++){\
330
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
331
-        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
330
+        OP(*((pixel4*)(block                )), AV_RN4P(pixels                ));\
331
+        OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
332 332
         pixels+=line_size;\
333 333
         block +=line_size;\
334 334
     }\
335 335
 }\
336
-static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
337
-    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
336
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
337
+    FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
338 338
 }\
339 339
 \
340
-static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
340
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
341 341
                                                 int src_stride1, int src_stride2, int h){\
342 342
     int i;\
343 343
     for(i=0; i<h; i++){\
344
-        uint32_t a,b;\
345
-        a= AV_RN32(&src1[i*src_stride1  ]);\
346
-        b= AV_RN32(&src2[i*src_stride2  ]);\
347
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
348
-        a= AV_RN32(&src1[i*src_stride1+4]);\
349
-        b= AV_RN32(&src2[i*src_stride2+4]);\
350
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
344
+        pixel4 a,b;\
345
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
346
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
347
+        OP(*((pixel4*)&dst[i*dst_stride  ]), no_rnd_avg_pixel4(a, b));\
348
+        a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
349
+        b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
350
+        OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
351 351
     }\
352 352
 }\
353 353
 \
354
-static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
354
+static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
355 355
                                                 int src_stride1, int src_stride2, int h){\
356 356
     int i;\
357 357
     for(i=0; i<h; i++){\
358
-        uint32_t a,b;\
359
-        a= AV_RN32(&src1[i*src_stride1  ]);\
360
-        b= AV_RN32(&src2[i*src_stride2  ]);\
361
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
362
-        a= AV_RN32(&src1[i*src_stride1+4]);\
363
-        b= AV_RN32(&src2[i*src_stride2+4]);\
364
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
358
+        pixel4 a,b;\
359
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
360
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
361
+        OP(*((pixel4*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
362
+        a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
363
+        b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
364
+        OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
365 365
     }\
366 366
 }\
367 367
 \
368
-static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
368
+static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
369 369
                                                 int src_stride1, int src_stride2, int h){\
370 370
     int i;\
371 371
     for(i=0; i<h; i++){\
372
-        uint32_t a,b;\
373
-        a= AV_RN32(&src1[i*src_stride1  ]);\
374
-        b= AV_RN32(&src2[i*src_stride2  ]);\
375
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
372
+        pixel4 a,b;\
373
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
374
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
375
+        OP(*((pixel4*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
376 376
     }\
377 377
 }\
378 378
 \
379
-static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
379
+static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
380 380
                                                 int src_stride1, int src_stride2, int h){\
381 381
     int i;\
382 382
     for(i=0; i<h; i++){\
383
-        uint32_t a,b;\
384
-        a= AV_RN16(&src1[i*src_stride1  ]);\
385
-        b= AV_RN16(&src2[i*src_stride2  ]);\
386
-        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
383
+        pixel4 a,b;\
384
+        a= AV_RN2P(&src1[i*src_stride1  ]);\
385
+        b= AV_RN2P(&src2[i*src_stride2  ]);\
386
+        OP(*((pixel2*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
387 387
     }\
388 388
 }\
389 389
 \
390
-static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
390
+static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
391 391
                                                 int src_stride1, int src_stride2, int h){\
392
-    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
393
-    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
392
+    FUNC(OPNAME ## _pixels8_l2)(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
393
+    FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
394 394
 }\
395 395
 \
396
-static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
396
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
397 397
                                                 int src_stride1, int src_stride2, int h){\
398
-    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
399
-    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
398
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
399
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
400 400
 }\
401 401
 \
402
-static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
403
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
402
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
403
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
404 404
 }\
405 405
 \
406
-static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
407
-    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
406
+static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
407
+    FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
408 408
 }\
409 409
 \
410
-static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
411
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
410
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
411
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
412 412
 }\
413 413
 \
414
-static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
415
-    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
414
+static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
415
+    FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
416 416
 }\
417 417
 \
418
-static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
418
+static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
419 419
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
420
+    /* FIXME HIGH BIT DEPTH */\
420 421
     int i;\
421 422
     for(i=0; i<h; i++){\
422 423
         uint32_t a, b, c, d, l0, l1, h0, h1;\
... ...
@@ -451,24 +489,25 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, cons
451 451
     }\
452 452
 }\
453 453
 \
454
-static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
455
-    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
454
+static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
455
+    FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
456 456
 }\
457 457
 \
458
-static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
459
-    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
458
+static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
459
+    FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
460 460
 }\
461 461
 \
462
-static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
463
-    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
462
+static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
463
+    FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
464 464
 }\
465 465
 \
466
-static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
467
-    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
466
+static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
467
+    FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
468 468
 }\
469 469
 \
470
-static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
470
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
471 471
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
472
+    /* FIXME HIGH BIT DEPTH*/\
472 473
     int i;\
473 474
     for(i=0; i<h; i++){\
474 475
         uint32_t a, b, c, d, l0, l1, h0, h1;\
... ...
@@ -502,20 +541,23 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src
502 502
         OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
503 503
     }\
504 504
 }\
505
-static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
505
+static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
506 506
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
507
-    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
508
-    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
507
+    FUNC(OPNAME ## _pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
508
+    FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
509 509
 }\
510
-static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
510
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
511 511
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
512
-    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
513
-    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
512
+    FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
513
+    FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
514 514
 }\
515 515
 \
516
-static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
516
+static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
517 517
 {\
518 518
         int i, a0, b0, a1, b1;\
519
+        pixel *block = (pixel*)_block;\
520
+        const pixel *pixels = (const pixel*)_pixels;\
521
+        line_size /= sizeof(pixel);\
519 522
         a0= pixels[0];\
520 523
         b0= pixels[1] + 2;\
521 524
         a0 += b0;\
... ...
@@ -546,8 +588,9 @@ static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixel
546 546
         }\
547 547
 }\
548 548
 \
549
-static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
549
+static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
550 550
 {\
551
+        /* FIXME HIGH BIT DEPTH */\
551 552
         int i;\
552 553
         const uint32_t a= AV_RN32(pixels  );\
553 554
         const uint32_t b= AV_RN32(pixels+1);\
... ...
@@ -582,8 +625,9 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel
582 582
         }\
583 583
 }\
584 584
 \
585
-static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
585
+static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
586 586
 {\
587
+    /* FIXME HIGH BIT DEPTH */\
587 588
     int j;\
588 589
     for(j=0; j<2; j++){\
589 590
         int i;\
... ...
@@ -623,8 +667,9 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel
623 623
     }\
624 624
 }\
625 625
 \
626
-static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
626
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
627 627
 {\
628
+    /* FIXME HIGH BIT DEPTH */\
628 629
     int j;\
629 630
     for(j=0; j<2; j++){\
630 631
         int i;\
... ...
@@ -664,18 +709,17 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t
664 664
     }\
665 665
 }\
666 666
 \
667
-CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
668
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
669
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
670
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
671
-av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
672
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
673
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
674
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
667
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16)    , FUNCC(OPNAME ## _pixels8)    , 8*sizeof(pixel))\
668
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
669
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
670
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
671
+av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16)    , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
672
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
673
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
674
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
675 675
 
676
-#define op_avg(a, b) a = rnd_avg32(a, b)
676
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
677 677
 #endif
678
-
679 678
 #define op_put(a, b) a = b
680 679
 
681 680
 PIXOP2(avg, op_avg)
... ...
@@ -686,21 +730,24 @@ PIXOP2(put, op_put)
686 686
 #define put_no_rnd_pixels8_c  put_pixels8_c
687 687
 #define put_no_rnd_pixels16_c put_pixels16_c
688 688
 
689
-static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
690
-    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
689
+static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
690
+    FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
691 691
 }
692 692
 
693
-static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
694
-    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
693
+static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
694
+    FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
695 695
 }
696 696
 
697 697
 #define H264_CHROMA_MC(OPNAME, OP)\
698
-static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
698
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
699
+    pixel *dst = (pixel*)_dst;\
700
+    pixel *src = (pixel*)_src;\
699 701
     const int A=(8-x)*(8-y);\
700 702
     const int B=(  x)*(8-y);\
701 703
     const int C=(8-x)*(  y);\
702 704
     const int D=(  x)*(  y);\
703 705
     int i;\
706
+    stride /= sizeof(pixel);\
704 707
     \
705 708
     assert(x<8 && y<8 && x>=0 && y>=0);\
706 709
 \
... ...
@@ -723,12 +770,15 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
723 723
     }\
724 724
 }\
725 725
 \
726
-static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
726
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
727
+    pixel *dst = (pixel*)_dst;\
728
+    pixel *src = (pixel*)_src;\
727 729
     const int A=(8-x)*(8-y);\
728 730
     const int B=(  x)*(8-y);\
729 731
     const int C=(8-x)*(  y);\
730 732
     const int D=(  x)*(  y);\
731 733
     int i;\
734
+    stride /= sizeof(pixel);\
732 735
     \
733 736
     assert(x<8 && y<8 && x>=0 && y>=0);\
734 737
 \
... ...
@@ -755,12 +805,15 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
755 755
     }\
756 756
 }\
757 757
 \
758
-static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
758
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
759
+    pixel *dst = (pixel*)_dst;\
760
+    pixel *src = (pixel*)_src;\
759 761
     const int A=(8-x)*(8-y);\
760 762
     const int B=(  x)*(8-y);\
761 763
     const int C=(8-x)*(  y);\
762 764
     const int D=(  x)*(  y);\
763 765
     int i;\
766
+    stride /= sizeof(pixel);\
764 767
     \
765 768
     assert(x<8 && y<8 && x>=0 && y>=0);\
766 769
 \
... ...
@@ -804,10 +857,14 @@ H264_CHROMA_MC(avg_       , op_avg)
804 804
 #undef op_put
805 805
 
806 806
 #define H264_LOWPASS(OPNAME, OP, OP2) \
807
-static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
807
+static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
808 808
     const int h=2;\
809
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
809
+    INIT_CLIP\
810 810
     int i;\
811
+    pixel *dst = (pixel*)_dst;\
812
+    pixel *src = (pixel*)_src;\
813
+    dstStride /= sizeof(pixel);\
814
+    srcStride /= sizeof(pixel);\
811 815
     for(i=0; i<h; i++)\
812 816
     {\
813 817
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
... ...
@@ -817,10 +874,14 @@ static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src,
817 817
     }\
818 818
 }\
819 819
 \
820
-static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
820
+static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
821 821
     const int w=2;\
822
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
822
+    INIT_CLIP\
823 823
     int i;\
824
+    pixel *dst = (pixel*)_dst;\
825
+    pixel *src = (pixel*)_src;\
826
+    dstStride /= sizeof(pixel);\
827
+    srcStride /= sizeof(pixel);\
824 828
     for(i=0; i<w; i++)\
825 829
     {\
826 830
         const int srcB= src[-2*srcStride];\
... ...
@@ -837,39 +898,48 @@ static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src,
837 837
     }\
838 838
 }\
839 839
 \
840
-static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
840
+static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
841 841
     const int h=2;\
842 842
     const int w=2;\
843
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
843
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
844
+    INIT_CLIP\
844 845
     int i;\
846
+    pixel *dst = (pixel*)_dst;\
847
+    pixel *src = (pixel*)_src;\
848
+    dstStride /= sizeof(pixel);\
849
+    srcStride /= sizeof(pixel);\
845 850
     src -= 2*srcStride;\
846 851
     for(i=0; i<h+5; i++)\
847 852
     {\
848
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
849
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
853
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
854
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
850 855
         tmp+=tmpStride;\
851 856
         src+=srcStride;\
852 857
     }\
853 858
     tmp -= tmpStride*(h+5-2);\
854 859
     for(i=0; i<w; i++)\
855 860
     {\
856
-        const int tmpB= tmp[-2*tmpStride];\
857
-        const int tmpA= tmp[-1*tmpStride];\
858
-        const int tmp0= tmp[0 *tmpStride];\
859
-        const int tmp1= tmp[1 *tmpStride];\
860
-        const int tmp2= tmp[2 *tmpStride];\
861
-        const int tmp3= tmp[3 *tmpStride];\
862
-        const int tmp4= tmp[4 *tmpStride];\
861
+        const int tmpB= tmp[-2*tmpStride] - pad;\
862
+        const int tmpA= tmp[-1*tmpStride] - pad;\
863
+        const int tmp0= tmp[0 *tmpStride] - pad;\
864
+        const int tmp1= tmp[1 *tmpStride] - pad;\
865
+        const int tmp2= tmp[2 *tmpStride] - pad;\
866
+        const int tmp3= tmp[3 *tmpStride] - pad;\
867
+        const int tmp4= tmp[4 *tmpStride] - pad;\
863 868
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
864 869
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
865 870
         dst++;\
866 871
         tmp++;\
867 872
     }\
868 873
 }\
869
-static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
874
+static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
870 875
     const int h=4;\
871
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
876
+    INIT_CLIP\
872 877
     int i;\
878
+    pixel *dst = (pixel*)_dst;\
879
+    pixel *src = (pixel*)_src;\
880
+    dstStride /= sizeof(pixel);\
881
+    srcStride /= sizeof(pixel);\
873 882
     for(i=0; i<h; i++)\
874 883
     {\
875 884
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
... ...
@@ -881,10 +951,14 @@ static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstSt
881 881
     }\
882 882
 }\
883 883
 \
884
-static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
884
+static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
885 885
     const int w=4;\
886
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
886
+    INIT_CLIP\
887 887
     int i;\
888
+    pixel *dst = (pixel*)_dst;\
889
+    pixel *src = (pixel*)_src;\
890
+    dstStride /= sizeof(pixel);\
891
+    srcStride /= sizeof(pixel);\
888 892
     for(i=0; i<w; i++)\
889 893
     {\
890 894
         const int srcB= src[-2*srcStride];\
... ...
@@ -905,33 +979,38 @@ static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt
905 905
     }\
906 906
 }\
907 907
 \
908
-static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
908
+static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
909 909
     const int h=4;\
910 910
     const int w=4;\
911
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
911
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
912
+    INIT_CLIP\
912 913
     int i;\
914
+    pixel *dst = (pixel*)_dst;\
915
+    pixel *src = (pixel*)_src;\
916
+    dstStride /= sizeof(pixel);\
917
+    srcStride /= sizeof(pixel);\
913 918
     src -= 2*srcStride;\
914 919
     for(i=0; i<h+5; i++)\
915 920
     {\
916
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
917
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
918
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
919
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
921
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
922
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
923
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
924
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
920 925
         tmp+=tmpStride;\
921 926
         src+=srcStride;\
922 927
     }\
923 928
     tmp -= tmpStride*(h+5-2);\
924 929
     for(i=0; i<w; i++)\
925 930
     {\
926
-        const int tmpB= tmp[-2*tmpStride];\
927
-        const int tmpA= tmp[-1*tmpStride];\
928
-        const int tmp0= tmp[0 *tmpStride];\
929
-        const int tmp1= tmp[1 *tmpStride];\
930
-        const int tmp2= tmp[2 *tmpStride];\
931
-        const int tmp3= tmp[3 *tmpStride];\
932
-        const int tmp4= tmp[4 *tmpStride];\
933
-        const int tmp5= tmp[5 *tmpStride];\
934
-        const int tmp6= tmp[6 *tmpStride];\
931
+        const int tmpB= tmp[-2*tmpStride] - pad;\
932
+        const int tmpA= tmp[-1*tmpStride] - pad;\
933
+        const int tmp0= tmp[0 *tmpStride] - pad;\
934
+        const int tmp1= tmp[1 *tmpStride] - pad;\
935
+        const int tmp2= tmp[2 *tmpStride] - pad;\
936
+        const int tmp3= tmp[3 *tmpStride] - pad;\
937
+        const int tmp4= tmp[4 *tmpStride] - pad;\
938
+        const int tmp5= tmp[5 *tmpStride] - pad;\
939
+        const int tmp6= tmp[6 *tmpStride] - pad;\
935 940
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
936 941
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
937 942
         OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
... ...
@@ -941,10 +1020,14 @@ static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
941 941
     }\
942 942
 }\
943 943
 \
944
-static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
944
+static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
945 945
     const int h=8;\
946
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
946
+    INIT_CLIP\
947 947
     int i;\
948
+    pixel *dst = (pixel*)_dst;\
949
+    pixel *src = (pixel*)_src;\
950
+    dstStride /= sizeof(pixel);\
951
+    srcStride /= sizeof(pixel);\
948 952
     for(i=0; i<h; i++)\
949 953
     {\
950 954
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
... ...
@@ -960,10 +1043,14 @@ static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstSt
960 960
     }\
961 961
 }\
962 962
 \
963
-static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
963
+static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
964 964
     const int w=8;\
965
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
965
+    INIT_CLIP\
966 966
     int i;\
967
+    pixel *dst = (pixel*)_dst;\
968
+    pixel *src = (pixel*)_src;\
969
+    dstStride /= sizeof(pixel);\
970
+    srcStride /= sizeof(pixel);\
967 971
     for(i=0; i<w; i++)\
968 972
     {\
969 973
         const int srcB= src[-2*srcStride];\
... ...
@@ -992,41 +1079,46 @@ static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt
992 992
     }\
993 993
 }\
994 994
 \
995
-static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
995
+static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
996 996
     const int h=8;\
997 997
     const int w=8;\
998
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
998
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
999
+    INIT_CLIP\
999 1000
     int i;\
1001
+    pixel *dst = (pixel*)_dst;\
1002
+    pixel *src = (pixel*)_src;\
1003
+    dstStride /= sizeof(pixel);\
1004
+    srcStride /= sizeof(pixel);\
1000 1005
     src -= 2*srcStride;\
1001 1006
     for(i=0; i<h+5; i++)\
1002 1007
     {\
1003
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
1004
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
1005
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
1006
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
1007
-        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
1008
-        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
1009
-        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
1010
-        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
1008
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
1009
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
1010
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
1011
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
1012
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
1013
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
1014
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
1015
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
1011 1016
         tmp+=tmpStride;\
1012 1017
         src+=srcStride;\
1013 1018
     }\
1014 1019
     tmp -= tmpStride*(h+5-2);\
1015 1020
     for(i=0; i<w; i++)\
1016 1021
     {\
1017
-        const int tmpB= tmp[-2*tmpStride];\
1018
-        const int tmpA= tmp[-1*tmpStride];\
1019
-        const int tmp0= tmp[0 *tmpStride];\
1020
-        const int tmp1= tmp[1 *tmpStride];\
1021
-        const int tmp2= tmp[2 *tmpStride];\
1022
-        const int tmp3= tmp[3 *tmpStride];\
1023
-        const int tmp4= tmp[4 *tmpStride];\
1024
-        const int tmp5= tmp[5 *tmpStride];\
1025
-        const int tmp6= tmp[6 *tmpStride];\
1026
-        const int tmp7= tmp[7 *tmpStride];\
1027
-        const int tmp8= tmp[8 *tmpStride];\
1028
-        const int tmp9= tmp[9 *tmpStride];\
1029
-        const int tmp10=tmp[10*tmpStride];\
1022
+        const int tmpB= tmp[-2*tmpStride] - pad;\
1023
+        const int tmpA= tmp[-1*tmpStride] - pad;\
1024
+        const int tmp0= tmp[0 *tmpStride] - pad;\
1025
+        const int tmp1= tmp[1 *tmpStride] - pad;\
1026
+        const int tmp2= tmp[2 *tmpStride] - pad;\
1027
+        const int tmp3= tmp[3 *tmpStride] - pad;\
1028
+        const int tmp4= tmp[4 *tmpStride] - pad;\
1029
+        const int tmp5= tmp[5 *tmpStride] - pad;\
1030
+        const int tmp6= tmp[6 *tmpStride] - pad;\
1031
+        const int tmp7= tmp[7 *tmpStride] - pad;\
1032
+        const int tmp8= tmp[8 *tmpStride] - pad;\
1033
+        const int tmp9= tmp[9 *tmpStride] - pad;\
1034
+        const int tmp10=tmp[10*tmpStride] - pad;\
1030 1035
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1031 1036
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1032 1037
         OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
... ...
@@ -1040,175 +1132,175 @@ static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
1040 1040
     }\
1041 1041
 }\
1042 1042
 \
1043
-static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1044
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
1045
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1043
+static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1044
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst                , src                , dstStride, srcStride);\
1045
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
1046 1046
     src += 8*srcStride;\
1047 1047
     dst += 8*dstStride;\
1048
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
1049
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1048
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst                , src                , dstStride, srcStride);\
1049
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
1050 1050
 }\
1051 1051
 \
1052
-static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1053
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
1054
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1052
+static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1053
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst                , src                , dstStride, srcStride);\
1054
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
1055 1055
     src += 8*srcStride;\
1056 1056
     dst += 8*dstStride;\
1057
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
1058
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1057
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst                , src                , dstStride, srcStride);\
1058
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
1059 1059
 }\
1060 1060
 \
1061
-static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1062
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
1063
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1061
+static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1062
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst                , tmp  , src                , dstStride, tmpStride, srcStride);\
1063
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
1064 1064
     src += 8*srcStride;\
1065 1065
     dst += 8*dstStride;\
1066
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
1067
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1066
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst                , tmp  , src                , dstStride, tmpStride, srcStride);\
1067
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
1068 1068
 }\
1069 1069
 
1070 1070
 #define H264_MC(OPNAME, SIZE) \
1071
-static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1072
-    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1071
+static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
1072
+    FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
1073 1073
 }\
1074 1074
 \
1075
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1076
-    uint8_t half[SIZE*SIZE];\
1077
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1078
-    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
1075
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
1076
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1077
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
1078
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
1079 1079
 }\
1080 1080
 \
1081
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1082
-    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1081
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
1082
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
1083 1083
 }\
1084 1084
 \
1085
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1086
-    uint8_t half[SIZE*SIZE];\
1087
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1088
-    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1085
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
1086
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1087
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
1088
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
1089 1089
 }\
1090 1090
 \
1091
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1092
-    uint8_t full[SIZE*(SIZE+5)];\
1093
-    uint8_t * const full_mid= full + SIZE*2;\
1094
-    uint8_t half[SIZE*SIZE];\
1095
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1096
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1097
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1091
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
1092
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1093
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1094
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1095
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1096
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1097
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1098 1098
 }\
1099 1099
 \
1100
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1101
-    uint8_t full[SIZE*(SIZE+5)];\
1102
-    uint8_t * const full_mid= full + SIZE*2;\
1103
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1104
-    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1100
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
1101
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1102
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1103
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1104
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
1105 1105
 }\
1106 1106
 \
1107
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1108
-    uint8_t full[SIZE*(SIZE+5)];\
1109
-    uint8_t * const full_mid= full + SIZE*2;\
1110
-    uint8_t half[SIZE*SIZE];\
1111
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1112
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1113
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1107
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
1108
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1109
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1110
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1111
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1112
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1113
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1114 1114
 }\
1115 1115
 \
1116
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1117
-    uint8_t full[SIZE*(SIZE+5)];\
1118
-    uint8_t * const full_mid= full + SIZE*2;\
1119
-    uint8_t halfH[SIZE*SIZE];\
1120
-    uint8_t halfV[SIZE*SIZE];\
1121
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1122
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1123
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1124
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1116
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
1117
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1118
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1119
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1120
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1121
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1122
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1123
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1124
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1125 1125
 }\
1126 1126
 \
1127
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1128
-    uint8_t full[SIZE*(SIZE+5)];\
1129
-    uint8_t * const full_mid= full + SIZE*2;\
1130
-    uint8_t halfH[SIZE*SIZE];\
1131
-    uint8_t halfV[SIZE*SIZE];\
1132
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1133
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
1134
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1135
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1127
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
1128
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1129
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1130
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1131
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1132
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1133
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
1134
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1135
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1136 1136
 }\
1137 1137
 \
1138
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1139
-    uint8_t full[SIZE*(SIZE+5)];\
1140
-    uint8_t * const full_mid= full + SIZE*2;\
1141
-    uint8_t halfH[SIZE*SIZE];\
1142
-    uint8_t halfV[SIZE*SIZE];\
1143
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1144
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1145
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1146
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1138
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
1139
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1140
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1141
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1142
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1143
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1144
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1145
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1146
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1147 1147
 }\
1148 1148
 \
1149
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1150
-    uint8_t full[SIZE*(SIZE+5)];\
1151
-    uint8_t * const full_mid= full + SIZE*2;\
1152
-    uint8_t halfH[SIZE*SIZE];\
1153
-    uint8_t halfV[SIZE*SIZE];\
1154
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1155
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
1156
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1157
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1149
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
1150
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1151
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1152
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1153
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1154
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1155
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
1156
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1157
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1158 1158
 }\
1159 1159
 \
1160
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1161
-    int16_t tmp[SIZE*(SIZE+5)];\
1162
-    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1160
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
1161
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1162
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
1163 1163
 }\
1164 1164
 \
1165
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1166
-    int16_t tmp[SIZE*(SIZE+5)];\
1167
-    uint8_t halfH[SIZE*SIZE];\
1168
-    uint8_t halfHV[SIZE*SIZE];\
1169
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1170
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1171
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1165
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
1166
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1167
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1168
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1169
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1170
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1171
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1172 1172
 }\
1173 1173
 \
1174
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1175
-    int16_t tmp[SIZE*(SIZE+5)];\
1176
-    uint8_t halfH[SIZE*SIZE];\
1177
-    uint8_t halfHV[SIZE*SIZE];\
1178
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1179
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1180
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1174
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
1175
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1176
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1177
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1178
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1179
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1180
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1181 1181
 }\
1182 1182
 \
1183
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1184
-    uint8_t full[SIZE*(SIZE+5)];\
1185
-    uint8_t * const full_mid= full + SIZE*2;\
1186
-    int16_t tmp[SIZE*(SIZE+5)];\
1187
-    uint8_t halfV[SIZE*SIZE];\
1188
-    uint8_t halfHV[SIZE*SIZE];\
1189
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
1190
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1191
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1192
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1183
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
1184
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1185
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1186
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1187
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1188
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1189
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
1190
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1191
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1192
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1193 1193
 }\
1194 1194
 \
1195
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1196
-    uint8_t full[SIZE*(SIZE+5)];\
1197
-    uint8_t * const full_mid= full + SIZE*2;\
1198
-    int16_t tmp[SIZE*(SIZE+5)];\
1199
-    uint8_t halfV[SIZE*SIZE];\
1200
-    uint8_t halfHV[SIZE*SIZE];\
1201
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
1202
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1203
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1204
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1195
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
1196
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1197
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1198
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1199
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1200
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1201
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
1202
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1203
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1204
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1205 1205
 }\
1206 1206
 
1207
-#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1207
+#define op_avg(a, b)  a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
1208 1208
 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
1209
-#define op_put(a, b)  a = cm[((b) + 16)>>5]
1210
-#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
1211
-#define op2_put(a, b)  a = cm[((b) + 512)>>10]
1209
+#define op_put(a, b)  a = CLIP(((b) + 16)>>5)
1210
+#define op2_avg(a, b)  a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
1211
+#define op2_put(a, b)  a = CLIP(((b) + 512)>>10)
1212 1212
 
1213 1213
 H264_LOWPASS(put_       , op_put, op2_put)
1214 1214
 H264_LOWPASS(avg_       , op_avg, op2_avg)
... ...
@@ -1230,28 +1322,28 @@ H264_MC(avg_, 16)
1230 1230
 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
1231 1231
 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
1232 1232
 
1233
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
1234
-    put_pixels8_c(dst, src, stride, 8);
1233
+void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
1234
+    FUNCC(put_pixels8)(dst, src, stride, 8);
1235 1235
 }
1236
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
1237
-    avg_pixels8_c(dst, src, stride, 8);
1236
+void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
1237
+    FUNCC(avg_pixels8)(dst, src, stride, 8);
1238 1238
 }
1239
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
1240
-    put_pixels16_c(dst, src, stride, 16);
1239
+void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
1240
+    FUNCC(put_pixels16)(dst, src, stride, 16);
1241 1241
 }
1242
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
1243
-    avg_pixels16_c(dst, src, stride, 16);
1242
+void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
1243
+    FUNCC(avg_pixels16)(dst, src, stride, 16);
1244 1244
 }
1245 1245
 
1246
-static void clear_block_c(DCTELEM *block)
1246
+static void FUNCC(clear_block)(DCTELEM *block)
1247 1247
 {
1248
-    memset(block, 0, sizeof(DCTELEM)*64);
1248
+    memset(block, 0, sizeof(dctcoef)*64);
1249 1249
 }
1250 1250
 
1251 1251
 /**
1252 1252
  * memset(blocks, 0, sizeof(DCTELEM)*6*64)
1253 1253
  */
1254
-static void clear_blocks_c(DCTELEM *blocks)
1254
+static void FUNCC(clear_blocks)(DCTELEM *blocks)
1255 1255
 {
1256
-    memset(blocks, 0, sizeof(DCTELEM)*6*64);
1256
+    memset(blocks, 0, sizeof(dctcoef)*6*64);
1257 1257
 }