Browse code

Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder.

This patch lets e.g. dsputil_init chose dsp functions with respect to
the bit depth to decode. The naming scheme of bit depth dependent
functions is <base name>_<bit depth>[_<prefix>] (i.e. the old
clear_blocks_c is now named clear_blocks_8_c).

Note: Some of the functions for high bit depth is not dependent on the
bit depth, but only on the pixel size. This leaves some room for
optimizing binary size.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Oskar Arvidsson authored on 2011/03/30 00:48:59
Showing 34 changed files
... ...
@@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
270 270
 
271 271
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
272 272
 {
273
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
274
+
275
+    if (!high_bit_depth) {
273 276
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
274 277
     c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
275 278
     c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
... ...
@@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
311 311
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
312 312
 
313 313
     c->clear_blocks = clear_blocks_axp;
314
+    }
314 315
 
315 316
     /* amask clears all bits that correspond to present features.  */
316 317
     if (amask(AMASK_MVI) == 0) {
... ...
@@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
75 75
 
76 76
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
77 77
 {
78
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
79
+
78 80
     ff_put_pixels_clamped = c->put_pixels_clamped;
79 81
     ff_add_pixels_clamped = c->add_pixels_clamped;
80 82
 
... ...
@@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
95 95
 
96 96
     c->add_pixels_clamped = ff_add_pixels_clamped_arm;
97 97
 
98
+    if (!high_bit_depth) {
98 99
     c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
99 100
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
100 101
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
... ...
@@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
112 112
     c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
113 113
     c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
114 114
     c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
115
+    }
115 116
 
116 117
     if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
117 118
     if (HAVE_ARMV6)   ff_dsputil_init_armv6(c, avctx);
... ...
@@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
72 72
 
73 73
 void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
74 74
 {
75
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
76
+
75 77
     if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
76 78
                            avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
77 79
         c->idct_put              = ff_simple_idct_put_armv6;
... ...
@@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
80 80
         c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
81 81
     }
82 82
 
83
+    if (!high_bit_depth) {
83 84
     c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
84 85
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
85 86
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
... ...
@@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
100 100
 
101 101
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
102 102
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
103
+    }
103 104
 
104 105
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
105 106
     c->get_pixels = ff_get_pixels_armv6;
... ...
@@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
173 173
 
174 174
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
175 175
 {
176
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
177
+
176 178
     if (!avctx->lowres) {
177 179
         if (avctx->idct_algo == FF_IDCT_AUTO ||
178 180
             avctx->idct_algo == FF_IDCT_SIMPLENEON) {
... ...
@@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
190 190
         }
191 191
     }
192 192
 
193
+    if (!high_bit_depth) {
193 194
     c->clear_block  = ff_clear_block_neon;
194 195
     c->clear_blocks = ff_clear_blocks_neon;
195 196
 
... ...
@@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
213 213
 
214 214
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
215 215
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
216
+    }
216 217
 
217 218
     c->add_pixels_clamped = ff_add_pixels_clamped_neon;
218 219
     c->put_pixels_clamped = ff_put_pixels_clamped_neon;
219 220
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
220 221
 
221 222
     if (CONFIG_H264_DECODER) {
223
+        if (!high_bit_depth) {
222 224
         c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
223 225
         c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
224 226
         c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
... ...
@@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
294 294
         c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
295 295
         c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
296 296
         c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
297
+        }
297 298
     }
298 299
 
299 300
     if (CONFIG_VP3_DECODER) {
... ...
@@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
155 155
 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
156 156
 {
157 157
     int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
158
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
158 159
 
159 160
     if (avctx->dsp_mask) {
160 161
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
... ...
@@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
167 167
 
168 168
     c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
169 169
 
170
+    if (!high_bit_depth) {
170 171
     c->clear_blocks = clear_blocks_iwmmxt;
171 172
 
172 173
     c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
... ...
@@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
204 204
     c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
205 205
     c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
206 206
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
207
+    }
207 208
 }
... ...
@@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
92 92
                              DCTELEM *block, int stride,
93 93
                              const uint8_t nnzc[6*8]);
94 94
 
95
-static void ff_h264dsp_init_neon(H264DSPContext *c)
95
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
96 96
 {
97
+    if (bit_depth == 8) {
97 98
     c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
98 99
     c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
99 100
     c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
... ...
@@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c)
125 125
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
126 126
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
127 127
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
128
+    }
128 129
 }
129 130
 
130
-void ff_h264dsp_init_arm(H264DSPContext *c)
131
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
131 132
 {
132
-    if (HAVE_NEON) ff_h264dsp_init_neon(c);
133
+    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
133 134
 }
... ...
@@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
42 42
 void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
43 43
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
44 44
 
45
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
45
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
46 46
 {
47
+    const int high_depth = bit_depth > 8;
48
+
49
+    if (high_depth)
50
+        return;
51
+
47 52
     h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
48 53
     h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
49 54
     if (codec_id != CODEC_ID_VP8)
... ...
@@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
69 69
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
70 70
 }
71 71
 
72
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id)
72
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
73 73
 {
74
-    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id);
74
+    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
75 75
 }
... ...
@@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
197 197
 
198 198
 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
199 199
 {
200
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
201
+
200 202
     c->get_pixels         = ff_bfin_get_pixels;
201 203
     c->diff_pixels        = ff_bfin_diff_pixels;
202 204
     c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
203 205
     c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
204 206
 
207
+    if (!high_bit_depth)
205 208
     c->clear_blocks       = bfin_clear_blocks;
206 209
     c->pix_sum            = ff_bfin_pix_sum;
207 210
     c->pix_norm1          = ff_bfin_pix_norm1;
... ...
@@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
228 228
     c->sse[1] = ff_bfin_sse8;
229 229
     c->sse[2] = ff_bfin_sse4;
230 230
 
231
+    if (!high_bit_depth) {
231 232
     c->put_pixels_tab[0][0] = bfin_put_pixels16;
232 233
     c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
233 234
     c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
... ...
@@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
247 247
     c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
248 248
     c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
249 249
 /*     c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
250
+    }
250 251
 
251 252
     if (avctx->dct_algo == FF_DCT_AUTO)
252 253
         c->fdct               = ff_bfin_fdct;
... ...
@@ -43,6 +43,15 @@
43 43
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 44
 uint32_t ff_squareTbl[512] = {0, };
45 45
 
46
+#define BIT_DEPTH 9
47
+#include "dsputil_template.c"
48
+#undef BIT_DEPTH
49
+
50
+#define BIT_DEPTH 10
51
+#include "dsputil_template.c"
52
+#undef BIT_DEPTH
53
+
54
+#define BIT_DEPTH 8
46 55
 #include "dsputil_template.c"
47 56
 
48 57
 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
... ...
@@ -619,10 +628,10 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
619 619
 
620 620
 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
621 621
     switch(width){
622
-    case 2: put_pixels2_c (dst, src, stride, height); break;
623
-    case 4: put_pixels4_c (dst, src, stride, height); break;
624
-    case 8: put_pixels8_c (dst, src, stride, height); break;
625
-    case 16:put_pixels16_c(dst, src, stride, height); break;
622
+    case 2: put_pixels2_8_c (dst, src, stride, height); break;
623
+    case 4: put_pixels4_8_c (dst, src, stride, height); break;
624
+    case 8: put_pixels8_8_c (dst, src, stride, height); break;
625
+    case 16:put_pixels16_8_c(dst, src, stride, height); break;
626 626
     }
627 627
 }
628 628
 
... ...
@@ -716,10 +725,10 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
716 716
 
717 717
 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
718 718
     switch(width){
719
-    case 2: avg_pixels2_c (dst, src, stride, height); break;
720
-    case 4: avg_pixels4_c (dst, src, stride, height); break;
721
-    case 8: avg_pixels8_c (dst, src, stride, height); break;
722
-    case 16:avg_pixels16_c(dst, src, stride, height); break;
719
+    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
720
+    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
721
+    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
722
+    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
723 723
     }
724 724
 }
725 725
 
... ...
@@ -953,7 +962,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
953 953
 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
954 954
     uint8_t half[64];\
955 955
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
956
-    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
956
+    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
957 957
 }\
958 958
 \
959 959
 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -963,7 +972,7 @@ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
963 963
 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
964 964
     uint8_t half[64];\
965 965
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
966
-    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
966
+    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
967 967
 }\
968 968
 \
969 969
 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -971,7 +980,7 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
971 971
     uint8_t half[64];\
972 972
     copy_block9(full, src, 16, stride, 9);\
973 973
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
974
-    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
974
+    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
975 975
 }\
976 976
 \
977 977
 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -985,7 +994,7 @@ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
985 985
     uint8_t half[64];\
986 986
     copy_block9(full, src, 16, stride, 9);\
987 987
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
988
-    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
988
+    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
989 989
 }\
990 990
 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
991 991
     uint8_t full[16*9];\
... ...
@@ -996,7 +1005,7 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
996 996
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
997 997
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
998 998
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
999
-    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
999
+    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1000 1000
 }\
1001 1001
 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1002 1002
     uint8_t full[16*9];\
... ...
@@ -1004,9 +1013,9 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1004 1004
     uint8_t halfHV[64];\
1005 1005
     copy_block9(full, src, 16, stride, 9);\
1006 1006
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1007
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1007
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1008 1008
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1009
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1009
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1010 1010
 }\
1011 1011
 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1012 1012
     uint8_t full[16*9];\
... ...
@@ -1017,7 +1026,7 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1017 1017
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1018 1018
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1019 1019
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1020
-    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1020
+    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1021 1021
 }\
1022 1022
 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1023 1023
     uint8_t full[16*9];\
... ...
@@ -1025,9 +1034,9 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1025 1025
     uint8_t halfHV[64];\
1026 1026
     copy_block9(full, src, 16, stride, 9);\
1027 1027
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1028
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1028
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1029 1029
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1030
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1030
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1031 1031
 }\
1032 1032
 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1033 1033
     uint8_t full[16*9];\
... ...
@@ -1038,7 +1047,7 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1038 1038
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1039 1039
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1040 1040
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1041
-    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1041
+    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1042 1042
 }\
1043 1043
 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1044 1044
     uint8_t full[16*9];\
... ...
@@ -1046,9 +1055,9 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1046 1046
     uint8_t halfHV[64];\
1047 1047
     copy_block9(full, src, 16, stride, 9);\
1048 1048
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1049
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1049
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1050 1050
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1051
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1051
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1052 1052
 }\
1053 1053
 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1054 1054
     uint8_t full[16*9];\
... ...
@@ -1059,7 +1068,7 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1059 1059
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1060 1060
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1061 1061
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1062
-    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1062
+    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1063 1063
 }\
1064 1064
 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1065 1065
     uint8_t full[16*9];\
... ...
@@ -1067,23 +1076,23 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1067 1067
     uint8_t halfHV[64];\
1068 1068
     copy_block9(full, src, 16, stride, 9);\
1069 1069
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1070
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1070
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1071 1071
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1072
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1072
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1073 1073
 }\
1074 1074
 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1075 1075
     uint8_t halfH[72];\
1076 1076
     uint8_t halfHV[64];\
1077 1077
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1078 1078
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1079
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1079
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1080 1080
 }\
1081 1081
 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1082 1082
     uint8_t halfH[72];\
1083 1083
     uint8_t halfHV[64];\
1084 1084
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1085 1085
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1086
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1086
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1087 1087
 }\
1088 1088
 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1089 1089
     uint8_t full[16*9];\
... ...
@@ -1094,14 +1103,14 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1094 1094
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1095 1095
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1096 1096
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1097
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1097
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1098 1098
 }\
1099 1099
 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1100 1100
     uint8_t full[16*9];\
1101 1101
     uint8_t halfH[72];\
1102 1102
     copy_block9(full, src, 16, stride, 9);\
1103 1103
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1104
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1104
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1105 1105
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1106 1106
 }\
1107 1107
 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1113,14 +1122,14 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1113 1113
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1114 1114
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1115 1115
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1116
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1116
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1117 1117
 }\
1118 1118
 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1119 1119
     uint8_t full[16*9];\
1120 1120
     uint8_t halfH[72];\
1121 1121
     copy_block9(full, src, 16, stride, 9);\
1122 1122
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1123
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1123
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1124 1124
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1125 1125
 }\
1126 1126
 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1132,7 +1141,7 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1132 1132
 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1133 1133
     uint8_t half[256];\
1134 1134
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1135
-    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1135
+    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1136 1136
 }\
1137 1137
 \
1138 1138
 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1142,7 +1151,7 @@ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1142 1142
 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1143 1143
     uint8_t half[256];\
1144 1144
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1145
-    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1145
+    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1146 1146
 }\
1147 1147
 \
1148 1148
 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1150,7 +1159,7 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1150 1150
     uint8_t half[256];\
1151 1151
     copy_block17(full, src, 24, stride, 17);\
1152 1152
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1153
-    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1153
+    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1154 1154
 }\
1155 1155
 \
1156 1156
 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1164,7 +1173,7 @@ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1164 1164
     uint8_t half[256];\
1165 1165
     copy_block17(full, src, 24, stride, 17);\
1166 1166
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1167
-    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1167
+    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1168 1168
 }\
1169 1169
 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1170 1170
     uint8_t full[24*17];\
... ...
@@ -1175,7 +1184,7 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){
1175 1175
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1176 1176
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1177 1177
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1178
-    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1178
+    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1179 1179
 }\
1180 1180
 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1181 1181
     uint8_t full[24*17];\
... ...
@@ -1183,9 +1192,9 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1183 1183
     uint8_t halfHV[256];\
1184 1184
     copy_block17(full, src, 24, stride, 17);\
1185 1185
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1186
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1186
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1187 1187
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1188
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1188
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1189 1189
 }\
1190 1190
 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1191 1191
     uint8_t full[24*17];\
... ...
@@ -1196,7 +1205,7 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){
1196 1196
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1197 1197
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1198 1198
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1199
-    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1199
+    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1200 1200
 }\
1201 1201
 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1202 1202
     uint8_t full[24*17];\
... ...
@@ -1204,9 +1213,9 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1204 1204
     uint8_t halfHV[256];\
1205 1205
     copy_block17(full, src, 24, stride, 17);\
1206 1206
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1207
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1207
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1208 1208
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1209
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1209
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1210 1210
 }\
1211 1211
 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1212 1212
     uint8_t full[24*17];\
... ...
@@ -1217,7 +1226,7 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){
1217 1217
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1218 1218
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1219 1219
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1220
-    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1220
+    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1221 1221
 }\
1222 1222
 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1223 1223
     uint8_t full[24*17];\
... ...
@@ -1225,9 +1234,9 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1225 1225
     uint8_t halfHV[256];\
1226 1226
     copy_block17(full, src, 24, stride, 17);\
1227 1227
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1228
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1228
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1229 1229
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1230
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1230
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1231 1231
 }\
1232 1232
 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1233 1233
     uint8_t full[24*17];\
... ...
@@ -1238,7 +1247,7 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){
1238 1238
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1239 1239
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1240 1240
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1241
-    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1241
+    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1242 1242
 }\
1243 1243
 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1244 1244
     uint8_t full[24*17];\
... ...
@@ -1246,23 +1255,23 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1246 1246
     uint8_t halfHV[256];\
1247 1247
     copy_block17(full, src, 24, stride, 17);\
1248 1248
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1249
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1249
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1250 1250
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1251
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1251
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1252 1252
 }\
1253 1253
 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1254 1254
     uint8_t halfH[272];\
1255 1255
     uint8_t halfHV[256];\
1256 1256
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1257 1257
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1258
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1258
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1259 1259
 }\
1260 1260
 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1261 1261
     uint8_t halfH[272];\
1262 1262
     uint8_t halfHV[256];\
1263 1263
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1264 1264
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1265
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1265
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1266 1266
 }\
1267 1267
 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1268 1268
     uint8_t full[24*17];\
... ...
@@ -1273,14 +1282,14 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){
1273 1273
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1274 1274
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1275 1275
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1276
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1276
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1277 1277
 }\
1278 1278
 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1279 1279
     uint8_t full[24*17];\
1280 1280
     uint8_t halfH[272];\
1281 1281
     copy_block17(full, src, 24, stride, 17);\
1282 1282
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1283
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1283
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1284 1284
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1285 1285
 }\
1286 1286
 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1292,14 +1301,14 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){
1292 1292
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1293 1293
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1294 1294
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1295
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1295
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1296 1296
 }\
1297 1297
 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1298 1298
     uint8_t full[24*17];\
1299 1299
     uint8_t halfH[272];\
1300 1300
     copy_block17(full, src, 24, stride, 17);\
1301 1301
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1302
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1302
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1303 1303
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1304 1304
 }\
1305 1305
 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
... ...
@@ -1327,7 +1336,7 @@ QPEL_MC(0, avg_       , _       , op_avg)
1327 1327
 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1328 1328
 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1329 1329
 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1330
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1330
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1331 1331
 
1332 1332
 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1333 1333
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
... ...
@@ -1349,16 +1358,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
1349 1349
 
1350 1350
 #if CONFIG_RV40_DECODER
1351 1351
 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1352
-    put_pixels16_xy2_c(dst, src, stride, 16);
1352
+    put_pixels16_xy2_8_c(dst, src, stride, 16);
1353 1353
 }
1354 1354
 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1355
-    avg_pixels16_xy2_c(dst, src, stride, 16);
1355
+    avg_pixels16_xy2_8_c(dst, src, stride, 16);
1356 1356
 }
1357 1357
 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1358
-    put_pixels8_xy2_c(dst, src, stride, 8);
1358
+    put_pixels8_xy2_8_c(dst, src, stride, 8);
1359 1359
 }
1360 1360
 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1361
-    avg_pixels8_xy2_c(dst, src, stride, 8);
1361
+    avg_pixels8_xy2_8_c(dst, src, stride, 8);
1362 1362
 }
1363 1363
 #endif /* CONFIG_RV40_DECODER */
1364 1364
 
... ...
@@ -1394,7 +1403,7 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
1394 1394
 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1395 1395
     uint8_t half[64];
1396 1396
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1397
-    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1397
+    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1398 1398
 }
1399 1399
 
1400 1400
 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
... ...
@@ -1404,7 +1413,7 @@ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1404 1404
 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1405 1405
     uint8_t half[64];
1406 1406
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1407
-    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1407
+    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1408 1408
 }
1409 1409
 
1410 1410
 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
... ...
@@ -1418,7 +1427,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1418 1418
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1419 1419
     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1420 1420
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1421
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1421
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1422 1422
 }
1423 1423
 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1424 1424
     uint8_t halfH[88];
... ...
@@ -1427,7 +1436,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1427 1427
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1428 1428
     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1429 1429
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1430
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1430
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1431 1431
 }
1432 1432
 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1433 1433
     uint8_t halfH[88];
... ...
@@ -2863,8 +2872,24 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2863 2863
             c->idct_put= ff_jref_idct4_put;
2864 2864
             c->idct_add= ff_jref_idct4_add;
2865 2865
         }else{
2866
-            c->idct_put= ff_h264_lowres_idct_put_c;
2867
-            c->idct_add= ff_h264_lowres_idct_add_c;
2866
+            if (avctx->codec_id != CODEC_ID_H264) {
2867
+                c->idct_put= ff_h264_lowres_idct_put_8_c;
2868
+                c->idct_add= ff_h264_lowres_idct_add_8_c;
2869
+            } else {
2870
+                switch (avctx->bits_per_raw_sample) {
2871
+                    case 9:
2872
+                        c->idct_put= ff_h264_lowres_idct_put_9_c;
2873
+                        c->idct_add= ff_h264_lowres_idct_add_9_c;
2874
+                        break;
2875
+                    case 10:
2876
+                        c->idct_put= ff_h264_lowres_idct_put_10_c;
2877
+                        c->idct_add= ff_h264_lowres_idct_add_10_c;
2878
+                        break;
2879
+                    default:
2880
+                        c->idct_put= ff_h264_lowres_idct_put_8_c;
2881
+                        c->idct_add= ff_h264_lowres_idct_add_8_c;
2882
+                }
2883
+            }
2868 2884
         }
2869 2885
         c->idct    = j_rev_dct4;
2870 2886
         c->idct_permutation_type= FF_NO_IDCT_PERM;
... ...
@@ -2922,14 +2947,9 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2922 2922
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2923 2923
     c->put_pixels_nonclamped = put_pixels_nonclamped_c;
2924 2924
     c->add_pixels_clamped = ff_add_pixels_clamped_c;
2925
-    c->add_pixels8 = add_pixels8_c;
2926
-    c->add_pixels4 = add_pixels4_c;
2927 2925
     c->sum_abs_dctelem = sum_abs_dctelem_c;
2928
-    c->emulated_edge_mc = ff_emulated_edge_mc;
2929 2926
     c->gmc1 = gmc1_c;
2930 2927
     c->gmc = ff_gmc_c;
2931
-    c->clear_block = clear_block_c;
2932
-    c->clear_blocks = clear_blocks_c;
2933 2928
     c->pix_sum = pix_sum_c;
2934 2929
     c->pix_norm1 = pix_norm1_c;
2935 2930
 
... ...
@@ -2947,30 +2967,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2947 2947
     c->pix_abs[1][2] = pix_abs8_y2_c;
2948 2948
     c->pix_abs[1][3] = pix_abs8_xy2_c;
2949 2949
 
2950
-#define dspfunc(PFX, IDX, NUM) \
2951
-    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
2952
-    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
2953
-    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
2954
-    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2955
-
2956
-    dspfunc(put, 0, 16);
2957
-    dspfunc(put_no_rnd, 0, 16);
2958
-    dspfunc(put, 1, 8);
2959
-    dspfunc(put_no_rnd, 1, 8);
2960
-    dspfunc(put, 2, 4);
2961
-    dspfunc(put, 3, 2);
2962
-
2963
-    dspfunc(avg, 0, 16);
2964
-    dspfunc(avg_no_rnd, 0, 16);
2965
-    dspfunc(avg, 1, 8);
2966
-    dspfunc(avg_no_rnd, 1, 8);
2967
-    dspfunc(avg, 2, 4);
2968
-    dspfunc(avg, 3, 2);
2969
-#undef dspfunc
2970
-
2971
-    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
2972
-    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
2973
-
2974 2950
     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2975 2951
     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2976 2952
     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
... ...
@@ -3021,23 +3017,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3021 3021
     dspfunc(avg_qpel, 1, 8);
3022 3022
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
3023 3023
 
3024
-    dspfunc(put_h264_qpel, 0, 16);
3025
-    dspfunc(put_h264_qpel, 1, 8);
3026
-    dspfunc(put_h264_qpel, 2, 4);
3027
-    dspfunc(put_h264_qpel, 3, 2);
3028
-    dspfunc(avg_h264_qpel, 0, 16);
3029
-    dspfunc(avg_h264_qpel, 1, 8);
3030
-    dspfunc(avg_h264_qpel, 2, 4);
3031
-
3032 3024
 #undef dspfunc
3033
-    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3034
-    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3035
-    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3036
-    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3037
-    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3038
-    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
3039
-
3040
-    c->draw_edges = draw_edges_c;
3041 3025
 
3042 3026
 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
3043 3027
     ff_mlp_init(c, avctx);
... ...
@@ -3162,6 +3142,92 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3162 3162
     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3163 3163
     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3164 3164
 
3165
+#undef FUNC
3166
+#undef FUNCC
3167
+#define FUNC(f, depth) f ## _ ## depth
3168
+#define FUNCC(f, depth) f ## _ ## depth ## _c
3169
+
3170
+#define dspfunc1(PFX, IDX, NUM, depth)\
3171
+    c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
3172
+    c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3173
+    c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3174
+    c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3175
+
3176
+#define dspfunc2(PFX, IDX, NUM, depth)\
3177
+    c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3178
+    c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3179
+    c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3180
+    c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3181
+    c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3182
+    c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3183
+    c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3184
+    c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3185
+    c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3186
+    c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3187
+    c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3188
+    c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3189
+    c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3190
+    c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3191
+    c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3192
+    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3193
+
3194
+
3195
+#define BIT_DEPTH_FUNCS(depth)\
3196
+    c->draw_edges                    = FUNCC(draw_edges            , depth);\
3197
+    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
3198
+    c->clear_block                   = FUNCC(clear_block           , depth);\
3199
+    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
3200
+    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
3201
+    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
3202
+    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
3203
+    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3204
+\
3205
+    c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
3206
+    c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
3207
+    c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
3208
+    c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
3209
+    c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
3210
+    c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
3211
+\
3212
+    dspfunc1(put       , 0, 16, depth);\
3213
+    dspfunc1(put       , 1,  8, depth);\
3214
+    dspfunc1(put       , 2,  4, depth);\
3215
+    dspfunc1(put       , 3,  2, depth);\
3216
+    dspfunc1(put_no_rnd, 0, 16, depth);\
3217
+    dspfunc1(put_no_rnd, 1,  8, depth);\
3218
+    dspfunc1(avg       , 0, 16, depth);\
3219
+    dspfunc1(avg       , 1,  8, depth);\
3220
+    dspfunc1(avg       , 2,  4, depth);\
3221
+    dspfunc1(avg       , 3,  2, depth);\
3222
+    dspfunc1(avg_no_rnd, 0, 16, depth);\
3223
+    dspfunc1(avg_no_rnd, 1,  8, depth);\
3224
+\
3225
+    dspfunc2(put_h264_qpel, 0, 16, depth);\
3226
+    dspfunc2(put_h264_qpel, 1,  8, depth);\
3227
+    dspfunc2(put_h264_qpel, 2,  4, depth);\
3228
+    dspfunc2(put_h264_qpel, 3,  2, depth);\
3229
+    dspfunc2(avg_h264_qpel, 0, 16, depth);\
3230
+    dspfunc2(avg_h264_qpel, 1,  8, depth);\
3231
+    dspfunc2(avg_h264_qpel, 2,  4, depth);
3232
+
3233
+    if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
3234
+        BIT_DEPTH_FUNCS(8)
3235
+    } else {
3236
+        switch (avctx->bits_per_raw_sample) {
3237
+            case 9:
3238
+                BIT_DEPTH_FUNCS(9)
3239
+                break;
3240
+            case 10:
3241
+                BIT_DEPTH_FUNCS(10)
3242
+                break;
3243
+            default:
3244
+                av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
3245
+                BIT_DEPTH_FUNCS(8)
3246
+                break;
3247
+        }
3248
+    }
3249
+
3250
+
3165 3251
     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
3166 3252
     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
3167 3253
     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
... ...
@@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block);
53 53
 void ff_fdct_mmx2(DCTELEM *block);
54 54
 void ff_fdct_sse2(DCTELEM *block);
55 55
 
56
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
57
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
58
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
59
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
60
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
61
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
62
-void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
63
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
64
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
65
-void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
66
-
67
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
68
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
56
+#define H264_IDCT(depth) \
57
+void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
58
+void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
59
+void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
60
+void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
61
+void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
62
+void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
63
+void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
64
+void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
65
+void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
66
+void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
67
+void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
68
+void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
69
+
70
+H264_IDCT( 8)
71
+H264_IDCT( 9)
72
+H264_IDCT(10)
73
+
69 74
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
70 75
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
71 76
 
... ...
@@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64];
82 82
 extern uint32_t ff_squareTbl[512];
83 83
 extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
84 84
 
85
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
86
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
87
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
88
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
85
+#define PUTAVG_PIXELS(depth)\
86
+void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
87
+void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
88
+void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
89
+void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
90
+
91
+PUTAVG_PIXELS( 8)
92
+PUTAVG_PIXELS( 9)
93
+PUTAVG_PIXELS(10)
94
+
95
+#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
96
+#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
97
+#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
98
+#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
89 99
 
90 100
 /* VP3 DSP functions */
91 101
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
... ...
@@ -187,10 +202,17 @@ typedef struct ScanTable{
187 187
 
188 188
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
189 189
 
190
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
191
-                         int block_w, int block_h,
190
+#define EMULATED_EDGE(depth) \
191
+void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
192
+                         int block_w, int block_h,\
192 193
                          int src_x, int src_y, int w, int h);
193 194
 
195
+EMULATED_EDGE(8)
196
+EMULATED_EDGE(9)
197
+EMULATED_EDGE(10)
198
+
199
+#define ff_emulated_edge_mc ff_emulated_edge_mc_8
200
+
194 201
 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
195 202
 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
196 203
 void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
... ...
@@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
562 562
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
563 563
 
564 564
 #define         BYTE_VEC32(c)   ((c)*0x01010101UL)
565
+#define         BYTE_VEC64(c)   ((c)*0x0001000100010001UL)
565 566
 
566 567
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
567 568
 {
... ...
@@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
573 573
     return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
574 574
 }
575 575
 
576
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
577
+{
578
+    return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
579
+}
580
+
581
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
582
+{
583
+    return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
584
+}
585
+
576 586
 static inline int get_penalty_factor(int lambda, int lambda2, int type){
577 587
     switch(type&0xFF){
578 588
     default:
... ...
@@ -27,25 +27,55 @@
27 27
  * DSP utils
28 28
  */
29 29
 
30
-#include "dsputil.h"
30
+#include "high_bit_depth.h"
31 31
 
32
-#define BIT_DEPTH 8
32
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
33
+{
34
+    int i;
35
+    for(i=0; i<h; i++)
36
+    {
37
+        AV_WN2P(dst   , AV_RN2P(src   ));
38
+        dst+=dstStride;
39
+        src+=srcStride;
40
+    }
41
+}
33 42
 
34
-#define pixel  uint8_t
35
-#define pixel2 uint16_t
36
-#define pixel4 uint32_t
37
-#define dctcoef int16_t
43
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
44
+{
45
+    int i;
46
+    for(i=0; i<h; i++)
47
+    {
48
+        AV_WN4P(dst   , AV_RN4P(src   ));
49
+        dst+=dstStride;
50
+        src+=srcStride;
51
+    }
52
+}
38 53
 
39
-#define FUNC(a)  a
40
-#define FUNCC(a) a ## _c
41
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
42
-#define CLIP(a) cm[a]
43
-#define AV_RN2P AV_RN16
44
-#define AV_RN4P AV_RN32
45
-#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
54
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
55
+{
56
+    int i;
57
+    for(i=0; i<h; i++)
58
+    {
59
+        AV_WN4P(dst                , AV_RN4P(src                ));
60
+        AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
61
+        dst+=dstStride;
62
+        src+=srcStride;
63
+    }
64
+}
46 65
 
47
-#define no_rnd_avg_pixel4 no_rnd_avg32
48
-#define    rnd_avg_pixel4    rnd_avg32
66
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
67
+{
68
+    int i;
69
+    for(i=0; i<h; i++)
70
+    {
71
+        AV_WN4P(dst                 , AV_RN4P(src                 ));
72
+        AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
73
+        AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
74
+        AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
75
+        dst+=dstStride;
76
+        src+=srcStride;
77
+    }
78
+}
49 79
 
50 80
 /* draw the edges of width 'w' of an image of size width, height */
51 81
 //FIXME check that this is ok for mpeg4 interlaced
... ...
@@ -1317,10 +1347,22 @@ H264_MC(avg_, 16)
1317 1317
 #undef op2_avg
1318 1318
 #undef op2_put
1319 1319
 
1320
-#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
1321
-#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
1322
-#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
1323
-#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
1320
+#if BIT_DEPTH == 8
1321
+#   define put_h264_qpel8_mc00_8_c  ff_put_pixels8x8_8_c
1322
+#   define avg_h264_qpel8_mc00_8_c  ff_avg_pixels8x8_8_c
1323
+#   define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
1324
+#   define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
1325
+#elif BIT_DEPTH == 9
1326
+#   define put_h264_qpel8_mc00_9_c  ff_put_pixels8x8_9_c
1327
+#   define avg_h264_qpel8_mc00_9_c  ff_avg_pixels8x8_9_c
1328
+#   define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
1329
+#   define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
1330
+#elif BIT_DEPTH == 10
1331
+#   define put_h264_qpel8_mc00_10_c  ff_put_pixels8x8_10_c
1332
+#   define avg_h264_qpel8_mc00_10_c  ff_avg_pixels8x8_10_c
1333
+#   define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
1334
+#   define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
1335
+#endif
1324 1336
 
1325 1337
 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
1326 1338
     FUNCC(put_pixels8)(dst, src, stride, 8);
... ...
@@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
783 783
     dst->list_counts              = src->list_counts;
784 784
 
785 785
     dst->s.obmc_scratchpad = NULL;
786
-    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
786
+    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
787 787
 }
788 788
 
789 789
 /**
... ...
@@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){
811 811
     s->height = s->avctx->height;
812 812
     s->codec_id= s->avctx->codec->id;
813 813
 
814
-    ff_h264dsp_init(&h->h264dsp);
815
-    ff_h264_pred_init(&h->hpc, s->codec_id);
814
+    ff_h264dsp_init(&h->h264dsp, 8);
815
+    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
816 816
 
817 817
     h->dequant_coeff_pps= -1;
818 818
     s->unrestricted_mv=1;
... ...
@@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
895 895
     ff_h264_decode_init_vlc();
896 896
 
897 897
     h->pixel_shift = 0;
898
-    h->sps.bit_depth_luma = 8;
898
+    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
899 899
 
900 900
     h->thread_context[0] = h;
901 901
     h->outputed_poc = INT_MIN;
... ...
@@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
2998 2998
 
2999 2999
             if(avctx->has_b_frames < 2)
3000 3000
                 avctx->has_b_frames= !s->low_delay;
3001
+
3002
+            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
3003
+                if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
3004
+                    avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
3005
+                    h->pixel_shift = h->sps.bit_depth_luma > 8;
3006
+
3007
+                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
3008
+                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
3009
+                    dsputil_init(&s->dsp, s->avctx);
3010
+                } else {
3011
+                    av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
3012
+                    return -1;
3013
+                }
3014
+            }
3001 3015
             break;
3002 3016
         case NAL_PPS:
3003 3017
             init_get_bits(&s->gb, ptr, bit_length);
... ...
@@ -29,57 +29,83 @@
29 29
 #include "avcodec.h"
30 30
 #include "h264dsp.h"
31 31
 
32
+#define BIT_DEPTH 8
32 33
 #include "h264dsp_template.c"
34
+#undef BIT_DEPTH
33 35
 
34
-void ff_h264dsp_init(H264DSPContext *c)
35
-{
36
-    c->h264_idct_add= ff_h264_idct_add_c;
37
-    c->h264_idct8_add= ff_h264_idct8_add_c;
38
-    c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
39
-    c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
40
-    c->h264_idct_add16     = ff_h264_idct_add16_c;
41
-    c->h264_idct8_add4     = ff_h264_idct8_add4_c;
42
-    c->h264_idct_add8      = ff_h264_idct_add8_c;
43
-    c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
44
-    c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
45
-    c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
36
+#define BIT_DEPTH 9
37
+#include "h264dsp_template.c"
38
+#undef BIT_DEPTH
39
+
40
+#define BIT_DEPTH 10
41
+#include "h264dsp_template.c"
42
+#undef BIT_DEPTH
46 43
 
47
-    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
48
-    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
49
-    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
50
-    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
51
-    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
52
-    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
53
-    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
54
-    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
55
-    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
56
-    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
57
-    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
58
-    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
59
-    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
60
-    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
61
-    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
62
-    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
63
-    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
64
-    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
65
-    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
66
-    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
44
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
45
+{
46
+#undef FUNC
47
+#define FUNC(a, depth) a ## _ ## depth ## _c
67 48
 
68
-    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
69
-    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
70
-    c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
71
-    c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
72
-    c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
73
-    c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
74
-    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
75
-    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
76
-    c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
77
-    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
78
-    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
79
-    c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
49
+#define H264_DSP(depth) \
50
+    c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
51
+    c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
52
+    c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
53
+    c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
54
+    c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
55
+    c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
56
+    c->h264_idct_add8      = FUNC(ff_h264_idct_add8, depth);\
57
+    c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
58
+    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
59
+    c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
60
+\
61
+    c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
62
+    c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
63
+    c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
64
+    c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
65
+    c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
66
+    c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
67
+    c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
68
+    c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
69
+    c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
70
+    c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
71
+    c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
72
+    c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
73
+    c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
74
+    c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
75
+    c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
76
+    c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
77
+    c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
78
+    c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
79
+    c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
80
+    c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
81
+\
82
+    c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
83
+    c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
84
+    c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
85
+    c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
86
+    c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
87
+    c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
88
+    c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
89
+    c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
90
+    c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
91
+    c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
92
+    c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
93
+    c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
80 94
     c->h264_loop_filter_strength= NULL;
81 95
 
82
-    if (ARCH_ARM) ff_h264dsp_init_arm(c);
83
-    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c);
84
-    if (HAVE_MMX) ff_h264dsp_init_x86(c);
96
+    switch (bit_depth) {
97
+    case 9:
98
+        H264_DSP(9);
99
+        break;
100
+    case 10:
101
+        H264_DSP(10);
102
+        break;
103
+    default:
104
+        H264_DSP(8);
105
+        break;
106
+    }
107
+
108
+    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
109
+    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
110
+    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
85 111
 }
... ...
@@ -75,9 +75,9 @@ typedef struct H264DSPContext{
75 75
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
76 76
 }H264DSPContext;
77 77
 
78
-void ff_h264dsp_init(H264DSPContext *c);
79
-void ff_h264dsp_init_arm(H264DSPContext *c);
80
-void ff_h264dsp_init_ppc(H264DSPContext *c);
81
-void ff_h264dsp_init_x86(H264DSPContext *c);
78
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
79
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
80
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
81
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
82 82
 
83 83
 #endif /* AVCODEC_H264DSP_H */
... ...
@@ -25,10 +25,7 @@
25 25
  * @author Michael Niedermayer <michaelni@gmx.at>
26 26
  */
27 27
 
28
-#define BIT_DEPTH 8
29
-#define pixel uint8_t
30
-#define av_clip_pixel av_clip_uint8
31
-#define FUNCC(a) a ## _c
28
+#include "high_bit_depth.h"
32 29
 
33 30
 #define op_scale1(x)  block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
34 31
 #define op_scale2(x)  dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
... ...
@@ -25,4 +25,14 @@
25 25
  * @author Michael Niedermayer <michaelni@gmx.at>
26 26
  */
27 27
 
28
+#define BIT_DEPTH 8
28 29
 #include "h264idct_template.c"
30
+#undef BIT_DEPTH
31
+
32
+#define BIT_DEPTH 9
33
+#include "h264idct_template.c"
34
+#undef BIT_DEPTH
35
+
36
+#define BIT_DEPTH 10
37
+#include "h264idct_template.c"
38
+#undef BIT_DEPTH
... ...
@@ -25,7 +25,7 @@
25 25
  * @author Michael Niedermayer <michaelni@gmx.at>
26 26
  */
27 27
 
28
-#include "dsputil.h"
28
+#include "high_bit_depth.h"
29 29
 
30 30
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
31 31
 #define AVCODEC_H264IDCT_INTERNAL_H
... ...
@@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={
42 42
 };
43 43
 #endif
44 44
 
45
-#define pixel  uint8_t
46
-#define dctcoef DCTELEM
47
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
48
-#define CLIP(a) cm[a]
49
-#define FUNCC(a) a ## _c
50
-
51 45
 static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
52 46
     int i;
53 47
     INIT_CLIP
... ...
@@ -26,7 +26,18 @@
26 26
  */
27 27
 
28 28
 #include "h264pred.h"
29
+
30
+#define BIT_DEPTH 8
31
+#include "h264pred_template.c"
32
+#undef BIT_DEPTH
33
+
34
+#define BIT_DEPTH 9
29 35
 #include "h264pred_template.c"
36
+#undef BIT_DEPTH
37
+
38
+#define BIT_DEPTH 10
39
+#include "h264pred_template.c"
40
+#undef BIT_DEPTH
30 41
 
31 42
 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
32 43
     const int lt= src[-1-1*stride];
... ...
@@ -245,11 +256,11 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
245 245
 }
246 246
 
247 247
 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
248
-    pred16x16_plane_compat_c(src, stride, 1, 0);
248
+    pred16x16_plane_compat_8_c(src, stride, 1, 0);
249 249
 }
250 250
 
251 251
 static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
252
-    pred16x16_plane_compat_c(src, stride, 0, 1);
252
+    pred16x16_plane_compat_8_c(src, stride, 0, 1);
253 253
 }
254 254
 
255 255
 static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
... ...
@@ -352,130 +363,149 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
352 352
 /**
353 353
  * Set the intra prediction function pointers.
354 354
  */
355
-void ff_h264_pred_init(H264PredContext *h, int codec_id){
355
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
356 356
 //    MpegEncContext * const s = &h->s;
357 357
 
358
-    if(codec_id != CODEC_ID_RV40){
359
-        if(codec_id == CODEC_ID_VP8) {
360
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_vp8_c;
361
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_vp8_c;
362
-        } else {
363
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_c;
364
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_c;
365
-        }
366
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
367
-        if(codec_id == CODEC_ID_SVQ3)
368
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
369
-        else
370
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
371
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
372
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
373
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
374
-        if (codec_id == CODEC_ID_VP8) {
375
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_vp8_c;
376
-        } else
377
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_c;
378
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
379
-        if(codec_id != CODEC_ID_VP8) {
380
-            h->pred4x4[LEFT_DC_PRED    ]= pred4x4_left_dc_c;
381
-            h->pred4x4[TOP_DC_PRED     ]= pred4x4_top_dc_c;
382
-            h->pred4x4[DC_128_PRED     ]= pred4x4_128_dc_c;
383
-        } else {
384
-            h->pred4x4[TM_VP8_PRED     ]= pred4x4_tm_vp8_c;
385
-            h->pred4x4[DC_127_PRED     ]= pred4x4_127_dc_c;
386
-            h->pred4x4[DC_129_PRED     ]= pred4x4_129_dc_c;
387
-            h->pred4x4[VERT_VP8_PRED   ]= pred4x4_vertical_c;
388
-            h->pred4x4[HOR_VP8_PRED    ]= pred4x4_horizontal_c;
389
-        }
390
-    }else{
391
-        h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
392
-        h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
393
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
394
-        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
395
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
396
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
397
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
398
-        h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_rv40_c;
399
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_rv40_c;
400
-        h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
401
-        h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
402
-        h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
403
-        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
404
-        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
405
-        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
358
+#undef FUNC
359
+#undef FUNCC
360
+#define FUNC(a, depth) a ## _ ## depth
361
+#define FUNCC(a, depth) a ## _ ## depth ## _c
362
+#define FUNCD(a) a ## _c
363
+
364
+#define H264_PRED(depth) \
365
+    if(codec_id != CODEC_ID_RV40){\
366
+        if(codec_id == CODEC_ID_VP8) {\
367
+            h->pred4x4[VERT_PRED       ]= FUNCD(pred4x4_vertical_vp8);\
368
+            h->pred4x4[HOR_PRED        ]= FUNCD(pred4x4_horizontal_vp8);\
369
+        } else {\
370
+            h->pred4x4[VERT_PRED       ]= FUNCC(pred4x4_vertical          , depth);\
371
+            h->pred4x4[HOR_PRED        ]= FUNCC(pred4x4_horizontal        , depth);\
372
+        }\
373
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
374
+        if(codec_id == CODEC_ID_SVQ3)\
375
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
376
+        else\
377
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left     , depth);\
378
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
379
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
380
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
381
+        if (codec_id == CODEC_ID_VP8) {\
382
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCD(pred4x4_vertical_left_vp8);\
383
+        } else\
384
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCC(pred4x4_vertical_left     , depth);\
385
+        h->pred4x4[HOR_UP_PRED         ]= FUNCC(pred4x4_horizontal_up     , depth);\
386
+        if(codec_id != CODEC_ID_VP8) {\
387
+            h->pred4x4[LEFT_DC_PRED    ]= FUNCC(pred4x4_left_dc           , depth);\
388
+            h->pred4x4[TOP_DC_PRED     ]= FUNCC(pred4x4_top_dc            , depth);\
389
+            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
390
+        } else {\
391
+            h->pred4x4[TM_VP8_PRED     ]= FUNCD(pred4x4_tm_vp8);\
392
+            h->pred4x4[DC_127_PRED     ]= FUNCC(pred4x4_127_dc            , depth);\
393
+            h->pred4x4[DC_129_PRED     ]= FUNCC(pred4x4_129_dc            , depth);\
394
+            h->pred4x4[VERT_VP8_PRED   ]= FUNCC(pred4x4_vertical          , depth);\
395
+            h->pred4x4[HOR_VP8_PRED    ]= FUNCC(pred4x4_horizontal        , depth);\
396
+        }\
397
+    }else{\
398
+        h->pred4x4[VERT_PRED           ]= FUNCC(pred4x4_vertical          , depth);\
399
+        h->pred4x4[HOR_PRED            ]= FUNCC(pred4x4_horizontal        , depth);\
400
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
401
+        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
402
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
403
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
404
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
405
+        h->pred4x4[VERT_LEFT_PRED      ]= FUNCD(pred4x4_vertical_left_rv40);\
406
+        h->pred4x4[HOR_UP_PRED         ]= FUNCD(pred4x4_horizontal_up_rv40);\
407
+        h->pred4x4[LEFT_DC_PRED        ]= FUNCC(pred4x4_left_dc           , depth);\
408
+        h->pred4x4[TOP_DC_PRED         ]= FUNCC(pred4x4_top_dc            , depth);\
409
+        h->pred4x4[DC_128_PRED         ]= FUNCC(pred4x4_128_dc            , depth);\
410
+        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
411
+        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
412
+        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
413
+    }\
414
+\
415
+    h->pred8x8l[VERT_PRED           ]= FUNCC(pred8x8l_vertical            , depth);\
416
+    h->pred8x8l[HOR_PRED            ]= FUNCC(pred8x8l_horizontal          , depth);\
417
+    h->pred8x8l[DC_PRED             ]= FUNCC(pred8x8l_dc                  , depth);\
418
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left           , depth);\
419
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right          , depth);\
420
+    h->pred8x8l[VERT_RIGHT_PRED     ]= FUNCC(pred8x8l_vertical_right      , depth);\
421
+    h->pred8x8l[HOR_DOWN_PRED       ]= FUNCC(pred8x8l_horizontal_down     , depth);\
422
+    h->pred8x8l[VERT_LEFT_PRED      ]= FUNCC(pred8x8l_vertical_left       , depth);\
423
+    h->pred8x8l[HOR_UP_PRED         ]= FUNCC(pred8x8l_horizontal_up       , depth);\
424
+    h->pred8x8l[LEFT_DC_PRED        ]= FUNCC(pred8x8l_left_dc             , depth);\
425
+    h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
426
+    h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
427
+\
428
+    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
429
+    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
430
+    if (codec_id != CODEC_ID_VP8) {\
431
+        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
432
+    } else\
433
+        h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
434
+    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
435
+        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
436
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
437
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
438
+        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
439
+        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
440
+        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
441
+        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
442
+    }else{\
443
+        h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
444
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
445
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
446
+        if (codec_id == CODEC_ID_VP8) {\
447
+            h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc              , depth);\
448
+            h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
449
+        }\
450
+    }\
451
+    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
452
+\
453
+    h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
454
+    h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
455
+    h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
456
+    switch(codec_id){\
457
+    case CODEC_ID_SVQ3:\
458
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_svq3);\
459
+       break;\
460
+    case CODEC_ID_RV40:\
461
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_rv40);\
462
+       break;\
463
+    case CODEC_ID_VP8:\
464
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_tm_vp8);\
465
+       h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc               , depth);\
466
+       h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc               , depth);\
467
+       break;\
468
+    default:\
469
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCC(pred16x16_plane               , depth);\
470
+       break;\
471
+    }\
472
+    h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc                , depth);\
473
+    h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
474
+    h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
475
+\
476
+    /* special lossless h/v prediction for h264 */ \
477
+    h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
478
+    h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
479
+    h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
480
+    h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
481
+    h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
482
+    h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
483
+    h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
484
+    h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
485
+
486
+    switch (bit_depth) {
487
+        case 9:
488
+            H264_PRED(9)
489
+            break;
490
+        case 10:
491
+            H264_PRED(10)
492
+            break;
493
+        default:
494
+            H264_PRED(8)
495
+            break;
406 496
     }
407 497
 
408
-    h->pred8x8l[VERT_PRED           ]= pred8x8l_vertical_c;
409
-    h->pred8x8l[HOR_PRED            ]= pred8x8l_horizontal_c;
410
-    h->pred8x8l[DC_PRED             ]= pred8x8l_dc_c;
411
-    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
412
-    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
413
-    h->pred8x8l[VERT_RIGHT_PRED     ]= pred8x8l_vertical_right_c;
414
-    h->pred8x8l[HOR_DOWN_PRED       ]= pred8x8l_horizontal_down_c;
415
-    h->pred8x8l[VERT_LEFT_PRED      ]= pred8x8l_vertical_left_c;
416
-    h->pred8x8l[HOR_UP_PRED         ]= pred8x8l_horizontal_up_c;
417
-    h->pred8x8l[LEFT_DC_PRED        ]= pred8x8l_left_dc_c;
418
-    h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
419
-    h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
420
-
421
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
422
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
423
-    if (codec_id != CODEC_ID_VP8) {
424
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_plane_c;
425
-    } else
426
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_tm_vp8_c;
427
-    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){
428
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
429
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
430
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
431
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
432
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
433
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
434
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
435
-    }else{
436
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_rv40_c;
437
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
438
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
439
-        if (codec_id == CODEC_ID_VP8) {
440
-            h->pred8x8[DC_127_PRED8x8]= pred8x8_127_dc_c;
441
-            h->pred8x8[DC_129_PRED8x8]= pred8x8_129_dc_c;
442
-        }
443
-    }
444
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
445
-
446
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
447
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
448
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
449
-    switch(codec_id){
450
-    case CODEC_ID_SVQ3:
451
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_svq3_c;
452
-       break;
453
-    case CODEC_ID_RV40:
454
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_rv40_c;
455
-       break;
456
-    case CODEC_ID_VP8:
457
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_tm_vp8_c;
458
-       h->pred16x16[DC_127_PRED8x8]= pred16x16_127_dc_c;
459
-       h->pred16x16[DC_129_PRED8x8]= pred16x16_129_dc_c;
460
-       break;
461
-    default:
462
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
463
-       break;
464
-    }
465
-    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
466
-    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
467
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
468
-
469
-    //special lossless h/v prediction for h264
470
-    h->pred4x4_add  [VERT_PRED   ]= pred4x4_vertical_add_c;
471
-    h->pred4x4_add  [ HOR_PRED   ]= pred4x4_horizontal_add_c;
472
-    h->pred8x8l_add [VERT_PRED   ]= pred8x8l_vertical_add_c;
473
-    h->pred8x8l_add [ HOR_PRED   ]= pred8x8l_horizontal_add_c;
474
-    h->pred8x8_add  [VERT_PRED8x8]= pred8x8_vertical_add_c;
475
-    h->pred8x8_add  [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
476
-    h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
477
-    h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
478
-
479
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id);
480
-    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id);
498
+    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
499
+    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
481 500
 }
... ...
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
101 101
     void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
102 102
 }H264PredContext;
103 103
 
104
-void ff_h264_pred_init(H264PredContext *h, int codec_id);
105
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
106
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
104
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
105
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
106
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
107 107
 
108 108
 #endif /* AVCODEC_H264PRED_H */
... ...
@@ -26,21 +26,7 @@
26 26
  */
27 27
 
28 28
 #include "mathops.h"
29
-#include "dsputil.h"
30
-
31
-#define BIT_DEPTH 8
32
-
33
-#define pixel uint8_t
34
-#define pixel4 uint32_t
35
-#define dctcoef DCTELEM
36
-
37
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
38
-#define CLIP(a) cm[a]
39
-#define FUNC(a) a
40
-#define FUNCC(a) a ## _c
41
-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
42
-#define AV_WN4P  AV_WN32
43
-#define AV_WN4PA AV_WN32A
29
+#include "high_bit_depth.h"
44 30
 
45 31
 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
46 32
     pixel *src = (pixel*)_src;
47 33
new file mode 100644
... ...
@@ -0,0 +1,85 @@
0
+#include "dsputil.h"
1
+
2
+#ifndef BIT_DEPTH
3
+#define BIT_DEPTH 8
4
+#endif
5
+
6
+#ifdef AVCODEC_H264_HIGH_DEPTH_H
7
+#   undef pixel
8
+#   undef pixel2
9
+#   undef pixel4
10
+#   undef dctcoef
11
+#   undef INIT_CLIP
12
+#   undef no_rnd_avg_pixel4
13
+#   undef rnd_avg_pixel4
14
+#   undef AV_RN2P
15
+#   undef AV_RN4P
16
+#   undef AV_WN2P
17
+#   undef AV_WN4P
18
+#   undef AV_WN4PA
19
+#   undef CLIP
20
+#   undef FUNC
21
+#   undef FUNCC
22
+#   undef av_clip_pixel
23
+#   undef PIXEL_SPLAT_X4
24
+#else
25
+#   define AVCODEC_H264_HIGH_DEPTH_H
26
+#   define CLIP_PIXEL(depth)\
27
+    static inline uint16_t av_clip_pixel_ ## depth (int p)\
28
+    {\
29
+        const int pixel_max = (1 << depth)-1;\
30
+        return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
31
+    }
32
+
33
+CLIP_PIXEL( 9)
34
+CLIP_PIXEL(10)
35
+#endif
36
+
37
+#if BIT_DEPTH > 8
38
+#   define pixel  uint16_t
39
+#   define pixel2 uint32_t
40
+#   define pixel4 uint64_t
41
+#   define dctcoef int32_t
42
+
43
+#   define INIT_CLIP
44
+#   define no_rnd_avg_pixel4 no_rnd_avg64
45
+#   define    rnd_avg_pixel4    rnd_avg64
46
+#   define AV_RN2P  AV_RN32
47
+#   define AV_RN4P  AV_RN64
48
+#   define AV_WN2P  AV_WN32
49
+#   define AV_WN4P  AV_WN64
50
+#   define AV_WN4PA AV_WN64A
51
+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
52
+#else
53
+#   define pixel  uint8_t
54
+#   define pixel2 uint16_t
55
+#   define pixel4 uint32_t
56
+#   define dctcoef int16_t
57
+
58
+#   define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
59
+#   define no_rnd_avg_pixel4 no_rnd_avg32
60
+#   define    rnd_avg_pixel4    rnd_avg32
61
+#   define AV_RN2P  AV_RN16
62
+#   define AV_RN4P  AV_RN32
63
+#   define AV_WN2P  AV_WN16
64
+#   define AV_WN4P  AV_WN32
65
+#   define AV_WN4PA AV_WN32A
66
+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
67
+#endif
68
+
69
+#if BIT_DEPTH == 8
70
+#   define av_clip_pixel(a) av_clip_uint8(a)
71
+#   define CLIP(a) cm[a]
72
+#   define FUNC(a)  a ## _8
73
+#   define FUNCC(a) a ## _8_c
74
+#elif BIT_DEPTH == 9
75
+#   define av_clip_pixel(a) av_clip_pixel_9(a)
76
+#   define CLIP(a)          av_clip_pixel_9(a)
77
+#   define FUNC(a)  a ## _9
78
+#   define FUNCC(a) a ## _9_c
79
+#elif BIT_DEPTH == 10
80
+#   define av_clip_pixel(a) av_clip_pixel_10(a)
81
+#   define CLIP(a)          av_clip_pixel_10(a)
82
+#   define FUNC(a)  a ## _10
83
+#   define FUNCC(a) a ## _10_c
84
+#endif
... ...
@@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
421 421
 
422 422
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
423 423
 {
424
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
425
+
424 426
     c->get_pixels  = get_pixels_mlib;
425 427
     c->diff_pixels = diff_pixels_mlib;
426 428
     c->add_pixels_clamped = add_pixels_clamped_mlib;
427 429
 
430
+    if (!high_bit_depth) {
428 431
     c->put_pixels_tab[0][0] = put_pixels16_mlib;
429 432
     c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
430 433
     c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
... ...
@@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
445 445
 
446 446
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
447 447
     c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
448
+    }
448 449
 
449 450
     c->bswap_buf = bswap_buf_mlib;
450 451
 }
... ...
@@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
1384 1384
 
1385 1385
 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
1386 1386
 {
1387
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
1388
+
1387 1389
     c->pix_abs[0][1] = sad16_x2_altivec;
1388 1390
     c->pix_abs[0][2] = sad16_y2_altivec;
1389 1391
     c->pix_abs[0][3] = sad16_xy2_altivec;
... ...
@@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
1397 1397
     c->pix_sum = pix_sum_altivec;
1398 1398
     c->diff_pixels = diff_pixels_altivec;
1399 1399
     c->get_pixels = get_pixels_altivec;
1400
+    if (!high_bit_depth)
1400 1401
     c->clear_block = clear_block_altivec;
1401 1402
     c->add_bytes= add_bytes_altivec;
1403
+    if (!high_bit_depth) {
1402 1404
     c->put_pixels_tab[0][0] = put_pixels16_altivec;
1403 1405
     /* the two functions do the same thing, so use the same code */
1404 1406
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
... ...
@@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
1409 1409
     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
1410 1410
     c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
1411 1411
     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
1412
+    }
1412 1413
 
1413 1414
     c->hadamard8_diff[0] = hadamard8_diff16_altivec;
1414 1415
     c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
... ...
@@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
153 153
 
154 154
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
155 155
 {
156
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
157
+
156 158
     // Common optimizations whether AltiVec is available or not
157 159
     c->prefetch = prefetch_ppc;
160
+    if (!high_bit_depth) {
158 161
     switch (check_dcbzl_effect()) {
159 162
         case 32:
160 163
             c->clear_blocks = clear_blocks_dcbz32_ppc;
... ...
@@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
165 165
         default:
166 166
             break;
167 167
     }
168
+    }
168 169
 
169 170
 #if HAVE_ALTIVEC
170 171
     if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
... ...
@@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8)
965 965
 H264_WEIGHT( 8, 4)
966 966
 
967 967
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
968
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
968 969
 
969 970
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
971
+    if (!high_bit_depth) {
970 972
         c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
971 973
         c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
972 974
 
... ...
@@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
992 992
         dspfunc(avg_h264_qpel, 0, 16);
993 993
 #undef dspfunc
994 994
     }
995
+    }
995 996
 }
996 997
 
997
-void ff_h264dsp_init_ppc(H264DSPContext *c)
998
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
998 999
 {
999 1000
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
1001
+    if (bit_depth == 8) {
1000 1002
         c->h264_idct_add = ff_h264_idct_add_altivec;
1001 1003
         c->h264_idct_add8 = ff_h264_idct_add8_altivec;
1002 1004
         c->h264_idct_add16 = ff_h264_idct_add16_altivec;
... ...
@@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c)
1019 1019
         c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
1020 1020
         c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
1021 1021
     }
1022
+    }
1022 1023
 }
... ...
@@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
142 142
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
143 143
 {
144 144
     const int idct_algo= avctx->idct_algo;
145
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
145 146
 
147
+    if (!high_bit_depth) {
146 148
     c->clear_blocks = clear_blocks_mmi;
147 149
 
148 150
     c->put_pixels_tab[1][0] = put_pixels8_mmi;
... ...
@@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
150 150
 
151 151
     c->put_pixels_tab[0][0] = put_pixels16_mmi;
152 152
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
153
+    }
153 154
 
154 155
     c->get_pixels = get_pixels_mmi;
155 156
 
... ...
@@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
1384 1384
     if (MPV_common_init(s) < 0)
1385 1385
         return -1;
1386 1386
 
1387
-    ff_h264_pred_init(&r->h, CODEC_ID_RV40);
1387
+    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
1388 1388
 
1389 1389
     r->intra_types_stride = 4*s->mb_stride + 4;
1390 1390
     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
... ...
@@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
333 333
 
334 334
 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
335 335
 {
336
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
337
+
338
+        if (!high_bit_depth) {
336 339
         c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
337 340
         c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
338 341
         c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
... ...
@@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
368 368
         c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
369 369
         c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
370 370
         c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
371
+        }
371 372
 
372 373
 #ifdef QPEL
373 374
 
... ...
@@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
401 401
     dspfunc(avg_qpel, 1, 8);
402 402
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
403 403
 
404
+    if (!high_bit_depth) {
404 405
     dspfunc(put_h264_qpel, 0, 16);
405 406
     dspfunc(put_h264_qpel, 1, 8);
406 407
     dspfunc(put_h264_qpel, 2, 4);
407 408
     dspfunc(avg_h264_qpel, 0, 16);
408 409
     dspfunc(avg_h264_qpel, 1, 8);
409 410
     dspfunc(avg_h264_qpel, 2, 4);
411
+    }
410 412
 
411 413
 #undef dspfunc
414
+    if (!high_bit_depth) {
412 415
     c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
413 416
     c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
414 417
     c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
415 418
     c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
416 419
     c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
417 420
     c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
421
+    }
418 422
 
419 423
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
420 424
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
... ...
@@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
92 92
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
93 93
 {
94 94
         const int idct_algo= avctx->idct_algo;
95
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
95 96
         dsputil_init_align(c,avctx);
96 97
 
98
+        if (!high_bit_depth)
97 99
         c->clear_blocks = clear_blocks_sh4;
98 100
         if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
99 101
                 c->idct_put = idct_put;
... ...
@@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
3953 3953
 {
3954 3954
   /* VIS-specific optimizations */
3955 3955
   int accel = vis_level ();
3956
+  const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
3956 3957
 
3957 3958
   if (accel & ACCEL_SPARC_VIS) {
3958 3959
       if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
... ...
@@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
3962 3962
           c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
3963 3963
       }
3964 3964
 
3965
+      if (!high_bit_depth) {
3965 3966
       c->put_pixels_tab[0][0] = MC_put_o_16_vis;
3966 3967
       c->put_pixels_tab[0][1] = MC_put_x_16_vis;
3967 3968
       c->put_pixels_tab[0][2] = MC_put_y_16_vis;
... ...
@@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
4001 4001
       c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
4002 4002
       c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
4003 4003
       c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
4004
+      }
4004 4005
   }
4005 4006
 }
... ...
@@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
1698 1698
     avctx->pix_fmt = PIX_FMT_YUV420P;
1699 1699
 
1700 1700
     dsputil_init(&s->dsp, avctx);
1701
-    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1701
+    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1702 1702
     ff_vp8dsp_init(&s->vp8dsp);
1703 1703
 
1704 1704
     return 0;
... ...
@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2418 2418
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2419 2419
 {
2420 2420
     int mm_flags = av_get_cpu_flags();
2421
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
2421 2422
 
2422 2423
     if (avctx->dsp_mask) {
2423 2424
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
... ...
@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2499 2499
         c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
2500 2500
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
2501 2501
         c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
2502
+        if (!high_bit_depth) {
2502 2503
         c->clear_block  = clear_block_mmx;
2503 2504
         c->clear_blocks = clear_blocks_mmx;
2504 2505
         if ((mm_flags & AV_CPU_FLAG_SSE) &&
... ...
@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2507 2507
             c->clear_block  = clear_block_sse;
2508 2508
             c->clear_blocks = clear_blocks_sse;
2509 2509
         }
2510
+        }
2510 2511
 
2511 2512
 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2512 2513
         c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
... ...
@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2514 2514
         c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
2515 2515
         c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
2516 2516
 
2517
+        if (!high_bit_depth) {
2517 2518
         SET_HPEL_FUNCS(put, 0, 16, mmx);
2518 2519
         SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
2519 2520
         SET_HPEL_FUNCS(avg, 0, 16, mmx);
... ...
@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2522 2522
         SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
2523 2523
         SET_HPEL_FUNCS(avg, 1, 8, mmx);
2524 2524
         SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
2525
+        }
2525 2526
 
2526 2527
 #if ARCH_X86_32 || !HAVE_YASM
2527 2528
         c->gmc= gmc_mmx;
2528 2529
 #endif
2529 2530
 #if ARCH_X86_32 && HAVE_YASM
2531
+        if (!high_bit_depth)
2530 2532
         c->emulated_edge_mc = emulated_edge_mc_mmx;
2531 2533
 #endif
2532 2534
 
2533 2535
         c->add_bytes= add_bytes_mmx;
2534 2536
         c->add_bytes_l2= add_bytes_l2_mmx;
2535 2537
 
2538
+        if (!high_bit_depth)
2536 2539
         c->draw_edges = draw_edges_mmx;
2537 2540
 
2538 2541
         if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
... ...
@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2541 2541
         }
2542 2542
 
2543 2543
 #if HAVE_YASM
2544
+        if (!high_bit_depth) {
2544 2545
         c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2545 2546
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2547
+        }
2546 2548
 
2547 2549
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2548 2550
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
... ...
@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2551 2551
         if (mm_flags & AV_CPU_FLAG_MMX2) {
2552 2552
             c->prefetch = prefetch_mmx2;
2553 2553
 
2554
+            if (!high_bit_depth) {
2554 2555
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2555 2556
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2556 2557
 
... ...
@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2564 2564
             c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
2565 2565
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
2566 2566
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
2567
+            }
2567 2568
 
2568 2569
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2570
+                if (!high_bit_depth) {
2569 2571
                 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
2570 2572
                 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
2571 2573
                 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
2572 2574
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
2573 2575
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
2574 2576
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
2577
+                }
2575 2578
 
2576 2579
                 if (CONFIG_VP3_DECODER && HAVE_YASM) {
2577 2580
                     c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
... ...
@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2613 2613
             SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
2614 2614
             SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
2615 2615
 
2616
+            if (!high_bit_depth) {
2616 2617
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
2617 2618
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
2618 2619
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
2619 2620
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
2620 2621
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
2621 2622
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
2623
+            }
2622 2624
 
2623 2625
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
2624 2626
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
... ...
@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2629 2629
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2630 2630
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2631 2631
 
2632
+            if (!high_bit_depth) {
2632 2633
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2633 2634
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2634 2635
             c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
2635 2636
             c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
2637
+            }
2636 2638
 
2637 2639
             c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2638 2640
 #endif
... ...
@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2645 2645
         } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
2646 2646
             c->prefetch = prefetch_3dnow;
2647 2647
 
2648
+            if (!high_bit_depth) {
2648 2649
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2649 2650
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2650 2651
 
... ...
@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2667 2667
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
2668 2668
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
2669 2669
             }
2670
+            }
2670 2671
 
2671 2672
             if (CONFIG_VP3_DECODER
2672 2673
                 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
... ...
@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2681 2681
             SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
2682 2682
             SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
2683 2683
 
2684
+            if (!high_bit_depth) {
2684 2685
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
2685 2686
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
2686 2687
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
2687 2688
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
2688 2689
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
2689 2690
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
2691
+            }
2690 2692
 
2691 2693
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
2692 2694
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
... ...
@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2694 2694
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
2695 2695
 
2696 2696
 #if HAVE_YASM
2697
+            if (!high_bit_depth) {
2697 2698
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2698 2699
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2700
+            }
2699 2701
 
2700 2702
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2701 2703
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
... ...
@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2710 2710
             c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
2711 2711
         if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
2712 2712
             // these functions are slower than mmx on AMD, but faster on Intel
2713
+            if (!high_bit_depth) {
2713 2714
             c->put_pixels_tab[0][0] = put_pixels16_sse2;
2714 2715
             c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
2715 2716
             c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2716 2717
             H264_QPEL_FUNCS(0, 0, sse2);
2718
+            }
2717 2719
         }
2718 2720
         if(mm_flags & AV_CPU_FLAG_SSE2){
2721
+            if (!high_bit_depth) {
2719 2722
             H264_QPEL_FUNCS(0, 1, sse2);
2720 2723
             H264_QPEL_FUNCS(0, 2, sse2);
2721 2724
             H264_QPEL_FUNCS(0, 3, sse2);
... ...
@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2728 2728
             H264_QPEL_FUNCS(3, 1, sse2);
2729 2729
             H264_QPEL_FUNCS(3, 2, sse2);
2730 2730
             H264_QPEL_FUNCS(3, 3, sse2);
2731
+            }
2731 2732
         }
2732 2733
 #if HAVE_SSSE3
2733 2734
         if(mm_flags & AV_CPU_FLAG_SSSE3){
2735
+            if (!high_bit_depth) {
2734 2736
             H264_QPEL_FUNCS(1, 0, ssse3);
2735 2737
             H264_QPEL_FUNCS(1, 1, ssse3);
2736 2738
             H264_QPEL_FUNCS(1, 2, ssse3);
... ...
@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2743 2743
             H264_QPEL_FUNCS(3, 1, ssse3);
2744 2744
             H264_QPEL_FUNCS(3, 2, ssse3);
2745 2745
             H264_QPEL_FUNCS(3, 3, ssse3);
2746
+            }
2746 2747
             c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2747 2748
 #if HAVE_YASM
2749
+            if (!high_bit_depth) {
2748 2750
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2749 2751
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2750 2752
             c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
2751 2753
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
2754
+            }
2752 2755
             c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
2753 2756
             if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
2754 2757
                 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
... ...
@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2805 2805
                 }
2806 2806
             }
2807 2807
 
2808
+            if (!high_bit_depth)
2808 2809
             c->emulated_edge_mc = emulated_edge_mc_sse;
2809 2810
             c->gmc= gmc_sse;
2810 2811
 #endif
... ...
@@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int s
95 95
 void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
96 96
 void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
97 97
 
98
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
98
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
99 99
 {
100 100
     int mm_flags = av_get_cpu_flags();
101
+    const int high_depth = bit_depth > 8;
102
+
103
+    if (high_depth)
104
+        return;
101 105
 
102 106
 #if HAVE_YASM
103 107
     if (mm_flags & AV_CPU_FLAG_MMX) {
... ...
@@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX    ( 4,  8)
285 285
 H264_BIWEIGHT_MMX    ( 4,  4)
286 286
 H264_BIWEIGHT_MMX    ( 4,  2)
287 287
 
288
-void ff_h264dsp_init_x86(H264DSPContext *c)
288
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
289 289
 {
290 290
     int mm_flags = av_get_cpu_flags();
291 291
 
292
+    if (bit_depth == 8) {
292 293
     if (mm_flags & AV_CPU_FLAG_MMX2) {
293 294
         c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
294 295
     }
... ...
@@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
378 378
             }
379 379
         }
380 380
     }
381
+    }
381 382
 #endif
382 383
 }