Browse code

h264dec: h264: 4:2:2 intra decoding

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Baptiste Coudurier authored on 2011/08/15 07:39:55
Showing 23 changed files
... ...
@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
92 92
                              DCTELEM *block, int stride,
93 93
                              const uint8_t nnzc[6*8]);
94 94
 
95
-static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
95
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
96 96
 {
97 97
     if (bit_depth == 8) {
98 98
     c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
... ...
@@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
122 122
     c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
123 123
     c->h264_idct_add16      = ff_h264_idct_add16_neon;
124 124
     c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
125
-    c->h264_idct_add8       = ff_h264_idct_add8_neon;
125
+    if (chroma_format_idc == 1)
126
+        c->h264_idct_add8   = ff_h264_idct_add8_neon;
126 127
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
127 128
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
128 129
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
129 130
     }
130 131
 }
131 132
 
132
-void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
133
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
133 134
 {
134
-    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
135
+    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
135 136
 }
... ...
@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
42 42
 void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
43 43
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
44 44
 
45
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
45
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
46 46
 {
47 47
     const int high_depth = bit_depth > 8;
48 48
 
... ...
@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid
63 63
 void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
64 64
 void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
65 65
 void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
66
+void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
66 67
 void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
67 68
 void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
69
+void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\
68 70
 void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
69 71
 
70 72
 H264_IDCT( 8)
... ...
@@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
942 942
     dst->list_counts              = src->list_counts;
943 943
 
944 944
     dst->s.obmc_scratchpad = NULL;
945
-    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
945
+    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
946 946
 }
947 947
 
948 948
 /**
... ...
@@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){
970 970
     s->height = s->avctx->height;
971 971
     s->codec_id= s->avctx->codec->id;
972 972
 
973
-    ff_h264dsp_init(&h->h264dsp, 8);
974
-    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
973
+    ff_h264dsp_init(&h->h264dsp, 8, 1);
974
+    ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
975 975
 
976 976
     h->dequant_coeff_pps= -1;
977 977
     s->unrestricted_mv=1;
... ...
@@ -1428,11 +1428,13 @@ static void decode_postinit(H264Context *h, int setup_finished){
1428 1428
         ff_thread_finish_setup(s->avctx);
1429 1429
 }
1430 1430
 
1431
-static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
1431
+static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
1432 1432
     MpegEncContext * const s = &h->s;
1433 1433
     uint8_t *top_border;
1434 1434
     int top_idx = 1;
1435 1435
     const int pixel_shift = h->pixel_shift;
1436
+    int chroma444 = CHROMA444;
1437
+    int chroma422 = CHROMA422;
1436 1438
 
1437 1439
     src_y  -=   linesize;
1438 1440
     src_cb -= uvlinesize;
... ...
@@ -1456,6 +1458,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui
1456 1456
                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
1457 1457
                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
1458 1458
                         }
1459
+                    } else if(chroma422){
1460
+                        if (pixel_shift) {
1461
+                            AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
1462
+                            AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
1463
+                        } else {
1464
+                            AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
1465
+                            AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
1466
+                        }
1459 1467
                     } else {
1460 1468
                         if (pixel_shift) {
1461 1469
                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
... ...
@@ -1491,6 +1501,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui
1491 1491
                 AV_COPY128(top_border+16, src_cb + 16*linesize);
1492 1492
                 AV_COPY128(top_border+32, src_cr + 16*linesize);
1493 1493
             }
1494
+        } else if(chroma422) {
1495
+            if (pixel_shift) {
1496
+                AV_COPY128(top_border+32, src_cb+16*uvlinesize);
1497
+                AV_COPY128(top_border+48, src_cr+16*uvlinesize);
1498
+            } else {
1499
+                AV_COPY64(top_border+16, src_cb+16*uvlinesize);
1500
+                AV_COPY64(top_border+24, src_cr+16*uvlinesize);
1501
+            }
1494 1502
         } else {
1495 1503
             if (pixel_shift) {
1496 1504
                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
... ...
@@ -1769,10 +1787,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
1769 1769
     /* is_h264 should always be true if SVQ3 is disabled. */
1770 1770
     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
1771 1771
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1772
+    const int block_h = 16>>s->chroma_y_shift;
1772 1773
 
1773 1774
     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
1774
-    dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) *  8;
1775
-    dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) *  8;
1775
+    dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
1776
+    dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
1776 1777
 
1777 1778
     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
1778 1779
     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
... ...
@@ -1785,8 +1804,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
1785 1785
         block_offset = &h->block_offset[48];
1786 1786
         if(mb_y&1){ //FIXME move out of this function?
1787 1787
             dest_y -= s->linesize*15;
1788
-            dest_cb-= s->uvlinesize*7;
1789
-            dest_cr-= s->uvlinesize*7;
1788
+            dest_cb-= s->uvlinesize*(block_h-1);
1789
+            dest_cr-= s->uvlinesize*(block_h-1);
1790 1790
         }
1791 1791
         if(FRAME_MBAFF) {
1792 1792
             int list;
... ...
@@ -1833,12 +1852,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
1833 1833
                         }
1834 1834
                     }
1835 1835
                 } else {
1836
-                for (i = 0; i < 8; i++) {
1836
+                for (i = 0; i < block_h; i++) {
1837 1837
                     uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
1838 1838
                     for (j = 0; j < 8; j++)
1839 1839
                         tmp_cb[j] = get_bits(&gb, bit_depth);
1840 1840
                 }
1841
-                for (i = 0; i < 8; i++) {
1841
+                for (i = 0; i < block_h; i++) {
1842 1842
                     uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
1843 1843
                     for (j = 0; j < 8; j++)
1844 1844
                         tmp_cr[j] = get_bits(&gb, bit_depth);
... ...
@@ -1856,7 +1875,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
1856 1856
                         memset(dest_cr+ i*uvlinesize, 1 << (bit_depth - 1), 8);
1857 1857
                     }
1858 1858
                 } else {
1859
-                for (i=0; i<8; i++) {
1859
+                for (i=0; i<block_h; i++) {
1860 1860
                     memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
1861 1861
                     memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
1862 1862
                 }
... ...
@@ -1904,10 +1923,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
1904 1904
                 }
1905 1905
             }else{
1906 1906
                 if(is_h264){
1907
+                    int qp[2];
1908
+                    if (CHROMA422) {
1909
+                        qp[0] = h->chroma_qp[0]+3;
1910
+                        qp[1] = h->chroma_qp[1]+3;
1911
+                    } else {
1912
+                        qp[0] = h->chroma_qp[0];
1913
+                        qp[1] = h->chroma_qp[1];
1914
+                    }
1907 1915
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
1908
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1916
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
1909 1917
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
1910
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1918
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
1911 1919
                     h->h264dsp.h264_idct_add8(dest, block_offset,
1912 1920
                                               h->mb, uvlinesize,
1913 1921
                                               h->non_zero_count_cache);
... ...
@@ -2545,11 +2572,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
2545 2545
 
2546 2546
     h->b_stride=  s->mb_width*4;
2547 2547
 
2548
+    s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
2549
+
2548 2550
     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
2549 2551
     if(h->sps.frame_mbs_only_flag)
2550
-        s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
2552
+        s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
2551 2553
     else
2552
-        s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
2554
+        s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
2553 2555
 
2554 2556
     if (s->context_initialized
2555 2557
         && (   s->width != s->avctx->width || s->height != s->avctx->height
... ...
@@ -2594,14 +2623,24 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
2594 2594
 
2595 2595
         switch (h->sps.bit_depth_luma) {
2596 2596
             case 9 :
2597
-                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
2597
+                if (CHROMA444)
2598
+                    s->avctx->pix_fmt = PIX_FMT_YUV444P9;
2599
+                else
2600
+                    s->avctx->pix_fmt = PIX_FMT_YUV420P9;
2598 2601
                 break;
2599 2602
             case 10 :
2600
-                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
2603
+                if (CHROMA444)
2604
+                    s->avctx->pix_fmt = PIX_FMT_YUV444P10;
2605
+                else if (CHROMA422)
2606
+                    s->avctx->pix_fmt = PIX_FMT_YUV422P10;
2607
+                else
2608
+                    s->avctx->pix_fmt = PIX_FMT_YUV420P10;
2601 2609
                 break;
2602 2610
             default:
2603 2611
                 if (CHROMA444){
2604 2612
                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
2613
+                }else if (CHROMA422) {
2614
+                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
2605 2615
                 }else{
2606 2616
                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
2607 2617
                                                              s->avctx->codec->pix_fmts ?
... ...
@@ -3263,6 +3302,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
3263 3263
     const int end_mb_y= s->mb_y + FRAME_MBAFF;
3264 3264
     const int old_slice_type= h->slice_type;
3265 3265
     const int pixel_shift = h->pixel_shift;
3266
+    const int block_h = 16>>s->chroma_y_shift;
3266 3267
 
3267 3268
     if(h->deblocking_filter) {
3268 3269
         for(mb_x= start_x; mb_x<end_x; mb_x++){
... ...
@@ -3279,8 +3319,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
3279 3279
                 s->mb_x= mb_x;
3280 3280
                 s->mb_y= mb_y;
3281 3281
                 dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
3282
-                dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
3283
-                dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
3282
+                dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h;
3283
+                dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h;
3284 3284
                     //FIXME simplify above
3285 3285
 
3286 3286
                 if (MB_FIELD) {
... ...
@@ -3288,14 +3328,14 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
3288 3288
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3289 3289
                     if(mb_y&1){ //FIXME move out of this function?
3290 3290
                         dest_y -= s->linesize*15;
3291
-                        dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
3292
-                        dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
3291
+                        dest_cb-= s->uvlinesize*(block_h-1);
3292
+                        dest_cr-= s->uvlinesize*(block_h-1);
3293 3293
                     }
3294 3294
                 } else {
3295 3295
                     linesize   = h->mb_linesize   = s->linesize;
3296 3296
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3297 3297
                 }
3298
-                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
3298
+                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3299 3299
                 if(fill_filter_caches(h, mb_type))
3300 3300
                     continue;
3301 3301
                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
... ...
@@ -3731,13 +3771,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
3731 3731
             if(avctx->has_b_frames < 2)
3732 3732
                 avctx->has_b_frames= !s->low_delay;
3733 3733
 
3734
-            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
3734
+            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
3735
+                h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
3735 3736
                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
3736 3737
                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
3738
+                    h->cur_chroma_format_idc = h->sps.chroma_format_idc;
3737 3739
                     h->pixel_shift = h->sps.bit_depth_luma > 8;
3738 3740
 
3739
-                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
3740
-                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
3741
+                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
3742
+                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
3741 3743
                     s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
3742 3744
                     dsputil_init(&s->dsp, s->avctx);
3743 3745
                 } else {
... ...
@@ -39,13 +39,6 @@
39 39
 #define interlaced_dct interlaced_dct_is_a_bad_name
40 40
 #define mb_intra mb_intra_is_not_initialized_see_mb_type
41 41
 
42
-#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
43
-#define COEFF_TOKEN_VLC_BITS           8
44
-#define TOTAL_ZEROS_VLC_BITS           9
45
-#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
46
-#define RUN_VLC_BITS                   3
47
-#define RUN7_VLC_BITS                  6
48
-
49 42
 #define MAX_SPS_COUNT 32
50 43
 #define MAX_PPS_COUNT 256
51 44
 
... ...
@@ -92,6 +85,7 @@
92 92
 #define CABAC h->pps.cabac
93 93
 #endif
94 94
 
95
+#define CHROMA422 (h->sps.chroma_format_idc == 2)
95 96
 #define CHROMA444 (h->sps.chroma_format_idc == 3)
96 97
 
97 98
 #define EXTENDED_SAR          255
... ...
@@ -582,6 +576,8 @@ typedef struct H264Context{
582 582
     // Timestamp stuff
583 583
     int sei_buffering_period_present;  ///< Buffering period SEI flag
584 584
     int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
585
+
586
+    int cur_chroma_format_idc;
585 587
 }H264Context;
586 588
 
587 589
 
... ...
@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){
809 809
     AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
810 810
     AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
811 811
 
812
-    if(CHROMA444){
812
+    if(!h->s.chroma_y_shift){
813 813
         AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
814 814
         AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
815 815
         AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
... ...
@@ -1587,6 +1587,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
1587 1587
         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
1588 1588
         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
1589 1589
     };
1590
+    static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 };
1590 1591
     /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
1591 1592
      * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
1592 1593
      * map node ctx => cabac ctx for level=1 */
... ...
@@ -1651,12 +1652,20 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
1651 1651
         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
1652 1652
                                                  last_coeff_ctx_base, sig_off);
1653 1653
     } else {
1654
-        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
1655
-                                             last_coeff_ctx_base-significant_coeff_ctx_base);
1654
+        if (is_dc && max_coeff == 8) { // dc 422
1655
+            DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
1656
+        } else {
1657
+            coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
1658
+                                                 last_coeff_ctx_base-significant_coeff_ctx_base);
1659
+        }
1656 1660
 #else
1657 1661
         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
1658 1662
     } else {
1659
-        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
1663
+        if (is_dc && max_coeff == 8) { // dc 422
1664
+            DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
1665
+        } else {
1666
+            DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
1667
+        }
1660 1668
 #endif
1661 1669
     }
1662 1670
     assert(coeff_count > 0);
... ...
@@ -1692,6 +1701,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
1692 1692
             } \
1693 1693
         } else { \
1694 1694
             int coeff_abs = 2; \
1695
+            if (is_dc && max_coeff == 8) \
1696
+                node_ctx = FFMIN(node_ctx, 6); \
1695 1697
             ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
1696 1698
             node_ctx = coeff_abs_level_transition[1][node_ctx]; \
1697 1699
 \
... ...
@@ -2315,22 +2326,31 @@ decode_intra_mb:
2315 2315
             decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
2316 2316
             decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
2317 2317
         } else {
2318
+            const int num_c8x8 = h->sps.chroma_format_idc;
2319
+
2318 2320
             if( cbp&0x30 ){
2319 2321
                 int c;
2320 2322
                 for( c = 0; c < 2; c++ ) {
2321 2323
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
2322
-                    decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
2324
+                    decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
2325
+                                             CHROMA_DC_BLOCK_INDEX+c,
2326
+                                             CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
2327
+                                             4*num_c8x8);
2323 2328
                 }
2324 2329
             }
2325 2330
 
2326 2331
             if( cbp&0x20 ) {
2327
-                int c, i;
2332
+                int c, i, i8x8;
2328 2333
                 for( c = 0; c < 2; c++ ) {
2334
+                    DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
2329 2335
                     qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
2330
-                    for( i = 0; i < 4; i++ ) {
2331
-                        const int index = 16 + 16 * c + i;
2332
-                        //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
2333
-                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
2336
+                    for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) {
2337
+                        for (i = 0; i < 4; i++) {
2338
+                            const int index = 16 + 16 * c + 8*i8x8 + i;
2339
+                            //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
2340
+                            decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15);
2341
+                            mb += 16<<pixel_shift;
2342
+                        }
2334 2343
                     }
2335 2344
                 }
2336 2345
             } else {
... ...
@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={
62 62
  2, 3, 2, 0,
63 63
 };
64 64
 
65
+static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66
+  1,  0,  0,  0,
67
+  7,  2,  0,  0,
68
+  7,  7,  3,  0,
69
+  9,  7,  7,  5,
70
+  9,  9,  7,  6,
71
+ 10, 10,  9,  7,
72
+ 11, 11, 10,  7,
73
+ 12, 12, 11, 10,
74
+ 13, 12, 12, 11,
75
+};
76
+
77
+static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78
+  1,   0,  0, 0,
79
+ 15,   1,  0, 0,
80
+ 14,  13,  1, 0,
81
+  7,  12, 11, 1,
82
+  6,   5, 10, 1,
83
+  7,   6,  4, 9,
84
+  7,   6,  5, 8,
85
+  7,   6,  5, 4,
86
+  7,   5,  4, 4,
87
+};
88
+
65 89
 static const uint8_t coeff_token_len[4][4*17]={
66 90
 {
67 91
      1, 0, 0, 0,
... ...
@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
172 172
     { 1, 0, 0, 0,},
173 173
 };
174 174
 
175
+static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
176
+    { 1, 3, 3, 4, 4, 4, 5, 5 },
177
+    { 3, 2, 3, 3, 3, 3, 3 },
178
+    { 3, 3, 2, 2, 3, 3 },
179
+    { 3, 2, 2, 2, 3 },
180
+    { 2, 2, 2, 2 },
181
+    { 2, 2, 1 },
182
+    { 1, 1 },
183
+};
184
+
185
+static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
186
+    { 1, 2, 3, 2, 3, 1, 1, 0 },
187
+    { 0, 1, 1, 4, 5, 6, 7 },
188
+    { 0, 1, 1, 2, 6, 7 },
189
+    { 6, 0, 1, 2, 7 },
190
+    { 0, 1, 2, 3 },
191
+    { 0, 1, 1 },
192
+    { 0, 1 },
193
+};
194
+
175 195
 static const uint8_t run_len[7][16]={
176 196
     {1,1},
177 197
     {1,2,2},
... ...
@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc;
200 200
 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 201
 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202 202
 
203
+static VLC chroma422_dc_coeff_token_vlc;
204
+static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
205
+static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
206
+
203 207
 static VLC total_zeros_vlc[15];
204 208
 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 209
 static const int total_zeros_vlc_tables_size = 512;
... ...
@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3];
208 208
 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 209
 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210 210
 
211
+static VLC chroma422_dc_total_zeros_vlc[7];
212
+static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
213
+static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
214
+
211 215
 static VLC run_vlc[6];
212 216
 static VLC_TYPE run_vlc_tables[6][8][2];
213 217
 static const int run_vlc_tables_size = 8;
... ...
@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96;
219 219
 #define LEVEL_TAB_BITS 8
220 220
 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221 221
 
222
+#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
223
+#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
224
+#define COEFF_TOKEN_VLC_BITS           8
225
+#define TOTAL_ZEROS_VLC_BITS           9
226
+#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
227
+#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
228
+#define RUN_VLC_BITS                   3
229
+#define RUN7_VLC_BITS                  6
222 230
 
223 231
 /**
224 232
  * gets the predicted number of non-zero coefficients.
... ...
@@ -277,6 +337,13 @@ av_cold void ff_h264_decode_init_vlc(void){
277 277
                  &chroma_dc_coeff_token_bits[0], 1, 1,
278 278
                  INIT_VLC_USE_NEW_STATIC);
279 279
 
280
+        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
281
+        chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
282
+        init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
283
+                 &chroma422_dc_coeff_token_len [0], 1, 1,
284
+                 &chroma422_dc_coeff_token_bits[0], 1, 1,
285
+                 INIT_VLC_USE_NEW_STATIC);
286
+
280 287
         offset = 0;
281 288
         for(i=0; i<4; i++){
282 289
             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
... ...
@@ -303,6 +370,17 @@ av_cold void ff_h264_decode_init_vlc(void){
303 303
                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
304 304
                      INIT_VLC_USE_NEW_STATIC);
305 305
         }
306
+
307
+        for(i=0; i<7; i++){
308
+            chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
309
+            chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
310
+            init_vlc(&chroma422_dc_total_zeros_vlc[i],
311
+                     CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
312
+                     &chroma422_dc_total_zeros_len [i][0], 1, 1,
313
+                     &chroma422_dc_total_zeros_bits[i][0], 1, 1,
314
+                     INIT_VLC_USE_NEW_STATIC);
315
+        }
316
+
306 317
         for(i=0; i<15; i++){
307 318
             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308 319
             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
... ...
@@ -372,7 +450,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
372 372
     //FIXME put trailing_onex into the context
373 373
 
374 374
     if(max_coeff <= 8){
375
-        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
375
+        if (max_coeff == 4)
376
+            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
377
+        else
378
+            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
376 379
         total_coeff= coeff_token>>2;
377 380
     }else{
378 381
         if(n >= LUMA_DC_BLOCK_INDEX){
... ...
@@ -482,11 +563,16 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
482 482
     if(total_coeff == max_coeff)
483 483
         zeros_left=0;
484 484
     else{
485
-        /* FIXME: we don't actually support 4:2:2 yet. */
486
-        if(max_coeff <= 8)
487
-            zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
488
-        else
485
+        if (max_coeff <= 8) {
486
+            if (max_coeff == 4)
487
+                zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
488
+                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
489
+            else
490
+                zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
491
+                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
492
+        } else {
489 493
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
494
+        }
490 495
     }
491 496
 
492 497
 #define STORE_BLOCK(type) \
... ...
@@ -993,7 +1079,7 @@ decode_intra_mb:
993 993
     s->current_picture.f.mb_type[mb_xy] = mb_type;
994 994
 
995 995
     if(cbp || IS_INTRA16x16(mb_type)){
996
-        int i4x4, chroma_idx;
996
+        int i4x4, i8x8, chroma_idx;
997 997
         int dquant;
998 998
         int ret;
999 999
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
... ...
@@ -1036,9 +1122,14 @@ decode_intra_mb:
1036 1036
                 return -1;
1037 1037
             }
1038 1038
         } else {
1039
+            const int num_c8x8 = h->sps.chroma_format_idc;
1040
+
1039 1041
             if(cbp&0x30){
1040 1042
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1041
-                    if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1043
+                    if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1044
+                                        CHROMA_DC_BLOCK_INDEX+chroma_idx,
1045
+                                        CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1046
+                                        NULL, 4*num_c8x8) < 0) {
1042 1047
                         return -1;
1043 1048
                     }
1044 1049
             }
... ...
@@ -1046,10 +1137,13 @@ decode_intra_mb:
1046 1046
             if(cbp&0x20){
1047 1047
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1048 1048
                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1049
-                    for(i4x4=0; i4x4<4; i4x4++){
1050
-                        const int index= 16 + 16*chroma_idx + i4x4;
1051
-                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1052
-                            return -1;
1049
+                    DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1050
+                    for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1051
+                        for (i4x4=0; i4x4<4; i4x4++) {
1052
+                            const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1053
+                            if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1054
+                                return -1;
1055
+                            mb += 16<<pixel_shift;
1053 1056
                         }
1054 1057
                     }
1055 1058
                 }
... ...
@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
212 212
     MpegEncContext * const s = &h->s;
213 213
     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
214 214
     int chroma444 = CHROMA444;
215
+    int chroma422 = CHROMA422;
215 216
 
216 217
     int mb_xy = h->mb_xy;
217 218
     int left_type= h->left_type[LTOP];
... ...
@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
289 289
                     filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
290 290
                     filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
291 291
                 }
292
+            }else if(chroma422){
293
+                if(left_type){
294
+                    filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
295
+                    filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
296
+                }
297
+                filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
298
+                filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
299
+                if(top_type){
300
+                    filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
301
+                    filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
302
+                }
303
+                filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
304
+                filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
305
+                filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
306
+                filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
307
+                filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
308
+                filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
292 309
             }else{
293 310
                 if(left_type){
294 311
                     filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
... ...
@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
411 411
     return v;
412 412
 }
413 413
 
414
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) {
414
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) {
415 415
     MpegEncContext * const s = &h->s;
416 416
     int edge;
417 417
     int chroma_qp_avg[2];
418
+    int chroma444 = CHROMA444;
419
+    int chroma422 = CHROMA422;
418 420
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
419 421
     const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
420 422
 
... ...
@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
564 564
     for( edge = 1; edge < edges; edge++ ) {
565 565
         DECLARE_ALIGNED(8, int16_t, bS)[4];
566 566
         int qp;
567
+        const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type)
567 568
 
568
-        if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
569
+        if (!deblock_edge && (!chroma422 || dir == 0))
569 570
             continue;
570 571
 
571 572
         if( IS_INTRA(mb_type)) {
... ...
@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
627 627
                 }
628 628
             }
629 629
         } else {
630
-            filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 );
631
-            if (chroma) {
632
-                if (chroma444) {
633
-                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
634
-                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
635
-                } else if( (edge&1) == 0 ) {
636
-                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
637
-                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
630
+            if (chroma422) {
631
+                if (deblock_edge)
632
+                    filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
633
+                if (chroma) {
634
+                    filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
635
+                    filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
636
+                }
637
+            } else {
638
+                filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
639
+                if (chroma) {
640
+                    if (chroma444) {
641
+                        filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
642
+                        filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
643
+                    } else if ((edge&1) == 0) {
644
+                        filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
645
+                        filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
646
+                    }
638 647
                 }
639 648
             }
640 649
         }
... ...
@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
726 726
                     filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
727 727
                     filter_mb_mbaff_edgev ( h, img_cr,                uvlinesize, bS  , 1, rqp[0], a, b, 1 );
728 728
                     filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 );
729
+                } else if (CHROMA422) {
730
+                    filter_mb_mbaff_edgecv(h, img_cb,                uvlinesize, bS  , 1, bqp[0], a, b, 1);
731
+                    filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1);
732
+                    filter_mb_mbaff_edgecv(h, img_cr,                uvlinesize, bS  , 1, rqp[0], a, b, 1);
733
+                    filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1);
729 734
                 }else{
730 735
                     filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0], a, b, 1 );
731 736
                     filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
... ...
@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
754 754
 
755 755
 #if CONFIG_SMALL
756 756
     for( dir = 0; dir < 2; dir++ )
757
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir);
757
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir);
758 758
 #else
759
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0);
760
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0,                        a, b, chroma, CHROMA444, 1);
759
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0);
760
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0,                        a, b, chroma, 1);
761 761
 #endif
762 762
 }
... ...
@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
510 510
     if(top_type){
511 511
         nnz = h->non_zero_count[top_xy];
512 512
         AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
513
-        if(CHROMA444){
513
+        if(!s->chroma_y_shift){
514 514
             AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
515 515
             AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
516 516
         }else{
... ...
@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
534 534
                 nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
535 535
                 nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
536 536
                 nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
537
+            }else if(CHROMA422) {
538
+                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4];
539
+                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4];
540
+                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4];
541
+                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4];
537 542
             }else{
538 543
                 nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
539 544
                 nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];
... ...
@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
396 396
 #endif
397 397
     sps->crop= get_bits1(&s->gb);
398 398
     if(sps->crop){
399
-        int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8;
399
+        int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8;
400
+        int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8;
400 401
         sps->crop_left  = get_ue_golomb(&s->gb);
401 402
         sps->crop_right = get_ue_golomb(&s->gb);
402 403
         sps->crop_top   = get_ue_golomb(&s->gb);
... ...
@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
404 404
         if(sps->crop_left || sps->crop_top){
405 405
             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
406 406
         }
407
-        if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){
407
+        if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){
408 408
             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
409 409
         }
410 410
     }else{
... ...
@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={
80 80
 
81 81
 static const uint8_t chroma_dc_scan[4]={
82 82
  (0+0*2)*16, (1+0*2)*16,
83
- (0+1*2)*16, (1+1*2)*16,  //FIXME
83
+ (0+1*2)*16, (1+1*2)*16,
84
+};
85
+
86
+static const uint8_t chroma422_dc_scan[8]={
87
+ (0+0*2)*16, (0+1*2)*16,
88
+ (1+0*2)*16, (0+2*2)*16,
89
+ (0+3*2)*16, (1+1*2)*16,
90
+ (1+2*2)*16, (1+3*2)*16,
84 91
 };
85 92
 
86 93
 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
... ...
@@ -41,7 +41,7 @@
41 41
 #include "h264dsp_template.c"
42 42
 #undef BIT_DEPTH
43 43
 
44
-void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
44
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
45 45
 {
46 46
 #undef FUNC
47 47
 #define FUNC(a, depth) a ## _ ## depth ## _c
... ...
@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
53 53
     c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
54 54
     c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
55 55
     c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
56
-    c->h264_idct_add8      = FUNC(ff_h264_idct_add8, depth);\
56
+    if (chroma_format_idc == 1)\
57
+        c->h264_idct_add8  = FUNC(ff_h264_idct_add8, depth);\
58
+    else\
59
+        c->h264_idct_add8  = FUNC(ff_h264_idct_add8_422, depth);\
57 60
     c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
58 61
     c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
59
-    c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
62
+    if (chroma_format_idc == 1)\
63
+        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
64
+    else\
65
+        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
60 66
 \
61 67
     c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
62 68
     c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
... ...
@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
86 86
     c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
87 87
     c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
88 88
     c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
89
-    c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
90
-    c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
89
+    if (chroma_format_idc == 1)\
90
+        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
91
+    else\
92
+        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\
93
+    if (chroma_format_idc == 1)\
94
+        c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
95
+    else\
96
+        c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
91 97
     c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
92
-    c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
93
-    c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
98
+    if (chroma_format_idc == 1)\
99
+        c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
100
+    else\
101
+        c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\
102
+    if (chroma_format_idc == 1)\
103
+        c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
104
+    else\
105
+        c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
94 106
     c->h264_loop_filter_strength= NULL;
95 107
 
96 108
     switch (bit_depth) {
... ...
@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
105 105
         break;
106 106
     }
107 107
 
108
-    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
109
-    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
110
-    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
108
+    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
109
+    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
110
+    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc);
111 111
 }
... ...
@@ -74,9 +74,9 @@ typedef struct H264DSPContext{
74 74
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
75 75
 }H264DSPContext;
76 76
 
77
-void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
78
-void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
79
-void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
80
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
77
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
78
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
79
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
80
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
81 81
 
82 82
 #endif /* AVCODEC_H264DSP_H */
... ...
@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int
275 275
 {
276 276
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
277 277
 }
278
+static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
279
+{
280
+    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
281
+}
282
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
283
+{
284
+    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
285
+}
278 286
 
279 287
 static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
280 288
 {
... ...
@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid
312 312
 {
313 313
     FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
314 314
 }
315
+static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta)
316
+{
317
+    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
318
+}
319
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
320
+{
321
+    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
322
+}
... ...
@@ -224,6 +224,39 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
224 224
         }
225 225
     }
226 226
 }
227
+
228
+void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
229
+    int i, j;
230
+
231
+#if 0
232
+    av_log(NULL, AV_LOG_INFO, "idct\n");
233
+    int32_t *b = block;
234
+    for (int i = 0; i < 256; i++) {
235
+        av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
236
+        if (!((i+1) % 16))
237
+            av_log(NULL, AV_LOG_INFO, "\n");
238
+    }
239
+#endif
240
+
241
+    for(j=1; j<3; j++){
242
+        for(i=j*16; i<j*16+4; i++){
243
+            if(nnzc[ scan8[i] ])
244
+                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
245
+            else if(((dctcoef*)block)[i*16])
246
+                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
247
+        }
248
+    }
249
+
250
+    for(j=1; j<3; j++){
251
+        for(i=j*16+4; i<j*16+8; i++){
252
+            if(nnzc[ scan8[i+4] ])
253
+                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
254
+            else if(((dctcoef*)block)[i*16])
255
+                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
256
+        }
257
+    }
258
+}
259
+
227 260
 /**
228 261
  * IDCT transforms the 16 dc values and dequantizes them.
229 262
  * @param qmul quantization parameter
... ...
@@ -263,6 +296,42 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
263 263
 #undef stride
264 264
 }
265 265
 
266
+void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
267
+    const int stride= 16*2;
268
+    const int xStride= 16;
269
+    int i;
270
+    int temp[8];
271
+    static const uint8_t x_offset[2]={0, 16};
272
+    dctcoef *block = (dctcoef*)p_block;
273
+
274
+    for(i=0; i<4; i++){
275
+        temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
276
+        temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
277
+    }
278
+
279
+    for(i=0; i<2; i++){
280
+        const int offset= x_offset[i];
281
+        const int z0= temp[2*0+i] + temp[2*2+i];
282
+        const int z1= temp[2*0+i] - temp[2*2+i];
283
+        const int z2= temp[2*1+i] - temp[2*3+i];
284
+        const int z3= temp[2*1+i] + temp[2*3+i];
285
+
286
+        block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
287
+        block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
288
+        block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
289
+        block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
290
+    }
291
+
292
+#if 0
293
+    av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
294
+    for (int i = 0; i < 256; i++) {
295
+        av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
296
+        if (!((i+1) % 16))
297
+            av_log(NULL, AV_LOG_INFO, "\n");
298
+    }
299
+#endif
300
+}
301
+
266 302
 void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){
267 303
     const int stride= 16*2;
268 304
     const int xStride= 16;
... ...
@@ -363,7 +363,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
363 363
 /**
364 364
  * Set the intra prediction function pointers.
365 365
  */
366
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
366
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){
367 367
 //    MpegEncContext * const s = &h->s;
368 368
 
369 369
 #undef FUNC
... ...
@@ -436,20 +436,39 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
436 436
     h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
437 437
     h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
438 438
 \
439
-    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
440
-    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
439
+    if (chroma_format_idc == 1) {\
440
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical               , depth);\
441
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal             , depth);\
442
+    } else {\
443
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x16_vertical              , depth);\
444
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x16_horizontal            , depth);\
445
+    }\
441 446
     if (codec_id != CODEC_ID_VP8) {\
442
-        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
447
+        if (chroma_format_idc == 1) {\
448
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                , depth);\
449
+        } else {\
450
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane               , depth);\
451
+        }\
443 452
     } else\
444 453
         h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
445 454
     if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
446
-        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
447
-        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
448
-        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
449
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
450
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
451
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
452
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
455
+        if (chroma_format_idc == 1) {\
456
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
457
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
458
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
459
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
460
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
461
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
462
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
463
+        } else {\
464
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x16_dc                    , depth);\
465
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc               , depth);\
466
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc                , depth);\
467
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
468
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
469
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
470
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
471
+        }\
453 472
     }else{\
454 473
         h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
455 474
         h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
... ...
@@ -459,7 +478,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
459 459
             h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
460 460
         }\
461 461
     }\
462
-    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
462
+    if (chroma_format_idc == 1) {\
463
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                 , depth);\
464
+    } else {\
465
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc                , depth);\
466
+    }\
463 467
 \
464 468
     h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
465 469
     h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
... ...
@@ -506,6 +529,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
506 506
             break;
507 507
     }
508 508
 
509
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
510
-    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
509
+    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
510
+    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
511 511
 }
... ...
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
101 101
     void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
102 102
 }H264PredContext;
103 103
 
104
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
105
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
106
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
104
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
105
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
106
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
107 107
 
108 108
 #endif /* AVCODEC_H264PRED_H */
... ...
@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
454 454
     }
455 455
 }
456 456
 
457
+static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
458
+    int i;
459
+    pixel *src = (pixel*)_src;
460
+    int stride = _stride>>(sizeof(pixel)-1);
461
+    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
462
+    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
463
+
464
+    for(i=0; i<16; i++){
465
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
466
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
467
+    }
468
+}
469
+
457 470
 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
458 471
     int i;
459 472
     pixel *src = (pixel*)_src;
... ...
@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
466 466
     }
467 467
 }
468 468
 
469
+static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
470
+    int i;
471
+    pixel *src = (pixel*)_src;
472
+    stride >>= sizeof(pixel)-1;
473
+    for(i=0; i<16; i++){
474
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
475
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
476
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
477
+    }
478
+}
479
+
469 480
 #define PRED8x8_X(n, v)\
470 481
 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
471 482
     int i;\
... ...
@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
482 482
 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
483 483
 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
484 484
 
485
+static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
486
+    FUNCC(pred8x8_128_dc)(_src, stride);
487
+    FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
488
+}
489
+
485 490
 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
486 491
     int i;
487 492
     int dc0, dc2;
... ...
@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
507 507
     }
508 508
 }
509 509
 
510
+static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
511
+    FUNCC(pred8x8_left_dc)(_src, stride);
512
+    FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
513
+}
514
+
510 515
 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
511 516
     int i;
512 517
     int dc0, dc1;
... ...
@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
532 532
     }
533 533
 }
534 534
 
535
+static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
536
+    int i;
537
+    int dc0, dc1;
538
+    pixel4 dc0splat, dc1splat;
539
+    pixel *src = (pixel*)_src;
540
+    stride >>= sizeof(pixel)-1;
541
+
542
+    dc0=dc1=0;
543
+    for(i=0;i<4; i++){
544
+        dc0+= src[i-stride];
545
+        dc1+= src[4+i-stride];
546
+    }
547
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
548
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
549
+
550
+    for(i=0; i<16; i++){
551
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
552
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
553
+    }
554
+}
555
+
535 556
 static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
536 557
     int i;
537 558
     int dc0, dc1, dc2;
... ...
@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
560 560
     }
561 561
 }
562 562
 
563
+static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
564
+    int i;
565
+    int dc0, dc1, dc2, dc3, dc4;
566
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
567
+    pixel *src = (pixel*)_src;
568
+    stride >>= sizeof(pixel)-1;
569
+
570
+    dc0=dc1=dc2=dc3=dc4=0;
571
+    for(i=0;i<4; i++){
572
+        dc0+= src[-1+i*stride] + src[i-stride];
573
+        dc1+= src[4+i-stride];
574
+        dc2+= src[-1+(i+4)*stride];
575
+        dc3+= src[-1+(i+8)*stride];
576
+        dc4+= src[-1+(i+12)*stride];
577
+    }
578
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
579
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
580
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
581
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
582
+    dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
583
+    dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
584
+    dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
585
+    dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
586
+
587
+    for(i=0; i<4; i++){
588
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
589
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
590
+    }
591
+    for(i=4; i<8; i++){
592
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
593
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
594
+    }
595
+    for(i=8; i<12; i++){
596
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
597
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
598
+    }
599
+    for(i=12; i<16; i++){
600
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
601
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
602
+    }
603
+}
604
+
563 605
 //the following 4 function should not be optimized!
564 606
 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
565 607
     FUNCC(pred8x8_top_dc)(src, stride);
... ...
@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
618 618
   }
619 619
 }
620 620
 
621
+static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
622
+  int j, k;
623
+  int a;
624
+  INIT_CLIP
625
+  pixel *src = (pixel*)_src;
626
+  int stride = _stride>>(sizeof(pixel)-1);
627
+  const pixel * const src0 = src +3-stride;
628
+  const pixel *       src1 = src +8*stride-1;
629
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
630
+  int H = src0[1] - src0[-1];
631
+  int V = src1[0] - src2[ 0];
632
+
633
+  for (k = 2; k <= 4; ++k) {
634
+      src1 += stride; src2 -= stride;
635
+      H += k*(src0[k] - src0[-k]);
636
+      V += k*(src1[0] - src2[ 0]);
637
+  }
638
+  for (; k <= 8; ++k) {
639
+      src1 += stride; src2 -= stride;
640
+      V += k*(src1[0] - src2[0]);
641
+  }
642
+
643
+  H = (17*H+16) >> 5;
644
+  V = (5*V+32) >> 6;
645
+
646
+  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
647
+  for(j=16; j>0; --j) {
648
+    int b = a;
649
+    a += V;
650
+    src[0] = CLIP((b    ) >> 5);
651
+    src[1] = CLIP((b+  H) >> 5);
652
+    src[2] = CLIP((b+2*H) >> 5);
653
+    src[3] = CLIP((b+3*H) >> 5);
654
+    src[4] = CLIP((b+4*H) >> 5);
655
+    src[5] = CLIP((b+5*H) >> 5);
656
+    src[6] = CLIP((b+6*H) >> 5);
657
+    src[7] = CLIP((b+7*H) >> 5);
658
+    src += stride;
659
+  }
660
+}
661
+
621 662
 #define SRC(x,y) src[(x)+(y)*stride]
622 663
 #define PL(y) \
623 664
     const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
... ...
@@ -999,12 +999,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
999 999
     }
1000 1000
 }
1001 1001
 
1002
-void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
1002
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
1003 1003
 {
1004 1004
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
1005 1005
     if (bit_depth == 8) {
1006 1006
         c->h264_idct_add = ff_h264_idct_add_altivec;
1007
-        c->h264_idct_add8 = ff_h264_idct_add8_altivec;
1007
+        if (chroma_format_idc == 1)
1008
+            c->h264_idct_add8 = ff_h264_idct_add8_altivec;
1008 1009
         c->h264_idct_add16 = ff_h264_idct_add16_altivec;
1009 1010
         c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
1010 1011
         c->h264_idct_dc_add= h264_idct_dc_add_altivec;
... ...
@@ -1412,7 +1412,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
1412 1412
     if (MPV_common_init(s) < 0)
1413 1413
         return -1;
1414 1414
 
1415
-    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
1415
+    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1);
1416 1416
 
1417 1417
 #if CONFIG_RV30_DECODER
1418 1418
     if (avctx->codec_id == CODEC_ID_RV30)
... ...
@@ -1721,7 +1721,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
1721 1721
     avctx->pix_fmt = PIX_FMT_YUV420P;
1722 1722
 
1723 1723
     dsputil_init(&s->dsp, avctx);
1724
-    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1724
+    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1725 1725
     ff_vp8dsp_init(&s->vp8dsp);
1726 1726
 
1727 1727
     return 0;
... ...
@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int s
167 167
 void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
168 168
 void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
169 169
 
170
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
170
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
171 171
 {
172 172
     int mm_flags = av_get_cpu_flags();
173 173
 
... ...
@@ -176,14 +176,17 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
176 176
         if (mm_flags & AV_CPU_FLAG_MMX) {
177 177
             h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_mmx;
178 178
             h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_mmx;
179
-            h->pred8x8  [VERT_PRED8x8         ] = ff_pred8x8_vertical_mmx;
180
-            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmx;
179
+            if (chroma_format_idc == 1) {
180
+                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_mmx;
181
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_mmx;
182
+            }
181 183
             if (codec_id == CODEC_ID_VP8) {
182 184
                 h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_mmx;
183 185
                 h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_mmx;
184 186
                 h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_mmx;
185 187
             } else {
186
-                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
188
+                if (chroma_format_idc == 1)
189
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
187 190
                 if (codec_id == CODEC_ID_SVQ3) {
188 191
                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
189 192
                 } else if (codec_id == CODEC_ID_RV40) {
... ...
@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
197 197
         if (mm_flags & AV_CPU_FLAG_MMX2) {
198 198
             h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_mmxext;
199 199
             h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_mmxext;
200
-            h->pred8x8  [HOR_PRED8x8            ] = ff_pred8x8_horizontal_mmxext;
200
+            if (chroma_format_idc == 1)
201
+                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmxext;
201 202
             h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_mmxext;
202 203
             h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_mmxext;
203 204
             h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_mmxext;
... ...
@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
221 221
                 h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_mmxext;
222 222
             }
223 223
             if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
224
-                h->pred8x8  [TOP_DC_PRED8x8     ] = ff_pred8x8_top_dc_mmxext;
225
-                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_mmxext;
224
+                if (chroma_format_idc == 1) {
225
+                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_mmxext;
226
+                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_mmxext;
227
+                }
226 228
             }
227 229
             if (codec_id == CODEC_ID_VP8) {
228 230
                 h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_mmxext;
... ...
@@ -231,7 +237,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
231 231
                 h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_mmxext;
232 232
                 h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_mmxext;
233 233
             } else {
234
-                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
234
+                if (chroma_format_idc == 1)
235
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
235 236
                 if (codec_id == CODEC_ID_SVQ3) {
236 237
                     h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_mmx2;
237 238
                 } else if (codec_id == CODEC_ID_RV40) {
... ...
@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
257 257
                 h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_sse2;
258 258
                 h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_sse2;
259 259
             } else {
260
-                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_plane_sse2;
260
+                if (chroma_format_idc == 1)
261
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_sse2;
261 262
                 if (codec_id == CODEC_ID_SVQ3) {
262 263
                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
263 264
                 } else if (codec_id == CODEC_ID_RV40) {
... ...
@@ -271,7 +279,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
271 271
         if (mm_flags & AV_CPU_FLAG_SSSE3) {
272 272
             h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_ssse3;
273 273
             h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_ssse3;
274
-            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_ssse3;
274
+            if (chroma_format_idc == 1)
275
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_ssse3;
275 276
             h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_ssse3;
276 277
             h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_ssse3;
277 278
             h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_ssse3;
... ...
@@ -286,7 +295,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
286 286
                 h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_ssse3;
287 287
                 h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_ssse3;
288 288
             } else {
289
-                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
289
+                if (chroma_format_idc == 1)
290
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
290 291
                 if (codec_id == CODEC_ID_SVQ3) {
291 292
                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
292 293
                 } else if (codec_id == CODEC_ID_RV40) {
... ...
@@ -301,7 +311,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
301 301
             h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
302 302
             h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
303 303
 
304
-            h->pred8x8[DC_PRED8x8          ] = ff_pred8x8_dc_10_mmxext;
304
+            if (chroma_format_idc == 1)
305
+                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;
305 306
 
306 307
             h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;
307 308
 
... ...
@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
319 319
             h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
320 320
             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
321 321
 
322
-            h->pred8x8[DC_PRED8x8          ] = ff_pred8x8_dc_10_sse2;
323
-            h->pred8x8[TOP_DC_PRED8x8      ] = ff_pred8x8_top_dc_10_sse2;
324
-            h->pred8x8[PLANE_PRED8x8       ] = ff_pred8x8_plane_10_sse2;
325
-            h->pred8x8[VERT_PRED8x8        ] = ff_pred8x8_vertical_10_sse2;
326
-            h->pred8x8[HOR_PRED8x8         ] = ff_pred8x8_horizontal_10_sse2;
322
+            if (chroma_format_idc == 1) {
323
+                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
324
+                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
325
+                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
326
+                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
327
+                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
328
+            }
327 329
 
328 330
             h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
329 331
             h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
... ...
@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4,  8, 10)
350 350
 H264_BIWEIGHT_10_SSE( 4,  4, 10)
351 351
 H264_BIWEIGHT_10_SSE( 4,  2, 10)
352 352
 
353
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
353
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
354 354
 {
355 355
     int mm_flags = av_get_cpu_flags();
356 356
 
... ...
@@ -368,7 +368,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
368 368
 
369 369
         c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
370 370
         c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
371
-        c->h264_idct_add8           = ff_h264_idct_add8_8_mmx;
371
+        if (chroma_format_idc == 1)
372
+            c->h264_idct_add8       = ff_h264_idct_add8_8_mmx;
372 373
         c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
373 374
         c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
374 375
 
... ...
@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
377 377
             c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
378 378
             c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
379 379
             c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
380
-            c->h264_idct_add8      = ff_h264_idct_add8_8_mmx2;
380
+            if (chroma_format_idc == 1)
381
+                c->h264_idct_add8  = ff_h264_idct_add8_8_mmx2;
381 382
             c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
382 383
 
383 384
             c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
384
-            c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
385 385
             c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
386
-            c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
386
+            if (chroma_format_idc == 1) {
387
+                c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
388
+                c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
389
+            }
387 390
 #if ARCH_X86_32
388 391
             c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
389 392
             c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
... ...
@@ -413,7 +417,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
413 413
 
414 414
                 c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
415 415
                 c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
416
-                c->h264_idct_add8           = ff_h264_idct_add8_8_sse2;
416
+                if (chroma_format_idc == 1)
417
+                    c->h264_idct_add8       = ff_h264_idct_add8_8_sse2;
417 418
                 c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
418 419
                 c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
419 420
 
... ...
@@ -472,7 +477,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
472 472
                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
473 473
 
474 474
                 c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
475
-                c->h264_idct_add8      = ff_h264_idct_add8_10_sse2;
475
+                if (chroma_format_idc == 1)
476
+                    c->h264_idct_add8  = ff_h264_idct_add8_10_sse2;
476 477
                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
477 478
 #if HAVE_ALIGNED_STACK
478 479
                 c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
... ...
@@ -532,7 +538,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
532 532
                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
533 533
 
534 534
                 c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
535
-                c->h264_idct_add8      = ff_h264_idct_add8_10_avx;
535
+                if (chroma_format_idc == 1)
536
+                    c->h264_idct_add8  = ff_h264_idct_add8_10_avx;
536 537
                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
537 538
 #if HAVE_ALIGNED_STACK
538 539
                 c->h264_idct8_add      = ff_h264_idct8_add_10_avx;