GitList

Browse code

h264: 4:2:2 intra decoding support

Signed-off-by: Diego Biurrun <diego@biurrun.de>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Baptiste Coudurier authored on 2011/08/17 00:05:44
Showing 24 changed files

Changelog index 3041632..491f93b 100644
libavcodec/arm/h264dsp_init_arm.c index c2399e5..c1ca217 100644
libavcodec/arm/h264pred_init_arm.c index e96f339..5fc07bc 100644
libavcodec/dsputil.h index bef2cdd..acb2041 100644
libavcodec/h264.c index 1faaaa6..f61f524 100644
libavcodec/h264.h index 122a54a..bd2b5d8 100644
libavcodec/h264_cabac.c index 065b6e8..0325ea4 100644
libavcodec/h264_cavlc.c index ca7b939..b94b51b 100644
libavcodec/h264_loopfilter.c index 377968f..64b07e9 100644
libavcodec/h264_mvpred.h index 7c7086d..4b6a083 100644
libavcodec/h264_ps.c index 677ca80..76bf116 100644
libavcodec/h264data.h index 1851169..2cfa548 100644
libavcodec/h264dsp.c index 64f4856..19ad2db 100644
libavcodec/h264dsp.h index 6972725..7337f17 100644
libavcodec/h264dsp_template.c index d11eff0..ee4bbe5 100644
libavcodec/h264idct_template.c index ba55715..eba850a 100644
libavcodec/h264pred.c index e73d82c..17199d0 100644
libavcodec/h264pred.h index 34b1e90..b880446 100644
libavcodec/h264pred_template.c index 750e82c..d4f654e 100644
libavcodec/ppc/h264_altivec.c index 8dd4ea3..a915378 100644
libavcodec/rv34.c index b771a7f..091d49f 100644
libavcodec/vp8.c index d5cdaba..95755e3 100644
libavcodec/x86/h264_intrapred_init.c index 414d5e6..41e611e 100644
libavcodec/x86/h264dsp_mmx.c index 35ec267..910ad84 100644

@@ -54,6 +54,7 @@ easier to use. The changes are:
                      - boxblur filter
                      - Ut Video decoder
                      - Speex encoding via libspeex
                     +- 4:2:2 H.264 decoding support
                      version 0.7:

libavcodec/arm/h264dsp_init_arm.c

History View file @ 76741b0

@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
                                                   DCTELEM *block, int stride,
                                                   const uint8_t nnzc[6*8]);
                     -static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
                     +static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+                     {
                          if (bit_depth == 8) {
                          c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
@@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
                          c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
                          c->h264_idct_add16      = ff_h264_idct_add16_neon;
                          c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
                     -    c->h264_idct_add8       = ff_h264_idct_add8_neon;
                     +    if (chroma_format_idc == 1)
                     +        c->h264_idct_add8   = ff_h264_idct_add8_neon;
                          c->h264_idct8_add       = ff_h264_idct8_add_neon;
                          c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
                          c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+                         }
+                     }
                     -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
                     +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+                     {
                     -    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
                     +    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+                     }

libavcodec/arm/h264pred_init_arm.c

History View file @ 76741b0

@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
 
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
 {
     const int high_depth = bit_depth > 8;
 
@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
 }
 
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc)
 {
-    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
+    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
 }

libavcodec/dsputil.h

History View file @ 76741b0

@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid
                      void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
                      void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
                      void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
                     +void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
                      void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
                      void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
                     +void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\
                      void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
                      H264_IDCT( 8)

libavcodec/h264.c

History View file @ 76741b0

@@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
                          dst->list_counts              = src->list_counts;
                          dst->s.obmc_scratchpad = NULL;
                     -    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
                     +    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
+                     }
                      /**
@@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){
                          s->height = s->avctx->height;
                          s->codec_id= s->avctx->codec->id;
                     -    ff_h264dsp_init(&h->h264dsp, 8);
                     -    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
                     +    ff_h264dsp_init(&h->h264dsp, 8, 1);
                     +    ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
                          h->dequant_coeff_pps= -1;
                          s->unrestricted_mv=1;
@@ -1432,11 +1432,16 @@ static void decode_postinit(H264Context *h, int setup_finished){
                              ff_thread_finish_setup(s->avctx);
+                     }
                     -static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
                     +static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
                     +                                              uint8_t *src_cb, uint8_t *src_cr,
                     +                                              int linesize, int uvlinesize, int simple)
                     +{
                          MpegEncContext * const s = &h->s;
                          uint8_t *top_border;
                          int top_idx = 1;
                          const int pixel_shift = h->pixel_shift;
                     +    int chroma444 = CHROMA444;
                     +    int chroma422 = CHROMA422;
                          src_y  -=   linesize;
                          src_cb -= uvlinesize;
@@ -1460,6 +1465,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui
                                                  AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
                                                  AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
+                                             }
                     +                    } else if(chroma422) {
                     +                        if (pixel_shift) {
                     +                            AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
                     +                            AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
                     +                        } else {
                     +                            AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
                     +                            AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
                     +                        }
                                          } else {
                                              if (pixel_shift) {
                                                  AV_COPY128(top_border+32, src_cb+7*uvlinesize);
@@ -1495,6 +1508,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui
                                      AV_COPY128(top_border+16, src_cb + 16*linesize);
                                      AV_COPY128(top_border+32, src_cr + 16*linesize);
+                                 }
                     +        } else if(chroma422) {
                     +            if (pixel_shift) {
                     +                AV_COPY128(top_border+32, src_cb+16*uvlinesize);
                     +                AV_COPY128(top_border+48, src_cr+16*uvlinesize);
                     +            } else {
                     +                AV_COPY64(top_border+16, src_cb+16*uvlinesize);
                     +                AV_COPY64(top_border+24, src_cr+16*uvlinesize);
                     +            }
                              } else {
                                  if (pixel_shift) {
                                      AV_COPY128(top_border+32, src_cb+8*uvlinesize);
@@ -1773,10 +1794,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                          /* is_h264 should always be true if SVQ3 is disabled. */
                          const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
                          void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
                     +    const int block_h = 16 >> s->chroma_y_shift;
                          dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
                     -    dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) *  8;
                     -    dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) *  8;
                     +    dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
                     +    dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
                          s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
                          s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
@@ -1789,8 +1811,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                              block_offset = &h->block_offset[48];
                              if(mb_y&1){ //FIXME move out of this function?
                                  dest_y -= s->linesize*15;
                     -            dest_cb-= s->uvlinesize*7;
                     -            dest_cr-= s->uvlinesize*7;
                     +            dest_cb-= s->uvlinesize * (block_h - 1);
                     +            dest_cr-= s->uvlinesize * (block_h - 1);
+                             }
                              if(FRAME_MBAFF) {
                                  int list;
@@ -1842,12 +1864,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
+                                             }
+                                         }
                                      } else {
                     -                    for (i = 0; i < 8; i++) {
                     +                    for (i = 0; i < block_h; i++) {
                                              uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
                                              for (j = 0; j < 8; j++)
                                                  tmp_cb[j] = get_bits(&gb, bit_depth);
+                                         }
                     -                    for (i = 0; i < 8; i++) {
                     +                    for (i = 0; i < block_h; i++) {
                                              uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
                                              for (j = 0; j < 8; j++)
                                                  tmp_cr[j] = get_bits(&gb, bit_depth);
@@ -1865,7 +1887,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                                              memset(dest_cr + i*uvlinesize, 128, 8);
+                                         }
                                      } else {
                     -                    for (i = 0; i < 8; i++) {
                     +                    for (i = 0; i < block_h; i++) {
                                              memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
                                              memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
+                                         }
@@ -1913,10 +1935,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
+                                     }
                                  }else{
                                      if(is_h264){
                     +                    int qp[2];
                     +                    if (CHROMA422) {
                     +                        qp[0] = h->chroma_qp[0] + 3;
                     +                        qp[1] = h->chroma_qp[1] + 3;
                     +                    } else {
                     +                        qp[0] = h->chroma_qp[0];
                     +                        qp[1] = h->chroma_qp[1];
                     +                    }
                                          if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
                     -                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     +                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
                                          if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
                     -                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     +                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
                                          h->h264dsp.h264_idct_add8(dest, block_offset,
                                                                    h->mb, uvlinesize,
                                                                    h->non_zero_count_cache);
@@ -2555,11 +2585,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                          h->b_stride=  s->mb_width*4;
                     +    s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
+                    +
                          s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
                          if(h->sps.frame_mbs_only_flag)
                     -        s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
                     +        s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
                          else
                     -        s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
                     +        s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
                          if (s->context_initialized
                              && (   s->width != s->avctx->width || s->height != s->avctx->height
@@ -2601,14 +2633,26 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                              switch (h->sps.bit_depth_luma) {
                                  case 9 :
                     -                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
                     +                if (CHROMA444)
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV444P9;
                     +                else if (CHROMA422)
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV422P9;
                     +                else
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV420P9;
                                      break;
                                  case 10 :
                     -                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
                     +                if (CHROMA444)
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV444P10;
                     +                else if (CHROMA422)
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV422P10;
                     +                else
                     +                    s->avctx->pix_fmt = PIX_FMT_YUV420P10;
                                      break;
                                  default:
                                      if (CHROMA444){
                                          s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
                     +                } else if (CHROMA422) {
                     +                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
                                      }else{
                                          s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
                                                                                   s->avctx->codec->pix_fmts ?
@@ -3272,6 +3316,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                          const int end_mb_y= s->mb_y + FRAME_MBAFF;
                          const int old_slice_type= h->slice_type;
                          const int pixel_shift = h->pixel_shift;
                     +    const int block_h = 16 >> s->chroma_y_shift;
                          if(h->deblocking_filter) {
                              for(mb_x= start_x; mb_x<end_x; mb_x++){
@@ -3288,8 +3333,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                                      s->mb_x= mb_x;
                                      s->mb_y= mb_y;
                                      dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
                     -                dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
                     -                dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
                     +                dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
                     +                dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
                                          //FIXME simplify above
                                      if (MB_FIELD) {
@@ -3297,14 +3342,14 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                                          uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
                                          if(mb_y&1){ //FIXME move out of this function?
                                              dest_y -= s->linesize*15;
                     -                        dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
                     -                        dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
                     +                        dest_cb-= s->uvlinesize * (block_h - 1);
                     +                        dest_cr-= s->uvlinesize * (block_h - 1);
+                                         }
                                      } else {
                                          linesize   = h->mb_linesize   = s->linesize;
                                          uvlinesize = h->mb_uvlinesize = s->uvlinesize;
+                                     }
                     -                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
                     +                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
                                      if(fill_filter_caches(h, mb_type))
                                          continue;
                                      h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
@@ -3742,13 +3787,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
                                  if(avctx->has_b_frames < 2)
                                      avctx->has_b_frames= !s->low_delay;
                     -            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
                     +            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
                     +                h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
                                      if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
                                          avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
                     +                    h->cur_chroma_format_idc = h->sps.chroma_format_idc;
                                          h->pixel_shift = h->sps.bit_depth_luma > 8;
                     -                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                     -                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
                     +                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
                     +                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
                                          s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
                                          dsputil_init(&s->dsp, s->avctx);
                                      } else {

libavcodec/h264.h

History View file @ 76741b0

@@ -39,13 +39,6 @@
                      #define interlaced_dct interlaced_dct_is_a_bad_name
                      #define mb_intra mb_intra_is_not_initialized_see_mb_type
                     -#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
                     -#define COEFF_TOKEN_VLC_BITS           8
                     -#define TOTAL_ZEROS_VLC_BITS           9
                     -#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
                     -#define RUN_VLC_BITS                   3
                     -#define RUN7_VLC_BITS                  6
+                    -
                      #define MAX_SPS_COUNT 32
                      #define MAX_PPS_COUNT 256
@@ -92,6 +85,7 @@
                      #define CABAC h->pps.cabac
                      #endif
                     +#define CHROMA422 (h->sps.chroma_format_idc == 2)
                      #define CHROMA444 (h->sps.chroma_format_idc == 3)
                      #define EXTENDED_SAR          255
@@ -582,6 +576,8 @@ typedef struct H264Context{
                          // Timestamp stuff
                          int sei_buffering_period_present;  ///< Buffering period SEI flag
                          int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
+                    +
                     +    int cur_chroma_format_idc;
                      }H264Context;
@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){
                          AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
                          AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
                     -    if(CHROMA444){
                     +    if(!h->s.chroma_y_shift){
                              AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
                              AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
                              AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);

libavcodec/h264_cabac.c

History View file @ 76741b0

@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
 , 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
                      };
                     -static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
                     +static av_always_inline void
                     +decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
                     +                               int cat, int n, const uint8_t *scantable,
                     +                               const uint32_t *qmul, int max_coeff,
                     +                               int is_dc, int chroma422)
                     +{
                          static const int significant_coeff_flag_offset[2][14] = {
                            { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
                            { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
@@ -1587,12 +1592,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
 , 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
 ,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
                          };
                     +    static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 };
                          /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
                           * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
                           * map node ctx => cabac ctx for level=1 */
                          static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
                          /* map node ctx => cabac ctx for level>1 */
                     -    static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
                     +    static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
                     +        { 5, 5, 5, 5, 6, 7, 8, 9 },
                     +        { 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
                     +    };
                          static const uint8_t coeff_abs_level_transition[2][8] = {
                          /* update node ctx after decoding a level=1 */
                              { 1, 2, 3, 3, 4, 5, 6, 7 },
@@ -1651,12 +1660,20 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
                              coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
                                                                       last_coeff_ctx_base, sig_off);
                          } else {
                     -        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
                     -                                             last_coeff_ctx_base-significant_coeff_ctx_base);
                     +        if (is_dc && chroma422) { // dc 422
                     +            DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
                     +        } else {
                     +            coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
                     +                                                 last_coeff_ctx_base-significant_coeff_ctx_base);
                     +        }
                      #else
                              DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
                          } else {
                     -        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
                     +        if (is_dc && chroma422) { // dc 422
                     +            DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
                     +        } else {
                     +            DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
                     +        }
                      #endif
+                         }
                          assert(coeff_count > 0);
@@ -1691,7 +1708,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
                                  } \
                              } else { \
                                  int coeff_abs = 2; \
                     -            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
                     +            ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
                                  node_ctx = coeff_abs_level_transition[1][node_ctx]; \
+                     \
                                  while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
@@ -1733,11 +1750,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
+                     }
                      static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
                     -    decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
                     +    decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
                     +}
+                    +
                     +static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
                     +                                                  int cat, int n, const uint8_t *scantable,
                     +                                                  int max_coeff)
                     +{
                     +    decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
+                     }
                      static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
                     -    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
                     +    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
+                     }
                      /* cat: 0-> DC 16x16  n = 0
@@ -1761,6 +1785,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
                          decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
+                     }
                     +static av_always_inline void
                     +decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
                     +                             int cat, int n, const uint8_t *scantable,
                     +                             int max_coeff)
                     +{
                     +    /* read coded block flag */
                     +    if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
                     +        h->non_zero_count_cache[scan8[n]] = 0;
                     +        return;
                     +    }
                     +    decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
                     +}
+                    +
                      static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
                          /* read coded block flag */
                          if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
@@ -2313,7 +2350,36 @@ decode_intra_mb:
                              if(CHROMA444){
                                  decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
                                  decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
                     -        } else {
                     +        } else if (CHROMA422) {
                     +            if( cbp&0x30 ){
                     +                int c;
                     +                for( c = 0; c < 2; c++ ) {
                     +                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
                     +                    decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
                     +                                                 CHROMA_DC_BLOCK_INDEX + c,
                     +                                                 chroma422_dc_scan, 8);
                     +                }
                     +            }
+                    +
                     +            if( cbp&0x20 ) {
                     +                int c, i, i8x8;
                     +                for( c = 0; c < 2; c++ ) {
                     +                    DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
                     +                    qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
                     +                    for (i8x8 = 0; i8x8 < 2; i8x8++) {
                     +                        for (i = 0; i < 4; i++) {
                     +                            const int index = 16 + 16 * c + 8*i8x8 + i;
                     +                            //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
                     +                            decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15);
                     +                            mb += 16<<pixel_shift;
                     +                        }
                     +                    }
                     +                }
                     +            } else {
                     +                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                     +                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
                     +            }
                     +        } else /* yuv420 */ {
                                  if( cbp&0x30 ){
                                      int c;
                                      for( c = 0; c < 2; c++ ) {

libavcodec/h264_cavlc.c

History View file @ 76741b0

@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={
 , 3, 2, 0,
                      };
                     +static const uint8_t chroma422_dc_coeff_token_len[4*9]={
                     +  1,  0,  0,  0,
                     +  7,  2,  0,  0,
                     +  7,  7,  3,  0,
                     +  9,  7,  7,  5,
                     +  9,  9,  7,  6,
                     + 10, 10,  9,  7,
                     + 11, 11, 10,  7,
                     + 12, 12, 11, 10,
                     + 13, 12, 12, 11,
                     +};
+                    +
                     +static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
                     +  1,   0,  0, 0,
                     + 15,   1,  0, 0,
                     + 14,  13,  1, 0,
                     +  7,  12, 11, 1,
                     +  6,   5, 10, 1,
                     +  7,   6,  4, 9,
                     +  7,   6,  5, 8,
                     +  7,   6,  5, 4,
                     +  7,   5,  4, 4,
                     +};
+                    +
                      static const uint8_t coeff_token_len[4][4*17]={
+                     {
 , 0, 0, 0,
@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
                          { 1, 0, 0, 0,},
                      };
                     +static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
                     +    { 1, 3, 3, 4, 4, 4, 5, 5 },
                     +    { 3, 2, 3, 3, 3, 3, 3 },
                     +    { 3, 3, 2, 2, 3, 3 },
                     +    { 3, 2, 2, 2, 3 },
                     +    { 2, 2, 2, 2 },
                     +    { 2, 2, 1 },
                     +    { 1, 1 },
                     +};
+                    +
                     +static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
                     +    { 1, 2, 3, 2, 3, 1, 1, 0 },
                     +    { 0, 1, 1, 4, 5, 6, 7 },
                     +    { 0, 1, 1, 2, 6, 7 },
                     +    { 6, 0, 1, 2, 7 },
                     +    { 0, 1, 2, 3 },
                     +    { 0, 1, 1 },
                     +    { 0, 1 },
                     +};
+                    +
                      static const uint8_t run_len[7][16]={
                          {1,1},
                          {1,2,2},
@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc;
                      static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
                      static const int chroma_dc_coeff_token_vlc_table_size = 256;
                     +static VLC chroma422_dc_coeff_token_vlc;
                     +static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
                     +static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
+                    +
                      static VLC total_zeros_vlc[15];
                      static VLC_TYPE total_zeros_vlc_tables[15][512][2];
                      static const int total_zeros_vlc_tables_size = 512;
@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3];
                      static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
                      static const int chroma_dc_total_zeros_vlc_tables_size = 8;
                     +static VLC chroma422_dc_total_zeros_vlc[7];
                     +static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
                     +static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
+                    +
                      static VLC run_vlc[6];
                      static VLC_TYPE run_vlc_tables[6][8][2];
                      static const int run_vlc_tables_size = 8;
@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96;
                      #define LEVEL_TAB_BITS 8
                      static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
                     +#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
                     +#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
                     +#define COEFF_TOKEN_VLC_BITS           8
                     +#define TOTAL_ZEROS_VLC_BITS           9
                     +#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
                     +#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
                     +#define RUN_VLC_BITS                   3
                     +#define RUN7_VLC_BITS                  6
                      /**
                       * gets the predicted number of non-zero coefficients.
@@ -278,6 +338,13 @@ av_cold void ff_h264_decode_init_vlc(void){
                                       &chroma_dc_coeff_token_bits[0], 1, 1,
                                       INIT_VLC_USE_NEW_STATIC);
                     +        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
                     +        chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
                     +        init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
                     +                 &chroma422_dc_coeff_token_len [0], 1, 1,
                     +                 &chroma422_dc_coeff_token_bits[0], 1, 1,
                     +                 INIT_VLC_USE_NEW_STATIC);
+                    +
                              offset = 0;
                              for(i=0; i<4; i++){
                                  coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
@@ -304,6 +371,17 @@ av_cold void ff_h264_decode_init_vlc(void){
                                           &chroma_dc_total_zeros_bits[i][0], 1, 1,
                                           INIT_VLC_USE_NEW_STATIC);
+                             }
+                    +
                     +        for(i=0; i<7; i++){
                     +            chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
                     +            chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
                     +            init_vlc(&chroma422_dc_total_zeros_vlc[i],
                     +                     CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
                     +                     &chroma422_dc_total_zeros_len [i][0], 1, 1,
                     +                     &chroma422_dc_total_zeros_bits[i][0], 1, 1,
                     +                     INIT_VLC_USE_NEW_STATIC);
                     +        }
+                    +
                              for(i=0; i<15; i++){
                                  total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
                                  total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
@@ -373,7 +451,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
                          //FIXME put trailing_onex into the context
                          if(max_coeff <= 8){
                     -        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
                     +        if (max_coeff == 4)
                     +            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
                     +        else
                     +            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
                              total_coeff= coeff_token>>2;
                          }else{
                              if(n >= LUMA_DC_BLOCK_INDEX){
@@ -483,11 +564,16 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
                          if(total_coeff == max_coeff)
                              zeros_left=0;
                          else{
                     -        /* FIXME: we don't actually support 4:2:2 yet. */
                     -        if(max_coeff <= 8)
                     -            zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
                     -        else
                     +        if (max_coeff <= 8) {
                     +            if (max_coeff == 4)
                     +                zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
                     +                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
                     +            else
                     +                zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
                     +                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
                     +        } else {
                                  zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
                     +        }
+                         }
                      #define STORE_BLOCK(type) \
@@ -994,7 +1080,7 @@ decode_intra_mb:
                          s->current_picture.f.mb_type[mb_xy] = mb_type;
                          if(cbp || IS_INTRA16x16(mb_type)){
                     -        int i4x4, chroma_idx;
                     +        int i4x4, i8x8, chroma_idx;
                              int dquant;
                              int ret;
                              GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
@@ -1036,7 +1122,34 @@ decode_intra_mb:
                                  if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
                                      return -1;
+                                 }
                     -        } else {
                     +        } else if (CHROMA422) {
                     +            if(cbp&0x30){
                     +                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
                     +                    if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
                     +                                        CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
                     +                                        NULL, 8) < 0) {
                     +                        return -1;
                     +                    }
                     +            }
+                    +
                     +            if(cbp&0x20){
                     +                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
                     +                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                     +                    DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
                     +                    for (i8x8 = 0; i8x8 < 2; i8x8++) {
                     +                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
                     +                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
                     +                            if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
                     +                                return -1;
                     +                            mb += 16 << pixel_shift;
                     +                        }
                     +                    }
                     +                }
                     +            }else{
                     +                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                     +                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
                     +            }
                     +        } else /* yuv420 */ {
                                  if(cbp&0x30){
                                      for(chroma_idx=0; chroma_idx<2; chroma_idx++)
                                          if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){

libavcodec/h264_loopfilter.c

History View file @ 76741b0

@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
                          MpegEncContext * const s = &h->s;
                          int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
                          int chroma444 = CHROMA444;
                     +    int chroma422 = CHROMA422;
                          int mb_xy = h->mb_xy;
                          int left_type= h->left_type[LTOP];
@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
                                          filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
                                          filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
+                                     }
                     +            }else if(chroma422){
                     +                if(left_type){
                     +                    filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
                     +                    filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
                     +                }
                     +                filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                if(top_type){
                     +                    filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
                     +                    filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
                     +                }
                     +                filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                     +                filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
                                  }else{
                                      if(left_type){
                                          filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
                          return v;
+                     }
                     -static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) {
                     +static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) {
                          MpegEncContext * const s = &h->s;
                          int edge;
                          int chroma_qp_avg[2];
                     +    int chroma444 = CHROMA444;
                     +    int chroma422 = CHROMA422;
                          const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
                          const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                          for( edge = 1; edge < edges; edge++ ) {
                              DECLARE_ALIGNED(8, int16_t, bS)[4];
                              int qp;
                     +        const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type)
                     -        if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
                     +        if (!deblock_edge && (!chroma422 || dir == 0))
                                  continue;
                              if( IS_INTRA(mb_type)) {
@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
+                                     }
+                                 }
                              } else {
                     -            filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 );
                     -            if (chroma) {
                     -                if (chroma444) {
                     -                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
                     -                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
                     -                } else if( (edge&1) == 0 ) {
                     -                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
                     -                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
                     +            if (chroma422) {
                     +                if (deblock_edge)
                     +                    filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
                     +                if (chroma) {
                     +                    filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
                     +                    filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
                     +                }
                     +            } else {
                     +                filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
                     +                if (chroma) {
                     +                    if (chroma444) {
                     +                        filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
                     +                        filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
                     +                    } else if ((edge&1) == 0) {
                     +                        filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
                     +                        filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
                     +                    }
+                                     }
+                                 }
+                             }
@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
                                          filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
                                          filter_mb_mbaff_edgev ( h, img_cr,                uvlinesize, bS  , 1, rqp[0], a, b, 1 );
                                          filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 );
                     +                } else if (CHROMA422) {
                     +                    filter_mb_mbaff_edgecv(h, img_cb,                uvlinesize, bS  , 1, bqp[0], a, b, 1);
                     +                    filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1);
                     +                    filter_mb_mbaff_edgecv(h, img_cr,                uvlinesize, bS  , 1, rqp[0], a, b, 1);
                     +                    filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1);
                                      }else{
                                          filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0], a, b, 1 );
                                          filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
                      #if CONFIG_SMALL
                          for( dir = 0; dir < 2; dir++ )
                     -        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir);
                     +        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir);
                      #else
                     -    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0);
                     -    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0,                        a, b, chroma, CHROMA444, 1);
                     +    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0);
                     +    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0,                        a, b, chroma, 1);
                      #endif
+                     }

libavcodec/h264_mvpred.h

History View file @ 76741b0

@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                          if(top_type){
                              nnz = h->non_zero_count[top_xy];
                              AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
                     -        if(CHROMA444){
                     +        if(!s->chroma_y_shift){
                                  AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
                                  AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
                              }else{
@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                                      nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
                                      nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
                                      nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
                     +            }else if(CHROMA422) {
                     +                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4];
                     +                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4];
                     +                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4];
                     +                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4];
                                  }else{
                                      nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
                                      nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];

libavcodec/h264_ps.c

History View file @ 76741b0

@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
                      #endif
                          sps->crop= get_bits1(&s->gb);
                          if(sps->crop){
                     -        int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8;
                     +        int crop_vertical_limit   = sps->chroma_format_idc  & 2 ? 16 : 8;
                     +        int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8;
                              sps->crop_left  = get_ue_golomb(&s->gb);
                              sps->crop_right = get_ue_golomb(&s->gb);
                              sps->crop_top   = get_ue_golomb(&s->gb);
@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
                              if(sps->crop_left || sps->crop_top){
                                  av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
+                             }
                     -        if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){
                     +        if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){
                                  av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
+                             }
                          }else{

libavcodec/h264data.h

History View file @ 76741b0

@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={
                      static const uint8_t chroma_dc_scan[4]={
                       (0+0*2)*16, (1+0*2)*16,
                     - (0+1*2)*16, (1+1*2)*16,  //FIXME
                     + (0+1*2)*16, (1+1*2)*16,
                     +};
+                    +
                     +static const uint8_t chroma422_dc_scan[8]={
                     + (0+0*2)*16, (0+1*2)*16,
                     + (1+0*2)*16, (0+2*2)*16,
                     + (0+3*2)*16, (1+1*2)*16,
                     + (1+2*2)*16, (1+3*2)*16,
                      };
                      // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]

libavcodec/h264dsp.c

History View file @ 76741b0

@@ -41,7 +41,7 @@
                      #include "h264dsp_template.c"
                      #undef BIT_DEPTH
                     -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
                     +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+                     {
                      #undef FUNC
                      #define FUNC(a, depth) a ## _ ## depth ## _c
@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
                          c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
                          c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
                          c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
                     -    c->h264_idct_add8      = FUNC(ff_h264_idct_add8, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_idct_add8  = FUNC(ff_h264_idct_add8, depth);\
                     +    else\
                     +        c->h264_idct_add8  = FUNC(ff_h264_idct_add8_422, depth);\
                          c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
                          c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
                     -    c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
                     +    else\
                     +        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
+                     \
                          c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
                          c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
                          c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
                          c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
                          c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
                     -    c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
                     -    c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
                     +    else\
                     +        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
                     +    else\
                     +        c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
                          c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
                     -    c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
                     -    c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
                     +    else\
                     +        c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\
                     +    if (chroma_format_idc == 1)\
                     +        c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
                     +    else\
                     +        c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
                          c->h264_loop_filter_strength= NULL;
                          switch (bit_depth) {
@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
                              break;
+                         }
                     -    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
                     -    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
                     -    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
                     +    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
                     +    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
                     +    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc);
+                     }

libavcodec/h264dsp.h

History View file @ 76741b0

@@ -74,9 +74,9 @@ typedef struct H264DSPContext{
                          void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
                      }H264DSPContext;
                     -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
                     -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
                     -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
                     -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
                     +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                      #endif /* AVCODEC_H264DSP_H */

libavcodec/h264dsp_template.c

History View file @ 76741b0

@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int
+                     {
                          FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
+                     }
                     +static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
                     +{
                     +    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
                     +}
                     +static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
                     +{
                     +    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
                     +}
                      static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+                     {
@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid
+                     {
                          FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
+                     }
                     +static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta)
                     +{
                     +    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
                     +}
                     +static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
                     +{
                     +    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
                     +}

libavcodec/h264idct_template.c

History View file @ 76741b0

@@ -224,6 +224,29 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
+                             }
+                         }
+                     }
+                    +
                     +void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
                     +    int i, j;
+                    +
                     +    for(j=1; j<3; j++){
                     +        for(i=j*16; i<j*16+4; i++){
                     +            if(nnzc[ scan8[i] ])
                     +                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
                     +            else if(((dctcoef*)block)[i*16])
                     +                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
                     +        }
                     +    }
+                    +
                     +    for(j=1; j<3; j++){
                     +        for(i=j*16+4; i<j*16+8; i++){
                     +            if(nnzc[ scan8[i+4] ])
                     +                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
                     +            else if(((dctcoef*)block)[i*16])
                     +                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
                     +        }
                     +    }
                     +}
+                    +
                      /**
                       * IDCT transforms the 16 dc values and dequantizes them.
                       * @param qmul quantization parameter
@@ -263,6 +286,33 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int
                      #undef stride
+                     }
                     +void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
                     +    const int stride= 16*2;
                     +    const int xStride= 16;
                     +    int i;
                     +    int temp[8];
                     +    static const uint8_t x_offset[2]={0, 16};
                     +    dctcoef *block = (dctcoef*)_block;
+                    +
                     +    for(i=0; i<4; i++){
                     +        temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
                     +        temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
                     +    }
+                    +
                     +    for(i=0; i<2; i++){
                     +        const int offset= x_offset[i];
                     +        const int z0= temp[2*0+i] + temp[2*2+i];
                     +        const int z1= temp[2*0+i] - temp[2*2+i];
                     +        const int z2= temp[2*1+i] - temp[2*3+i];
                     +        const int z3= temp[2*1+i] + temp[2*3+i];
+                    +
                     +        block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
                     +        block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
                     +        block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
                     +        block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
                     +    }
                     +}
+                    +
                      void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
                          const int stride= 16*2;
                          const int xStride= 16;

libavcodec/h264pred.c

History View file @ 76741b0

@@ -361,7 +361,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
                      /**
                       * Set the intra prediction function pointers.
                       */
                     -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
                     +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){
                      //    MpegEncContext * const s = &h->s;
                      #undef FUNC
@@ -434,20 +434,39 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
                          h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
                          h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
+                     \
                     -    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
                     -    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
                     +    if (chroma_format_idc == 1) {\
                     +        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical               , depth);\
                     +        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal             , depth);\
                     +    } else {\
                     +        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x16_vertical              , depth);\
                     +        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x16_horizontal            , depth);\
                     +    }\
                          if (codec_id != CODEC_ID_VP8) {\
                     -        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
                     +        if (chroma_format_idc == 1) {\
                     +            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                , depth);\
                     +        } else {\
                     +            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane               , depth);\
                     +        }\
                          } else\
                              h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
                          if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
                     -        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
                     -        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
                     -        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
                     -        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
                     -        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
                     -        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
                     -        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
                     +        if (chroma_format_idc == 1) {\
                     +            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
                     +            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
                     +            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
                     +            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
                     +        } else {\
                     +            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x16_dc                    , depth);\
                     +            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc               , depth);\
                     +            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc                , depth);\
                     +            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
                     +            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
                     +        }\
                          }else{\
                              h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
                              h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
@@ -457,7 +476,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
                                  h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
                              }\
                          }\
                     -    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
                     +    if (chroma_format_idc == 1) {\
                     +        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                 , depth);\
                     +    } else {\
                     +        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc                , depth);\
                     +    }\
+                     \
                          h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
                          h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
@@ -504,6 +527,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
                                  break;
+                         }
                     -    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
                     -    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
                     +    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
                     +    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
+                     }

libavcodec/h264pred.h

History View file @ 76741b0

@@ -101,8 +101,8 @@ typedef struct H264PredContext{
                          void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
                      }H264PredContext;
                     -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
                     -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
                     -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
                     +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                      #endif /* AVCODEC_H264PRED_H */

libavcodec/h264pred_template.c

History View file @ 76741b0

@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
+                         }
+                     }
                     +static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
                     +    int i;
                     +    pixel *src = (pixel*)_src;
                     +    int stride = _stride>>(sizeof(pixel)-1);
                     +    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
                     +    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
+                    +
                     +    for(i=0; i<16; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
                     +    }
                     +}
+                    +
                      static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
                          int i;
                          pixel *src = (pixel*)_src;
@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
+                         }
+                     }
                     +static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
                     +    int i;
                     +    pixel *src = (pixel*)_src;
                     +    stride >>= sizeof(pixel)-1;
                     +    for(i=0; i<16; i++){
                     +        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
                     +    }
                     +}
+                    +
                      #define PRED8x8_X(n, v)\
                      static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
                          int i;\
@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
                      PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
                      PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
                     +static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
                     +    FUNCC(pred8x8_128_dc)(_src, stride);
                     +    FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
                     +}
+                    +
                      static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
                          int i;
                          int dc0, dc2;
@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
+                         }
+                     }
                     +static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
                     +    FUNCC(pred8x8_left_dc)(_src, stride);
                     +    FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
                     +}
+                    +
                      static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
                          int i;
                          int dc0, dc1;
@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
+                         }
+                     }
                     +static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
                     +    int i;
                     +    int dc0, dc1;
                     +    pixel4 dc0splat, dc1splat;
                     +    pixel *src = (pixel*)_src;
                     +    stride >>= sizeof(pixel)-1;
+                    +
                     +    dc0=dc1=0;
                     +    for(i=0;i<4; i++){
                     +        dc0+= src[i-stride];
                     +        dc1+= src[4+i-stride];
                     +    }
                     +    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
                     +    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+                    +
                     +    for(i=0; i<16; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
                     +    }
                     +}
+                    +
                      static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
                          int i;
                          int dc0, dc1, dc2;
@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
+                         }
+                     }
                     +static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
                     +    int i;
                     +    int dc0, dc1, dc2, dc3, dc4;
                     +    pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
                     +    pixel *src = (pixel*)_src;
                     +    stride >>= sizeof(pixel)-1;
+                    +
                     +    dc0=dc1=dc2=dc3=dc4=0;
                     +    for(i=0;i<4; i++){
                     +        dc0+= src[-1+i*stride] + src[i-stride];
                     +        dc1+= src[4+i-stride];
                     +        dc2+= src[-1+(i+4)*stride];
                     +        dc3+= src[-1+(i+8)*stride];
                     +        dc4+= src[-1+(i+12)*stride];
                     +    }
                     +    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
                     +    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
                     +    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
                     +    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
                     +    dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
                     +    dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
                     +    dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
                     +    dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
+                    +
                     +    for(i=0; i<4; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
                     +    }
                     +    for(i=4; i<8; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
                     +    }
                     +    for(i=8; i<12; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
                     +    }
                     +    for(i=12; i<16; i++){
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
                     +        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
                     +    }
                     +}
+                    +
                      //the following 4 function should not be optimized!
                      static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
                          FUNCC(pred8x8_top_dc)(src, stride);
@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
+                       }
+                     }
                     +static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
                     +  int j, k;
                     +  int a;
                     +  INIT_CLIP
                     +  pixel *src = (pixel*)_src;
                     +  int stride = _stride>>(sizeof(pixel)-1);
                     +  const pixel * const src0 = src +3-stride;
                     +  const pixel *       src1 = src +8*stride-1;
                     +  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
                     +  int H = src0[1] - src0[-1];
                     +  int V = src1[0] - src2[ 0];
+                    +
                     +  for (k = 2; k <= 4; ++k) {
                     +      src1 += stride; src2 -= stride;
                     +      H += k*(src0[k] - src0[-k]);
                     +      V += k*(src1[0] - src2[ 0]);
                     +  }
                     +  for (; k <= 8; ++k) {
                     +      src1 += stride; src2 -= stride;
                     +      V += k*(src1[0] - src2[0]);
                     +  }
+                    +
                     +  H = (17*H+16) >> 5;
                     +  V = (5*V+32) >> 6;
+                    +
                     +  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
                     +  for(j=16; j>0; --j) {
                     +    int b = a;
                     +    a += V;
                     +    src[0] = CLIP((b    ) >> 5);
                     +    src[1] = CLIP((b+  H) >> 5);
                     +    src[2] = CLIP((b+2*H) >> 5);
                     +    src[3] = CLIP((b+3*H) >> 5);
                     +    src[4] = CLIP((b+4*H) >> 5);
                     +    src[5] = CLIP((b+5*H) >> 5);
                     +    src[6] = CLIP((b+6*H) >> 5);
                     +    src[7] = CLIP((b+7*H) >> 5);
                     +    src += stride;
                     +  }
                     +}
+                    +
                      #define SRC(x,y) src[(x)+(y)*stride]
                      #define PL(y) \
                          const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;

libavcodec/ppc/h264_altivec.c

History View file @ 76741b0

@@ -999,12 +999,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+                         }
+                     }
                     -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
                     +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+                     {
                          if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
                          if (bit_depth == 8) {
                              c->h264_idct_add = ff_h264_idct_add_altivec;
                     -        c->h264_idct_add8 = ff_h264_idct_add8_altivec;
                     +        if (chroma_format_idc == 1)
                     +            c->h264_idct_add8 = ff_h264_idct_add8_altivec;
                              c->h264_idct_add16 = ff_h264_idct_add16_altivec;
                              c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
                              c->h264_idct_dc_add= h264_idct_dc_add_altivec;

libavcodec/rv34.c

History View file @ 76741b0

@@ -1343,7 +1343,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
                          if (MPV_common_init(s) < 0)
                              return -1;
                     -    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
                     +    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1);
                      #if CONFIG_RV30_DECODER
                          if (avctx->codec_id == CODEC_ID_RV30)

libavcodec/vp8.c

History View file @ 76741b0

@@ -1769,7 +1769,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
                          avctx->pix_fmt = PIX_FMT_YUV420P;
                          dsputil_init(&s->dsp, avctx);
                     -    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
                     +    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
                          ff_vp8dsp_init(&s->vp8dsp);
                          return 0;

libavcodec/x86/h264_intrapred_init.c

History View file @ 76741b0

@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int s
                      void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
                      void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
                     -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
                     +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
+                     {
                      #if HAVE_YASM
                          int mm_flags = av_get_cpu_flags();
@@ -176,14 +176,17 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                              if (mm_flags & AV_CPU_FLAG_MMX) {
                                  h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_mmx;
                                  h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_mmx;
                     -            h->pred8x8  [VERT_PRED8x8         ] = ff_pred8x8_vertical_mmx;
                     -            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmx;
                     +            if (chroma_format_idc == 1) {
                     +                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_mmx;
                     +                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_mmx;
                     +            }
                                  if (codec_id == CODEC_ID_VP8) {
                                      h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_mmx;
                                      h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_mmx;
                                      h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_mmx;
                                  } else {
                     -                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
                     +                if (chroma_format_idc == 1)
                     +                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
                                      if (codec_id == CODEC_ID_SVQ3) {
                                          h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
                                      } else if (codec_id == CODEC_ID_RV40) {
@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                              if (mm_flags & AV_CPU_FLAG_MMX2) {
                                  h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_mmxext;
                                  h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_mmxext;
                     -            h->pred8x8  [HOR_PRED8x8            ] = ff_pred8x8_horizontal_mmxext;
                     +            if (chroma_format_idc == 1)
                     +                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmxext;
                                  h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_mmxext;
                                  h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_mmxext;
                                  h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_mmxext;
@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                      h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_mmxext;
+                                 }
                                  if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
                     -                h->pred8x8  [TOP_DC_PRED8x8     ] = ff_pred8x8_top_dc_mmxext;
                     -                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_mmxext;
                     +                if (chroma_format_idc == 1) {
                     +                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_mmxext;
                     +                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_mmxext;
                     +                }
+                                 }
                                  if (codec_id == CODEC_ID_VP8) {
                                      h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_mmxext;
@@ -231,7 +237,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                      h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_mmxext;
                                      h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_mmxext;
                                  } else {
                     -                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
                     +                if (chroma_format_idc == 1)
                     +                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
                                      if (codec_id == CODEC_ID_SVQ3) {
                                          h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_mmx2;
                                      } else if (codec_id == CODEC_ID_RV40) {
@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                      h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_sse2;
                                      h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_sse2;
                                  } else {
                     -                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_plane_sse2;
                     +                if (chroma_format_idc == 1)
                     +                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_sse2;
                                      if (codec_id == CODEC_ID_SVQ3) {
                                          h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
                                      } else if (codec_id == CODEC_ID_RV40) {
@@ -271,7 +279,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                              if (mm_flags & AV_CPU_FLAG_SSSE3) {
                                  h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_ssse3;
                                  h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_ssse3;
                     -            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_ssse3;
                     +            if (chroma_format_idc == 1)
                     +                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_ssse3;
                                  h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_ssse3;
                                  h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_ssse3;
                                  h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_ssse3;
@@ -286,7 +295,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                      h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_ssse3;
                                      h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_ssse3;
                                  } else {
                     -                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
                     +                if (chroma_format_idc == 1)
                     +                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
                                      if (codec_id == CODEC_ID_SVQ3) {
                                          h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
                                      } else if (codec_id == CODEC_ID_RV40) {
@@ -301,7 +311,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                  h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
                                  h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
                     -            h->pred8x8[DC_PRED8x8          ] = ff_pred8x8_dc_10_mmxext;
                     +            if (chroma_format_idc == 1)
                     +                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;
                                  h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;
@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
                                  h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
                                  h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
                     -            h->pred8x8[DC_PRED8x8          ] = ff_pred8x8_dc_10_sse2;
                     -            h->pred8x8[TOP_DC_PRED8x8      ] = ff_pred8x8_top_dc_10_sse2;
                     -            h->pred8x8[PLANE_PRED8x8       ] = ff_pred8x8_plane_10_sse2;
                     -            h->pred8x8[VERT_PRED8x8        ] = ff_pred8x8_vertical_10_sse2;
                     -            h->pred8x8[HOR_PRED8x8         ] = ff_pred8x8_horizontal_10_sse2;
                     +            if (chroma_format_idc == 1) {
                     +                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
                     +                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
                     +                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
                     +                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
                     +                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
                     +            }
                                  h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
                                  h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;

libavcodec/x86/h264dsp_mmx.c

History View file @ 76741b0

@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4,  8, 10)
                      H264_BIWEIGHT_10_SSE( 4,  4, 10)
                      H264_BIWEIGHT_10_SSE( 4,  2, 10)
                     -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                     +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
+                     {
                          int mm_flags = av_get_cpu_flags();
@@ -368,7 +368,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                              c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
                              c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
                     -        c->h264_idct_add8           = ff_h264_idct_add8_8_mmx;
                     +        if (chroma_format_idc == 1)
                     +            c->h264_idct_add8       = ff_h264_idct_add8_8_mmx;
                              c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
                              c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                                  c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
                                  c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
                                  c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
                     -            c->h264_idct_add8      = ff_h264_idct_add8_8_mmx2;
                     +            if (chroma_format_idc == 1)
                     +                c->h264_idct_add8  = ff_h264_idct_add8_8_mmx2;
                                  c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
                                  c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
                     -            c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
                                  c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
                     -            c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
                     +            if (chroma_format_idc == 1) {
                     +                c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
                     +                c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
                     +            }
                      #if ARCH_X86_32
                                  c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
                                  c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
@@ -413,7 +417,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                                      c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
                                      c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
                     -                c->h264_idct_add8           = ff_h264_idct_add8_8_sse2;
                     +                if (chroma_format_idc == 1)
                     +                    c->h264_idct_add8       = ff_h264_idct_add8_8_sse2;
                                      c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
                                      c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
@@ -472,7 +477,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                                      c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
                                      c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
                     -                c->h264_idct_add8      = ff_h264_idct_add8_10_sse2;
                     +                if (chroma_format_idc == 1)
                     +                    c->h264_idct_add8  = ff_h264_idct_add8_10_sse2;
                                      c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
                      #if HAVE_ALIGNED_STACK
                                      c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
@@ -532,7 +538,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                                      c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
                                      c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
                     -                c->h264_idct_add8      = ff_h264_idct_add8_10_avx;
                     +                if (chroma_format_idc == 1)
                     +                    c->h264_idct_add8  = ff_h264_idct_add8_10_avx;
                                      c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
                      #if HAVE_ALIGNED_STACK
                                      c->h264_idct8_add      = ff_h264_idct8_add_10_avx;

...	...	@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
42	42	void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
43	43	void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
44	44
45		-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
	45	+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
46	46	{
47	47	const int high_depth = bit_depth > 8;
48	48
...	...	@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b
74	74	h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
75	75	}
76	76
77		-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth)
	77	+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc)
78	78	{
79		- if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth);
	79	+ if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
80	80	}