libavcodec/cavs.c
0abc2e73
 /*
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
b78e7197
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
0abc2e73
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
b78e7197
  * version 2.1 of the License, or (at your option) any later version.
0abc2e73
  *
b78e7197
  * FFmpeg is distributed in the hope that it will be useful,
0abc2e73
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
b78e7197
  * License along with FFmpeg; if not, write to the Free Software
e5a389a1
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0abc2e73
  */
 
f503b195
 /**
ba87f080
  * @file
f503b195
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder
  * @author Stefan Gehrer <stefan.gehrer@gmx.de>
  */
 
0abc2e73
 #include "avcodec.h"
9106a698
 #include "get_bits.h"
0abc2e73
 #include "golomb.h"
79dad2a9
 #include "h264chroma.h"
e3fcb143
 #include "idctdsp.h"
85dc006b
 #include "internal.h"
199436b9
 #include "mathops.h"
368f5035
 #include "qpeldsp.h"
b88e2114
 #include "cavs.h"
0abc2e73
 
ef07ac1e
 static const uint8_t alpha_tab[64] = {
3b4fa548
      0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  3,  3,
      4,  4,  5,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 18, 20,
     22, 24, 26, 28, 30, 33, 33, 35, 35, 36, 37, 37, 39, 39, 42, 44,
     46, 48, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
ef07ac1e
 };
 
 static const uint8_t beta_tab[64] = {
3b4fa548
      0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,
      2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,
      6,  7,  7,  7,  8,  8,  8,  9,  9, 10, 10, 11, 11, 12, 13, 14,
     15, 16, 17, 18, 19, 20, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27
ef07ac1e
 };
 
 static const uint8_t tc_tab[64] = {
3b4fa548
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
     2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
     5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9
ef07ac1e
 };
 
 /** mark block as unavailable, i.e. out of picture
3b4fa548
  *  or not yet decoded */
ef07ac1e
 static const cavs_vector un_mv = { 0, 0, 1, NOT_AVAIL };
 
 static const int8_t left_modifier_l[8] = {  0, -1,  6, -1, -1, 7, 6, 7 };
 static const int8_t top_modifier_l[8]  = { -1,  1,  5, -1, -1, 5, 7, 7 };
 static const int8_t left_modifier_c[7] = {  5, -1,  2, -1,  6, 5, 6 };
 static const int8_t top_modifier_c[7]  = {  4,  1, -1, -1,  4, 6, 6 };
0abc2e73
 
 /*****************************************************************************
  *
  * in-loop deblocking filter
  *
  ****************************************************************************/
 
64bb3fa6
 static inline int get_bs(cavs_vector *mvP, cavs_vector *mvQ, int b)
 {
     if ((mvP->ref == REF_INTRA) || (mvQ->ref == REF_INTRA))
0abc2e73
         return 2;
54649166
     if((abs(mvP->x - mvQ->x) >= 4) ||
        (abs(mvP->y - mvQ->y) >= 4) ||
        (mvP->ref != mvQ->ref))
0abc2e73
         return 1;
64bb3fa6
     if (b) {
69f34e5b
         mvP += MV_BWD_OFFS;
         mvQ += MV_BWD_OFFS;
54649166
         if((abs(mvP->x - mvQ->x) >= 4) ||
            (abs(mvP->y - mvQ->y) >= 4) ||
            (mvP->ref != mvQ->ref))
0abc2e73
             return 1;
     }
     return 0;
 }
 
64bb3fa6
 #define SET_PARAMS                                                \
9abc80f1
     alpha = alpha_tab[av_clip_uintp2(qp_avg + h->alpha_offset, 6)];  \
     beta  =  beta_tab[av_clip_uintp2(qp_avg + h->beta_offset,  6)];  \
     tc    =    tc_tab[av_clip_uintp2(qp_avg + h->alpha_offset, 6)];
f503b195
 
 /**
  * in-loop deblocking filter for a single macroblock
  *
  * boundary strength (bs) mapping:
0abc2e73
  *
  * --4---5--
  * 0   2   |
  * | 6 | 7 |
  * 1   3   |
  * ---------
  */
64bb3fa6
 void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type)
 {
026b9d3c
     uint8_t bs[8];
0abc2e73
     int qp_avg, alpha, beta, tc;
     int i;
 
     /* save un-deblocked lines */
64bb3fa6
     h->topleft_border_y = h->top_border_y[h->mbx * 16 + 15];
     h->topleft_border_u = h->top_border_u[h->mbx * 10 + 8];
     h->topleft_border_v = h->top_border_v[h->mbx * 10 + 8];
     memcpy(&h->top_border_y[h->mbx * 16],     h->cy + 15 * h->l_stride, 16);
     memcpy(&h->top_border_u[h->mbx * 10 + 1], h->cu +  7 * h->c_stride, 8);
     memcpy(&h->top_border_v[h->mbx * 10 + 1], h->cv +  7 * h->c_stride, 8);
     for (i = 0; i < 8; i++) {
         h->left_border_y[i * 2 + 1] = *(h->cy + 15 + (i * 2 + 0) * h->l_stride);
         h->left_border_y[i * 2 + 2] = *(h->cy + 15 + (i * 2 + 1) * h->l_stride);
         h->left_border_u[i + 1]     = *(h->cu + 7  +  i          * h->c_stride);
         h->left_border_v[i + 1]     = *(h->cv + 7  +  i          * h->c_stride);
0abc2e73
     }
64bb3fa6
     if (!h->loop_filter_disable) {
0abc2e73
         /* determine bs */
64bb3fa6
         if (mb_type == I_8X8)
             memset(bs, 2, 8);
3b4fa548
         else {
64bb3fa6
             memset(bs, 0, 8);
             if (ff_cavs_partition_flags[mb_type] & SPLITV) {
69f34e5b
                 bs[2] = get_bs(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X1], mb_type > P_8X8);
                 bs[3] = get_bs(&h->mv[MV_FWD_X2], &h->mv[MV_FWD_X3], mb_type > P_8X8);
0abc2e73
             }
64bb3fa6
             if (ff_cavs_partition_flags[mb_type] & SPLITH) {
69f34e5b
                 bs[6] = get_bs(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X2], mb_type > P_8X8);
                 bs[7] = get_bs(&h->mv[MV_FWD_X1], &h->mv[MV_FWD_X3], mb_type > P_8X8);
             }
             bs[0] = get_bs(&h->mv[MV_FWD_A1], &h->mv[MV_FWD_X0], mb_type > P_8X8);
             bs[1] = get_bs(&h->mv[MV_FWD_A3], &h->mv[MV_FWD_X2], mb_type > P_8X8);
             bs[4] = get_bs(&h->mv[MV_FWD_B2], &h->mv[MV_FWD_X0], mb_type > P_8X8);
             bs[5] = get_bs(&h->mv[MV_FWD_B3], &h->mv[MV_FWD_X1], mb_type > P_8X8);
0abc2e73
         }
64bb3fa6
         if (AV_RN64(bs)) {
             if (h->flags & A_AVAIL) {
0abc2e73
                 qp_avg = (h->qp + h->left_qp + 1) >> 1;
                 SET_PARAMS;
64bb3fa6
                 h->cdsp.cavs_filter_lv(h->cy, h->l_stride, alpha, beta, tc, bs[0], bs[1]);
54649166
                 qp_avg = (ff_cavs_chroma_qp[h->qp] + ff_cavs_chroma_qp[h->left_qp] + 1) >> 1;
                 SET_PARAMS;
64bb3fa6
                 h->cdsp.cavs_filter_cv(h->cu, h->c_stride, alpha, beta, tc, bs[0], bs[1]);
                 h->cdsp.cavs_filter_cv(h->cv, h->c_stride, alpha, beta, tc, bs[0], bs[1]);
0abc2e73
             }
             qp_avg = h->qp;
             SET_PARAMS;
64bb3fa6
             h->cdsp.cavs_filter_lv(h->cy + 8,               h->l_stride, alpha, beta, tc, bs[2], bs[3]);
             h->cdsp.cavs_filter_lh(h->cy + 8 * h->l_stride, h->l_stride, alpha, beta, tc, bs[6], bs[7]);
0abc2e73
 
64bb3fa6
             if (h->flags & B_AVAIL) {
0abc2e73
                 qp_avg = (h->qp + h->top_qp[h->mbx] + 1) >> 1;
                 SET_PARAMS;
64bb3fa6
                 h->cdsp.cavs_filter_lh(h->cy, h->l_stride, alpha, beta, tc, bs[4], bs[5]);
54649166
                 qp_avg = (ff_cavs_chroma_qp[h->qp] + ff_cavs_chroma_qp[h->top_qp[h->mbx]] + 1) >> 1;
                 SET_PARAMS;
64bb3fa6
                 h->cdsp.cavs_filter_ch(h->cu, h->c_stride, alpha, beta, tc, bs[4], bs[5]);
                 h->cdsp.cavs_filter_ch(h->cv, h->c_stride, alpha, beta, tc, bs[4], bs[5]);
0abc2e73
             }
         }
     }
64bb3fa6
     h->left_qp        = h->qp;
0abc2e73
     h->top_qp[h->mbx] = h->qp;
 }
 
 #undef SET_PARAMS
 
 /*****************************************************************************
  *
  * spatial intra prediction
  *
  ****************************************************************************/
 
5d2b15b8
 void ff_cavs_load_intra_pred_luma(AVSContext *h, uint8_t *top,
64bb3fa6
                                   uint8_t **left, int block)
 {
5d2b15b8
     int i;
 
64bb3fa6
     switch (block) {
5d2b15b8
     case 0:
64bb3fa6
         *left               = h->left_border_y;
5d2b15b8
         h->left_border_y[0] = h->left_border_y[1];
64bb3fa6
         memset(&h->left_border_y[17], h->left_border_y[16], 9);
         memcpy(&top[1], &h->top_border_y[h->mbx * 16], 16);
5d2b15b8
         top[17] = top[16];
64bb3fa6
         top[0]  = top[1];
         if ((h->flags & A_AVAIL) && (h->flags & B_AVAIL))
5d2b15b8
             h->left_border_y[0] = top[0] = h->topleft_border_y;
         break;
     case 1:
         *left = h->intern_border_y;
64bb3fa6
         for (i = 0; i < 8; i++)
             h->intern_border_y[i + 1] = *(h->cy + 7 + i * h->l_stride);
         memset(&h->intern_border_y[9], h->intern_border_y[8], 9);
5d2b15b8
         h->intern_border_y[0] = h->intern_border_y[1];
64bb3fa6
         memcpy(&top[1], &h->top_border_y[h->mbx * 16 + 8], 8);
         if (h->flags & C_AVAIL)
             memcpy(&top[9], &h->top_border_y[(h->mbx + 1) * 16], 8);
5d2b15b8
         else
64bb3fa6
             memset(&top[9], top[8], 9);
5d2b15b8
         top[17] = top[16];
64bb3fa6
         top[0]  = top[1];
         if (h->flags & B_AVAIL)
             h->intern_border_y[0] = top[0] = h->top_border_y[h->mbx * 16 + 7];
5d2b15b8
         break;
     case 2:
         *left = &h->left_border_y[8];
64bb3fa6
         memcpy(&top[1], h->cy + 7 * h->l_stride, 16);
5d2b15b8
         top[17] = top[16];
64bb3fa6
         top[0]  = top[1];
         if (h->flags & A_AVAIL)
5d2b15b8
             top[0] = h->left_border_y[8];
         break;
     case 3:
         *left = &h->intern_border_y[8];
64bb3fa6
         for (i = 0; i < 8; i++)
             h->intern_border_y[i + 9] = *(h->cy + 7 + (i + 8) * h->l_stride);
         memset(&h->intern_border_y[17], h->intern_border_y[16], 9);
         memcpy(&top[0], h->cy + 7 + 7 * h->l_stride, 9);
         memset(&top[9], top[8], 9);
5d2b15b8
         break;
     }
 }
 
64bb3fa6
 void ff_cavs_load_intra_pred_chroma(AVSContext *h)
 {
5d2b15b8
     /* extend borders by one pixel */
3b4fa548
     h->left_border_u[9]              = h->left_border_u[8];
     h->left_border_v[9]              = h->left_border_v[8];
54649166
     if(h->flags & C_AVAIL) {
         h->top_border_u[h->mbx*10 + 9] = h->top_border_u[h->mbx*10 + 11];
         h->top_border_v[h->mbx*10 + 9] = h->top_border_v[h->mbx*10 + 11];
     } else {
         h->top_border_u[h->mbx * 10 + 9] = h->top_border_u[h->mbx * 10 + 8];
         h->top_border_v[h->mbx * 10 + 9] = h->top_border_v[h->mbx * 10 + 8];
     }
     if((h->flags & A_AVAIL) && (h->flags & B_AVAIL)) {
64bb3fa6
         h->top_border_u[h->mbx * 10] = h->left_border_u[0] = h->topleft_border_u;
         h->top_border_v[h->mbx * 10] = h->left_border_v[0] = h->topleft_border_v;
5d2b15b8
     } else {
3b4fa548
         h->left_border_u[0]          = h->left_border_u[1];
         h->left_border_v[0]          = h->left_border_v[1];
64bb3fa6
         h->top_border_u[h->mbx * 10] = h->top_border_u[h->mbx * 10 + 1];
         h->top_border_v[h->mbx * 10] = h->top_border_v[h->mbx * 10 + 1];
5d2b15b8
     }
 }
 
f81be06c
 static void intra_pred_vert(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
0abc2e73
     int y;
905694d9
     uint64_t a = AV_RN64(&top[1]);
3b4fa548
     for (y = 0; y < 8; y++)
64bb3fa6
         *((uint64_t *)(d + y * stride)) = a;
0abc2e73
 }
 
f81be06c
 static void intra_pred_horiz(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
0abc2e73
     int y;
     uint64_t a;
64bb3fa6
     for (y = 0; y < 8; y++) {
         a = left[y + 1] * 0x0101010101010101ULL;
         *((uint64_t *)(d + y * stride)) = a;
0abc2e73
     }
 }
 
f81be06c
 static void intra_pred_dc_128(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
0abc2e73
     int y;
     uint64_t a = 0x8080808080808080ULL;
64bb3fa6
     for (y = 0; y < 8; y++)
         *((uint64_t *)(d + y * stride)) = a;
0abc2e73
 }
 
f81be06c
 static void intra_pred_plane(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y, ia;
0abc2e73
     int ih = 0;
     int iv = 0;
05563cca
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
0abc2e73
 
64bb3fa6
     for (x = 0; x < 4; x++) {
3b4fa548
         ih += (x + 1) *  (top[5 + x] -  top[3 - x]);
64bb3fa6
         iv += (x + 1) * (left[5 + x] - left[3 - x]);
0abc2e73
     }
64bb3fa6
     ia = (top[8] + left[8]) << 4;
     ih = (17 * ih + 16) >> 5;
     iv = (17 * iv + 16) >> 5;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             d[y * stride + x] = cm[(ia + (x - 3) * ih + (y - 3) * iv + 16) >> 5];
0abc2e73
 }
 
3b4fa548
 #define LOWPASS(ARRAY, INDEX)                                           \
64bb3fa6
     ((ARRAY[(INDEX) - 1] + 2 * ARRAY[(INDEX)] + ARRAY[(INDEX) + 1] + 2) >> 2)
0abc2e73
 
f81be06c
 static void intra_pred_lp(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             d[y * stride + x] = (LOWPASS(top, x + 1) + LOWPASS(left, y + 1)) >> 1;
0abc2e73
 }
 
f81be06c
 static void intra_pred_down_left(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             d[y * stride + x] = (LOWPASS(top, x + y + 2) + LOWPASS(left, x + y + 2)) >> 1;
0abc2e73
 }
 
f81be06c
 static void intra_pred_down_right(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             if (x == y)
                 d[y * stride + x] = (left[1] + 2 * top[0] + top[1] + 2) >> 2;
             else if (x > y)
                 d[y * stride + x] = LOWPASS(top, x - y);
0abc2e73
             else
64bb3fa6
                 d[y * stride + x] = LOWPASS(left, y - x);
0abc2e73
 }
 
f81be06c
 static void intra_pred_lp_left(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             d[y * stride + x] = LOWPASS(left, y + 1);
0abc2e73
 }
 
f81be06c
 static void intra_pred_lp_top(uint8_t *d, uint8_t *top, uint8_t *left, ptrdiff_t stride)
64bb3fa6
 {
     int x, y;
     for (y = 0; y < 8; y++)
         for (x = 0; x < 8; x++)
             d[y * stride + x] = LOWPASS(top, x + 1);
0abc2e73
 }
 
 #undef LOWPASS
 
a6d9f9e6
 static inline void modify_pred(const int8_t *mod_table, int *mode)
 {
     *mode = mod_table[*mode];
64bb3fa6
     if (*mode < 0) {
a6d9f9e6
         av_log(NULL, AV_LOG_ERROR, "Illegal intra prediction mode\n");
         *mode = 0;
     }
 }
 
64bb3fa6
 void ff_cavs_modify_mb_i(AVSContext *h, int *pred_mode_uv)
 {
5d2b15b8
     /* save pred modes before they get modified */
3b4fa548
     h->pred_mode_Y[3]             = h->pred_mode_Y[5];
     h->pred_mode_Y[6]             = h->pred_mode_Y[8];
64bb3fa6
     h->top_pred_Y[h->mbx * 2 + 0] = h->pred_mode_Y[7];
     h->top_pred_Y[h->mbx * 2 + 1] = h->pred_mode_Y[8];
5d2b15b8
 
     /* modify pred modes according to availability of neighbour samples */
64bb3fa6
     if (!(h->flags & A_AVAIL)) {
ef07ac1e
         modify_pred(left_modifier_l, &h->pred_mode_Y[4]);
         modify_pred(left_modifier_l, &h->pred_mode_Y[7]);
         modify_pred(left_modifier_c, pred_mode_uv);
5d2b15b8
     }
64bb3fa6
     if (!(h->flags & B_AVAIL)) {
ef07ac1e
         modify_pred(top_modifier_l, &h->pred_mode_Y[4]);
         modify_pred(top_modifier_l, &h->pred_mode_Y[5]);
         modify_pred(top_modifier_c, pred_mode_uv);
5d2b15b8
     }
 }
 
0abc2e73
 /*****************************************************************************
  *
  * motion compensation
  *
  ****************************************************************************/
 
14ddbb47
 static inline void mc_dir_part(AVSContext *h, AVFrame *pic, int chroma_height,
                                int delta, int list, uint8_t *dest_y,
                                uint8_t *dest_cb, uint8_t *dest_cr,
                                int src_x_offset, int src_y_offset,
                                qpel_mc_func *qpix_op,
                                h264_chroma_mc_func chroma_op, cavs_vector *mv)
c265b77b
 {
3b4fa548
     const int mx         = mv->x + src_x_offset * 8;
     const int my         = mv->y + src_y_offset * 8;
14ddbb47
     const int luma_xy    = (mx & 3) + ((my & 3) << 2);
     uint8_t *src_y       = pic->data[0] + (mx >> 2) + (my >> 2) * h->l_stride;
     uint8_t *src_cb      = pic->data[1] + (mx >> 3) + (my >> 3) * h->c_stride;
     uint8_t *src_cr      = pic->data[2] + (mx >> 3) + (my >> 3) * h->c_stride;
     int extra_width      = 0;
     int extra_height     = extra_width;
     const int full_mx    = mx >> 2;
     const int full_my    = my >> 2;
     const int pic_width  = 16 * h->mb_width;
     const int pic_height = 16 * h->mb_height;
     int emu = 0;
0abc2e73
 
7d848264
     if (!pic->data[0])
0abc2e73
         return;
14ddbb47
     if (mx & 7)
         extra_width  -= 3;
     if (my & 7)
         extra_height -= 3;
 
     if (full_mx < 0 - extra_width ||
         full_my < 0 - extra_height ||
         full_mx + 16 /* FIXME */ > pic_width + extra_width ||
         full_my + 16 /* FIXME */ > pic_height + extra_height) {
         h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
458446ac
                                  src_y - 2 - 2 * h->l_stride,
                                  h->l_stride, h->l_stride,
14ddbb47
                                  16 + 5, 16 + 5 /* FIXME */,
                                  full_mx - 2, full_my - 2,
                                  pic_width, pic_height);
         src_y = h->edge_emu_buffer + 2 + 2 * h->l_stride;
3b4fa548
         emu   = 1;
0abc2e73
     }
 
14ddbb47
     // FIXME try variable height perhaps?
     qpix_op[luma_xy](dest_y, src_y, h->l_stride);
0abc2e73
 
14ddbb47
     if (emu) {
         h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb,
458446ac
                                  h->c_stride, h->c_stride,
14ddbb47
                                  9, 9 /* FIXME */,
                                  mx >> 3, my >> 3,
                                  pic_width >> 1, pic_height >> 1);
         src_cb = h->edge_emu_buffer;
0abc2e73
     }
14ddbb47
     chroma_op(dest_cb, src_cb, h->c_stride, chroma_height, mx & 7, my & 7);
 
     if (emu) {
         h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr,
458446ac
                                  h->c_stride, h->c_stride,
14ddbb47
                                  9, 9 /* FIXME */,
                                  mx >> 3, my >> 3,
                                  pic_width >> 1, pic_height >> 1);
         src_cr = h->edge_emu_buffer;
0abc2e73
     }
14ddbb47
     chroma_op(dest_cr, src_cr, h->c_stride, chroma_height, mx & 7, my & 7);
0abc2e73
 }
 
14ddbb47
 static inline void mc_part_std(AVSContext *h, int chroma_height, int delta,
                                uint8_t *dest_y,
                                uint8_t *dest_cb,
                                uint8_t *dest_cr,
                                int x_offset, int y_offset,
                                qpel_mc_func *qpix_put,
                                h264_chroma_mc_func chroma_put,
                                qpel_mc_func *qpix_avg,
                                h264_chroma_mc_func chroma_avg,
                                cavs_vector *mv)
c265b77b
 {
14ddbb47
     qpel_mc_func *qpix_op =  qpix_put;
     h264_chroma_mc_func chroma_op = chroma_put;
0abc2e73
 
3b4fa548
     dest_y   += x_offset * 2 + y_offset * h->l_stride * 2;
     dest_cb  += x_offset     + y_offset * h->c_stride;
     dest_cr  += x_offset     + y_offset * h->c_stride;
14ddbb47
     x_offset += 8 * h->mbx;
     y_offset += 8 * h->mby;
0abc2e73
 
14ddbb47
     if (mv->ref >= 0) {
7d848264
         AVFrame *ref = h->DPB[mv->ref].f;
c265b77b
         mc_dir_part(h, ref, chroma_height, delta, 0,
0abc2e73
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_op, chroma_op, mv);
 
14ddbb47
         qpix_op   = qpix_avg;
         chroma_op = chroma_avg;
0abc2e73
     }
 
14ddbb47
     if ((mv + MV_BWD_OFFS)->ref >= 0) {
7d848264
         AVFrame *ref = h->DPB[0].f;
c265b77b
         mc_dir_part(h, ref, chroma_height, delta, 1,
0abc2e73
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
3b4fa548
                     qpix_op, chroma_op, mv + MV_BWD_OFFS);
0abc2e73
     }
 }
 
14ddbb47
 void ff_cavs_inter(AVSContext *h, enum cavs_mb mb_type)
 {
     if (ff_cavs_partition_flags[mb_type] == 0) { // 16x16
c265b77b
         mc_part_std(h, 8, 0, h->cy, h->cu, h->cv, 0, 0,
14ddbb47
                     h->cdsp.put_cavs_qpel_pixels_tab[0],
                     h->h264chroma.put_h264_chroma_pixels_tab[0],
                     h->cdsp.avg_cavs_qpel_pixels_tab[0],
                     h->h264chroma.avg_h264_chroma_pixels_tab[0],
                     &h->mv[MV_FWD_X0]);
     } else {
c265b77b
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 0,
14ddbb47
                     h->cdsp.put_cavs_qpel_pixels_tab[1],
                     h->h264chroma.put_h264_chroma_pixels_tab[1],
                     h->cdsp.avg_cavs_qpel_pixels_tab[1],
                     h->h264chroma.avg_h264_chroma_pixels_tab[1],
                     &h->mv[MV_FWD_X0]);
c265b77b
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 0,
14ddbb47
                     h->cdsp.put_cavs_qpel_pixels_tab[1],
                     h->h264chroma.put_h264_chroma_pixels_tab[1],
                     h->cdsp.avg_cavs_qpel_pixels_tab[1],
                     h->h264chroma.avg_h264_chroma_pixels_tab[1],
                     &h->mv[MV_FWD_X1]);
c265b77b
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 4,
14ddbb47
                     h->cdsp.put_cavs_qpel_pixels_tab[1],
                     h->h264chroma.put_h264_chroma_pixels_tab[1],
                     h->cdsp.avg_cavs_qpel_pixels_tab[1],
                     h->h264chroma.avg_h264_chroma_pixels_tab[1],
                     &h->mv[MV_FWD_X2]);
c265b77b
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 4,
14ddbb47
                     h->cdsp.put_cavs_qpel_pixels_tab[1],
                     h->h264chroma.put_h264_chroma_pixels_tab[1],
                     h->cdsp.avg_cavs_qpel_pixels_tab[1],
                     h->h264chroma.avg_h264_chroma_pixels_tab[1],
                     &h->mv[MV_FWD_X3]);
1a1650fe
     }
0abc2e73
 }
 
 /*****************************************************************************
  *
  * motion vector prediction
  *
  ****************************************************************************/
 
14ddbb47
 static inline void scale_mv(AVSContext *h, int *d_x, int *d_y,
                             cavs_vector *src, int distp)
 {
1e6ee86d
     int64_t den = h->scale_den[FFMAX(src->ref, 0)];
85dc006b
     *d_x = (src->x * distp * den + 256 + FF_SIGNBIT(src->x)) >> 9;
     *d_y = (src->y * distp * den + 256 + FF_SIGNBIT(src->y)) >> 9;
0abc2e73
 }
 
14ddbb47
 static inline void mv_pred_median(AVSContext *h,
                                   cavs_vector *mvP,
                                   cavs_vector *mvA,
                                   cavs_vector *mvB,
                                   cavs_vector *mvC)
 {
0abc2e73
     int ax, ay, bx, by, cx, cy;
     int len_ab, len_bc, len_ca, len_mid;
 
     /* scale candidates according to their temporal span */
     scale_mv(h, &ax, &ay, mvA, mvP->dist);
     scale_mv(h, &bx, &by, mvB, mvP->dist);
     scale_mv(h, &cx, &cy, mvC, mvP->dist);
     /* find the geometrical median of the three candidates */
3b4fa548
     len_ab  = abs(ax - bx) + abs(ay - by);
     len_bc  = abs(bx - cx) + abs(by - cy);
     len_ca  = abs(cx - ax) + abs(cy - ay);
0abc2e73
     len_mid = mid_pred(len_ab, len_bc, len_ca);
14ddbb47
     if (len_mid == len_ab) {
0abc2e73
         mvP->x = cx;
         mvP->y = cy;
14ddbb47
     } else if (len_mid == len_bc) {
0abc2e73
         mvP->x = ax;
         mvP->y = ay;
     } else {
         mvP->x = bx;
         mvP->y = by;
     }
 }
 
bef01ca0
 void ff_cavs_mv(AVSContext *h, enum cavs_mv_loc nP, enum cavs_mv_loc nC,
14ddbb47
                 enum cavs_mv_pred mode, enum cavs_block size, int ref)
 {
e685e8ea
     cavs_vector *mvP = &h->mv[nP];
     cavs_vector *mvA = &h->mv[nP-1];
     cavs_vector *mvB = &h->mv[nP-4];
     cavs_vector *mvC = &h->mv[nC];
     const cavs_vector *mvP2 = NULL;
0abc2e73
 
14ddbb47
     mvP->ref  = ref;
0abc2e73
     mvP->dist = h->dist[mvP->ref];
e18b48c6
     if (mvC->ref == NOT_AVAIL || (nP == MV_FWD_X3) || (nP == MV_BWD_X3 ))
3b4fa548
         mvC = &h->mv[nP - 5];  // set to top-left (mvD)
14ddbb47
     if (mode == MV_PRED_PSKIP &&
         (mvA->ref == NOT_AVAIL ||
          mvB->ref == NOT_AVAIL ||
          (mvA->x | mvA->y | mvA->ref) == 0 ||
          (mvB->x | mvB->y | mvB->ref) == 0)) {
ef07ac1e
         mvP2 = &un_mv;
0abc2e73
     /* if there is only one suitable candidate, take it */
14ddbb47
     } else if (mvA->ref >= 0 && mvB->ref < 0  && mvC->ref < 0) {
         mvP2 = mvA;
     } else if (mvA->ref < 0  && mvB->ref >= 0 && mvC->ref < 0) {
         mvP2 = mvB;
     } else if (mvA->ref < 0  && mvB->ref < 0  && mvC->ref >= 0) {
         mvP2 = mvC;
     } else if (mode == MV_PRED_LEFT     && mvA->ref == ref) {
         mvP2 = mvA;
     } else if (mode == MV_PRED_TOP      && mvB->ref == ref) {
         mvP2 = mvB;
     } else if (mode == MV_PRED_TOPRIGHT && mvC->ref == ref) {
         mvP2 = mvC;
0abc2e73
     }
14ddbb47
     if (mvP2) {
21227514
         mvP->x = mvP2->x;
         mvP->y = mvP2->y;
14ddbb47
     } else
21227514
         mv_pred_median(h, mvP, mvA, mvB, mvC);
 
14ddbb47
     if (mode < MV_PRED_PSKIP) {
5871adc9
         int mx = get_se_golomb(&h->gb) + (unsigned)mvP->x;
         int my = get_se_golomb(&h->gb) + (unsigned)mvP->y;
 
         if (mx != (int16_t)mx || my != (int16_t)my) {
             av_log(h->avctx, AV_LOG_ERROR, "MV %d %d out of supported range\n", mx, my);
         } else {
             mvP->x = mx;
             mvP->y = my;
         }
0abc2e73
     }
3b4fa548
     set_mvs(mvP, size);
0abc2e73
 }
 
 /*****************************************************************************
  *
5d2b15b8
  * macroblock level
  *
  ****************************************************************************/
 
 /**
  * initialise predictors for motion vectors and intra prediction
  */
14ddbb47
 void ff_cavs_init_mb(AVSContext *h)
 {
5d2b15b8
     int i;
 
     /* copy predictors from top line (MB B and C) into cache */
14ddbb47
     for (i = 0; i < 3; i++) {
         h->mv[MV_FWD_B2 + i] = h->top_mv[0][h->mbx * 2 + i];
         h->mv[MV_BWD_B2 + i] = h->top_mv[1][h->mbx * 2 + i];
5d2b15b8
     }
14ddbb47
     h->pred_mode_Y[1] = h->top_pred_Y[h->mbx * 2 + 0];
     h->pred_mode_Y[2] = h->top_pred_Y[h->mbx * 2 + 1];
5d2b15b8
     /* clear top predictors if MB B is not available */
14ddbb47
     if (!(h->flags & B_AVAIL)) {
         h->mv[MV_FWD_B2]  = un_mv;
         h->mv[MV_FWD_B3]  = un_mv;
         h->mv[MV_BWD_B2]  = un_mv;
         h->mv[MV_BWD_B3]  = un_mv;
5d2b15b8
         h->pred_mode_Y[1] = h->pred_mode_Y[2] = NOT_AVAIL;
14ddbb47
         h->flags         &= ~(C_AVAIL | D_AVAIL);
     } else if (h->mbx) {
5d2b15b8
         h->flags |= D_AVAIL;
     }
14ddbb47
     if (h->mbx == h->mb_width - 1) // MB C not available
5d2b15b8
         h->flags &= ~C_AVAIL;
     /* clear top-right predictors if MB C is not available */
14ddbb47
     if (!(h->flags & C_AVAIL)) {
ef07ac1e
         h->mv[MV_FWD_C2] = un_mv;
         h->mv[MV_BWD_C2] = un_mv;
5d2b15b8
     }
     /* clear top-left predictors if MB D is not available */
14ddbb47
     if (!(h->flags & D_AVAIL)) {
ef07ac1e
         h->mv[MV_FWD_D3] = un_mv;
         h->mv[MV_BWD_D3] = un_mv;
5d2b15b8
     }
 }
 
 /**
  * save predictors for later macroblocks and increase
  * macroblock address
32e543f8
  * @return 0 if end of frame is reached, 1 otherwise
5d2b15b8
  */
14ddbb47
 int ff_cavs_next_mb(AVSContext *h)
 {
5d2b15b8
     int i;
 
     h->flags |= A_AVAIL;
14ddbb47
     h->cy    += 16;
     h->cu    += 8;
     h->cv    += 8;
5d2b15b8
     /* copy mvs as predictors to the left */
14ddbb47
     for (i = 0; i <= 20; i += 4)
         h->mv[i] = h->mv[i + 2];
5d2b15b8
     /* copy bottom mvs from cache to top line */
14ddbb47
     h->top_mv[0][h->mbx * 2 + 0] = h->mv[MV_FWD_X2];
     h->top_mv[0][h->mbx * 2 + 1] = h->mv[MV_FWD_X3];
     h->top_mv[1][h->mbx * 2 + 0] = h->mv[MV_BWD_X2];
     h->top_mv[1][h->mbx * 2 + 1] = h->mv[MV_BWD_X3];
5d2b15b8
     /* next MB address */
6847ab43
     h->mbidx++;
5d2b15b8
     h->mbx++;
14ddbb47
     if (h->mbx == h->mb_width) { // New mb line
         h->flags = B_AVAIL | C_AVAIL;
5d2b15b8
         /* clear left pred_modes */
         h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
         /* clear left mv predictors */
14ddbb47
         for (i = 0; i <= 20; i += 4)
ef07ac1e
             h->mv[i] = un_mv;
5d2b15b8
         h->mbx = 0;
         h->mby++;
         /* re-calculate sample pointers */
7d848264
         h->cy = h->cur.f->data[0] + h->mby * 16 * h->l_stride;
14ddbb47
         h->cu = h->cur.f->data[1] + h->mby * 8 * h->c_stride;
         h->cv = h->cur.f->data[2] + h->mby * 8 * h->c_stride;
         if (h->mby == h->mb_height) { // Frame end
5d2b15b8
             return 0;
         }
     }
     return 1;
 }
 
 /*****************************************************************************
  *
0abc2e73
  * frame level
  *
  ****************************************************************************/
 
fb8a10db
 int ff_cavs_init_pic(AVSContext *h)
14ddbb47
 {
0abc2e73
     int i;
 
     /* clear some predictors */
14ddbb47
     for (i = 0; i <= 20; i += 4)
ef07ac1e
         h->mv[i] = un_mv;
ac83e7b2
     h->mv[MV_BWD_X0] = ff_cavs_dir_mv;
0abc2e73
     set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
ac83e7b2
     h->mv[MV_FWD_X0] = ff_cavs_dir_mv;
0abc2e73
     set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
     h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
14ddbb47
     h->cy             = h->cur.f->data[0];
     h->cu             = h->cur.f->data[1];
     h->cv             = h->cur.f->data[2];
     h->l_stride       = h->cur.f->linesize[0];
     h->c_stride       = h->cur.f->linesize[1];
     h->luma_scan[2]   = 8 * h->l_stride;
     h->luma_scan[3]   = 8 * h->l_stride + 8;
     h->mbx            = h->mby = h->mbidx = 0;
     h->flags          = 0;
642a655f
 
     return 0;
0abc2e73
 }
 
 /*****************************************************************************
  *
  * headers and interface
  *
  ****************************************************************************/
 
f503b195
 /**
  * some predictions require data from the top-neighbouring macroblock.
  * this data has to be stored for one complete row of macroblocks
  * and this storage space is allocated here
  */
35e559ae
 int ff_cavs_init_top_lines(AVSContext *h)
14ddbb47
 {
0abc2e73
     /* alloc top line of predictors */
14ddbb47
     h->top_qp       = av_mallocz(h->mb_width);
0674da99
     h->top_mv[0]    = av_mallocz_array(h->mb_width * 2 + 1,  sizeof(cavs_vector));
     h->top_mv[1]    = av_mallocz_array(h->mb_width * 2 + 1,  sizeof(cavs_vector));
     h->top_pred_Y   = av_mallocz_array(h->mb_width * 2,  sizeof(*h->top_pred_Y));
     h->top_border_y = av_mallocz_array(h->mb_width + 1,  16);
     h->top_border_u = av_mallocz_array(h->mb_width,  10);
     h->top_border_v = av_mallocz_array(h->mb_width,  10);
0abc2e73
 
     /* alloc space for co-located MVs and types */
0674da99
     h->col_mv        = av_mallocz_array(h->mb_width * h->mb_height,
                                         4 * sizeof(cavs_vector));
14ddbb47
     h->col_type_base = av_mallocz(h->mb_width * h->mb_height);
     h->block         = av_mallocz(64 * sizeof(int16_t));
35e559ae
 
     if (!h->top_qp || !h->top_mv[0] || !h->top_mv[1] || !h->top_pred_Y ||
         !h->top_border_y || !h->top_border_u || !h->top_border_v ||
         !h->col_mv || !h->col_type_base || !h->block) {
         av_freep(&h->top_qp);
         av_freep(&h->top_mv[0]);
         av_freep(&h->top_mv[1]);
         av_freep(&h->top_pred_Y);
         av_freep(&h->top_border_y);
         av_freep(&h->top_border_u);
         av_freep(&h->top_border_v);
         av_freep(&h->col_mv);
         av_freep(&h->col_type_base);
         av_freep(&h->block);
         return AVERROR(ENOMEM);
     }
     return 0;
0abc2e73
 }
 
14ddbb47
 av_cold int ff_cavs_init(AVCodecContext *avctx)
 {
76f2a12f
     AVSContext *h = avctx->priv_data;
0abc2e73
 
e74433a8
     ff_blockdsp_init(&h->bdsp, avctx);
79dad2a9
     ff_h264chroma_init(&h->h264chroma, 8);
e3fcb143
     ff_idctdsp_init(&h->idsp, avctx);
7d848264
     ff_videodsp_init(&h->vdsp, 8);
f079a64a
     ff_cavsdsp_init(&h->cdsp, avctx);
e3fcb143
     ff_init_scantable_permutation(h->idsp.idct_permutation,
7d848264
                                   h->cdsp.idct_perm);
e3fcb143
     ff_init_scantable(h->idsp.idct_permutation, &h->scantable, ff_zigzag_direct);
0abc2e73
 
14ddbb47
     h->avctx       = avctx;
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
0abc2e73
 
759001c5
     h->cur.f    = av_frame_alloc();
     h->DPB[0].f = av_frame_alloc();
     h->DPB[1].f = av_frame_alloc();
7d848264
     if (!h->cur.f || !h->DPB[0].f || !h->DPB[1].f) {
         ff_cavs_end(avctx);
         return AVERROR(ENOMEM);
     }
 
14ddbb47
     h->luma_scan[0]                     = 0;
     h->luma_scan[1]                     = 8;
     h->intra_pred_l[INTRA_L_VERT]       = intra_pred_vert;
     h->intra_pred_l[INTRA_L_HORIZ]      = intra_pred_horiz;
     h->intra_pred_l[INTRA_L_LP]         = intra_pred_lp;
     h->intra_pred_l[INTRA_L_DOWN_LEFT]  = intra_pred_down_left;
0abc2e73
     h->intra_pred_l[INTRA_L_DOWN_RIGHT] = intra_pred_down_right;
14ddbb47
     h->intra_pred_l[INTRA_L_LP_LEFT]    = intra_pred_lp_left;
     h->intra_pred_l[INTRA_L_LP_TOP]     = intra_pred_lp_top;
     h->intra_pred_l[INTRA_L_DC_128]     = intra_pred_dc_128;
     h->intra_pred_c[INTRA_C_LP]         = intra_pred_lp;
     h->intra_pred_c[INTRA_C_HORIZ]      = intra_pred_horiz;
     h->intra_pred_c[INTRA_C_VERT]       = intra_pred_vert;
     h->intra_pred_c[INTRA_C_PLANE]      = intra_pred_plane;
     h->intra_pred_c[INTRA_C_LP_LEFT]    = intra_pred_lp_left;
     h->intra_pred_c[INTRA_C_LP_TOP]     = intra_pred_lp_top;
     h->intra_pred_c[INTRA_C_DC_128]     = intra_pred_dc_128;
     h->mv[7]                            = un_mv;
     h->mv[19]                           = un_mv;
0abc2e73
     return 0;
 }
 
14ddbb47
 av_cold int ff_cavs_end(AVCodecContext *avctx)
 {
76f2a12f
     AVSContext *h = avctx->priv_data;
0abc2e73
 
759001c5
     av_frame_free(&h->cur.f);
     av_frame_free(&h->DPB[0].f);
     av_frame_free(&h->DPB[1].f);
e40f7f1e
 
3537ddb7
     av_freep(&h->top_qp);
     av_freep(&h->top_mv[0]);
     av_freep(&h->top_mv[1]);
     av_freep(&h->top_pred_Y);
     av_freep(&h->top_border_y);
     av_freep(&h->top_border_u);
     av_freep(&h->top_border_v);
     av_freep(&h->col_mv);
     av_freep(&h->col_type_base);
     av_freep(&h->block);
7d848264
     av_freep(&h->edge_emu_buffer);
0abc2e73
     return 0;
 }