libavcodec/h264.c
0da71265
 /*
d5ec00ba
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
0da71265
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
b78e7197
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
0da71265
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
b78e7197
  * version 2.1 of the License, or (at your option) any later version.
0da71265
  *
b78e7197
  * FFmpeg is distributed in the hope that it will be useful,
0da71265
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
b78e7197
  * License along with FFmpeg; if not, write to the Free Software
5509bffa
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0da71265
  */
115329f1
 
0da71265
 /**
ba87f080
  * @file
0da71265
  * H.264 / AVC / MPEG4 part10 codec.
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
7ffe76e5
 #include "libavutil/imgutils.h"
40e5d31b
 #include "internal.h"
0da71265
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
26b4fe82
 #include "h264.h"
0da71265
 #include "h264data.h"
188d3c51
 #include "h264_mvpred.h"
0da71265
 #include "golomb.h"
199436b9
 #include "mathops.h"
626464fb
 #include "rectangle.h"
d375c104
 #include "thread.h"
369122dd
 #include "vdpau_internal.h"
cfa5a81e
 #include "libavutil/avassert.h"
0da71265
 
e5017ab8
 #include "cabac.h"
 
2848ce84
 //#undef NDEBUG
0da71265
 #include <assert.h>
 
fcc0224e
 static const uint8_t rem6[QP_MAX_NUM+1]={
d268bed2
 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
acd8d10f
 };
 
fcc0224e
 static const uint8_t div6[QP_MAX_NUM+1]={
d268bed2
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
acd8d10f
 };
 
0435fb16
 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
     PIX_FMT_DXVA2_VLD,
     PIX_FMT_VAAPI_VLD,
     PIX_FMT_YUVJ420P,
     PIX_FMT_NONE
 };
 
903d58f6
 void ff_h264_write_back_intra_pred_mode(H264Context *h){
5b0fb524
     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
0da71265
 
662a5b23
     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
0da71265
 }
 
 /**
  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
  */
2bedc0e8
 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
     MpegEncContext * const s = &h->s;
     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
     int i;
 
     if(!(h->top_samples_available&0x8000)){
         for(i=0; i<4; i++){
             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
             if(status<0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                 return -1;
             } else if(status){
                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
             }
         }
     }
 
     if((h->left_samples_available&0x8888)!=0x8888){
         static const int mask[4]={0x8000,0x2000,0x80,0x20};
         for(i=0; i<4; i++){
             if(!(h->left_samples_available&mask[i])){
                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
                 if(status<0){
                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                     return -1;
                 } else if(status){
                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
                 }
             }
         }
     }
 
     return 0;
b89a0c9d
 } //FIXME cleanup like check_intra_pred_mode
2bedc0e8
 
b89a0c9d
 static int check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
0da71265
     MpegEncContext * const s = &h->s;
     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
115329f1
 
43ff0714
     if(mode > 6U) {
5175b937
         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
7440fe83
         return -1;
5175b937
     }
115329f1
 
0da71265
     if(!(h->top_samples_available&0x8000)){
         mode= top[ mode ];
         if(mode<0){
9b879566
             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
             return -1;
         }
     }
115329f1
 
d1d10e91
     if((h->left_samples_available&0x8080) != 0x8080){
0da71265
         mode= left[ mode ];
b89a0c9d
         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
d1d10e91
             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
         }
0da71265
         if(mode<0){
9b879566
             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
0da71265
             return -1;
115329f1
         }
0da71265
     }
 
     return mode;
 }
 
b89a0c9d
 /**
  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
  */
 int ff_h264_check_intra16x16_pred_mode(H264Context *h, int mode)
 {
     return check_intra_pred_mode(h, mode, 0);
 }
 
 /**
  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
  */
 int ff_h264_check_intra_chroma_pred_mode(H264Context *h, int mode)
 {
     return check_intra_pred_mode(h, mode, 1);
 }
 
 
1790a5e9
 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
0da71265
     int i, si, di;
     uint8_t *dst;
24456882
     int bufidx;
0da71265
 
bb270c08
 //    src[0]&0x80;                //forbidden bit
0da71265
     h->nal_ref_idc= src[0]>>5;
     h->nal_unit_type= src[0]&0x1F;
 
     src++; length--;
e08715d3
 
b250f9c6
 #if HAVE_FAST_UNALIGNED
 # if HAVE_FAST_64BIT
e08715d3
 #   define RS 7
     for(i=0; i+1<length; i+=9){
19769ece
         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
e08715d3
 # else
 #   define RS 3
     for(i=0; i+1<length; i+=5){
19769ece
         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
e08715d3
 # endif
             continue;
         if(i>0 && !src[i]) i--;
         while(src[i]) i++;
 #else
 #   define RS 0
0da71265
     for(i=0; i+1<length; i+=2){
         if(src[i]) continue;
         if(i>0 && src[i-1]==0) i--;
e08715d3
 #endif
0da71265
         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
             if(src[i+2]!=3){
                 /* startcode, so we must be past the end */
                 length=i;
             }
             break;
         }
abb27cfb
         i-= RS;
0da71265
     }
 
24456882
     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
97437dad
     si=h->rbsp_buffer_size[bufidx];
     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
24456882
     dst= h->rbsp_buffer[bufidx];
97437dad
     if(si != h->rbsp_buffer_size[bufidx])
         memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
0da71265
 
ac658be5
     if (dst == NULL){
         return NULL;
     }
 
97437dad
     if(i>=length-1){ //no escaped 0
         *dst_length= length;
         *consumed= length+1; //+1 for the header
fa3f7391
         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
             return src;
         }else{
             memcpy(dst, src, length);
             return dst;
         }
97437dad
     }
 
3b66c4c5
 //printf("decoding esc\n");
593af7cd
     memcpy(dst, src, i);
     si=di=i;
     while(si+2<length){
0da71265
         //remove escapes (very rare 1:2^22)
593af7cd
         if(src[si+2]>3){
             dst[di++]= src[si++];
             dst[di++]= src[si++];
         }else if(src[si]==0 && src[si+1]==0){
0da71265
             if(src[si+2]==3){ //escape
                 dst[di++]= 0;
                 dst[di++]= 0;
                 si+=3;
c8470cc1
                 continue;
0da71265
             }else //next start code
593af7cd
                 goto nsc;
0da71265
         }
 
         dst[di++]= src[si++];
     }
593af7cd
     while(si<length)
         dst[di++]= src[si++];
 nsc:
0da71265
 
d4369630
     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 
0da71265
     *dst_length= di;
     *consumed= si + 1;//+1 for the header
90b5b51e
 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
0da71265
     return dst;
 }
 
12324121
 /**
  * Identify the exact end of the bitstream
  * @return the length of the trailing, or 0 if damaged
  */
 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
0da71265
     int v= *src;
     int r;
 
a9c9a240
     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
0da71265
 
     for(r=1; r<9; r++){
         if(v&1) return r;
         v>>=1;
     }
     return 0;
 }
 
6a9c8594
 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
                                  int y_offset, int list){
     int raw_my= h->mv_cache[list][ scan8[n] ][1];
     int filter_height= (raw_my&3) ? 2 : 0;
     int full_my= (raw_my>>2) + y_offset;
     int top = full_my - filter_height, bottom = full_my + height + filter_height;
 
     return FFMAX(abs(top), bottom);
 }
 
 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
                                int y_offset, int list0, int list1, int *nrefs){
     MpegEncContext * const s = &h->s;
     int my;
 
     y_offset += 16*(s->mb_y >> MB_FIELD);
 
     if(list0){
         int ref_n = h->ref_cache[0][ scan8[n] ];
         Picture *ref= &h->ref_list[0][ref_n];
 
         // Error resilience puts the current picture in the ref list.
         // Don't try to wait on these as it will cause a deadlock.
         // Fields can wait on each other, though.
         if(ref->thread_opaque != s->current_picture.thread_opaque ||
            (ref->reference&3) != s->picture_structure) {
             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
             if (refs[0][ref_n] < 0) nrefs[0] += 1;
             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
         }
     }
 
     if(list1){
         int ref_n = h->ref_cache[1][ scan8[n] ];
         Picture *ref= &h->ref_list[1][ref_n];
 
         if(ref->thread_opaque != s->current_picture.thread_opaque ||
            (ref->reference&3) != s->picture_structure) {
             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
             if (refs[1][ref_n] < 0) nrefs[1] += 1;
             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
         }
     }
 }
 
 /**
  * Wait until all reference frames are available for MC operations.
  *
  * @param h the H264 context
  */
 static void await_references(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     int refs[2][48];
     int nrefs[2] = {0};
     int ref, list;
 
     memset(refs, -1, sizeof(refs));
 
     if(IS_16X16(mb_type)){
         get_lowest_part_y(h, refs, 0, 16, 0,
                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
     }else if(IS_16X8(mb_type)){
         get_lowest_part_y(h, refs, 0, 8, 0,
                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
         get_lowest_part_y(h, refs, 8, 8, 8,
                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
     }else if(IS_8X16(mb_type)){
         get_lowest_part_y(h, refs, 0, 16, 0,
                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
         get_lowest_part_y(h, refs, 4, 16, 0,
                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
     }else{
         int i;
 
         assert(IS_8X8(mb_type));
 
         for(i=0; i<4; i++){
             const int sub_mb_type= h->sub_mb_type[i];
             const int n= 4*i;
             int y_offset= (i&2)<<2;
 
             if(IS_SUB_8X8(sub_mb_type)){
                 get_lowest_part_y(h, refs, n  , 8, y_offset,
                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
             }else if(IS_SUB_8X4(sub_mb_type)){
                 get_lowest_part_y(h, refs, n  , 4, y_offset,
                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 get_lowest_part_y(h, refs, n  , 8, y_offset,
                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
                 for(j=0; j<4; j++){
                     int sub_y_offset= y_offset + 2*(j&2);
                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
                 }
             }
         }
     }
 
     for(list=h->list_count-1; list>=0; list--){
         for(ref=0; ref<48 && nrefs[list]; ref++){
             int row = refs[list][ref];
             if(row >= 0){
                 Picture *ref_pic = &h->ref_list[list][ref];
                 int ref_field = ref_pic->reference - 1;
                 int ref_field_picture = ref_pic->field_picture;
                 int pic_height = 16*s->mb_height >> ref_field_picture;
 
                 row <<= MB_MBAFF;
                 nrefs[list]--;
 
                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
                 }else if(FIELD_PICTURE){
                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
                 }else{
                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
                 }
             }
         }
     }
 }
 
772225c0
 #if 0
 /**
  * DCT transforms the 16 dc values.
  * @param qp quantization parameter ??? FIXME
  */
 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
 //    const int qmul= dequant_coeff[qp][0];
     int i;
     int temp[16]; //FIXME check if this is a good idea
     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
 
     for(i=0; i<4; i++){
         const int offset= y_offset[i];
         const int z0= block[offset+stride*0] + block[offset+stride*4];
         const int z1= block[offset+stride*0] - block[offset+stride*4];
         const int z2= block[offset+stride*1] - block[offset+stride*5];
         const int z3= block[offset+stride*1] + block[offset+stride*5];
 
         temp[4*i+0]= z0+z3;
         temp[4*i+1]= z1+z2;
         temp[4*i+2]= z1-z2;
         temp[4*i+3]= z0-z3;
     }
 
     for(i=0; i<4; i++){
         const int offset= x_offset[i];
         const int z0= temp[4*0+i] + temp[4*2+i];
         const int z1= temp[4*0+i] - temp[4*2+i];
         const int z2= temp[4*1+i] - temp[4*3+i];
         const int z3= temp[4*1+i] + temp[4*3+i];
 
         block[stride*0 +offset]= (z0 + z3)>>1;
         block[stride*2 +offset]= (z1 + z2)>>1;
         block[stride*8 +offset]= (z1 - z2)>>1;
         block[stride*10+offset]= (z0 - z3)>>1;
     }
 }
 #endif
 
 #undef xStride
 #undef stride
 
 #if 0
 static void chroma_dc_dct_c(DCTELEM *block){
     const int stride= 16*2;
     const int xStride= 16;
     int a,b,c,d,e;
 
     a= block[stride*0 + xStride*0];
     b= block[stride*0 + xStride*1];
     c= block[stride*1 + xStride*0];
     d= block[stride*1 + xStride*1];
 
     e= a-b;
     a= a+b;
     b= c-d;
     c= c+d;
 
     block[stride*0 + xStride*0]= (a+c);
     block[stride*0 + xStride*1]= (e+b);
     block[stride*1 + xStride*0]= (a-c);
     block[stride*1 + xStride*1]= (e-b);
 }
 #endif
 
f0e9ee8d
 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
21d5de93
                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
c90b9442
                            int pixel_shift, int chroma444){
f0e9ee8d
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
c90b9442
     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
     uint8_t * src_y = pic->data[0] + offset;
f0e9ee8d
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
     int emu=0;
     const int full_mx= mx>>2;
     const int full_my= my>>2;
     const int pic_width  = 16*s->mb_width;
     const int pic_height = 16*s->mb_height >> MB_FIELD;
 
     if(mx&7) extra_width -= 3;
     if(my&7) extra_height -= 3;
 
     if(   full_mx < 0-extra_width
        || full_my < 0-extra_height
        || full_mx + 16/*FIXME*/ > pic_width + extra_width
        || full_my + 16/*FIXME*/ > pic_height + extra_height){
21d5de93
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
f0e9ee8d
         emu=1;
     }
 
     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
     if(!square){
         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
 
     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
 
c90b9442
     if(chroma444){
         src_cb = pic->data[1] + offset;
         if(emu){
             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
         }
         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
         if(!square){
             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
         }
 
         src_cr = pic->data[2] + offset;
         if(emu){
             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
         }
         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
         if(!square){
             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
         }
         return;
     }
 
f0e9ee8d
     if(MB_FIELD){
         // chroma offset when predicting from a field of opposite parity
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
     }
21d5de93
     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
f0e9ee8d
 
     if(emu){
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
             src_cb= s->edge_emu_buffer;
     }
     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
 
     if(emu){
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
             src_cr= s->edge_emu_buffer;
     }
     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
 }
 
 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
c90b9442
                            int list0, int list1, int pixel_shift, int chroma444){
f0e9ee8d
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
c90b9442
     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
     if(chroma444){
         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
     }else{
         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     }
f0e9ee8d
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
     if(list0){
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
c90b9442
                            qpix_op, chroma_op, pixel_shift, chroma444);
f0e9ee8d
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
     }
 
     if(list1){
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
c90b9442
                            qpix_op, chroma_op, pixel_shift, chroma444);
f0e9ee8d
     }
 }
 
 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
c90b9442
                            int list0, int list1, int pixel_shift, int chroma444){
f0e9ee8d
     MpegEncContext * const s = &h->s;
 
c90b9442
     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
     if(chroma444){
         chroma_weight_avg = luma_weight_avg;
         chroma_weight_op = luma_weight_op;
         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
     }else{
         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     }
f0e9ee8d
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
     if(list0 && list1){
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
c90b9442
         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
f0e9ee8d
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
c90b9442
                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
f0e9ee8d
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
c90b9442
                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
f0e9ee8d
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
             int weight1 = 64 - weight0;
             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
         }else{
             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
         }
     }else{
         int list = list1 ? 1 : 0;
         int refn = h->ref_cache[list][ scan8[n] ];
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
c90b9442
                     qpix_put, chroma_put, pixel_shift, chroma444);
f0e9ee8d
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
         if(h->use_weight_chroma){
             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
         }
     }
 }
 
 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
c90b9442
                            int list0, int list1, int pixel_shift, int chroma444){
f0e9ee8d
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
21d5de93
                          weight_op[0], weight_op[3], weight_avg[0],
c90b9442
                          weight_avg[3], list0, list1, pixel_shift, chroma444);
f0e9ee8d
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
21d5de93
                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
c90b9442
                     chroma_avg, list0, list1, pixel_shift, chroma444);
f0e9ee8d
 }
 
c90b9442
 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
f0e9ee8d
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
     const int refn = h->ref_cache[list][scan8[0]];
     if(refn >= 0){
         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
         uint8_t **src= h->ref_list[list][refn].data;
         int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
c90b9442
         if(chroma444){
             s->dsp.prefetch(src[1]+off, s->linesize, 4);
             s->dsp.prefetch(src[2]+off, s->linesize, 4);
         }else{
c137fdd7
             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
c90b9442
             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
         }
f0e9ee8d
     }
 }
 
21d5de93
 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
f0e9ee8d
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
21d5de93
                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
c90b9442
                       int pixel_shift, int chroma444){
f0e9ee8d
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
 
     assert(IS_INTER(mb_type));
 
21d5de93
     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
f0e9ee8d
         await_references(h);
c90b9442
     prefetch_motion(h, 0, pixel_shift, chroma444);
f0e9ee8d
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
21d5de93
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
c90b9442
                 pixel_shift, chroma444);
f0e9ee8d
     }else if(IS_16X8(mb_type)){
21d5de93
         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
f0e9ee8d
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
21d5de93
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
c90b9442
                 pixel_shift, chroma444);
21d5de93
         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
f0e9ee8d
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
21d5de93
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
c90b9442
                 pixel_shift, chroma444);
f0e9ee8d
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
21d5de93
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
c90b9442
                 pixel_shift, chroma444);
f0e9ee8d
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
21d5de93
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
c90b9442
                 pixel_shift, chroma444);
f0e9ee8d
     }else{
         int i;
 
         assert(IS_8X8(mb_type));
 
         for(i=0; i<4; i++){
             const int sub_mb_type= h->sub_mb_type[i];
             const int n= 4*i;
             int x_offset= (i&1)<<2;
             int y_offset= (i&2)<<1;
 
             if(IS_SUB_8X8(sub_mb_type)){
                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
21d5de93
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                     pixel_shift, chroma444);
f0e9ee8d
             }else if(IS_SUB_8X4(sub_mb_type)){
21d5de93
                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
f0e9ee8d
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
21d5de93
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                     pixel_shift, chroma444);
21d5de93
                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
f0e9ee8d
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
21d5de93
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                     pixel_shift, chroma444);
f0e9ee8d
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
21d5de93
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                     pixel_shift, chroma444);
f0e9ee8d
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
21d5de93
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                     pixel_shift, chroma444);
f0e9ee8d
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
                 for(j=0; j<4; j++){
                     int sub_x_offset= x_offset + 2*(j&1);
                     int sub_y_offset= y_offset +   (j&2);
                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
21d5de93
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
c90b9442
                         pixel_shift, chroma444);
f0e9ee8d
                 }
             }
         }
     }
 
c90b9442
     prefetch_motion(h, 1, pixel_shift, chroma444);
f0e9ee8d
 }
 
21d5de93
 #define hl_motion_fn(sh, bits) \
 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
                                        uint8_t *dest_y, \
                                        uint8_t *dest_cb, uint8_t *dest_cr, \
                                        qpel_mc_func (*qpix_put)[16], \
                                        h264_chroma_mc_func (*chroma_put), \
                                        qpel_mc_func (*qpix_avg)[16], \
                                        h264_chroma_mc_func (*chroma_avg), \
                                        h264_weight_func *weight_op, \
c90b9442
                                        h264_biweight_func *weight_avg, \
                                        int chroma444) \
21d5de93
 { \
     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
c90b9442
               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
21d5de93
 }
 hl_motion_fn(0, 8);
 hl_motion_fn(1, 16);
f0e9ee8d
 
2ed0f766
 static void free_tables(H264Context *h, int free_rbsp){
7978debd
     int i;
afebe2f7
     H264Context *hx;
6a9c8594
 
0da71265
     av_freep(&h->intra4x4_pred_mode);
e5017ab8
     av_freep(&h->chroma_pred_mode_table);
     av_freep(&h->cbp_table);
9e528114
     av_freep(&h->mvd_table[0]);
     av_freep(&h->mvd_table[1]);
5ad984c9
     av_freep(&h->direct_table);
0da71265
     av_freep(&h->non_zero_count);
     av_freep(&h->slice_table_base);
     h->slice_table= NULL;
c988f975
     av_freep(&h->list_counts);
e5017ab8
 
0da71265
     av_freep(&h->mb2b_xy);
d43c1922
     av_freep(&h->mb2br_xy);
9f2d1b4f
 
6752dd5a
     for(i = 0; i < MAX_THREADS; i++) {
afebe2f7
         hx = h->thread_context[i];
         if(!hx) continue;
         av_freep(&hx->top_borders[1]);
         av_freep(&hx->top_borders[0]);
         av_freep(&hx->s.obmc_scratchpad);
2ed0f766
         if (free_rbsp){
fb397b1a
             av_freep(&hx->rbsp_buffer[1]);
             av_freep(&hx->rbsp_buffer[0]);
             hx->rbsp_buffer_size[0] = 0;
             hx->rbsp_buffer_size[1] = 0;
2ed0f766
         }
d2d5e067
         if (i) av_freep(&h->thread_context[i]);
afebe2f7
     }
0da71265
 }
 
239ea04c
 static void init_dequant8_coeff_table(H264Context *h){
c90b9442
     int i,j,q,x;
d268bed2
     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
239ea04c
 
c90b9442
     for(i=0; i<6; i++ ){
         h->dequant8_coeff[i] = h->dequant8_buffer[i];
         for(j=0; j<i; j++){
             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
                 break;
             }
239ea04c
         }
c90b9442
         if(j<i)
             continue;
239ea04c
 
d268bed2
         for(q=0; q<max_qp+1; q++){
d9ec210b
             int shift = div6[q];
             int idx = rem6[q];
239ea04c
             for(x=0; x<64; x++)
ca32f7f2
                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
548a1c8a
                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
                     h->pps.scaling_matrix8[i][x]) << shift;
239ea04c
         }
     }
 }
 
 static void init_dequant4_coeff_table(H264Context *h){
     int i,j,q,x;
d268bed2
     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
239ea04c
     for(i=0; i<6; i++ ){
         h->dequant4_coeff[i] = h->dequant4_buffer[i];
         for(j=0; j<i; j++){
             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
                 break;
             }
         }
         if(j<i)
             continue;
 
d268bed2
         for(q=0; q<max_qp+1; q++){
d9ec210b
             int shift = div6[q] + 2;
             int idx = rem6[q];
239ea04c
             for(x=0; x<16; x++)
ca32f7f2
                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
ab2e3e2c
                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
239ea04c
                     h->pps.scaling_matrix4[i][x]) << shift;
         }
     }
 }
 
 static void init_dequant_tables(H264Context *h){
     int i,x;
     init_dequant4_coeff_table(h);
     if(h->pps.transform_8x8_mode)
         init_dequant8_coeff_table(h);
     if(h->sps.transform_bypass){
         for(i=0; i<6; i++)
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][0][x] = 1<<6;
         if(h->pps.transform_8x8_mode)
c90b9442
             for(i=0; i<6; i++)
239ea04c
                 for(x=0; x<64; x++)
                     h->dequant8_coeff[i][0][x] = 1<<6;
     }
 }
 
 
903d58f6
 int ff_h264_alloc_tables(H264Context *h){
0da71265
     MpegEncContext * const s = &h->s;
7bc9090a
     const int big_mb_num= s->mb_stride * (s->mb_height+1);
145061a1
     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
239ea04c
     int x,y;
0da71265
 
145061a1
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
e5017ab8
 
c90b9442
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
d31dbec3
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
0da71265
 
d31dbec3
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
145061a1
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
36b54927
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
c988f975
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
e5017ab8
 
b735aeea
     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
5d18eaad
     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
0da71265
 
d31dbec3
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
d43c1922
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
0da71265
     for(y=0; y<s->mb_height; y++){
         for(x=0; x<s->mb_width; x++){
7bc9090a
             const int mb_xy= x + y*s->mb_stride;
0da71265
             const int b_xy = 4*x + 4*y*h->b_stride;
115329f1
 
0da71265
             h->mb2b_xy [mb_xy]= b_xy;
e1c88a21
             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
0da71265
         }
     }
9f2d1b4f
 
9c6221ae
     s->obmc_scratchpad = NULL;
 
56edbd81
     if(!h->dequant4_coeff[0])
         init_dequant_tables(h);
 
0da71265
     return 0;
 fail:
2ed0f766
     free_tables(h, 1);
0da71265
     return -1;
 }
 
afebe2f7
 /**
  * Mimic alloc_tables(), but for every context thread.
  */
145061a1
 static void clone_tables(H264Context *dst, H264Context *src, int i){
     MpegEncContext * const s = &src->s;
     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
afebe2f7
     dst->non_zero_count           = src->non_zero_count;
     dst->slice_table              = src->slice_table;
     dst->cbp_table                = src->cbp_table;
     dst->mb2b_xy                  = src->mb2b_xy;
d43c1922
     dst->mb2br_xy                 = src->mb2br_xy;
afebe2f7
     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
145061a1
     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
afebe2f7
     dst->direct_table             = src->direct_table;
fb823b77
     dst->list_counts              = src->list_counts;
afebe2f7
 
     dst->s.obmc_scratchpad = NULL;
8dbe5856
     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
afebe2f7
 }
 
 /**
  * Init context
  * Allocate buffers which are not shared amongst multiple threads.
  */
 static int context_init(H264Context *h){
c90b9442
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
afebe2f7
 
145061a1
     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
 
afebe2f7
     return 0;
 fail:
     return -1; // free_tables will clean up for us
 }
 
9855b2e3
 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
 
98a6fff9
 static av_cold void common_init(H264Context *h){
0da71265
     MpegEncContext * const s = &h->s;
 
     s->width = s->avctx->width;
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
115329f1
 
8dbe5856
     ff_h264dsp_init(&h->h264dsp, 8);
     ff_h264_pred_init(&h->hpc, s->codec_id, 8);
0da71265
 
239ea04c
     h->dequant_coeff_pps= -1;
9a41c2c7
     s->unrestricted_mv=1;
0da71265
     s->decode=1; //FIXME
56edbd81
 
a5805aa9
     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
 
56edbd81
     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
0da71265
 }
 
f71c761a
 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
05e95319
 {
     AVCodecContext *avctx = h->s.avctx;
9855b2e3
 
7aa24b15
     if(!buf || size <= 0)
         return -1;
 
f71c761a
     if(buf[0] == 1){
9855b2e3
         int i, cnt, nalsize;
f71c761a
         const unsigned char *p = buf;
9855b2e3
 
         h->is_avc = 1;
 
f71c761a
         if(size < 7) {
9855b2e3
             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
             return -1;
         }
         /* sps and pps in the avcC always have length coded with 2 bytes,
            so put a fake nal_length_size = 2 while parsing them */
         h->nal_length_size = 2;
         // Decode sps from avcC
         cnt = *(p+5) & 0x1f; // Number of sps
         p += 6;
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
101e38e0
             if(nalsize > size - (p-buf))
                 return -1;
9855b2e3
             if(decode_nal_units(h, p, nalsize) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
                 return -1;
             }
             p += nalsize;
         }
         // Decode pps from avcC
         cnt = *(p++); // Number of pps
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
101e38e0
             if(nalsize > size - (p-buf))
                 return -1;
108f318d
             if (decode_nal_units(h, p, nalsize) < 0) {
9855b2e3
                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
                 return -1;
             }
             p += nalsize;
         }
         // Now store right nal length size, that will be use to parse all other nals
f71c761a
         h->nal_length_size = (buf[4] & 0x03) + 1;
9855b2e3
     } else {
         h->is_avc = 0;
f71c761a
         if(decode_nal_units(h, buf, size) < 0)
9855b2e3
             return -1;
     }
05e95319
     return 0;
 }
 
 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     H264Context *h= avctx->priv_data;
     MpegEncContext * const s = &h->s;
 
     MPV_decode_defaults(s);
 
     s->avctx = avctx;
     common_init(h);
 
     s->out_format = FMT_H264;
     s->workaround_bugs= avctx->workaround_bugs;
 
     // set defaults
 //    s->decode_mb= ff_h263_decode_mb;
     s->quarter_sample = 1;
     if(!avctx->has_b_frames)
     s->low_delay= 1;
 
     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
 
     ff_h264_decode_init_vlc();
 
e7077f5e
     h->pixel_shift = 0;
19a0729b
     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
dc172ecc
 
05e95319
     h->thread_context[0] = h;
d375c104
     h->outputed_poc = h->next_outputed_poc = INT_MIN;
05e95319
     h->prev_poc_msb= 1<<16;
     h->x264_build = -1;
     ff_h264_reset_sei(h);
     if(avctx->codec_id == CODEC_ID_H264){
         if(avctx->ticks_per_frame == 1){
             s->avctx->time_base.den *=2;
         }
         avctx->ticks_per_frame = 2;
     }
 
     if(avctx->extradata_size > 0 && avctx->extradata &&
f71c761a
         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size))
05e95319
         return -1;
 
db8cb47d
     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
         s->avctx->has_b_frames = h->sps.num_reorder_frames;
         s->low_delay = 0;
     }
9855b2e3
 
0da71265
     return 0;
 }
 
0424e052
 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
d375c104
 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
 {
     int i;
 
     for (i=0; i<count; i++){
0424e052
         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
                 !from[i]));
d375c104
         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
     }
 }
 
 static void copy_parameter_set(void **to, void **from, int count, int size)
 {
     int i;
 
     for (i=0; i<count; i++){
         if (to[i] && !from[i]) av_freep(&to[i]);
         else if (from[i] && !to[i]) to[i] = av_malloc(size);
 
         if (from[i]) memcpy(to[i], from[i], size);
     }
 }
 
 static int decode_init_thread_copy(AVCodecContext *avctx){
     H264Context *h= avctx->priv_data;
 
     if (!avctx->is_copy) return 0;
     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
 
     return 0;
 }
 
 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
     H264Context *h= dst->priv_data, *h1= src->priv_data;
     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
     int inited = s->context_initialized, err;
     int i;
 
     if(dst == src || !s1->context_initialized) return 0;
 
     err = ff_mpeg_update_thread_context(dst, src);
     if(err) return err;
 
     //FIXME handle width/height changing
     if(!inited){
         for(i = 0; i < MAX_SPS_COUNT; i++)
             av_freep(h->sps_buffers + i);
 
         for(i = 0; i < MAX_PPS_COUNT; i++)
             av_freep(h->pps_buffers + i);
 
         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
cc471819
         if (ff_h264_alloc_tables(h) < 0) {
             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
             return AVERROR(ENOMEM);
         }
d375c104
         context_init(h);
 
         for(i=0; i<2; i++){
             h->rbsp_buffer[i] = NULL;
             h->rbsp_buffer_size[i] = 0;
         }
 
         h->thread_context[0] = h;
 
         // frame_start may not be called for the next thread (if it's decoding a bottom field)
         // so this has to be allocated here
c90b9442
         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
d375c104
 
         s->dsp.clear_blocks(h->mb);
c90b9442
         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
d375c104
     }
 
     //extradata/NAL handling
     h->is_avc          = h1->is_avc;
 
     //SPS/PPS
     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
     h->sps             = h1->sps;
     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
     h->pps             = h1->pps;
 
     //Dequantization matrices
     //FIXME these are big - can they be only copied when PPS changes?
     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
 
     for(i=0; i<6; i++)
         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
 
c90b9442
     for(i=0; i<6; i++)
d375c104
         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
 
     h->dequant_coeff_pps = h1->dequant_coeff_pps;
 
     //POC timing
     copy_fields(h, h1, poc_lsb, redundant_pic_count);
 
     //reference lists
6a9c8594
     copy_fields(h, h1, ref_count, list_count);
     copy_fields(h, h1, ref_list,  intra_gb);
d375c104
     copy_fields(h, h1, short_ref, cabac_init_idc);
 
     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
7e2eb4ba
     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
d375c104
     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
 
     h->last_slice_type = h1->last_slice_type;
 
     if(!s->current_picture_ptr) return 0;
 
     if(!s->dropable) {
         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
         h->prev_poc_msb     = h->poc_msb;
         h->prev_poc_lsb     = h->poc_lsb;
     }
     h->prev_frame_num_offset= h->frame_num_offset;
     h->prev_frame_num       = h->frame_num;
     h->outputed_poc         = h->next_outputed_poc;
 
     return 0;
 }
 
903d58f6
 int ff_h264_frame_start(H264Context *h){
0da71265
     MpegEncContext * const s = &h->s;
     int i;
6e3ef511
     const int pixel_shift = h->pixel_shift;
6a9c8594
     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
0da71265
 
af8aa846
     if(MPV_frame_start(s, s->avctx) < 0)
         return -1;
0da71265
     ff_er_frame_start(s);
3a22d7fa
     /*
      * MPV_frame_start uses pict_type to derive key_frame.
      * This is incorrect for H.264; IDR markings must be used.
1412060e
      * Zero here; IDR markings per slice in frame or fields are ORed in later.
3a22d7fa
      * See decode_nal_units().
      */
     s->current_picture_ptr->key_frame= 0;
c173a088
     s->current_picture_ptr->mmco_reset= 0;
0da71265
 
     assert(s->linesize && s->uvlinesize);
 
     for(i=0; i<16; i++){
6e3ef511
         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
c90b9442
         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
0da71265
     }
c90b9442
     for(i=0; i<16; i++){
0da71265
         h->block_offset[16+i]=
c90b9442
         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
         h->block_offset[48+16+i]=
         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
0da71265
     }
 
934b0821
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
6a9c8594
     for(i = 0; i < thread_count; i++)
0f016023
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
c90b9442
             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
5d18eaad
 
2ce1c2e0
     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
5820b90d
     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
934b0821
 
0da71265
 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
28bb9eb2
 
1412060e
     // We mark the current picture as non-reference after allocating it, so
28bb9eb2
     // that if we break out due to an error it can be released automatically
     // in the next MPV_frame_start().
     // SVQ3 as well as most other codecs have only last/next/current and thus
     // get released even with set reference, besides SVQ3 and others do not
     // mark frames as reference later "naturally".
     if(s->codec_id != CODEC_ID_SVQ3)
         s->current_picture_ptr->reference= 0;
357282c6
 
     s->current_picture_ptr->field_poc[0]=
     s->current_picture_ptr->field_poc[1]= INT_MAX;
d375c104
 
     h->next_output_pic = NULL;
 
5118c6c7
     assert(s->current_picture_ptr->long_ref==0);
357282c6
 
af8aa846
     return 0;
0da71265
 }
 
d375c104
 /**
   * Run setup operations that must be run after slice header decoding.
   * This includes finding the next displayed frame.
   *
   * @param h h264 master context
0424e052
   * @param setup_finished enough NALs have been read that we can call
   * ff_thread_finish_setup()
d375c104
   */
0424e052
 static void decode_postinit(H264Context *h, int setup_finished){
d375c104
     MpegEncContext * const s = &h->s;
     Picture *out = s->current_picture_ptr;
     Picture *cur = s->current_picture_ptr;
     int i, pics, out_of_order, out_idx;
 
     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
     s->current_picture_ptr->pict_type= s->pict_type;
 
     if (h->next_output_pic) return;
 
     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
0424e052
         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
         //to find this yet, so we assume the worst for now.
         //if (setup_finished)
         //    ff_thread_finish_setup(s->avctx);
d375c104
         return;
     }
 
     cur->interlaced_frame = 0;
     cur->repeat_pict = 0;
 
     /* Signal interlacing information externally. */
     /* Prioritize picture timing SEI information over used decoding process if it exists. */
 
     if(h->sps.pic_struct_present_flag){
         switch (h->sei_pic_struct)
         {
         case SEI_PIC_STRUCT_FRAME:
             break;
         case SEI_PIC_STRUCT_TOP_FIELD:
         case SEI_PIC_STRUCT_BOTTOM_FIELD:
             cur->interlaced_frame = 1;
             break;
         case SEI_PIC_STRUCT_TOP_BOTTOM:
         case SEI_PIC_STRUCT_BOTTOM_TOP:
             if (FIELD_OR_MBAFF_PICTURE)
                 cur->interlaced_frame = 1;
             else
                 // try to flag soft telecine progressive
                 cur->interlaced_frame = h->prev_interlaced_frame;
             break;
         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
             // Signal the possibility of telecined film externally (pic_struct 5,6)
             // From these hints, let the applications decide if they apply deinterlacing.
             cur->repeat_pict = 1;
             break;
         case SEI_PIC_STRUCT_FRAME_DOUBLING:
             // Force progressive here, as doubling interlaced frame is a bad idea.
             cur->repeat_pict = 2;
             break;
         case SEI_PIC_STRUCT_FRAME_TRIPLING:
             cur->repeat_pict = 4;
             break;
         }
 
         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
     }else{
         /* Derive interlacing flag from used decoding process. */
         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
     }
     h->prev_interlaced_frame = cur->interlaced_frame;
 
     if (cur->field_poc[0] != cur->field_poc[1]){
         /* Derive top_field_first from field pocs. */
         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
     }else{
         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
                 cur->top_field_first = 1;
             else
                 cur->top_field_first = 0;
         }else{
             /* Most likely progressive */
             cur->top_field_first = 0;
         }
     }
 
     //FIXME do something with unavailable reference frames
 
     /* Sort B-frames into display order */
 
     if(h->sps.bitstream_restriction_flag
        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
         s->avctx->has_b_frames = h->sps.num_reorder_frames;
         s->low_delay = 0;
     }
 
     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
        && !h->sps.bitstream_restriction_flag){
         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
         s->low_delay= 0;
     }
 
     pics = 0;
     while(h->delayed_pic[pics]) pics++;
 
1979a9b4
     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
d375c104
 
     h->delayed_pic[pics++] = cur;
     if(cur->reference == 0)
         cur->reference = DELAYED_PIC_REF;
 
     out = h->delayed_pic[0];
     out_idx = 0;
     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
         if(h->delayed_pic[i]->poc < out->poc){
             out = h->delayed_pic[i];
             out_idx = i;
         }
     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
         h->next_outputed_poc= INT_MIN;
     out_of_order = out->poc < h->next_outputed_poc;
 
     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
         { }
     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
        || (s->low_delay &&
         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
ce5e49b0
          || cur->pict_type == AV_PICTURE_TYPE_B)))
d375c104
     {
         s->low_delay = 0;
         s->avctx->has_b_frames++;
     }
 
     if(out_of_order || pics > s->avctx->has_b_frames){
         out->reference &= ~DELAYED_PIC_REF;
7e2eb4ba
         out->owner2 = s; // for frame threading, the owner must be the second field's thread
                          // or else the first thread can release the picture and reuse it unsafely
d375c104
         for(i=out_idx; h->delayed_pic[i]; i++)
             h->delayed_pic[i] = h->delayed_pic[i+1];
     }
     if(!out_of_order && pics > s->avctx->has_b_frames){
         h->next_output_pic = out;
         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
             h->next_outputed_poc = INT_MIN;
         } else
             h->next_outputed_poc = out->poc;
     }else{
         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
     }
 
0424e052
     if (setup_finished)
         ff_thread_finish_setup(s->avctx);
d375c104
 }
 
c90b9442
 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
53c05b1e
     MpegEncContext * const s = &h->s;
0b69d625
     uint8_t *top_border;
5f7f9719
     int top_idx = 1;
6e3ef511
     const int pixel_shift = h->pixel_shift;
115329f1
 
53c05b1e
     src_y  -=   linesize;
     src_cb -= uvlinesize;
     src_cr -= uvlinesize;
 
5f7f9719
     if(!simple && FRAME_MBAFF){
         if(s->mb_y&1){
             if(!MB_MBAFF){
0b69d625
                 top_border = h->top_borders[0][s->mb_x];
                 AV_COPY128(top_border, src_y + 15*linesize);
6e3ef511
                 if (pixel_shift)
dc172ecc
                     AV_COPY128(top_border+16, src_y+15*linesize+16);
49fb20cb
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c90b9442
                     if(chroma444){
                         if (pixel_shift){
                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
                         } else {
                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
                         }
dc172ecc
                     } else {
c90b9442
                         if (pixel_shift) {
                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
                         } else {
                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
                         }
dc172ecc
                     }
5f7f9719
                 }
             }
c988f975
         }else if(MB_MBAFF){
             top_idx = 0;
         }else
             return;
5f7f9719
     }
 
0b69d625
     top_border = h->top_borders[top_idx][s->mb_x];
3b66c4c5
     // There are two lines saved, the line above the the top macroblock of a pair,
6867a90b
     // and the line above the bottom macroblock
0b69d625
     AV_COPY128(top_border, src_y + 16*linesize);
6e3ef511
     if (pixel_shift)
dc172ecc
         AV_COPY128(top_border+16, src_y+16*linesize+16);
53c05b1e
 
49fb20cb
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c90b9442
         if(chroma444){
             if (pixel_shift){
                 AV_COPY128(top_border+32, src_cb + 16*linesize);
                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
                 AV_COPY128(top_border+64, src_cr + 16*linesize);
                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
             } else {
                 AV_COPY128(top_border+16, src_cb + 16*linesize);
                 AV_COPY128(top_border+32, src_cr + 16*linesize);
             }
dc172ecc
         } else {
c90b9442
             if (pixel_shift) {
                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
             } else {
                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
             }
dc172ecc
         }
53c05b1e
     }
 }
 
6e3ef511
 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   uint8_t *src_cb, uint8_t *src_cr,
                                   int linesize, int uvlinesize,
c90b9442
                                   int xchg, int chroma444,
                                   int simple, int pixel_shift){
53c05b1e
     MpegEncContext * const s = &h->s;
4e987f82
     int deblock_topleft;
b69378e2
     int deblock_top;
5f7f9719
     int top_idx = 1;
1e4f1c56
     uint8_t *top_border_m1;
     uint8_t *top_border;
5f7f9719
 
     if(!simple && FRAME_MBAFF){
         if(s->mb_y&1){
c988f975
             if(!MB_MBAFF)
                 return;
5f7f9719
         }else{
             top_idx = MB_MBAFF ? 0 : 1;
         }
     }
b69378e2
 
     if(h->deblocking_filter == 2) {
4e987f82
         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
         deblock_top     = h->top_type;
b69378e2
     } else {
4e987f82
         deblock_topleft = (s->mb_x > 0);
         deblock_top     = (s->mb_y > !!MB_FIELD);
b69378e2
     }
53c05b1e
 
df2d5b16
     src_y  -=   linesize + 1 + pixel_shift;
     src_cb -= uvlinesize + 1 + pixel_shift;
     src_cr -= uvlinesize + 1 + pixel_shift;
53c05b1e
 
1e4f1c56
     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
     top_border    = h->top_borders[top_idx][s->mb_x];
 
0b69d625
 #define XCHG(a,b,xchg)\
df2d5b16
     if (pixel_shift) {\
dc172ecc
         if (xchg) {\
             AV_SWAP64(b+0,a+0);\
             AV_SWAP64(b+8,a+8);\
         } else {\
             AV_COPY128(b,a); \
         }\
     } else \
0b69d625
 if (xchg) AV_SWAP64(b,a);\
 else      AV_COPY64(b,a);
d89dc06a
 
     if(deblock_top){
4e987f82
         if(deblock_topleft){
6e3ef511
             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
c988f975
         }
6e3ef511
         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
cad4368a
         if(s->mb_x+1 < s->mb_width){
6e3ef511
             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
43efd19a
         }
53c05b1e
     }
49fb20cb
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c90b9442
         if(chroma444){
4e987f82
             if(deblock_topleft){
c90b9442
                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
             }
             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
             if(s->mb_x+1 < s->mb_width){
                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
             }
         } else {
             if(deblock_top){
                 if(deblock_topleft){
                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
                 }
                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
c988f975
             }
53c05b1e
         }
     }
 }
 
6e3ef511
 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
     if (high_bit_depth) {
         return AV_RN32A(((int32_t*)mb) + index);
     } else
         return AV_RN16A(mb + index);
dc172ecc
 }
 
6e3ef511
 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
     if (high_bit_depth) {
         AV_WN32A(((int32_t*)mb) + index, value);
     } else
         AV_WN16A(mb + index, value);
dc172ecc
 }
 
c90b9442
 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
 {
     MpegEncContext * const s = &h->s;
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
     int i;
     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
     block_offset += 16*p;
     if(IS_INTRA4x4(mb_type)){
         if(simple || !s->encoding){
             if(IS_8x8DCT(mb_type)){
                 if(transform_bypass){
                     idct_dc_add =
                     idct_add    = s->dsp.add_pixels8;
                 }else{
                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
                     idct_add    = h->h264dsp.h264_idct8_add;
                 }
                 for(i=0; i<16; i+=4){
                     uint8_t * const ptr= dest_y + block_offset[i];
                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                     }else{
                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
                                                     (h->topright_samples_available<<i)&0x4000, linesize);
                         if(nnz){
                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                             else
                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                         }
                     }
                 }
             }else{
                 if(transform_bypass){
                     idct_dc_add =
                     idct_add    = s->dsp.add_pixels4;
                 }else{
                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
                     idct_add    = h->h264dsp.h264_idct_add;
                 }
                 for(i=0; i<16; i++){
                     uint8_t * const ptr= dest_y + block_offset[i];
                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
 
                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                     }else{
                         uint8_t *topright;
                         int nnz, tr;
                         uint64_t tr_high;
                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
403eee16
                             assert(s->mb_y || linesize <= block_offset[i]);
c90b9442
                             if(!topright_avail){
                                 if (pixel_shift) {
                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
                                     topright= (uint8_t*) &tr_high;
                                 } else {
                                     tr= ptr[3 - linesize]*0x01010101;
                                     topright= (uint8_t*) &tr;
                                 }
                             }else
                                 topright= ptr + (4 << pixel_shift) - linesize;
                         }else
                             topright= NULL;
 
                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
                         if(nnz){
                             if(is_h264){
                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                                 else
                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
                             }else
                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
                         }
                     }
                 }
             }
         }
     }else{
         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
         if(is_h264){
             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
                 if(!transform_bypass)
                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
                 else{
                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
                     for(i = 0; i < 16; i++)
                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
                 }
             }
         }else
             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
     }
 }
 
 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
 {
     MpegEncContext * const s = &h->s;
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     int i;
     block_offset += 16*p;
     if(!IS_INTRA4x4(mb_type)){
         if(is_h264){
             if(IS_INTRA16x16(mb_type)){
                 if(transform_bypass){
                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
                     }else{
                         for(i=0; i<16; i++){
da0dadb9
                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
c90b9442
                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
                         }
                     }
                 }else{
                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
                 }
             }else if(h->cbp&15){
                 if(transform_bypass){
                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
                     for(i=0; i<16; i+=di){
                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
                         }
                     }
                 }else{
                     if(IS_8x8DCT(mb_type)){
                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
                     }else{
                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
                     }
                 }
             }
         }else{
             for(i=0; i<16; i++){
                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
                     uint8_t * const ptr= dest_y + block_offset[i];
                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
                 }
             }
         }
     }
 }
 
df2d5b16
 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
0da71265
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
64514ee8
     const int mb_xy= h->mb_xy;
0da71265
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize /*dct_offset*/;
c90b9442
     int i, j;
6867a90b
     int *block_offset = &h->block_offset[0];
41e4055b
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
8b6871ed
     /* is_h264 should always be true if SVQ3 is disabled. */
49fb20cb
     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
36940eca
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
0da71265
 
6e3ef511
     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
0da71265
 
6e3ef511
     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
a957c27b
 
c988f975
     h->list_counts[mb_xy]= h->list_count;
 
bd91fee3
     if (!simple && MB_FIELD) {
5d18eaad
         linesize   = h->mb_linesize   = s->linesize * 2;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
c90b9442
         block_offset = &h->block_offset[48];
1412060e
         if(mb_y&1){ //FIXME move out of this function?
0da71265
             dest_y -= s->linesize*15;
6867a90b
             dest_cb-= s->uvlinesize*7;
             dest_cr-= s->uvlinesize*7;
0da71265
         }
5d18eaad
         if(FRAME_MBAFF) {
             int list;
3425501d
             for(list=0; list<h->list_count; list++){
5d18eaad
                 if(!USES_LIST(mb_type, list))
                     continue;
                 if(IS_16X16(mb_type)){
                     int8_t *ref = &h->ref_cache[list][scan8[0]];
1710856c
                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
5d18eaad
                 }else{
                     for(i=0; i<16; i+=4){
                         int ref = h->ref_cache[list][scan8[i]];
                         if(ref >= 0)
1710856c
                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
5d18eaad
                     }
                 }
             }
         }
0da71265
     } else {
5d18eaad
         linesize   = h->mb_linesize   = s->linesize;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
0da71265
 //        dct_offset = s->linesize * 16;
     }
115329f1
 
bd91fee3
     if (!simple && IS_INTRA_PCM(mb_type)) {
df2d5b16
         if (pixel_shift) {
dc172ecc
             const int bit_depth = h->sps.bit_depth_luma;
             int j;
             GetBitContext gb;
             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
 
             for (i = 0; i < 16; i++) {
                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
                 for (j = 0; j < 16; j++)
                     tmp_y[j] = get_bits(&gb, bit_depth);
             }
7b442ad9
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
566d2692
                 if (!h->sps.chroma_format_idc) {
                     for (i = 0; i < 8; i++) {
                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
                         for (j = 0; j < 8; j++) {
                             tmp_cb[j] = 1 << (bit_depth - 1);
                         }
                     }
                     for (i = 0; i < 8; i++) {
                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
                         for (j = 0; j < 8; j++) {
                             tmp_cr[j] = 1 << (bit_depth - 1);
                         }
                     }
                 } else {
                     for (i = 0; i < 8; i++) {
                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
                         for (j = 0; j < 8; j++)
                             tmp_cb[j] = get_bits(&gb, bit_depth);
                     }
                     for (i = 0; i < 8; i++) {
                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
                         for (j = 0; j < 8; j++)
                             tmp_cr[j] = get_bits(&gb, bit_depth);
                     }
7b442ad9
                 }
dc172ecc
             }
         } else {
7b442ad9
             for (i=0; i<16; i++) {
                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
             }
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
566d2692
                 if (!h->sps.chroma_format_idc) {
                     for (i = 0; i < 8; i++) {
                         memset(dest_cb + i*uvlinesize, 128, 8);
                         memset(dest_cr + i*uvlinesize, 128, 8);
                     }
                 } else {
                     for (i = 0; i < 8; i++) {
                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
                     }
7b442ad9
                 }
             }
dc172ecc
         }
e7e09b49
     } else {
         if(IS_INTRA(mb_type)){
5f7f9719
             if(h->deblocking_filter)
c90b9442
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
53c05b1e
 
49fb20cb
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
c92a30bb
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
e7e09b49
             }
0da71265
 
c90b9442
             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
 
5f7f9719
             if(h->deblocking_filter)
c90b9442
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
bd91fee3
         }else if(is_h264){
21d5de93
             if (pixel_shift) {
                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
c90b9442
                              h->h264dsp.biweight_h264_pixels_tab, 0);
21d5de93
             } else
                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                             h->h264dsp.weight_h264_pixels_tab,
c90b9442
                             h->h264dsp.biweight_h264_pixels_tab, 0);
0da71265
         }
e7e09b49
 
c90b9442
         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
0da71265
 
49fb20cb
         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
ef9d1d15
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
96465b90
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
c90b9442
                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
96465b90
                 }else{
c25ac15a
                     idct_add = s->dsp.add_pixels4;
c90b9442
                     for(j=1; j<3; j++){
                         for(i=j*16; i<j*16+4; i++){
                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
                         }
96465b90
                     }
                 }
ef9d1d15
             }else{
aebb5d6d
                 if(is_h264){
2e186601
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
c90b9442
                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2e186601
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
c90b9442
                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1d16a1cf
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
007b1f04
                 }
 #if CONFIG_SVQ3_DECODER
                 else{
c90b9442
                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     for(j=1; j<3; j++){
                         for(i=j*16; i<j*16+4; i++){
                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
                             }
aebb5d6d
                         }
e7e09b49
                     }
4704097a
                 }
007b1f04
 #endif
0da71265
             }
         }
     }
c212fb0c
     if(h->cbp || IS_INTRA(mb_type))
c90b9442
     {
c9c49387
         s->dsp.clear_blocks(h->mb);
c90b9442
         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
     }
 }
 
 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest[3];
     int linesize;
     int i, j, p;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
7b442ad9
     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
c90b9442
 
7b442ad9
     for (p = 0; p < plane_count; p++)
c90b9442
     {
         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
     }
 
     h->list_counts[mb_xy]= h->list_count;
 
     if (!simple && MB_FIELD) {
         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
         block_offset = &h->block_offset[48];
         if(mb_y&1) //FIXME move out of this function?
             for (p = 0; p < 3; p++)
                 dest[p] -= s->linesize*15;
         if(FRAME_MBAFF) {
             int list;
             for(list=0; list<h->list_count; list++){
                 if(!USES_LIST(mb_type, list))
                     continue;
                 if(IS_16X16(mb_type)){
                     int8_t *ref = &h->ref_cache[list][scan8[0]];
                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
                 }else{
                     for(i=0; i<16; i+=4){
                         int ref = h->ref_cache[list][scan8[i]];
                         if(ref >= 0)
                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
                     }
                 }
             }
         }
     } else {
         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
     }
 
     if (!simple && IS_INTRA_PCM(mb_type)) {
         if (pixel_shift) {
             const int bit_depth = h->sps.bit_depth_luma;
             GetBitContext gb;
             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
 
7b442ad9
             for (p = 0; p < plane_count; p++) {
c90b9442
                 for (i = 0; i < 16; i++) {
                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
                     for (j = 0; j < 16; j++)
                         tmp[j] = get_bits(&gb, bit_depth);
                 }
             }
         } else {
7b442ad9
             for (p = 0; p < plane_count; p++) {
c90b9442
                 for (i = 0; i < 16; i++) {
                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
                 }
             }
         }
     } else {
         if(IS_INTRA(mb_type)){
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
 
7b442ad9
             for (p = 0; p < plane_count; p++)
c90b9442
                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
 
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
         }else{
             if (pixel_shift) {
                 hl_motion_16(h, dest[0], dest[1], dest[2],
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
                              h->h264dsp.biweight_h264_pixels_tab, 1);
             } else
                 hl_motion_8(h, dest[0], dest[1], dest[2],
                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                             h->h264dsp.weight_h264_pixels_tab,
                             h->h264dsp.biweight_h264_pixels_tab, 1);
         }
 
7b442ad9
         for (p = 0; p < plane_count; p++)
c90b9442
             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
     }
     if(h->cbp || IS_INTRA(mb_type))
     {
c212fb0c
         s->dsp.clear_blocks(h->mb);
c90b9442
         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
     }
0da71265
 }
 
 /**
bd91fee3
  * Process a macroblock; this case avoids checks for expensive uncommon cases.
  */
6e3ef511
 #define hl_decode_mb_simple(sh, bits) \
 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
     hl_decode_mb_internal(h, 1, sh); \
bd91fee3
 }
6e3ef511
 hl_decode_mb_simple(0, 8);
 hl_decode_mb_simple(1, 16);
bd91fee3
 
 /**
  * Process a macroblock; this handles edge cases, such as interlacing.
  */
 static void av_noinline hl_decode_mb_complex(H264Context *h){
df2d5b16
     hl_decode_mb_internal(h, 0, h->pixel_shift);
bd91fee3
 }
 
c90b9442
 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
 }
 
 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
     hl_decode_mb_444_internal(h, 1, 0);
 }
 
903d58f6
 void ff_h264_hl_decode_mb(H264Context *h){
bd91fee3
     MpegEncContext * const s = &h->s;
64514ee8
     const int mb_xy= h->mb_xy;
bd91fee3
     const int mb_type= s->current_picture.mb_type[mb_xy];
49fb20cb
     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
bd91fee3
 
c90b9442
     if (CHROMA444) {
         if(is_complex || h->pixel_shift)
             hl_decode_mb_444_complex(h);
         else
             hl_decode_mb_444_simple(h);
     } else if (is_complex) {
bd91fee3
         hl_decode_mb_complex(h);
6e3ef511
     } else if (h->pixel_shift) {
         hl_decode_mb_simple_16(h);
     } else
         hl_decode_mb_simple_8(h);
bd91fee3
 }
 
0da71265
 static int pred_weight_table(H264Context *h){
     MpegEncContext * const s = &h->s;
     int list, i;
9f2d1b4f
     int luma_def, chroma_def;
115329f1
 
9f2d1b4f
     h->use_weight= 0;
     h->use_weight_chroma= 0;
0da71265
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
c90b9442
     if(h->sps.chroma_format_idc)
09fffe9b
         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
9f2d1b4f
     luma_def = 1<<h->luma_log2_weight_denom;
     chroma_def = 1<<h->chroma_log2_weight_denom;
0da71265
 
     for(list=0; list<2; list++){
cb99c652
         h->luma_weight_flag[list]   = 0;
         h->chroma_weight_flag[list] = 0;
0da71265
         for(i=0; i<h->ref_count[list]; i++){
             int luma_weight_flag, chroma_weight_flag;
115329f1
 
0da71265
             luma_weight_flag= get_bits1(&s->gb);
             if(luma_weight_flag){
3d9137c8
                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
                 if(   h->luma_weight[i][list][0] != luma_def
                    || h->luma_weight[i][list][1] != 0) {
9f2d1b4f
                     h->use_weight= 1;
cb99c652
                     h->luma_weight_flag[list]= 1;
                 }
9f2d1b4f
             }else{
3d9137c8
                 h->luma_weight[i][list][0]= luma_def;
                 h->luma_weight[i][list][1]= 0;
0da71265
             }
 
c90b9442
             if(h->sps.chroma_format_idc){
fef744d4
                 chroma_weight_flag= get_bits1(&s->gb);
                 if(chroma_weight_flag){
                     int j;
                     for(j=0; j<2; j++){
3d9137c8
                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
                         if(   h->chroma_weight[i][list][j][0] != chroma_def
                            || h->chroma_weight[i][list][j][1] != 0) {
fef744d4
                             h->use_weight_chroma= 1;
cb99c652
                             h->chroma_weight_flag[list]= 1;
                         }
fef744d4
                     }
                 }else{
                     int j;
                     for(j=0; j<2; j++){
3d9137c8
                         h->chroma_weight[i][list][j][0]= chroma_def;
                         h->chroma_weight[i][list][j][1]= 0;
fef744d4
                     }
0da71265
                 }
             }
         }
ce5e49b0
         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
0da71265
     }
9f2d1b4f
     h->use_weight= h->use_weight || h->use_weight_chroma;
0da71265
     return 0;
 }
 
1052b76f
 /**
  * Initialize implicit_weight table.
6da88bd3
  * @param field  0/1 initialize the weight for interlaced MBAFF
1052b76f
  *                -1 initializes the rest
  */
 static void implicit_weight_table(H264Context *h, int field){
9f2d1b4f
     MpegEncContext * const s = &h->s;
1052b76f
     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
9f2d1b4f
 
ce09f927
     for (i = 0; i < 2; i++) {
         h->luma_weight_flag[i]   = 0;
         h->chroma_weight_flag[i] = 0;
     }
 
1052b76f
     if(field < 0){
         cur_poc = s->current_picture_ptr->poc;
     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
9f2d1b4f
        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
         h->use_weight= 0;
         h->use_weight_chroma= 0;
         return;
     }
1052b76f
         ref_start= 0;
         ref_count0= h->ref_count[0];
         ref_count1= h->ref_count[1];
     }else{
         cur_poc = s->current_picture_ptr->field_poc[field];
         ref_start= 16;
         ref_count0= 16+2*h->ref_count[0];
         ref_count1= 16+2*h->ref_count[1];
     }
9f2d1b4f
 
     h->use_weight= 2;
     h->use_weight_chroma= 2;
     h->luma_log2_weight_denom= 5;
     h->chroma_log2_weight_denom= 5;
 
1052b76f
     for(ref0=ref_start; ref0 < ref_count0; ref0++){
9f2d1b4f
         int poc0 = h->ref_list[0][ref0].poc;
1052b76f
         for(ref1=ref_start; ref1 < ref_count1; ref1++){
767efcb4
             int w = 32;
             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
                 int poc1 = h->ref_list[1][ref1].poc;
                 int td = av_clip(poc1 - poc0, -128, 127);
                 if(td){
                     int tb = av_clip(cur_poc - poc0, -128, 127);
                     int tx = (16384 + (FFABS(td) >> 1)) / td;
                     int dist_scale_factor = (tb*tx + 32) >> 8;
                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
                         w = 64 - dist_scale_factor;
                 }
1052b76f
             }
             if(field<0){
                 h->implicit_weight[ref0][ref1][0]=
                 h->implicit_weight[ref0][ref1][1]= w;
             }else{
                 h->implicit_weight[ref0][ref1][field]=w;
72f86ec0
             }
9f2d1b4f
         }
     }
 }
 
8fd57a66
 /**
5175b937
  * instantaneous decoder refresh.
0da71265
  */
 static void idr(H264Context *h){
ea6f00c4
     ff_h264_remove_all_refs(h);
a149c1a5
     h->prev_frame_num= 0;
80f8e035
     h->prev_frame_num_offset= 0;
     h->prev_poc_msb=
     h->prev_poc_lsb= 0;
0da71265
 }
 
7c33ad19
 /* forget old pics after a seek */
 static void flush_dpb(AVCodecContext *avctx){
     H264Context *h= avctx->priv_data;
     int i;
ca5dfd15
     for(i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
285b570f
         if(h->delayed_pic[i])
             h->delayed_pic[i]->reference= 0;
7c33ad19
         h->delayed_pic[i]= NULL;
285b570f
     }
d375c104
     h->outputed_poc=h->next_outputed_poc= INT_MIN;
b19d493f
     h->prev_interlaced_frame = 1;
7c33ad19
     idr(h);
ca159196
     if(h->s.current_picture_ptr)
         h->s.current_picture_ptr->reference= 0;
12d96de3
     h->s.first_field= 0;
9c095463
     ff_h264_reset_sei(h);
e240f898
     ff_mpeg_flush(avctx);
7c33ad19
 }
 
0da71265
 static int init_poc(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
     int field_poc[2];
357282c6
     Picture *cur = s->current_picture_ptr;
0da71265
 
b78a6baa
     h->frame_num_offset= h->prev_frame_num_offset;
5710b371
     if(h->frame_num < h->prev_frame_num)
b78a6baa
         h->frame_num_offset += max_frame_num;
0da71265
 
     if(h->sps.poc_type==0){
         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
 
         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
         else
             h->poc_msb = h->prev_poc_msb;
 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
115329f1
         field_poc[0] =
0da71265
         field_poc[1] = h->poc_msb + h->poc_lsb;
115329f1
         if(s->picture_structure == PICT_FRAME)
0da71265
             field_poc[1] += h->delta_poc_bottom;
     }else if(h->sps.poc_type==1){
         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
         int i;
 
         if(h->sps.poc_cycle_length != 0)
             abs_frame_num = h->frame_num_offset + h->frame_num;
         else
             abs_frame_num = 0;
 
         if(h->nal_ref_idc==0 && abs_frame_num > 0)
             abs_frame_num--;
115329f1
 
0da71265
         expected_delta_per_poc_cycle = 0;
         for(i=0; i < h->sps.poc_cycle_length; i++)
             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
 
         if(abs_frame_num > 0){
             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
 
             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
             for(i = 0; i <= frame_num_in_poc_cycle; i++)
                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
         } else
             expectedpoc = 0;
 
115329f1
         if(h->nal_ref_idc == 0)
0da71265
             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
115329f1
 
0da71265
         field_poc[0] = expectedpoc + h->delta_poc[0];
         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
 
         if(s->picture_structure == PICT_FRAME)
             field_poc[1] += h->delta_poc[1];
     }else{
b78a6baa
         int poc= 2*(h->frame_num_offset + h->frame_num);
5710b371
 
b78a6baa
         if(!h->nal_ref_idc)
             poc--;
5710b371
 
0da71265
         field_poc[0]= poc;
         field_poc[1]= poc;
     }
115329f1
 
357282c6
     if(s->picture_structure != PICT_BOTTOM_FIELD)
0da71265
         s->current_picture_ptr->field_poc[0]= field_poc[0];
357282c6
     if(s->picture_structure != PICT_TOP_FIELD)
0da71265
         s->current_picture_ptr->field_poc[1]= field_poc[1];
357282c6
     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
0da71265
 
     return 0;
 }
 
b41c1db3
 
 /**
  * initialize scan tables
  */
 static void init_scan_tables(H264Context *h){
     int i;
ca32f7f2
     for(i=0; i<16; i++){
b41c1db3
 #define T(x) (x>>2) | ((x<<2) & 0xF)
ca32f7f2
         h->zigzag_scan[i] = T(zigzag_scan[i]);
         h-> field_scan[i] = T( field_scan[i]);
b41c1db3
 #undef T
     }
ca32f7f2
     for(i=0; i<64; i++){
b41c1db3
 #define T(x) (x>>3) | ((x&7)<<3)
ca32f7f2
         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
         h->field_scan8x8[i]        = T(field_scan8x8[i]);
         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
b41c1db3
 #undef T
     }
     if(h->sps.transform_bypass){ //FIXME same ugly
         h->zigzag_scan_q0          = zigzag_scan;
45beb850
         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
b41c1db3
         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
         h->field_scan_q0           = field_scan;
         h->field_scan8x8_q0        = field_scan8x8;
         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
     }else{
         h->zigzag_scan_q0          = h->zigzag_scan;
         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
         h->field_scan_q0           = h->field_scan;
         h->field_scan8x8_q0        = h->field_scan8x8;
         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
     }
 }
afebe2f7
 
d375c104
 static void field_end(H264Context *h, int in_setup){
256299d3
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
     s->mb_y= 0;
 
d375c104
     if (!in_setup && !s->dropable)
         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
                                  s->picture_structure==PICT_BOTTOM_FIELD);
256299d3
 
     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
         ff_vdpau_h264_set_reference_frames(s);
 
d375c104
     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
         if(!s->dropable) {
             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
             h->prev_poc_msb= h->poc_msb;
             h->prev_poc_lsb= h->poc_lsb;
         }
         h->prev_frame_num_offset= h->frame_num_offset;
         h->prev_frame_num= h->frame_num;
         h->outputed_poc = h->next_outputed_poc;
256299d3
     }
 
     if (avctx->hwaccel) {
         if (avctx->hwaccel->end_frame(avctx) < 0)
             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
     }
 
     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
         ff_vdpau_h264_picture_complete(s);
 
     /*
      * FIXME: Error handling code does not seem to support interlaced
      * when slices span multiple rows
      * The ff_er_add_slice calls don't work right for bottom
      * fields; they cause massive erroneous error concealing
      * Error marking covers both fields (top and bottom).
      * This causes a mismatched s->error_count
      * and a bad error table. Further, the error count goes to
      * INT_MAX when called for bottom field, because mb_y is
      * past end by one (callers fault) and resync_mb_y != 0
      * causes problems for the first MB line, too.
      */
     if (!FIELD_PICTURE)
         ff_er_frame_end(s);
 
     MPV_frame_end(s);
d225a1e2
 
     h->current_slice=0;
256299d3
 }
 
afebe2f7
 /**
49bd8e4b
  * Replicate H264 "master" context to thread contexts.
afebe2f7
  */
 static void clone_slice(H264Context *dst, H264Context *src)
 {
     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
     dst->s.current_picture      = src->s.current_picture;
     dst->s.linesize             = src->s.linesize;
     dst->s.uvlinesize           = src->s.uvlinesize;
12d96de3
     dst->s.first_field          = src->s.first_field;
afebe2f7
 
     dst->prev_poc_msb           = src->prev_poc_msb;
     dst->prev_poc_lsb           = src->prev_poc_lsb;
     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
     dst->prev_frame_num         = src->prev_frame_num;
     dst->short_ref_count        = src->short_ref_count;
 
     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
50c21814
 
     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
afebe2f7
 }
 
0da71265
 /**
94e3e83f
  * computes profile from profile_idc and constraint_set?_flags
  *
  * @param sps SPS
  *
  * @return profile as defined by FF_PROFILE_H264_*
  */
 int ff_h264_get_profile(SPS *sps)
 {
     int profile = sps->profile_idc;
 
     switch(sps->profile_idc) {
     case FF_PROFILE_H264_BASELINE:
         // constraint_set1_flag set to 1
         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
         break;
     case FF_PROFILE_H264_HIGH_10:
     case FF_PROFILE_H264_HIGH_422:
     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
         // constraint_set3_flag set to 1
         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
         break;
     }
 
     return profile;
 }
 
 /**
0da71265
  * decodes a slice header.
9c852bcf
  * This will also call MPV_common_init() and frame_start() as needed.
afebe2f7
  *
  * @param h h264context
  * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
  *
d9526386
  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
0da71265
  */
afebe2f7
 static int decode_slice_header(H264Context *h, H264Context *h0){
0da71265
     MpegEncContext * const s = &h->s;
12d96de3
     MpegEncContext * const s0 = &h0->s;
88e7a4d1
     unsigned int first_mb_in_slice;
ac658be5
     unsigned int pps_id;
0da71265
     int num_ref_idx_active_override_flag;
41f5c62f
     unsigned int slice_type, tmp, i, j;
0bf79634
     int default_ref_list_done = 0;
12d96de3
     int last_pic_structure;
0da71265
 
2f944356
     s->dropable= h->nal_ref_idc == 0;
0da71265
 
e54fd338
     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
cf653d08
         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
     }else{
         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
     }
 
0da71265
     first_mb_in_slice= get_ue_golomb(&s->gb);
 
d225a1e2
     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
         if(h0->current_slice && FIELD_PICTURE){
d375c104
             field_end(h, 1);
d225a1e2
         }
 
afebe2f7
         h0->current_slice = 0;
12d96de3
         if (!s0->first_field)
f6e3c460
             s->current_picture_ptr= NULL;
66a4b2c1
     }
 
9963b332
     slice_type= get_ue_golomb_31(&s->gb);
0bf79634
     if(slice_type > 9){
9b879566
         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
5175b937
         return -1;
0da71265
     }
0bf79634
     if(slice_type > 4){
         slice_type -= 5;
0da71265
         h->slice_type_fixed=1;
     }else
         h->slice_type_fixed=0;
115329f1
 
ee2a957f
     slice_type= golomb_to_pict_type[ slice_type ];
ce5e49b0
     if (slice_type == AV_PICTURE_TYPE_I
afebe2f7
         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
0bf79634
         default_ref_list_done = 1;
     }
     h->slice_type= slice_type;
e3e6f18f
     h->slice_type_nos= slice_type & 3;
0bf79634
 
1412060e
     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
115329f1
 
0da71265
     pps_id= get_ue_golomb(&s->gb);
ac658be5
     if(pps_id>=MAX_PPS_COUNT){
9b879566
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
0da71265
         return -1;
     }
afebe2f7
     if(!h0->pps_buffers[pps_id]) {
a0f80050
         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
8b92b792
         return -1;
     }
afebe2f7
     h->pps= *h0->pps_buffers[pps_id];
8b92b792
 
afebe2f7
     if(!h0->sps_buffers[h->pps.sps_id]) {
a0f80050
         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
8b92b792
         return -1;
     }
afebe2f7
     h->sps = *h0->sps_buffers[h->pps.sps_id];
239ea04c
 
94e3e83f
     s->avctx->profile = ff_h264_get_profile(&h->sps);
fa37cf0d
     s->avctx->level   = h->sps.level_idc;
6752a3cc
     s->avctx->refs    = h->sps.ref_frame_count;
b08e38e8
 
50c21814
     if(h == h0 && h->dequant_coeff_pps != pps_id){
50eaa857
         h->dequant_coeff_pps = pps_id;
239ea04c
         init_dequant_tables(h);
     }
115329f1
 
0da71265
     s->mb_width= h->sps.mb_width;
6867a90b
     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
115329f1
 
bf4665ee
     h->b_stride=  s->mb_width*4;
0da71265
 
c90b9442
     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
0da71265
     if(h->sps.frame_mbs_only_flag)
c90b9442
         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
0da71265
     else
c90b9442
         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
115329f1
 
     if (s->context_initialized
5388f0b4
         && (   s->width != s->avctx->width || s->height != s->avctx->height
             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
d375c104
         if(h != h0) {
             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
afebe2f7
             return -1;   // width / height changed during parallelized decoding
d375c104
         }
2ed0f766
         free_tables(h, 0);
ff7f75e1
         flush_dpb(s->avctx);
0da71265
         MPV_common_end(s);
     }
     if (!s->context_initialized) {
33aec3f4
         if (h != h0) {
             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
0d5e6843
             return -1;
         }
f3bdc3da
 
         avcodec_set_dimensions(s->avctx, s->width, s->height);
         s->avctx->sample_aspect_ratio= h->sps.sar;
cfa5a81e
         av_assert0(s->avctx->sample_aspect_ratio.den);
f3bdc3da
 
b47904d1
         h->s.avctx->coded_width = 16*s->mb_width;
         h->s.avctx->coded_height = 16*s->mb_height;
 
c4dffe7e
         if(h->sps.video_signal_type_present_flag){
             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
             if(h->sps.colour_description_present_flag){
                 s->avctx->color_primaries = h->sps.color_primaries;
                 s->avctx->color_trc       = h->sps.color_trc;
                 s->avctx->colorspace      = h->sps.colorspace;
             }
         }
 
f3bdc3da
         if(h->sps.timing_info_present_flag){
3102d180
             int64_t den= h->sps.time_scale;
055a6aa7
             if(h->x264_build < 44U)
3102d180
                 den *= 2;
f3bdc3da
             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3102d180
                       h->sps.num_units_in_tick, den, 1<<30);
f3bdc3da
         }
5e85298c
 
         switch (h->sps.bit_depth_luma) {
             case 9 :
c90b9442
                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
5e85298c
                 break;
             case 10 :
c90b9442
                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
5e85298c
                 break;
             default:
c90b9442
                 if (CHROMA444){
                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
                 }else{
                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
                                                              s->avctx->codec->pix_fmts ?
                                                              s->avctx->codec->pix_fmts :
                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
                                                              ff_hwaccel_pixfmt_list_420);
                 }
5e85298c
         }
 
f3bdc3da
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
 
33aec3f4
         if (MPV_common_init(s) < 0) {
             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
0da71265
             return -1;
0d5e6843
         }
12d96de3
         s->first_field = 0;
b19d493f
         h->prev_interlaced_frame = 1;
115329f1
 
b41c1db3
         init_scan_tables(h);
cc471819
         if (ff_h264_alloc_tables(h) < 0) {
             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
             return AVERROR(ENOMEM);
         }
0da71265
 
d375c104
         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
33aec3f4
             if (context_init(h) < 0) {
                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
afebe2f7
                 return -1;
0d5e6843
             }
d375c104
         } else {
             for(i = 1; i < s->avctx->thread_count; i++) {
                 H264Context *c;
                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
                 c->h264dsp = h->h264dsp;
                 c->sps = h->sps;
                 c->pps = h->pps;
75a37b57
                 c->pixel_shift = h->pixel_shift;
d375c104
                 init_scan_tables(c);
                 clone_tables(c, h, i);
             }
 
             for(i = 0; i < s->avctx->thread_count; i++)
33aec3f4
                 if (context_init(h->thread_context[i]) < 0) {
                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
d375c104
                     return -1;
0d5e6843
                 }
d375c104
         }
0da71265
     }
 
     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
 
5d18eaad
     h->mb_mbaff = 0;
6ba71fc4
     h->mb_aff_frame = 0;
12d96de3
     last_pic_structure = s0->picture_structure;
0da71265
     if(h->sps.frame_mbs_only_flag){
         s->picture_structure= PICT_FRAME;
     }else{
6ba71fc4
         if(get_bits1(&s->gb)) { //field_pic_flag
0da71265
             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
6ba71fc4
         } else {
0da71265
             s->picture_structure= PICT_FRAME;
6ba71fc4
             h->mb_aff_frame = h->sps.mb_aff;
6867a90b
         }
0da71265
     }
44e9dcf1
     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
2ddcf84b
 
     if(h0->current_slice == 0){
33eac92a
         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
         if(h->frame_num != h->prev_frame_num) {
             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
 
             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
 
             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
                 if (unwrap_prev_frame_num < 0)
                     unwrap_prev_frame_num += max_frame_num;
 
                 h->prev_frame_num = unwrap_prev_frame_num;
             }
         }
d375c104
 
26b86e47
         while(h->frame_num !=  h->prev_frame_num &&
               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
4dece8c7
             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
bb943bb8
             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
903d58f6
             if (ff_h264_frame_start(h) < 0)
66e6038c
                 return -1;
26b86e47
             h->prev_frame_num++;
             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
             s->current_picture_ptr->frame_num= h->prev_frame_num;
d375c104
             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
3d542120
             ff_generate_sliding_window_mmcos(h);
             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
e2983d6e
             /* Error concealment: if a ref is missing, copy the previous ref in its place.
              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
              * about there being no actual duplicates.
              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
              * concealing a lost frame, this probably isn't noticable by comparison, but it should
              * be fixed. */
4dece8c7
             if (h->short_ref_count) {
                 if (prev) {
                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
                                   (const uint8_t**)prev->data, prev->linesize,
5dd7f994
                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
4dece8c7
                     h->short_ref[0]->poc = prev->poc+2;
                 }
                 h->short_ref[0]->frame_num = h->prev_frame_num;
             }
26b86e47
         }
 
12d96de3
         /* See if we have a decoded first field looking for a pair... */
         if (s0->first_field) {
             assert(s0->current_picture_ptr);
             assert(s0->current_picture_ptr->data[0]);
             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
 
             /* figure out if we have a complementary field pair */
             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
                 /*
                  * Previous field is unmatched. Don't display it, but let it
                  * remain for reference if marked as such.
                  */
                 s0->current_picture_ptr = NULL;
                 s0->first_field = FIELD_PICTURE;
 
             } else {
                 if (h->nal_ref_idc &&
                         s0->current_picture_ptr->reference &&
                         s0->current_picture_ptr->frame_num != h->frame_num) {
                     /*
                      * This and previous field were reference, but had
                      * different frame_nums. Consider this field first in
                      * pair. Throw away previous field except for reference
                      * purposes.
                      */
                     s0->first_field = 1;
                     s0->current_picture_ptr = NULL;
 
                 } else {
                     /* Second field in complementary pair */
                     s0->first_field = 0;
                 }
             }
 
         } else {
             /* Frame or first field in a potentially complementary pair */
             assert(!s0->current_picture_ptr);
             s0->first_field = FIELD_PICTURE;
         }
 
7e2eb4ba
         if(!FIELD_PICTURE || s0->first_field) {
             if (ff_h264_frame_start(h) < 0) {
                 s0->first_field = 0;
                 return -1;
             }
         } else {
             ff_release_unused_pictures(s, 0);
12d96de3
         }
2ddcf84b
     }
     if(h != h0)
         clone_slice(h, h0);
 
     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
 
88e7a4d1
     assert(s->mb_num == s->mb_width * s->mb_height);
f3e53d9f
     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
88e7a4d1
        first_mb_in_slice                    >= s->mb_num){
         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
6b53b87e
         return -1;
     }
88e7a4d1
     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
f3e53d9f
     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
     if (s->picture_structure == PICT_BOTTOM_FIELD)
         s->resync_mb_y = s->mb_y = s->mb_y + 1;
88e7a4d1
     assert(s->mb_y < s->mb_height);
115329f1
 
0da71265
     if(s->picture_structure==PICT_FRAME){
         h->curr_pic_num=   h->frame_num;
         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
     }else{
f57e2af6
         h->curr_pic_num= 2*h->frame_num + 1;
0da71265
         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
     }
115329f1
 
0da71265
     if(h->nal_unit_type == NAL_IDR_SLICE){
1df1df0b
         get_ue_golomb(&s->gb); /* idr_pic_id */
0da71265
     }
115329f1
 
0da71265
     if(h->sps.poc_type==0){
         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
115329f1
 
0da71265
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
             h->delta_poc_bottom= get_se_golomb(&s->gb);
         }
     }
115329f1
 
0da71265
     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
         h->delta_poc[0]= get_se_golomb(&s->gb);
115329f1
 
0da71265
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
             h->delta_poc[1]= get_se_golomb(&s->gb);
     }
115329f1
 
0da71265
     init_poc(h);
115329f1
 
0da71265
     if(h->pps.redundant_pic_cnt_present){
         h->redundant_pic_count= get_ue_golomb(&s->gb);
     }
 
1412060e
     //set defaults, might be overridden a few lines later
0da71265
     h->ref_count[0]= h->pps.ref_count[0];
     h->ref_count[1]= h->pps.ref_count[1];
 
ce5e49b0
     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
be9183de
         unsigned max= (16<<(s->picture_structure != PICT_FRAME))-1;
ce5e49b0
         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
0da71265
             h->direct_spatial_mv_pred= get_bits1(&s->gb);
         }
         num_ref_idx_active_override_flag= get_bits1(&s->gb);
115329f1
 
0da71265
         if(num_ref_idx_active_override_flag){
             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
ce5e49b0
             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
0da71265
                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
 
be9183de
         }
         if(h->ref_count[0]-1 > max || h->ref_count[1]-1 > max){
             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
             h->ref_count[0]= h->ref_count[1]= 1;
             return -1;
0da71265
         }
ce5e49b0
         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
187696fa
             h->list_count= 2;
         else
             h->list_count= 1;
     }else
3e0dbb8a
         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
0da71265
 
0bf79634
     if(!default_ref_list_done){
ea6f00c4
         ff_h264_fill_default_ref_list(h);
0da71265
     }
 
cf005293
     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
         h->ref_count[1]= h->ref_count[0]= 0;
806bb93f
         return -1;
cf005293
     }
0da71265
 
ce5e49b0
     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
07dff5c7
         s->last_picture_ptr= &h->ref_list[0][0];
8d2fc163
         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
07dff5c7
     }
ce5e49b0
     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
07dff5c7
         s->next_picture_ptr= &h->ref_list[1][0];
8d2fc163
         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
07dff5c7
     }
 
ce5e49b0
     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
0da71265
         pred_weight_table(h);
ce5e49b0
     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
1052b76f
         implicit_weight_table(h, -1);
1a29c6a0
     }else {
9f2d1b4f
         h->use_weight = 0;
cb99c652
         for (i = 0; i < 2; i++) {
             h->luma_weight_flag[i]   = 0;
             h->chroma_weight_flag[i] = 0;
         }
     }
115329f1
 
2ddcf84b
     if(h->nal_ref_idc)
ea6f00c4
         ff_h264_decode_ref_pic_marking(h0, &s->gb);
0da71265
 
1052b76f
     if(FRAME_MBAFF){
ea6f00c4
         ff_h264_fill_mbaff_ref_list(h);
5d18eaad
 
ce5e49b0
         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
1052b76f
             implicit_weight_table(h, 0);
             implicit_weight_table(h, 1);
         }
     }
 
ce5e49b0
     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
943f69a6
         ff_h264_direct_dist_scale_factor(h);
     ff_h264_direct_ref_list_init(h);
8f56e219
 
ce5e49b0
     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
9963b332
         tmp = get_ue_golomb_31(&s->gb);
88e7a4d1
         if(tmp > 2){
             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
             return -1;
         }
         h->cabac_init_idc= tmp;
     }
e5017ab8
 
     h->last_qscale_diff = 0;
88e7a4d1
     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
d268bed2
     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
88e7a4d1
         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3ebc7e04
         return -1;
     }
88e7a4d1
     s->qscale= tmp;
4691a77d
     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
0da71265
     //FIXME qscale / qp ... stuff
ce5e49b0
     if(h->slice_type == AV_PICTURE_TYPE_SP){
1df1df0b
         get_bits1(&s->gb); /* sp_for_switch_flag */
0da71265
     }
ce5e49b0
     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
1df1df0b
         get_se_golomb(&s->gb); /* slice_qs_delta */
0da71265
     }
 
53c05b1e
     h->deblocking_filter = 1;
0c32e19d
     h->slice_alpha_c0_offset = 52;
     h->slice_beta_offset = 52;
0da71265
     if( h->pps.deblocking_filter_parameters_present ) {
9963b332
         tmp= get_ue_golomb_31(&s->gb);
88e7a4d1
         if(tmp > 2){
             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
             return -1;
         }
         h->deblocking_filter= tmp;
115329f1
         if(h->deblocking_filter < 2)
53c05b1e
             h->deblocking_filter^= 1; // 1<->0
 
         if( h->deblocking_filter ) {
0c32e19d
             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
             if(   h->slice_alpha_c0_offset > 104U
                || h->slice_beta_offset     > 104U){
                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
                 return -1;
             }
0da71265
         }
980a82b7
     }
afebe2f7
 
61858a76
     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
ce5e49b0
        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
61858a76
        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
         h->deblocking_filter= 0;
 
afebe2f7
     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
ec970c21
         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
             /* Cheat slightly for speed:
5d81d641
                Do not bother to deblock across slices. */
ec970c21
             h->deblocking_filter = 2;
         } else {
7ae94d52
             h0->max_contexts = 1;
             if(!h0->single_decode_warning) {
                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
                 h0->single_decode_warning = 1;
             }
33aec3f4
             if (h != h0) {
                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
0d5e6843
                 return 1;
             }
ec970c21
         }
afebe2f7
     }
0c32e19d
     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
afebe2f7
 
0da71265
 #if 0 //FMO
     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
         slice_group_change_cycle= get_bits(&s->gb, ?);
 #endif
 
afebe2f7
     h0->last_slice_type = slice_type;
     h->slice_num = ++h0->current_slice;
b735aeea
     if(h->slice_num >= MAX_SLICES){
a4233d1f
         av_log(s->avctx, AV_LOG_ERROR, "Too many slices (%d >= %d), increase MAX_SLICES and recompile\n", h->slice_num, MAX_SLICES);
b735aeea
     }
5175b937
 
c32867b5
     for(j=0; j<2; j++){
6d7e6b26
         int id_list[16];
b735aeea
         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
6d7e6b26
         for(i=0; i<16; i++){
             id_list[i]= 60;
             if(h->ref_list[j][i].data[0]){
                 int k;
                 uint8_t *base= h->ref_list[j][i].base[0];
                 for(k=0; k<h->short_ref_count; k++)
                     if(h->short_ref[k]->base[0] == base){
                         id_list[i]= k;
                         break;
                     }
                 for(k=0; k<h->long_ref_count; k++)
                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
                         id_list[i]= h->short_ref_count + k;
                         break;
                     }
             }
         }
 
c32867b5
         ref2frm[0]=
         ref2frm[1]= -1;
d50cdd82
         for(i=0; i<16; i++)
6d7e6b26
             ref2frm[i+2]= 4*id_list[i]
c32867b5
                           +(h->ref_list[j][i].reference&3);
d50cdd82
         ref2frm[18+0]=
         ref2frm[18+1]= -1;
         for(i=16; i<48; i++)
6d7e6b26
             ref2frm[i+4]= 4*id_list[(i-16)>>1]
d50cdd82
                           +(h->ref_list[j][i].reference&3);
c32867b5
     }
 
d375c104
     //FIXME: fix draw_edges+PAFF+frame threads
0424e052
     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
8a11a969
     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
5d18eaad
 
0da71265
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
49573a87
         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
6867a90b
                h->slice_num,
                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
115329f1
                first_mb_in_slice,
301183d9
                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
0da71265
                pps_id, h->frame_num,
                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
                h->ref_count[0], h->ref_count[1],
                s->qscale,
0c32e19d
                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
9f2d1b4f
                h->use_weight,
4806b922
                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
ce5e49b0
                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
0da71265
                );
     }
 
     return 0;
 }
 
0dc343d4
 int ff_h264_get_slice_type(const H264Context *h)
75dd6938
 {
     switch (h->slice_type) {
ce5e49b0
     case AV_PICTURE_TYPE_P:  return 0;
     case AV_PICTURE_TYPE_B:  return 1;
     case AV_PICTURE_TYPE_I:  return 2;
     case AV_PICTURE_TYPE_SP: return 3;
     case AV_PICTURE_TYPE_SI: return 4;
75dd6938
     default:         return -1;
     }
 }
 
d02bb3ec
 /**
  *
  * @return non zero if the loop filter can be skiped
  */
 static int fill_filter_caches(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int top_xy, left_xy[2];
     int top_type, left_type[2];
 
     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
 
     //FIXME deblocking could skip the intra and nnz parts.
 
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
      * stuff, I can't imagine that these complex rules are worth it. */
 
     left_xy[1] = left_xy[0] = mb_xy-1;
     if(FRAME_MBAFF){
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
         if(s->mb_y&1){
             if (left_mb_field_flag != curr_mb_field_flag) {
                 left_xy[0] -= s->mb_stride;
             }
         }else{
             if(curr_mb_field_flag){
                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
             }
             if (left_mb_field_flag != curr_mb_field_flag) {
                 left_xy[1] += s->mb_stride;
             }
         }
     }
 
     h->top_mb_xy = top_xy;
     h->left_mb_xy[0] = left_xy[0];
     h->left_mb_xy[1] = left_xy[1];
     {
         //for sufficiently low qp, filtering wouldn't do anything
         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
         int qp = s->current_picture.qscale_table[mb_xy];
         if(qp <= qp_thresh
            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
             if(!FRAME_MBAFF)
                 return 1;
             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
                 return 1;
         }
     }
 
     top_type     = s->current_picture.mb_type[top_xy]    ;
     left_type[0] = s->current_picture.mb_type[left_xy[0]];
     left_type[1] = s->current_picture.mb_type[left_xy[1]];
     if(h->deblocking_filter == 2){
         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
     }else{
         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
     }
     h->top_type    = top_type    ;
     h->left_type[0]= left_type[0];
     h->left_type[1]= left_type[1];
 
     if(IS_INTRA(mb_type))
         return 0;
 
c90b9442
     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
d02bb3ec
 
     h->cbp= h->cbp_table[mb_xy];
 
     {
         int list;
         for(list=0; list<h->list_count; list++){
             int8_t *ref;
             int y, b_stride;
             int16_t (*mv_dst)[2];
             int16_t (*mv_src)[2];
 
             if(!USES_LIST(mb_type, list)){
                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
                 continue;
             }
 
             ref = &s->current_picture.ref_index[list][4*mb_xy];
             {
                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
                 ref += 2;
                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
             }
 
             b_stride = h->b_stride;
             mv_dst   = &h->mv_cache[list][scan8[0]];
             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
             for(y=0; y<4; y++){
                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
             }
 
         }
     }
 
 
 /*
 0 . T T. T T T T
 1 L . .L . . . .
 2 L . .L . . . .
 3 . T TL . . . .
 4 L . .L . . . .
 5 L . .. . . . .
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
c90b9442
         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
d02bb3ec
     }
 
     if(left_type[0]){
c90b9442
         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
d02bb3ec
     }
 
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
     if(!CABAC && h->pps.transform_8x8_mode){
         if(IS_8x8DCT(top_type)){
             h->non_zero_count_cache[4+8*0]=
c90b9442
             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
d02bb3ec
             h->non_zero_count_cache[6+8*0]=
c90b9442
             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
d02bb3ec
         }
         if(IS_8x8DCT(left_type[0])){
             h->non_zero_count_cache[3+8*1]=
c90b9442
             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
d02bb3ec
         }
         if(IS_8x8DCT(left_type[1])){
             h->non_zero_count_cache[3+8*3]=
c90b9442
             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
d02bb3ec
         }
 
         if(IS_8x8DCT(mb_type)){
             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
c90b9442
             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
d02bb3ec
 
             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
c90b9442
             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
d02bb3ec
 
             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
c90b9442
             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
d02bb3ec
 
             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
c90b9442
             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
d02bb3ec
         }
     }
 
     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
         int list;
         for(list=0; list<h->list_count; list++){
             if(USES_LIST(top_type, list)){
                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                 const int b8_xy= 4*top_xy + 2;
                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
             }else{
                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
             }
 
             if(!IS_INTERLACED(mb_type^left_type[0])){
                 if(USES_LIST(left_type[0], list)){
                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                     const int b8_xy= 4*left_xy[0] + 1;
                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
                     h->ref_cache[list][scan8[0] - 1 +16 ]=
                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
                 }else{
                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
                 }
             }
         }
     }
 
     return 0;
 }
 
0ffc8415
 static void loop_filter(H264Context *h, int start_x, int end_x){
c988f975
     MpegEncContext * const s = &h->s;
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize, mb_x, mb_y;
     const int end_mb_y= s->mb_y + FRAME_MBAFF;
     const int old_slice_type= h->slice_type;
6e3ef511
     const int pixel_shift = h->pixel_shift;
c988f975
 
     if(h->deblocking_filter) {
0ffc8415
         for(mb_x= start_x; mb_x<end_x; mb_x++){
c988f975
             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
78998bf2
                 int mb_xy, mb_type;
c988f975
                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
                 h->slice_num= h->slice_table[mb_xy];
                 mb_type= s->current_picture.mb_type[mb_xy];
                 h->list_count= h->list_counts[mb_xy];
 
                 if(FRAME_MBAFF)
                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
 
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
6e3ef511
                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
c90b9442
                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
c988f975
                     //FIXME simplify above
 
                 if (MB_FIELD) {
                     linesize   = h->mb_linesize   = s->linesize * 2;
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
                     if(mb_y&1){ //FIXME move out of this function?
                         dest_y -= s->linesize*15;
a3589cce
                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
c988f975
                     }
                 } else {
                     linesize   = h->mb_linesize   = s->linesize;
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
                 }
c90b9442
                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
aaa995d7
                 if(fill_filter_caches(h, mb_type))
44a5e7b6
                     continue;
c988f975
                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
 
77d40dce
                 if (FRAME_MBAFF) {
c988f975
                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
                 } else {
                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
                 }
             }
         }
     }
     h->slice_type= old_slice_type;
4e987f82
     s->mb_x= end_x;
c988f975
     s->mb_y= end_mb_y - FRAME_MBAFF;
f4b8b825
     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
c988f975
 }
 
69a28f3e
 static void predict_field_decoding_flag(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
                 ? s->current_picture.mb_type[mb_xy-1]
                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
                 : 0;
     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
 }
 
d375c104
 /**
  * Draw edges and report progress for the last MB row.
  */
 static void decode_finish_row(H264Context *h){
     MpegEncContext * const s = &h->s;
     int top = 16*(s->mb_y >> FIELD_PICTURE);
     int height = 16 << FRAME_MBAFF;
     int deblock_border = (16 + 4) << FRAME_MBAFF;
     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
 
     if (h->deblocking_filter) {
         if((top + height) >= pic_height)
             height += deblock_border;
 
         top -= deblock_border;
     }
 
     if (top >= pic_height || (top + height) < h->emu_edge_height)
         return;
 
     height = FFMIN(height, pic_height - top);
     if (top < h->emu_edge_height) {
         height = top+height;
         top = 0;
     }
 
     ff_draw_horiz_band(s, top, height);
 
     if (s->dropable) return;
 
     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
                              s->picture_structure==PICT_BOTTOM_FIELD);
 }
 
3a84713a
 static int decode_slice(struct AVCodecContext *avctx, void *arg){
     H264Context *h = *(void**)arg;
0da71265
     MpegEncContext * const s = &h->s;
     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
0ffc8415
     int lf_x_start = s->mb_x;
0da71265
 
     s->mb_skip_run= -1;
 
89db0bae
     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
5317c95b
                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
89db0bae
 
e5017ab8
     if( h->pps.cabac ) {
         /* realign */
         align_get_bits( &s->gb );
 
         /* init cabac */
d61c4e73
         ff_init_cabac_states( &h->cabac);
e5017ab8
         ff_init_cabac_decoder( &h->cabac,
                                s->gb.buffer + get_bits_count(&s->gb)/8,
6e44ba15
                                (get_bits_left(&s->gb) + 7)/8);
cc51b282
 
         ff_h264_init_cabac_states(h);
95c26348
 
e5017ab8
         for(;;){
851ded89
 //START_TIMER
cc51b282
             int ret = ff_h264_decode_mb_cabac(h);
6867a90b
             int eos;
851ded89
 //STOP_TIMER("decode_mb_cabac")
0da71265
 
903d58f6
             if(ret>=0) ff_h264_hl_decode_mb(h);
0da71265
 
5d18eaad
             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
                 s->mb_y++;
 
cc51b282
                 ret = ff_h264_decode_mb_cabac(h);
e5017ab8
 
903d58f6
                 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
                 s->mb_y--;
             }
6867a90b
             eos = get_cabac_terminate( &h->cabac );
e5017ab8
 
3566042a
             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0ffc8415
                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
3566042a
                 return 0;
             }
5659b509
             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
706da4af
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
e5017ab8
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
                 return -1;
             }
 
             if( ++s->mb_x >= s->mb_width ) {
0ffc8415
                 loop_filter(h, lf_x_start, s->mb_x);
                 s->mb_x = lf_x_start = 0;
d375c104
                 decode_finish_row(h);
5175b937
                 ++s->mb_y;
f3e53d9f
                 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
                     ++s->mb_y;
69cc3183
                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
                         predict_field_decoding_flag(h);
6867a90b
                 }
0da71265
             }
 
e5017ab8
             if( eos || s->mb_y >= s->mb_height ) {
a9c9a240
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0ffc8415
                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
0da71265
                 return 0;
e5017ab8
             }
         }
 
     } else {
         for(;;){
e1e94902
             int ret = ff_h264_decode_mb_cavlc(h);
e5017ab8
 
903d58f6
             if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
 
5d18eaad
             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
e5017ab8
                 s->mb_y++;
e1e94902
                 ret = ff_h264_decode_mb_cavlc(h);
e5017ab8
 
903d58f6
                 if(ret>=0) ff_h264_hl_decode_mb(h);
e5017ab8
                 s->mb_y--;
             }
 
             if(ret<0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
                 return -1;
             }
e5017ab8
 
             if(++s->mb_x >= s->mb_width){
0ffc8415
                 loop_filter(h, lf_x_start, s->mb_x);
                 s->mb_x = lf_x_start = 0;
d375c104
                 decode_finish_row(h);
6867a90b
                 ++s->mb_y;
f3e53d9f
                 if(FIELD_OR_MBAFF_PICTURE) {
6867a90b
                     ++s->mb_y;
69cc3183
                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
                         predict_field_decoding_flag(h);
6867a90b
                 }
                 if(s->mb_y >= s->mb_height){
a9c9a240
                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
 
4ce776d6
                     if(   get_bits_count(&s->gb) == s->gb.size_in_bits
                        || get_bits_count(&s->gb) <  s->gb.size_in_bits && s->avctx->error_recognition < FF_ER_AGGRESSIVE) {
e5017ab8
                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                         return 0;
                     }else{
                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                         return -1;
                     }
                 }
             }
 
             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
a9c9a240
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
e5017ab8
                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
0ffc8415
                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
e5017ab8
 
                     return 0;
                 }else{
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
 
                     return -1;
                 }
             }
0da71265
         }
     }
e5017ab8
 
0da71265
 #if 0
     for(;s->mb_y < s->mb_height; s->mb_y++){
         for(;s->mb_x < s->mb_width; s->mb_x++){
             int ret= decode_mb(h);
115329f1
 
903d58f6
             ff_h264_hl_decode_mb(h);
0da71265
 
             if(ret<0){
267f7edc
                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
0da71265
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
 
                 return -1;
             }
115329f1
 
0da71265
             if(++s->mb_x >= s->mb_width){
                 s->mb_x=0;
                 if(++s->mb_y >= s->mb_height){
                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                         return 0;
                     }else{
                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                         return -1;
                     }
                 }
             }
115329f1
 
0da71265
             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                     return 0;
                 }else{
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
 
                     return -1;
                 }
             }
         }
         s->mb_x=0;
         ff_draw_horiz_band(s, 16*s->mb_y, 16);
     }
 #endif
     return -1; //not reached
 }
 
afebe2f7
 /**
  * Call decode_slice() for each context.
  *
  * @param h h264 master context
  * @param context_count number of contexts to execute
  */
 static void execute_decode_slices(H264Context *h, int context_count){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
     H264Context *hx;
     int i;
 
40e5d31b
     if (s->avctx->hwaccel)
         return;
0d3d172f
     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
369122dd
         return;
afebe2f7
     if(context_count == 1) {
74e8b78b
         decode_slice(avctx, &h);
afebe2f7
     } else {
         for(i = 1; i < context_count; i++) {
             hx = h->thread_context[i];
047599a4
             hx->s.error_recognition = avctx->error_recognition;
afebe2f7
             hx->s.error_count = 0;
488efb33
             hx->x264_build= h->x264_build;
afebe2f7
         }
 
         avctx->execute(avctx, (void *)decode_slice,
01418506
                        h->thread_context, NULL, context_count, sizeof(void*));
afebe2f7
 
         /* pull back stuff from slices to master context */
         hx = h->thread_context[context_count - 1];
         s->mb_x = hx->s.mb_x;
         s->mb_y = hx->s.mb_y;
12d96de3
         s->dropable = hx->s.dropable;
         s->picture_structure = hx->s.picture_structure;
afebe2f7
         for(i = 1; i < context_count; i++)
             h->s.error_count += h->thread_context[i]->s.error_count;
     }
 }
 
 
30317501
 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
0da71265
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
afebe2f7
     H264Context *hx; ///< thread context
0424e052
     int buf_index;
     int context_count;
     int next_avc;
     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
     int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts
     int nal_index;
afebe2f7
 
d375c104
     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
66a4b2c1
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
afebe2f7
         h->current_slice = 0;
12d96de3
         if (!s->first_field)
f6e3c460
             s->current_picture_ptr= NULL;
9c095463
         ff_h264_reset_sei(h);
66a4b2c1
     }
 
0424e052
     for(;pass <= 1;pass++){
         buf_index = 0;
         context_count = 0;
         next_avc = h->is_avc ? 0 : buf_size;
         nal_index = 0;
0da71265
     for(;;){
         int consumed;
         int dst_length;
         int bit_length;
30317501
         const uint8_t *ptr;
4770b1b4
         int i, nalsize = 0;
afebe2f7
         int err;
115329f1
 
74b14aac
         if(buf_index >= next_avc) {
1c48415b
             if(buf_index >= buf_size) break;
             nalsize = 0;
             for(i = 0; i < h->nal_length_size; i++)
                 nalsize = (nalsize << 8) | buf[buf_index++];
9d252137
             if(nalsize <= 0 || nalsize > buf_size - buf_index){
                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
                 break;
1c48415b
             }
74b14aac
             next_avc= buf_index + nalsize;
1c48415b
         } else {
             // start code prefix search
52255d17
             for(; buf_index + 3 < next_avc; buf_index++){
1c48415b
                 // This should always succeed in the first iteration.
                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
                     break;
8b031359
             }
115329f1
 
1c48415b
             if(buf_index+3 >= buf_size) break;
115329f1
 
1c48415b
             buf_index+=3;
52255d17
             if(buf_index >= next_avc) continue;
1c48415b
         }
115329f1
 
afebe2f7
         hx = h->thread_context[context_count];
 
74b14aac
         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
ff82e429
         if (ptr==NULL || dst_length < 0){
ac658be5
             return -1;
         }
3566042a
         i= buf_index + consumed;
         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
             s->workaround_bugs |= FF_BUG_TRUNCATED;
 
         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
54544100
         while(dst_length > 0 && ptr[dst_length - 1] == 0)
c4da83fb
             dst_length--;
3566042a
         }
1790a5e9
         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
0da71265
 
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
0e5758d1
             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length);
0da71265
         }
115329f1
 
74b14aac
         if (h->is_avc && (nalsize != consumed) && nalsize){
e262365d
             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
9d2cc8c1
         }
4770b1b4
 
0da71265
         buf_index += consumed;
0424e052
         nal_index++;
 
         if(pass == 0) {
             // packets can sometimes contain multiple PPS/SPS
             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
             // if so, when frame threading we can't start the next thread until we've read all of them
             switch (hx->nal_unit_type) {
                 case NAL_SPS:
                 case NAL_PPS:
ea6331f8
                 case NAL_IDR_SLICE:
                 case NAL_SLICE:
0424e052
                     nals_needed = nal_index;
             }
             continue;
         }
0da71265
 
8ed2ae09
         //FIXME do not discard SEI id
e7021c0e
         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
0da71265
             continue;
115329f1
 
afebe2f7
       again:
         err = 0;
         switch(hx->nal_unit_type){
0da71265
         case NAL_IDR_SLICE:
afebe2f7
             if (h->nal_unit_type != NAL_IDR_SLICE) {
                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
                 return -1;
             }
3b66c4c5
             idr(h); //FIXME ensure we don't loose some frames if there is reordering
0da71265
         case NAL_SLICE:
afebe2f7
             init_get_bits(&hx->s.gb, ptr, bit_length);
             hx->intra_gb_ptr=
             hx->inter_gb_ptr= &hx->s.gb;
             hx->s.data_partitioning = 0;
 
             if((err = decode_slice_header(hx, h)))
                break;
 
d375c104
             s->current_picture_ptr->key_frame |=
                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
                     (h->sei_recovery_frame_cnt >= 0);
 
3bccd93a
             if (h->current_slice == 1) {
d375c104
                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
0424e052
                     decode_postinit(h, nal_index >= nals_needed);
d375c104
                 }
 
3bccd93a
                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
6026a096
                     return -1;
3bccd93a
                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
                     ff_vdpau_h264_picture_start(s);
6026a096
             }
 
8ed2ae09
             if(hx->redundant_pic_count==0
afebe2f7
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
ce5e49b0
                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
369122dd
                && avctx->skip_frame < AVDISCARD_ALL){
d404b3ed
                 if(avctx->hwaccel) {
                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
                         return -1;
                 }else
0d3d172f
                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
369122dd
                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
c639fc72
                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
369122dd
                 }else
f2c214a1
                     context_count++;
369122dd
             }
0da71265
             break;
         case NAL_DPA:
afebe2f7
             init_get_bits(&hx->s.gb, ptr, bit_length);
             hx->intra_gb_ptr=
             hx->inter_gb_ptr= NULL;
0410ee8f
 
             if ((err = decode_slice_header(hx, h)) < 0)
                 break;
 
afebe2f7
             hx->s.data_partitioning = 1;
115329f1
 
0da71265
             break;
         case NAL_DPB:
afebe2f7
             init_get_bits(&hx->intra_gb, ptr, bit_length);
             hx->intra_gb_ptr= &hx->intra_gb;
0da71265
             break;
         case NAL_DPC:
afebe2f7
             init_get_bits(&hx->inter_gb, ptr, bit_length);
             hx->inter_gb_ptr= &hx->inter_gb;
8b92b792
 
afebe2f7
             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
b18e5c03
                && s->context_initialized
afebe2f7
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
ce5e49b0
                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
e0111b32
                && avctx->skip_frame < AVDISCARD_ALL)
afebe2f7
                 context_count++;
0da71265
             break;
         case NAL_SEI:
cdd10689
             init_get_bits(&s->gb, ptr, bit_length);
1790a5e9
             ff_h264_decode_sei(h);
0da71265
             break;
         case NAL_SPS:
             init_get_bits(&s->gb, ptr, bit_length);
1790a5e9
             ff_h264_decode_seq_parameter_set(h);
115329f1
 
aa15e687
             if (s->flags& CODEC_FLAG_LOW_DELAY ||
                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
0da71265
                 s->low_delay=1;
115329f1
 
a18030bb
             if(avctx->has_b_frames < 2)
                 avctx->has_b_frames= !s->low_delay;
8dbe5856
 
             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
19a0729b
                     h->pixel_shift = h->sps.bit_depth_luma > 8;
8dbe5856
 
                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
                     dsputil_init(&s->dsp, s->avctx);
                 } else {
                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
                     return -1;
                 }
             }
0da71265
             break;
         case NAL_PPS:
             init_get_bits(&s->gb, ptr, bit_length);
115329f1
 
1790a5e9
             ff_h264_decode_picture_parameter_set(h, bit_length);
0da71265
 
             break;
ab470fa7
         case NAL_AUD:
         case NAL_END_SEQUENCE:
         case NAL_END_STREAM:
         case NAL_FILLER_DATA:
         case NAL_SPS_EXT:
         case NAL_AUXILIARY_SLICE:
0da71265
             break;
bb270c08
         default:
4ad04da2
             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
115329f1
         }
 
afebe2f7
         if(context_count == h->max_contexts) {
             execute_decode_slices(h, context_count);
             context_count = 0;
         }
 
         if (err < 0)
             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
         else if(err == 1) {
             /* Slice could not be decoded in parallel mode, copy down
              * NAL unit stuff to context 0 and restart. Note that
1412060e
              * rbsp_buffer is not transferred, but since we no longer
afebe2f7
              * run in parallel mode this should not be an issue. */
             h->nal_unit_type = hx->nal_unit_type;
             h->nal_ref_idc   = hx->nal_ref_idc;
             hx = h;
             goto again;
         }
     }
0424e052
     }
afebe2f7
     if(context_count)
         execute_decode_slices(h, context_count);
0da71265
     return buf_index;
 }
 
 /**
3b66c4c5
  * returns the number of bytes consumed for building the current frame
0da71265
  */
 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
755bfeab
         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
0da71265
         if(pos+10>buf_size) pos=buf_size; // oops ;)
 
         return pos;
 }
 
115329f1
 static int decode_frame(AVCodecContext *avctx,
0da71265
                              void *data, int *data_size,
7a00bbad
                              AVPacket *avpkt)
0da71265
 {
7a00bbad
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
0da71265
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
115329f1
     AVFrame *pict = data;
0da71265
     int buf_index;
115329f1
 
0da71265
     s->flags= avctx->flags;
303e50e6
     s->flags2= avctx->flags2;
0da71265
 
1412060e
    /* end of stream, output what is still in the buffers */
9d252137
  out:
0da71265
     if (buf_size == 0) {
97bbb885
         Picture *out;
         int i, out_idx;
 
d375c104
         s->current_picture_ptr = NULL;
 
97bbb885
 //FIXME factorize this with the output code below
         out = h->delayed_pic[0];
         out_idx = 0;
c173a088
         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
97bbb885
             if(h->delayed_pic[i]->poc < out->poc){
                 out = h->delayed_pic[i];
                 out_idx = i;
             }
 
         for(i=out_idx; h->delayed_pic[i]; i++)
             h->delayed_pic[i] = h->delayed_pic[i+1];
 
         if(out){
             *data_size = sizeof(AVFrame);
             *pict= *(AVFrame*)out;
         }
 
0da71265
         return 0;
     }
115329f1
 
0da71265
     buf_index=decode_nal_units(h, buf, buf_size);
115329f1
     if(buf_index < 0)
0da71265
         return -1;
 
9d252137
     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
         buf_size = 0;
         goto out;
     }
 
56c70e1d
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
e7021c0e
         if (avctx->skip_frame >= AVDISCARD_NONREF)
8ed2ae09
             return 0;
56c70e1d
         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
         return -1;
     }
 
66a4b2c1
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
115329f1
 
0424e052
         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
66a4b2c1
 
d375c104
         field_end(h, 0);
 
         if (!h->next_output_pic) {
12d96de3
             /* Wait for second field. */
             *data_size = 0;
 
         } else {
d375c104
             *data_size = sizeof(AVFrame);
             *pict = *(AVFrame*)h->next_output_pic;
12d96de3
         }
a4dae92b
     }
 
3165e258
     assert(pict->data[0] || !*data_size);
4e4d983e
     ff_print_debug_info(s, pict);
0da71265
 //printf("out %d\n", (int)pict->data[0]);
 
     return get_consumed_bytes(s, buf_index, buf_size);
 }
 #if 0
 static inline void fill_mb_avail(H264Context *h){
     MpegEncContext * const s = &h->s;
7bc9090a
     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
0da71265
 
     if(s->mb_y){
7bc9090a
         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
0da71265
     }else{
         h->mb_avail[0]=
         h->mb_avail[1]=
         h->mb_avail[2]= 0;
     }
     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
     h->mb_avail[4]= 1; //FIXME move out
     h->mb_avail[5]= 0; //FIXME move out
 }
 #endif
 
07e4e3ea
 #ifdef TEST
6bf398a0
 #undef printf
d04d5bcd
 #undef random
0da71265
 #define COUNT 8000
 #define SIZE (COUNT*40)
f8a80fd6
 int main(void){
0da71265
     int i;
     uint8_t temp[SIZE];
     PutBitContext pb;
     GetBitContext gb;
 //    int int_temp[10000];
     DSPContext dsp;
     AVCodecContext avctx;
115329f1
 
0da71265
     dsputil_init(&dsp, &avctx);
 
ed7debda
     init_put_bits(&pb, temp, SIZE);
0da71265
     printf("testing unsigned exp golomb\n");
     for(i=0; i<COUNT; i++){
         START_TIMER
         set_ue_golomb(&pb, i);
         STOP_TIMER("set_ue_golomb");
     }
     flush_put_bits(&pb);
115329f1
 
0da71265
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
115329f1
 
0da71265
         s= show_bits(&gb, 24);
115329f1
 
0da71265
         START_TIMER
         j= get_ue_golomb(&gb);
         if(j != i){
755bfeab
             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
 //            return -1;
         }
         STOP_TIMER("get_ue_golomb");
     }
115329f1
 
 
c58222c5
     init_put_bits(&pb, temp, SIZE);
0da71265
     printf("testing signed exp golomb\n");
     for(i=0; i<COUNT; i++){
         START_TIMER
         set_se_golomb(&pb, i - COUNT/2);
         STOP_TIMER("set_se_golomb");
     }
     flush_put_bits(&pb);
115329f1
 
0da71265
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
115329f1
 
0da71265
         s= show_bits(&gb, 24);
115329f1
 
0da71265
         START_TIMER
         j= get_se_golomb(&gb);
         if(j != i - COUNT/2){
755bfeab
             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
0da71265
 //            return -1;
         }
         STOP_TIMER("get_se_golomb");
     }
 
6bf398a0
 #if 0
0da71265
     printf("testing 4x4 (I)DCT\n");
115329f1
 
0da71265
     DCTELEM block[16];
     uint8_t src[16], ref[16];
     uint64_t error= 0, max_error=0;
 
     for(i=0; i<COUNT; i++){
         int j;
 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
         for(j=0; j<16; j++){
             ref[j]= random()%255;
             src[j]= random()%255;
         }
 
         h264_diff_dct_c(block, src, ref, 4);
115329f1
 
0da71265
         //normalize
         for(j=0; j<16; j++){
 //            printf("%d ", block[j]);
             block[j]= block[j]*4;
             if(j&1) block[j]= (block[j]*4 + 2)/5;
             if(j&4) block[j]= (block[j]*4 + 2)/5;
         }
 //        printf("\n");
115329f1
 
4693b031
         h->h264dsp.h264_idct_add(ref, block, 4);
0da71265
 /*        for(j=0; j<16; j++){
             printf("%d ", ref[j]);
         }
         printf("\n");*/
115329f1
 
0da71265
         for(j=0; j<16; j++){
c26abfa5
             int diff= FFABS(src[j] - ref[j]);
115329f1
 
0da71265
             error+= diff*diff;
             max_error= FFMAX(max_error, diff);
         }
     }
     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
     printf("testing quantizer\n");
     for(qp=0; qp<52; qp++){
         for(i=0; i<16; i++)
             src1_block[i]= src2_block[i]= random()%255;
115329f1
 
0da71265
     }
     printf("Testing NAL layer\n");
115329f1
 
0da71265
     uint8_t bitstream[COUNT];
     uint8_t nal[COUNT*2];
     H264Context h;
     memset(&h, 0, sizeof(H264Context));
115329f1
 
0da71265
     for(i=0; i<COUNT; i++){
         int zeros= i;
         int nal_length;
         int consumed;
         int out_length;
         uint8_t *out;
         int j;
115329f1
 
0da71265
         for(j=0; j<COUNT; j++){
             bitstream[j]= (random() % 255) + 1;
         }
115329f1
 
0da71265
         for(j=0; j<zeros; j++){
             int pos= random() % COUNT;
             while(bitstream[pos] == 0){
                 pos++;
                 pos %= COUNT;
             }
             bitstream[pos]=0;
         }
115329f1
 
0da71265
         START_TIMER
115329f1
 
0da71265
         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
         if(nal_length<0){
             printf("encoding failed\n");
             return -1;
         }
115329f1
 
1790a5e9
         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
0da71265
 
         STOP_TIMER("NAL")
115329f1
 
0da71265
         if(out_length != COUNT){
             printf("incorrect length %d %d\n", out_length, COUNT);
             return -1;
         }
115329f1
 
0da71265
         if(consumed != nal_length){
             printf("incorrect consumed length %d %d\n", nal_length, consumed);
             return -1;
         }
115329f1
 
0da71265
         if(memcmp(bitstream, out, COUNT)){
755bfeab
             printf("mismatch\n");
0da71265
             return -1;
         }
     }
6bf398a0
 #endif
115329f1
 
0da71265
     printf("Testing RBSP\n");
115329f1
 
 
0da71265
     return 0;
 }
07e4e3ea
 #endif /* TEST */
0da71265
 
 
cbf1eae9
 av_cold void ff_h264_free_context(H264Context *h)
0da71265
 {
5f129a05
     int i;
115329f1
 
2ed0f766
     free_tables(h, 1); //FIXME cleanup init stuff perhaps
5f129a05
 
     for(i = 0; i < MAX_SPS_COUNT; i++)
         av_freep(h->sps_buffers + i);
 
     for(i = 0; i < MAX_PPS_COUNT; i++)
         av_freep(h->pps_buffers + i);
15861962
 }
 
903d58f6
 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
15861962
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
 
     ff_h264_free_context(h);
5f129a05
 
0da71265
     MPV_common_end(s);
 
 //    memset(h, 0, sizeof(H264Context));
115329f1
 
0da71265
     return 0;
 }
 
45eaec30
 static const AVProfile profiles[] = {
     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
     { FF_PROFILE_H264_MAIN,                 "Main"                  },
     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
     { FF_PROFILE_H264_HIGH,                 "High"                  },
     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
154f7bb0
     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
45eaec30
     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
     { FF_PROFILE_UNKNOWN },
 };
0da71265
 
e7e2df27
 AVCodec ff_h264_decoder = {
0da71265
     "h264",
72415b2a
     AVMEDIA_TYPE_VIDEO,
0da71265
     CODEC_ID_H264,
     sizeof(H264Context),
903d58f6
     ff_h264_decode_init,
0da71265
     NULL,
903d58f6
     ff_h264_decode_end,
0da71265
     decode_frame,
94f7451a
     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
6a9c8594
         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
7c33ad19
     .flush= flush_dpb,
fe4bf374
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
d375c104
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
45eaec30
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
0da71265
 };
 
b250f9c6
 #if CONFIG_H264_VDPAU_DECODER
e7e2df27
 AVCodec ff_h264_vdpau_decoder = {
369122dd
     "h264_vdpau",
72415b2a
     AVMEDIA_TYPE_VIDEO,
0d3d172f
     CODEC_ID_H264,
369122dd
     sizeof(H264Context),
903d58f6
     ff_h264_decode_init,
369122dd
     NULL,
903d58f6
     ff_h264_decode_end,
369122dd
     decode_frame,
     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
     .flush= flush_dpb,
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
602dd2d3
     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
45eaec30
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
369122dd
 };
 #endif