libavcodec/motion_est_template.c
1457ab52
 /*
115329f1
  * Motion estimation
8f2ab833
  * Copyright (c) 2002-2004 Michael Niedermayer
1457ab52
  *
b78e7197
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
1457ab52
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
b78e7197
  * version 2.1 of the License, or (at your option) any later version.
1457ab52
  *
b78e7197
  * FFmpeg is distributed in the hope that it will be useful,
1457ab52
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
b78e7197
  * License along with FFmpeg; if not, write to the Free Software
5509bffa
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1457ab52
  */
115329f1
 
983e3246
 /**
ba87f080
  * @file
983e3246
  * Motion estimation template.
  */
2750b827
 
ca74c0a1
 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
bb198e19
 #define LOAD_COMMON\
154e30f6
     uint32_t av_unused * const score_map= c->score_map;\
     const int av_unused xmin= c->xmin;\
     const int av_unused ymin= c->ymin;\
     const int av_unused xmax= c->xmax;\
     const int av_unused ymax= c->ymax;\
af4091f1
     uint8_t *mv_penalty= c->current_mv_penalty;\
     const int pred_x= c->pred_x;\
     const int pred_y= c->pred_y;\
1457ab52
 
 #define CHECK_HALF_MV(dx, dy, x, y)\
 {\
     const int hx= 2*(x)+(dx);\
     const int hy= 2*(y)+(dy);\
919e7497
     d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
1457ab52
     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
 }
 
2750b827
 static int hpel_motion_search(MpegEncContext * s,
bb270c08
                                   int *mx_ptr, int *my_ptr, int dmin,
2750b827
                                   int src_index, int ref_index,
                                   int size, int h)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
1457ab52
     const int mx = *mx_ptr;
115329f1
     const int my = *my_ptr;
af4091f1
     const int penalty_factor= c->sub_penalty_factor;
1457ab52
     me_cmp_func cmp_sub, chroma_cmp_sub;
67725183
     int bx=2*mx, by=2*my;
1457ab52
 
bb198e19
     LOAD_COMMON
af4091f1
     int flags= c->sub_flags;
115329f1
 
1457ab52
  //FIXME factorize
 
     cmp_sub= s->dsp.me_sub_cmp[size];
     chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
 
af4091f1
     if(c->skip){ //FIXME move out of hpel?
1457ab52
         *mx_ptr = 0;
         *my_ptr = 0;
         return dmin;
     }
115329f1
 
1f202b0d
     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
2750b827
         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
1d0eab1d
         if(mx || my || size>0)
1457ab52
             dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
     }
115329f1
 
     if (mx > xmin && mx < xmax &&
1457ab52
         my > ymin && my < ymax) {
         int d= dmin;
         const int index= (my<<ME_MAP_SHIFT) + mx;
115329f1
         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
af4091f1
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
1457ab52
         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
af4091f1
                      + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
1457ab52
         const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
af4091f1
                      + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
1457ab52
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
af4091f1
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
115329f1
 
7b9e9817
 #if 1
cb668476
         unsigned key;
         unsigned map_generation= c->map_generation;
802f454e
 #ifndef NDEBUG
af4091f1
         uint32_t *map= c->map;
802f454e
 #endif
b07a5980
         key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
         assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
         key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
         assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
         assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
         assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
7b9e9817
 #endif
1457ab52
         if(t<=b){
             CHECK_HALF_MV(0, 1, mx  ,my-1)
             if(l<=r){
                 CHECK_HALF_MV(1, 1, mx-1, my-1)
                 if(t+r<=b+l){
                     CHECK_HALF_MV(1, 1, mx  , my-1)
                 }else{
                     CHECK_HALF_MV(1, 1, mx-1, my  )
                 }
                 CHECK_HALF_MV(1, 0, mx-1, my  )
             }else{
                 CHECK_HALF_MV(1, 1, mx  , my-1)
                 if(t+l<=b+r){
                     CHECK_HALF_MV(1, 1, mx-1, my-1)
                 }else{
                     CHECK_HALF_MV(1, 1, mx  , my  )
                 }
                 CHECK_HALF_MV(1, 0, mx  , my  )
             }
         }else{
             if(l<=r){
                 if(t+l<=b+r){
                     CHECK_HALF_MV(1, 1, mx-1, my-1)
                 }else{
                     CHECK_HALF_MV(1, 1, mx  , my  )
                 }
                 CHECK_HALF_MV(1, 0, mx-1, my)
                 CHECK_HALF_MV(1, 1, mx-1, my)
             }else{
                 if(t+r<=b+l){
                     CHECK_HALF_MV(1, 1, mx  , my-1)
                 }else{
                     CHECK_HALF_MV(1, 1, mx-1, my)
                 }
                 CHECK_HALF_MV(1, 0, mx  , my)
                 CHECK_HALF_MV(1, 1, mx  , my)
             }
             CHECK_HALF_MV(0, 1, mx  , my)
         }
         assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
     }
 
67725183
     *mx_ptr = bx;
     *my_ptr = by;
115329f1
 
1457ab52
     return dmin;
 }
 
1c3990db
 static int no_sub_motion_search(MpegEncContext * s,
           int *mx_ptr, int *my_ptr, int dmin,
                                   int src_index, int ref_index,
                                   int size, int h)
 {
     (*mx_ptr)<<=1;
     (*my_ptr)<<=1;
     return dmin;
 }
 
621d9294
 inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
155ec6ed
                                int ref_index, int size, int h, int add_rate)
67725183
 {
 //    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
af4091f1
     MotionEstContext * const c= &s->me;
     const int penalty_factor= c->mb_penalty_factor;
     const int flags= c->mb_flags;
2750b827
     const int qpel= flags & FLAG_QPEL;
     const int mask= 1+2*qpel;
67725183
     me_cmp_func cmp_sub, chroma_cmp_sub;
     int d;
 
bb198e19
     LOAD_COMMON
115329f1
 
67725183
  //FIXME factorize
 
     cmp_sub= s->dsp.mb_cmp[size];
     chroma_cmp_sub= s->dsp.mb_cmp[size+1];
115329f1
 
155ec6ed
 //    assert(!c->skip);
 //    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
67725183
 
2750b827
     d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
67725183
     //FIXME check cbp before adding penalty for (0,0) vector
155ec6ed
     if(add_rate && (mx || my || size>0))
67725183
         d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
115329f1
 
67725183
     return d;
 }
 
1457ab52
 #define CHECK_QUARTER_MV(dx, dy, x, y)\
 {\
     const int hx= 4*(x)+(dx);\
     const int hy= 4*(y)+(dy);\
919e7497
     d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
1457ab52
     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
 }
 
2750b827
 static int qpel_motion_search(MpegEncContext * s,
bb270c08
                                   int *mx_ptr, int *my_ptr, int dmin,
115329f1
                                   int src_index, int ref_index,
2750b827
                                   int size, int h)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
1457ab52
     const int mx = *mx_ptr;
115329f1
     const int my = *my_ptr;
af4091f1
     const int penalty_factor= c->sub_penalty_factor;
cb668476
     const unsigned map_generation = c->map_generation;
1f202b0d
     const int subpel_quality= c->avctx->me_subpel_quality;
af4091f1
     uint32_t *map= c->map;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
1457ab52
     me_cmp_func cmp_sub, chroma_cmp_sub;
 
bb198e19
     LOAD_COMMON
af4091f1
     int flags= c->sub_flags;
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
1457ab52
  //FIXME factorize
 
     cmp_sub= s->dsp.me_sub_cmp[size];
     chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
 
af4091f1
     if(c->skip){ //FIXME somehow move up (benchmark)
1457ab52
         *mx_ptr = 0;
         *my_ptr = 0;
         return dmin;
     }
115329f1
 
1f202b0d
     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
2750b827
         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
1d0eab1d
         if(mx || my || size>0)
1457ab52
             dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
     }
115329f1
 
     if (mx > xmin && mx < xmax &&
1457ab52
         my > ymin && my < ymax) {
         int bx=4*mx, by=4*my;
         int d= dmin;
         int i, nx, ny;
         const int index= (my<<ME_MAP_SHIFT) + mx;
         const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
         const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
         const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
         const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
         const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
         int best[8];
         int best_pos[8][2];
115329f1
 
1457ab52
         memset(best, 64, sizeof(int)*8);
115329f1
         if(s->me.dia_size>=2){
1457ab52
             const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
             const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
 
             for(ny= -3; ny <= 3; ny++){
                 for(nx= -3; nx <= 3; nx++){
0da6315a
                     //FIXME this could overflow (unlikely though)
                     const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
                     const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
                     const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
                     int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
1457ab52
                     int i;
115329f1
 
1457ab52
                     if((nx&3)==0 && (ny&3)==0) continue;
115329f1
 
0da6315a
                     score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
115329f1
 
af4091f1
 //                    if(nx&1) score-=1024*c->penalty_factor;
 //                    if(ny&1) score-=1024*c->penalty_factor;
115329f1
 
1457ab52
                     for(i=0; i<8; i++){
                         if(score < best[i]){
                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                             memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                             best[i]= score;
                             best_pos[i][0]= nx + 4*mx;
                             best_pos[i][1]= ny + 4*my;
                             break;
                         }
                     }
                 }
             }
         }else{
             int tl;
0da6315a
             //FIXME this could overflow (unlikely though)
1457ab52
             const int cx = 4*(r - l);
115329f1
             const int cx2= r + l - 2*c;
1457ab52
             const int cy = 4*(b - t);
             const int cy2= b + t - 2*c;
             int cxy;
115329f1
 
1457ab52
             if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
                 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             }else{
2750b827
                 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
1457ab52
             }
115329f1
 
             cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
 
1457ab52
             assert(16*cx2 + 4*cx + 32*c == 32*r);
             assert(16*cx2 - 4*cx + 32*c == 32*l);
             assert(16*cy2 + 4*cy + 32*c == 32*b);
             assert(16*cy2 - 4*cy + 32*c == 32*t);
             assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
115329f1
 
1457ab52
             for(ny= -3; ny <= 3; ny++){
                 for(nx= -3; nx <= 3; nx++){
0da6315a
                     //FIXME this could overflow (unlikely though)
1457ab52
                     int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
                     int i;
115329f1
 
1457ab52
                     if((nx&3)==0 && (ny&3)==0) continue;
115329f1
 
1457ab52
                     score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
af4091f1
 //                    if(nx&1) score-=32*c->penalty_factor;
   //                  if(ny&1) score-=32*c->penalty_factor;
115329f1
 
1457ab52
                     for(i=0; i<8; i++){
                         if(score < best[i]){
                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                             memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                             best[i]= score;
                             best_pos[i][0]= nx + 4*mx;
                             best_pos[i][1]= ny + 4*my;
                             break;
                         }
                     }
                 }
115329f1
             }
1457ab52
         }
826f429a
         for(i=0; i<subpel_quality; i++){
1457ab52
             nx= best_pos[i][0];
             ny= best_pos[i][1];
             CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
         }
826f429a
 
1457ab52
         assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
 
         *mx_ptr = bx;
         *my_ptr = by;
     }else{
         *mx_ptr =4*mx;
         *my_ptr =4*my;
     }
 
     return dmin;
 }
 
 
 #define CHECK_MV(x,y)\
 {\
cb668476
     const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
1457ab52
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
ae1dbde1
     assert((x) >= xmin);\
     assert((x) <= xmax);\
     assert((y) >= ymin);\
     assert((y) <= ymax);\
b07a5980
 /*printf("check_mv %d %d\n", x, y);*/\
1457ab52
     if(map[index]!=key){\
2750b827
         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
1457ab52
         map[index]= key;\
         score_map[index]= d;\
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
b07a5980
 /*printf("score:%d\n", d);*/\
1457ab52
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
     }\
 }
 
c6c36725
 #define CHECK_CLIPPED_MV(ax,ay)\
b07a5980
 {\
ac003d24
     const int Lx= ax;\
     const int Ly= ay;\
     const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
     const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
     CHECK_MV(Lx2, Ly2)\
b07a5980
 }
 
1457ab52
 #define CHECK_MV_DIR(x,y,new_dir)\
 {\
cb668476
     const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
1457ab52
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
b07a5980
 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
1457ab52
     if(map[index]!=key){\
2750b827
         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
1457ab52
         map[index]= key;\
         score_map[index]= d;\
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
b07a5980
 /*printf("score:%d\n", d);*/\
1457ab52
         if(d<dmin){\
             best[0]=x;\
             best[1]=y;\
             dmin=d;\
             next_dir= new_dir;\
         }\
     }\
 }
 
 #define check(x,y,S,v)\
 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
 
2750b827
 #define LOAD_COMMON2\
af4091f1
     uint32_t *map= c->map;\
2750b827
     const int qpel= flags&FLAG_QPEL;\
     const int shift= 1+qpel;\
1457ab52
 
849f1035
 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
2750b827
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
1457ab52
     int next_dir=-1;
bb198e19
     LOAD_COMMON
2750b827
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
1457ab52
 
b07a5980
     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
cb668476
         const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
b07a5980
         const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
         if(map[index]!=key){ //this will be executed only very rarey
2750b827
             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
b07a5980
             map[index]= key;
         }
     }
 
1457ab52
     for(;;){
         int d;
         const int dir= next_dir;
         const int x= best[0];
         const int y= best[1];
         next_dir=-1;
 
 //printf("%d", dir);
         if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
         if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
         if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
         if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
 
         if(next_dir==-1){
             return dmin;
         }
     }
 }
 
2750b827
 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
b07a5980
 {
af4091f1
     MotionEstContext * const c= &s->me;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
b07a5980
     int dia_size;
bb198e19
     LOAD_COMMON
2750b827
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
b07a5980
 
     for(dia_size=1; dia_size<=4; dia_size++){
         int dir;
         const int x= best[0];
         const int y= best[1];
115329f1
 
b07a5980
         if(dia_size&(dia_size-1)) continue;
 
         if(   x + dia_size > xmax
            || x - dia_size < xmin
            || y + dia_size > ymax
            || y - dia_size < ymin)
            continue;
115329f1
 
b07a5980
         for(dir= 0; dir<dia_size; dir+=2){
             int d;
 
             CHECK_MV(x + dir           , y + dia_size - dir);
             CHECK_MV(x + dia_size - dir, y - dir           );
             CHECK_MV(x - dir           , y - dia_size + dir);
             CHECK_MV(x - dia_size + dir, y + dir           );
         }
 
         if(x!=best[0] || y!=best[1])
             dia_size=0;
     }
115329f1
     return dmin;
b07a5980
 }
 
376edfcc
 static int hex_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags, int dia_size)
 {
     MotionEstContext * const c= &s->me;
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
f852ff32
     int x,y,d;
     const int dec= dia_size & (dia_size-1);
376edfcc
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
f852ff32
     for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
376edfcc
         do{
             x= best[0];
             y= best[1];
f852ff32
 
             CHECK_CLIPPED_MV(x  -dia_size    , y);
             CHECK_CLIPPED_MV(x+  dia_size    , y);
             CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
             CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
             if(dia_size>1){
                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
376edfcc
             }
         }while(best[0] != x || best[1] != y);
     }
 
     return dmin;
 }
 
 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
 {
     MotionEstContext * const c= &s->me;
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
f852ff32
     int x,y,i,d;
     int dia_size= c->dia_size&0xFF;
     const int dec= dia_size & (dia_size-1);
376edfcc
     static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
                                 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
f852ff32
     for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
376edfcc
         do{
             x= best[0];
             y= best[1];
             for(i=0; i<8; i++){
c6c36725
                 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
376edfcc
             }
         }while(best[0] != x || best[1] != y);
     }
 
     x= best[0];
     y= best[1];
c6c36725
     CHECK_CLIPPED_MV(x+1, y);
     CHECK_CLIPPED_MV(x, y+1);
     CHECK_CLIPPED_MV(x-1, y);
     CHECK_CLIPPED_MV(x, y-1);
376edfcc
 
     return dmin;
 }
 
c7675718
 static int umh_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
 {
     MotionEstContext * const c= &s->me;
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
c7675718
     int x,y,x2,y2, i, j, d;
1994e7c6
     const int dia_size= c->dia_size&0xFE;
c7675718
     static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
                                  { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
                                  {-2, 3}, { 0, 4}, { 2, 3},
c9cac6e3
                                  {-2,-3}, { 0,-4}, { 2,-3},};
c7675718
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
     x= best[0];
     y= best[1];
1994e7c6
     for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
c7675718
         CHECK_MV(x2, y);
     }
1994e7c6
     for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
c7675718
         CHECK_MV(x, y2);
     }
 
     x= best[0];
     y= best[1];
     for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
         for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
             CHECK_MV(x2, y2);
         }
     }
 
 //FIXME prevent the CLIP stuff
 
1994e7c6
     for(j=1; j<=dia_size/4; j++){
c7675718
         for(i=0; i<16; i++){
c6c36725
             CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
c7675718
         }
     }
 
2421a01b
     return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
c7675718
 }
 
81d4ee3e
 static int full_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
 {
     MotionEstContext * const c= &s->me;
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
81d4ee3e
     int x,y, d;
     const int dia_size= c->dia_size&0xFF;
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
     for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
         for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
             CHECK_MV(x, y);
         }
     }
 
     x= best[0];
     y= best[1];
     d= dmin;
     CHECK_CLIPPED_MV(x  , y);
     CHECK_CLIPPED_MV(x+1, y);
     CHECK_CLIPPED_MV(x, y+1);
     CHECK_CLIPPED_MV(x-1, y);
     CHECK_CLIPPED_MV(x, y-1);
     best[0]= x;
     best[1]= y;
 
     return d;
 }
 
b07a5980
 #define SAB_CHECK_MV(ax,ay)\
 {\
cb668476
     const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
b07a5980
     const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
 /*printf("sab check %d %d\n", ax, ay);*/\
     if(map[index]!=key){\
2750b827
         d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
b07a5980
         map[index]= key;\
         score_map[index]= d;\
         d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
 /*printf("score: %d\n", d);*/\
         if(d < minima[minima_count-1].height){\
             int j=0;\
             \
             while(d >= minima[j].height) j++;\
 \
             memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
 \
             minima[j].checked= 0;\
             minima[j].height= d;\
             minima[j].x= ax;\
             minima[j].y= ay;\
             \
             i=-1;\
             continue;\
         }\
     }\
 }
 
4994af2f
 #define MAX_SAB_SIZE ME_MAP_SIZE
2750b827
 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
b07a5980
 {
af4091f1
     MotionEstContext * const c= &s->me;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
b07a5980
     Minima minima[MAX_SAB_SIZE];
c26abfa5
     const int minima_count= FFABS(c->dia_size);
b07a5980
     int i, j;
bb198e19
     LOAD_COMMON
2750b827
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
115329f1
 
2b0cdd9e
     /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
       become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
      */
     for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
b07a5980
         uint32_t key= map[i];
 
         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
115329f1
 
b07a5980
         if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
115329f1
 
b07a5980
         minima[j].height= score_map[i];
         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
         minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
         minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
2b0cdd9e
 
         // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
         if(   minima[j].x > xmax || minima[j].x < xmin
            || minima[j].y > ymax || minima[j].y < ymin)
             continue;
 
b07a5980
         minima[j].checked=0;
         if(minima[j].x || minima[j].y)
             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
115329f1
 
b07a5980
         j++;
     }
115329f1
 
b07a5980
     qsort(minima, j, sizeof(Minima), minima_cmp);
115329f1
 
b07a5980
     for(; j<minima_count; j++){
         minima[j].height=256*256*256*64;
         minima[j].checked=0;
         minima[j].x= minima[j].y=0;
     }
115329f1
 
b07a5980
     for(i=0; i<minima_count; i++){
         const int x= minima[i].x;
         const int y= minima[i].y;
         int d;
115329f1
 
b07a5980
         if(minima[i].checked) continue;
115329f1
 
b07a5980
         if(   x >= xmax || x <= xmin
            || y >= ymax || y <= ymin)
            continue;
 
         SAB_CHECK_MV(x-1, y)
         SAB_CHECK_MV(x+1, y)
         SAB_CHECK_MV(x  , y-1)
         SAB_CHECK_MV(x  , y+1)
115329f1
 
b07a5980
         minima[i].checked= 1;
     }
115329f1
 
b07a5980
     best[0]= minima[0].x;
     best[1]= minima[0].y;
     dmin= minima[0].height;
115329f1
 
b07a5980
     if(   best[0] < xmax && best[0] > xmin
        && best[1] < ymax && best[1] > ymin){
         int d;
         //ensure that the refernece samples for hpel refinement are in the map
         CHECK_MV(best[0]-1, best[1])
         CHECK_MV(best[0]+1, best[1])
         CHECK_MV(best[0], best[1]-1)
         CHECK_MV(best[0], best[1]+1)
     }
115329f1
     return dmin;
b07a5980
 }
 
2750b827
 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
b07a5980
     int dia_size;
bb198e19
     LOAD_COMMON
2750b827
     LOAD_COMMON2
cb668476
     unsigned map_generation = c->map_generation;
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
1457ab52
 
af4091f1
     for(dia_size=1; dia_size<=c->dia_size; dia_size++){
1457ab52
         int dir, start, end;
         const int x= best[0];
         const int y= best[1];
 
         start= FFMAX(0, y + dia_size - ymax);
b07a5980
         end  = FFMIN(dia_size, xmax - x + 1);
1457ab52
         for(dir= start; dir<end; dir++){
             int d;
 
 //check(x + dir,y + dia_size - dir,0, a0)
             CHECK_MV(x + dir           , y + dia_size - dir);
         }
 
         start= FFMAX(0, x + dia_size - xmax);
b07a5980
         end  = FFMIN(dia_size, y - ymin + 1);
1457ab52
         for(dir= start; dir<end; dir++){
             int d;
 
 //check(x + dia_size - dir, y - dir,0, a1)
             CHECK_MV(x + dia_size - dir, y - dir           );
         }
 
         start= FFMAX(0, -y + dia_size + ymin );
b07a5980
         end  = FFMIN(dia_size, x - xmin + 1);
1457ab52
         for(dir= start; dir<end; dir++){
             int d;
 
 //check(x - dir,y - dia_size + dir,0, a2)
             CHECK_MV(x - dir           , y - dia_size + dir);
         }
 
         start= FFMAX(0, -x + dia_size + xmin );
b07a5980
         end  = FFMIN(dia_size, ymax - y + 1);
1457ab52
         for(dir= start; dir<end; dir++){
             int d;
 
 //check(x - dia_size + dir, y + dir,0, a3)
             CHECK_MV(x - dia_size + dir, y + dir           );
         }
 
         if(x!=best[0] || y!=best[1])
             dia_size=0;
     }
115329f1
     return dmin;
1457ab52
 }
 
849f1035
 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
2750b827
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags){
af4091f1
     MotionEstContext * const c= &s->me;
     if(c->dia_size==-1)
1994e7c6
         return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
af4091f1
     else if(c->dia_size<-1)
2750b827
         return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
af4091f1
     else if(c->dia_size<2)
2750b827
         return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
81d4ee3e
     else if(c->dia_size>1024)
         return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1994e7c6
     else if(c->dia_size>768)
         return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
376edfcc
     else if(c->dia_size>512)
         return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
     else if(c->dia_size>256)
         return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
2750b827
     else
         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 }
 
adbfc605
 /**
ff993cd7
    @param P a list of candidate mvs to check before starting the
bb21f176
    iterative search. If one of the candidates is close to the optimal mv, then
    it takes fewer iterations. And it increases the chance that we find the
    optimal mv.
  */
849f1035
 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
115329f1
                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
26efc54e
                              int ref_mv_scale, int flags, int size, int h)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
adbfc605
     int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
bb21f176
                                i.e. the difference between the position of the
1cfe393a
                                block currently being encoded and the position of
bb21f176
                                the block chosen to predict it from. */
     int d;                   ///< the score (cmp + penalty) of any given mv
adbfc605
     int dmin;                /**< the best value of d, i.e. the score
bb21f176
                                corresponding to the mv stored in best[]. */
cb668476
     unsigned map_generation;
c08a01a6
     int penalty_factor;
bb198e19
     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
     const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
2750b827
     me_cmp_func cmpf, chroma_cmpf;
115329f1
 
bb198e19
     LOAD_COMMON
2750b827
     LOAD_COMMON2
115329f1
 
c08a01a6
     if(c->pre_pass){
         penalty_factor= c->pre_penalty_factor;
         cmpf= s->dsp.me_pre_cmp[size];
         chroma_cmpf= s->dsp.me_pre_cmp[size+1];
     }else{
         penalty_factor= c->penalty_factor;
         cmpf= s->dsp.me_cmp[size];
         chroma_cmpf= s->dsp.me_cmp[size+1];
     }
115329f1
 
af4091f1
     map_generation= update_map_generation(c);
1457ab52
 
26efc54e
     assert(cmpf);
2750b827
     dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1457ab52
     map[0]= map_generation;
     score_map[0]= dmin;
e4d0e2ed
 
     //FIXME precalc first term below?
ce5e49b0
     if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
ff8dc81b
         dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1457ab52
 
     /* first line */
9c3d33d6
     if (s->first_slice_line) {
1457ab52
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
b07a5980
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1457ab52
     }else{
1dd509b1
         if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
                     && ( P_LEFT[0]    |P_LEFT[1]
1457ab52
                         |P_TOP[0]     |P_TOP[1]
b07a5980
                         |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1457ab52
             *mx_ptr= 0;
             *my_ptr= 0;
af4091f1
             c->skip=1;
1457ab52
             return dmin;
         }
f7f8af46
         CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
c6c36725
         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
516ea8d2
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
         CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1457ab52
     }
26efc54e
     if(dmin>h*h*4){
af4091f1
         if(c->pre_pass){
c6c36725
             CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
f931ff7b
                             (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
9c3d33d6
             if(!s->first_slice_line)
c6c36725
                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
9c3d33d6
                                 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
f931ff7b
         }else{
c6c36725
             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
f931ff7b
                             (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
80ee9fc0
             if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
c6c36725
                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
9c3d33d6
                                 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
f931ff7b
         }
1457ab52
     }
b07a5980
 
1f202b0d
     if(c->avctx->last_predictor_count){
         const int count= c->avctx->last_predictor_count;
b07a5980
         const int xstart= FFMAX(0, s->mb_x - count);
         const int ystart= FFMAX(0, s->mb_y - count);
         const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
         const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
         int mb_y;
 
         for(mb_y=ystart; mb_y<yend; mb_y++){
             int mb_x;
             for(mb_x=xstart; mb_x<xend; mb_x++){
                 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
                 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
                 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
 
                 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
                 CHECK_MV(mx,my)
1457ab52
             }
         }
     }
b07a5980
 
1457ab52
 //check(best[0],best[1],0, b0)
2750b827
     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1457ab52
 
 //check(best[0],best[1],0, b1)
     *mx_ptr= best[0];
115329f1
     *my_ptr= best[1];
1457ab52
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
 }
 
2750b827
 //this function is dedicated to the braindamaged gcc
26efc54e
 inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
115329f1
                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
26efc54e
                              int ref_mv_scale, int size, int h)
2750b827
 {
af4091f1
     MotionEstContext * const c= &s->me;
2750b827
 //FIXME convert other functions in the same way if faster
26efc54e
     if(c->flags==0 && h==16 && size==0){
         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
2750b827
 //    case FLAG_QPEL:
 //        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
26efc54e
     }else{
         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
2750b827
     }
 }
 
 static int epzs_motion_search4(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int P[10][2],
115329f1
                              int src_index, int ref_index, int16_t (*last_mv)[2],
2750b827
                              int ref_mv_scale)
1457ab52
 {
af4091f1
     MotionEstContext * const c= &s->me;
1457ab52
     int best[2]={0, 0};
115329f1
     int d, dmin;
cb668476
     unsigned map_generation;
af4091f1
     const int penalty_factor= c->penalty_factor;
1457ab52
     const int size=1;
bb198e19
     const int h=8;
7bc9090a
     const int ref_mv_stride= s->mb_stride;
     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
bb198e19
     LOAD_COMMON
af4091f1
     int flags= c->flags;
2750b827
     LOAD_COMMON2
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
1457ab52
 
af4091f1
     map_generation= update_map_generation(c);
1457ab52
 
     dmin = 1000000;
115329f1
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1457ab52
     /* first line */
9c3d33d6
     if (s->first_slice_line) {
bb270c08
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
b07a5980
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1457ab52
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
     }else{
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
         //FIXME try some early stop
516ea8d2
         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
516ea8d2
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1457ab52
     }
     if(dmin>64*4){
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
b07a5980
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
80ee9fc0
         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
c6c36725
             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
9c3d33d6
                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1457ab52
     }
 
2750b827
     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
bb198e19
 
     *mx_ptr= best[0];
115329f1
     *my_ptr= best[1];
bb198e19
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
 }
 
 //try to merge with above FIXME (needs PSNR test)
2750b827
 static int epzs_motion_search2(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int P[10][2],
115329f1
                              int src_index, int ref_index, int16_t (*last_mv)[2],
2750b827
                              int ref_mv_scale)
bb198e19
 {
af4091f1
     MotionEstContext * const c= &s->me;
bb198e19
     int best[2]={0, 0};
115329f1
     int d, dmin;
cb668476
     unsigned map_generation;
af4091f1
     const int penalty_factor= c->penalty_factor;
bb198e19
     const int size=0; //FIXME pass as arg
     const int h=8;
     const int ref_mv_stride= s->mb_stride;
     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
2750b827
     me_cmp_func cmpf, chroma_cmpf;
bb198e19
     LOAD_COMMON
af4091f1
     int flags= c->flags;
2750b827
     LOAD_COMMON2
115329f1
 
2750b827
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
bb198e19
 
af4091f1
     map_generation= update_map_generation(c);
bb198e19
 
     dmin = 1000000;
115329f1
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
bb198e19
     /* first line */
9c3d33d6
     if (s->first_slice_line) {
bb270c08
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
bb198e19
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
     }else{
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
         //FIXME try some early stop
516ea8d2
         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
516ea8d2
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
bb198e19
     }
     if(dmin>64*4){
c6c36725
         CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
bb198e19
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
80ee9fc0
         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
c6c36725
             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
9c3d33d6
                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
bb198e19
     }
 
2750b827
     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
b07a5980
 
1457ab52
     *mx_ptr= best[0];
115329f1
     *my_ptr= best[1];
1457ab52
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
 }