Browse code

interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed

Originally committed as revision 2638 to svn://svn.ffmpeg.org/ffmpeg/trunk

Michael Niedermayer authored on 2003/12/31 01:07:57
Showing 23 changed files
... ...
@@ -119,8 +119,10 @@ static int use_obmc = 0;
119 119
 static int use_aic = 0;
120 120
 static int use_aiv = 0;
121 121
 static int use_umv = 0;
122
+static int use_alt_scan = 0;
122 123
 static int do_deinterlace = 0;
123
-static int do_interlace = 0;
124
+static int do_interlace_dct = 0;
125
+static int do_interlace_me = 0;
124 126
 static int workaround_bugs = FF_BUG_AUTODETECT;
125 127
 static int error_resilience = 2;
126 128
 static int error_concealment = 3;
... ...
@@ -130,6 +132,8 @@ static int use_part = 0;
130 130
 static int packet_size = 0;
131 131
 static int error_rate = 0;
132 132
 static int strict = 0;
133
+static int top_field_first = -1;
134
+static int noise_reduction = 0;
133 135
 static int debug = 0;
134 136
 static int debug_mv = 0;
135 137
 extern int loop_input; /* currently a hack */
... ...
@@ -635,7 +639,12 @@ static void do_video_out(AVFormatContext *s,
635 635
             /* better than nothing: use input picture interlaced
636 636
                settings */
637 637
             big_picture.interlaced_frame = in_picture->interlaced_frame;
638
-            big_picture.top_field_first = in_picture->top_field_first;
638
+            if(do_interlace_me || do_interlace_dct){
639
+                if(top_field_first == -1)
640
+                    big_picture.top_field_first = in_picture->top_field_first;
641
+                else
642
+                    big_picture.top_field_first = 1;
643
+            }
639 644
 
640 645
             /* handles sameq here. This is not correct because it may
641 646
                not be a global option */
... ...
@@ -1946,6 +1955,16 @@ static void opt_strict(const char *arg)
1946 1946
     strict= atoi(arg);
1947 1947
 }
1948 1948
 
1949
+static void opt_top_field_first(const char *arg)
1950
+{
1951
+    top_field_first= atoi(arg);
1952
+}
1953
+
1954
+static void opt_noise_reduction(const char *arg)
1955
+{
1956
+    noise_reduction= atoi(arg);
1957
+}
1958
+
1949 1959
 static void opt_audio_bitrate(const char *arg)
1950 1960
 {
1951 1961
     audio_bit_rate = atoi(arg) * 1000;
... ...
@@ -2373,14 +2392,20 @@ static void opt_output_file(const char *filename)
2373 2373
                 if(use_part) {
2374 2374
                     video_enc->flags |= CODEC_FLAG_PART;
2375 2375
                 }
2376
+           	if (use_alt_scan) {
2377
+                    video_enc->flags |= CODEC_FLAG_ALT_SCAN;
2378
+                }
2376 2379
                 if (b_frames) {
2377 2380
                     video_enc->max_b_frames = b_frames;
2378 2381
                     video_enc->b_frame_strategy = 0;
2379 2382
                     video_enc->b_quant_factor = 2.0;
2380 2383
                 }
2381
-                if (do_interlace) {
2384
+                if (do_interlace_dct) {
2382 2385
                     video_enc->flags |= CODEC_FLAG_INTERLACED_DCT;
2383 2386
                 }
2387
+                if (do_interlace_me) {
2388
+                    video_enc->flags |= CODEC_FLAG_INTERLACED_ME;
2389
+                }
2384 2390
                 video_enc->qmin = video_qmin;
2385 2391
                 video_enc->qmax = video_qmax;
2386 2392
                 video_enc->mb_qmin = video_mb_qmin;
... ...
@@ -2430,6 +2455,7 @@ static void opt_output_file(const char *filename)
2430 2430
                 video_enc->idct_algo = idct_algo;
2431 2431
                 video_enc->strict_std_compliance = strict;
2432 2432
                 video_enc->error_rate = error_rate;
2433
+                video_enc->noise_reduction= noise_reduction;
2433 2434
                 if(packet_size){
2434 2435
                     video_enc->rtp_mode= 1;
2435 2436
                     video_enc->rtp_payload_size= packet_size;
... ...
@@ -2992,16 +3018,21 @@ const OptionDef options[] = {
2992 2992
     { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename}, "select two pass log file name", "file" },
2993 2993
     { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace}, 
2994 2994
       "deinterlace pictures" },
2995
-    { "interlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace}, 
2996
-      "force interlacing support in encoder (MPEG2/MPEG4)" },
2995
+    { "ildct", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_dct}, 
2996
+      "force interlaced dct support in encoder (MPEG2/MPEG4)" },
2997
+    { "ilme", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_me}, 
2998
+      "force interlacied me support in encoder MPEG2" },
2997 2999
     { "psnr", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_psnr}, "calculate PSNR of compressed frames" },
2998 3000
     { "vstats", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_vstats}, "dump video coding statistics to file" }, 
2999 3001
     { "vhook", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)add_frame_hooker}, "insert video processing module", "module" },
3000 3002
     { "aic", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aic}, "enable Advanced intra coding (h263+)" },
3001 3003
     { "aiv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aiv}, "enable Alternative inter vlc (h263+)" },
3002 3004
     { "umv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_umv}, "enable Unlimited Motion Vector (h263+)" },
3005
+    { "alt", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_alt_scan}, "enable alternate scantable (mpeg2)" },
3003 3006
     { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" },
3004 3007
     { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" },
3008
+    { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
3009
+    { "nr", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_noise_reduction}, "noise reduction", "" },
3005 3010
 
3006 3011
     /* audio options */
3007 3012
     { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_bitrate}, "set audio bitrate (in kbit/s)", "bitrate", },
... ...
@@ -39,11 +39,11 @@ void get_pixels_mvi(DCTELEM *restrict block,
39 39
                     const uint8_t *restrict pixels, int line_size);
40 40
 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
41 41
                      int stride);
42
-int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
42
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
43 43
 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
44
-int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
45
-int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
46
-int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
44
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
45
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
46
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
47 47
 
48 48
 #if 0
49 49
 /* These functions were the base for the optimized assembler routines,
... ...
@@ -290,11 +290,6 @@ static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
290 290
     return pix_abs16x16_mvi_asm(a, b, stride);
291 291
 }
292 292
 
293
-static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
294
-{
295
-    return pix_abs8x8_mvi(a, b, stride);
296
-}
297
-
298 293
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
299 294
 {
300 295
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
... ...
@@ -347,12 +342,13 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
347 347
         c->get_pixels       = get_pixels_mvi;
348 348
         c->diff_pixels      = diff_pixels_mvi;
349 349
         c->sad[0]           = sad16x16_mvi;
350
-        c->sad[1]           = sad8x8_mvi;
351
-        c->pix_abs8x8       = pix_abs8x8_mvi;
352
-        c->pix_abs16x16     = pix_abs16x16_mvi_asm;
353
-        c->pix_abs16x16_x2  = pix_abs16x16_x2_mvi;
354
-        c->pix_abs16x16_y2  = pix_abs16x16_y2_mvi;
355
-        c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;
350
+        c->sad[1]           = pix_abs8x8_mvi;
351
+//        c->pix_abs[0][0]    = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed
352
+        c->pix_abs[0][0]    = sad16x16_mvi;
353
+        c->pix_abs[1][0]    = pix_abs8x8_mvi;
354
+        c->pix_abs[0][1]    = pix_abs16x16_x2_mvi;
355
+        c->pix_abs[0][2]    = pix_abs16x16_y2_mvi;
356
+        c->pix_abs[0][3]    = pix_abs16x16_xy2_mvi;
356 357
     }
357 358
 
358 359
     put_pixels_clamped_axp_p = c->put_pixels_clamped;
... ...
@@ -84,10 +84,9 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
84 84
     return r1 + r2;
85 85
 }
86 86
 
87
-int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
87
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
88 88
 {
89 89
     int result = 0;
90
-    int h = 8;
91 90
 
92 91
     if ((size_t) pix2 & 0x7) {
93 92
         /* works only when pix2 is actually unaligned */
... ...
@@ -160,10 +159,9 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
160 160
 }
161 161
 #endif
162 162
 
163
-int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
163
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
164 164
 {
165 165
     int result = 0;
166
-    int h = 16;
167 166
     uint64_t disalign = (size_t) pix2 & 0x7;
168 167
 
169 168
     switch (disalign) {
... ...
@@ -234,10 +232,9 @@ int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
234 234
     return result;
235 235
 }
236 236
 
237
-int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
237
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
238 238
 {
239 239
     int result = 0;
240
-    int h = 16;
241 240
 
242 241
     if ((size_t) pix2 & 0x7) {
243 242
         uint64_t t, p2_l, p2_r;
... ...
@@ -288,10 +285,9 @@ int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
288 288
     return result;
289 289
 }
290 290
 
291
-int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
291
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
292 292
 {
293 293
     int result = 0;
294
-    int h = 16;
295 294
     
296 295
     uint64_t p1_l, p1_r;
297 296
     uint64_t p2_l, p2_r, p2_x;
... ...
@@ -17,7 +17,7 @@ extern "C" {
17 17
 
18 18
 #define FFMPEG_VERSION_INT     0x000408
19 19
 #define FFMPEG_VERSION         "0.4.8"
20
-#define LIBAVCODEC_BUILD       4697
20
+#define LIBAVCODEC_BUILD       4698
21 21
 
22 22
 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
23 23
 #define LIBAVCODEC_VERSION     FFMPEG_VERSION
... ...
@@ -263,7 +263,8 @@ static const __attribute__((unused)) int Motion_Est_QTab[] =
263 263
 #define CODEC_FLAG_H263P_AIV      0x00000008 ///< H263 Alternative inter vlc
264 264
 #define CODEC_FLAG_OBMC           0x00000001 ///< OBMC
265 265
 #define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
266
-#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 
266
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
267
+#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
267 268
 /* Unsupported options :
268 269
  * 		Syntax Arithmetic coding (SAC)
269 270
  * 		Reference Picture Selection
... ...
@@ -218,13 +218,13 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
218 218
     }
219 219
 }
220 220
 
221
-static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
221
+static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
222 222
 {
223 223
     int s, i;
224 224
     uint32_t *sq = squareTbl + 256;
225 225
 
226 226
     s = 0;
227
-    for (i = 0; i < 8; i++) {
227
+    for (i = 0; i < h; i++) {
228 228
         s += sq[pix1[0] - pix2[0]];
229 229
         s += sq[pix1[1] - pix2[1]];
230 230
         s += sq[pix1[2] - pix2[2]];
... ...
@@ -239,13 +239,13 @@ static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
239 239
     return s;
240 240
 }
241 241
 
242
-static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
242
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
243 243
 {
244 244
     int s, i;
245 245
     uint32_t *sq = squareTbl + 256;
246 246
 
247 247
     s = 0;
248
-    for (i = 0; i < 16; i++) {
248
+    for (i = 0; i < h; i++) {
249 249
         s += sq[pix1[ 0] - pix2[ 0]];
250 250
         s += sq[pix1[ 1] - pix2[ 1]];
251 251
         s += sq[pix1[ 2] - pix2[ 2]];
... ...
@@ -2331,12 +2331,12 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2331 2331
     }
2332 2332
 }
2333 2333
 
2334
-static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2334
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2335 2335
 {
2336 2336
     int s, i;
2337 2337
 
2338 2338
     s = 0;
2339
-    for(i=0;i<16;i++) {
2339
+    for(i=0;i<h;i++) {
2340 2340
         s += abs(pix1[0] - pix2[0]);
2341 2341
         s += abs(pix1[1] - pix2[1]);
2342 2342
         s += abs(pix1[2] - pix2[2]);
... ...
@@ -2359,12 +2359,12 @@ static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2359 2359
     return s;
2360 2360
 }
2361 2361
 
2362
-static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2362
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2363 2363
 {
2364 2364
     int s, i;
2365 2365
 
2366 2366
     s = 0;
2367
-    for(i=0;i<16;i++) {
2367
+    for(i=0;i<h;i++) {
2368 2368
         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2369 2369
         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2370 2370
         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
... ...
@@ -2387,13 +2387,13 @@ static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2387 2387
     return s;
2388 2388
 }
2389 2389
 
2390
-static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2390
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2391 2391
 {
2392 2392
     int s, i;
2393 2393
     uint8_t *pix3 = pix2 + line_size;
2394 2394
 
2395 2395
     s = 0;
2396
-    for(i=0;i<16;i++) {
2396
+    for(i=0;i<h;i++) {
2397 2397
         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2398 2398
         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2399 2399
         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
... ...
@@ -2417,13 +2417,13 @@ static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2417 2417
     return s;
2418 2418
 }
2419 2419
 
2420
-static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2420
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2421 2421
 {
2422 2422
     int s, i;
2423 2423
     uint8_t *pix3 = pix2 + line_size;
2424 2424
 
2425 2425
     s = 0;
2426
-    for(i=0;i<16;i++) {
2426
+    for(i=0;i<h;i++) {
2427 2427
         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2428 2428
         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2429 2429
         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
... ...
@@ -2447,12 +2447,12 @@ static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2447 2447
     return s;
2448 2448
 }
2449 2449
 
2450
-static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2450
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2451 2451
 {
2452 2452
     int s, i;
2453 2453
 
2454 2454
     s = 0;
2455
-    for(i=0;i<8;i++) {
2455
+    for(i=0;i<h;i++) {
2456 2456
         s += abs(pix1[0] - pix2[0]);
2457 2457
         s += abs(pix1[1] - pix2[1]);
2458 2458
         s += abs(pix1[2] - pix2[2]);
... ...
@@ -2467,12 +2467,12 @@ static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2467 2467
     return s;
2468 2468
 }
2469 2469
 
2470
-static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2470
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2471 2471
 {
2472 2472
     int s, i;
2473 2473
 
2474 2474
     s = 0;
2475
-    for(i=0;i<8;i++) {
2475
+    for(i=0;i<h;i++) {
2476 2476
         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2477 2477
         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2478 2478
         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
... ...
@@ -2487,13 +2487,13 @@ static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2487 2487
     return s;
2488 2488
 }
2489 2489
 
2490
-static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2490
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2491 2491
 {
2492 2492
     int s, i;
2493 2493
     uint8_t *pix3 = pix2 + line_size;
2494 2494
 
2495 2495
     s = 0;
2496
-    for(i=0;i<8;i++) {
2496
+    for(i=0;i<h;i++) {
2497 2497
         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2498 2498
         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2499 2499
         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
... ...
@@ -2509,13 +2509,13 @@ static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2509 2509
     return s;
2510 2510
 }
2511 2511
 
2512
-static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2512
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2513 2513
 {
2514 2514
     int s, i;
2515 2515
     uint8_t *pix3 = pix2 + line_size;
2516 2516
 
2517 2517
     s = 0;
2518
-    for(i=0;i<8;i++) {
2518
+    for(i=0;i<h;i++) {
2519 2519
         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2520 2520
         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2521 2521
         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
... ...
@@ -2531,14 +2531,6 @@ static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
2531 2531
     return s;
2532 2532
 }
2533 2533
 
2534
-static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
2535
-    return pix_abs16x16_c(a,b,stride);
2536
-}
2537
-
2538
-static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
2539
-    return pix_abs8x8_c(a,b,stride);
2540
-}
2541
-
2542 2534
 /**
2543 2535
  * permutes an 8x8 block.
2544 2536
  * @param block the block which will be permuted according to the given permutation vector
... ...
@@ -2641,10 +2633,12 @@ o2= (i1)-(i2);
2641 2641
 
2642 2642
 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2643 2643
 
2644
-static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
2644
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2645 2645
     int i;
2646 2646
     int temp[64];
2647 2647
     int sum=0;
2648
+    
2649
+    assert(h==8);
2648 2650
 
2649 2651
     for(i=0; i<8; i++){
2650 2652
         //FIXME try pointer walks
... ...
@@ -2735,11 +2729,13 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
2735 2735
     return sum;
2736 2736
 }
2737 2737
 
2738
-static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2738
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2739 2739
     MpegEncContext * const s= (MpegEncContext *)c;
2740 2740
     uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2741 2741
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
2742 2742
     int sum=0, i;
2743
+    
2744
+    assert(h==8);
2743 2745
 
2744 2746
     s->dsp.diff_pixels(temp, src1, src2, stride);
2745 2747
     s->dsp.fdct(temp);
... ...
@@ -2752,13 +2748,14 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
2752 2752
 
2753 2753
 void simple_idct(DCTELEM *block); //FIXME
2754 2754
 
2755
-static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2755
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2756 2756
     MpegEncContext * const s= (MpegEncContext *)c;
2757 2757
     uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
2758 2758
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
2759 2759
     DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
2760 2760
     int sum=0, i;
2761 2761
 
2762
+    assert(h==8);
2762 2763
     s->mb_intra=0;
2763 2764
     
2764 2765
     s->dsp.diff_pixels(temp, src1, src2, stride);
... ...
@@ -2775,7 +2772,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
2775 2775
     return sum;
2776 2776
 }
2777 2777
 
2778
-static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2778
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2779 2779
     MpegEncContext * const s= (MpegEncContext *)c;
2780 2780
     const uint8_t *scantable= s->intra_scantable.permutated;
2781 2781
     uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
... ...
@@ -2787,6 +2784,8 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
2787 2787
     uint8_t * length;
2788 2788
     uint8_t * last_length;
2789 2789
     
2790
+    assert(h==8);
2791
+
2790 2792
     for(i=0; i<8; i++){
2791 2793
         ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
2792 2794
         ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
... ...
@@ -2847,12 +2846,12 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
2847 2847
     
2848 2848
     s->dsp.idct_add(bak, stride, temp);
2849 2849
     
2850
-    distoration= s->dsp.sse[1](NULL, bak, src1, stride);
2850
+    distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
2851 2851
 
2852 2852
     return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2853 2853
 }
2854 2854
 
2855
-static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2855
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2856 2856
     MpegEncContext * const s= (MpegEncContext *)c;
2857 2857
     const uint8_t *scantable= s->intra_scantable.permutated;
2858 2858
     uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
... ...
@@ -2861,6 +2860,8 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
2861 2861
     const int esc_length= s->ac_esc_length;
2862 2862
     uint8_t * length;
2863 2863
     uint8_t * last_length;
2864
+
2865
+    assert(h==8);
2864 2866
     
2865 2867
     s->dsp.diff_pixels(temp, src1, src2, stride);
2866 2868
 
... ...
@@ -2910,12 +2911,11 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
2910 2910
     return bits;
2911 2911
 }
2912 2912
 
2913
-
2914
-WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
2915
-WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
2916
-WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
2917
-WARPER88_1616(rd8x8_c, rd16x16_c)
2918
-WARPER88_1616(bit8x8_c, bit16x16_c)
2913
+WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2914
+WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2915
+WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2916
+WARPER8_16_SQ(rd8x8_c, rd16_c)
2917
+WARPER8_16_SQ(bit8x8_c, bit16_c)
2919 2918
 
2920 2919
 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2921 2920
  converted */
... ...
@@ -2989,18 +2989,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2989 2989
     c->clear_blocks = clear_blocks_c;
2990 2990
     c->pix_sum = pix_sum_c;
2991 2991
     c->pix_norm1 = pix_norm1_c;
2992
-    c->sse[0]= sse16_c;
2993
-    c->sse[1]= sse8_c;
2994 2992
 
2995 2993
     /* TODO [0] 16  [1] 8 */
2996
-    c->pix_abs16x16     = pix_abs16x16_c;
2997
-    c->pix_abs16x16_x2  = pix_abs16x16_x2_c;
2998
-    c->pix_abs16x16_y2  = pix_abs16x16_y2_c;
2999
-    c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
3000
-    c->pix_abs8x8     = pix_abs8x8_c;
3001
-    c->pix_abs8x8_x2  = pix_abs8x8_x2_c;
3002
-    c->pix_abs8x8_y2  = pix_abs8x8_y2_c;
3003
-    c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
2994
+    c->pix_abs[0][0] = pix_abs16_c;
2995
+    c->pix_abs[0][1] = pix_abs16_x2_c;
2996
+    c->pix_abs[0][2] = pix_abs16_y2_c;
2997
+    c->pix_abs[0][3] = pix_abs16_xy2_c;
2998
+    c->pix_abs[1][0] = pix_abs8_c;
2999
+    c->pix_abs[1][1] = pix_abs8_x2_c;
3000
+    c->pix_abs[1][2] = pix_abs8_y2_c;
3001
+    c->pix_abs[1][3] = pix_abs8_xy2_c;
3004 3002
 
3005 3003
 #define dspfunc(PFX, IDX, NUM) \
3006 3004
     c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
... ...
@@ -3097,24 +3095,21 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3097 3097
     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3098 3098
     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
3099 3099
         
3100
-    c->hadamard8_diff[0]= hadamard8_diff16_c;
3101
-    c->hadamard8_diff[1]= hadamard8_diff_c;
3102 3100
     c->hadamard8_abs = hadamard8_abs_c;
3103
-    
3104
-    c->dct_sad[0]= dct_sad16x16_c;
3105
-    c->dct_sad[1]= dct_sad8x8_c;
3106
-    
3107
-    c->sad[0]= sad16x16_c;
3108
-    c->sad[1]= sad8x8_c;
3109
-    
3110
-    c->quant_psnr[0]= quant_psnr16x16_c;
3111
-    c->quant_psnr[1]= quant_psnr8x8_c;
3112
-
3113
-    c->rd[0]= rd16x16_c;
3114
-    c->rd[1]= rd8x8_c;
3115 3101
 
3116
-    c->bit[0]= bit16x16_c;
3117
-    c->bit[1]= bit8x8_c;
3102
+#define SET_CMP_FUNC(name) \
3103
+    c->name[0]= name ## 16_c;\
3104
+    c->name[1]= name ## 8x8_c;
3105
+    
3106
+    SET_CMP_FUNC(hadamard8_diff)
3107
+    SET_CMP_FUNC(dct_sad)
3108
+    c->sad[0]= pix_abs16_c;
3109
+    c->sad[1]= pix_abs8_c;
3110
+    c->sse[0]= sse16_c;
3111
+    c->sse[1]= sse8_c;
3112
+    SET_CMP_FUNC(quant_psnr)
3113
+    SET_CMP_FUNC(rd)
3114
+    SET_CMP_FUNC(bit)
3118 3115
         
3119 3116
     c->add_bytes= add_bytes_c;
3120 3117
     c->diff_bytes= diff_bytes_c;
... ...
@@ -110,9 +110,7 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
110 110
 
111 111
 /* motion estimation */
112 112
 
113
-typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
114
-
115
-typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
113
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
116 114
 
117 115
 
118 116
 /**
... ...
@@ -136,19 +134,21 @@ typedef struct DSPContext {
136 136
     void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
137 137
     int (*pix_sum)(uint8_t * pix, int line_size);
138 138
     int (*pix_norm1)(uint8_t * pix, int line_size);
139
-    me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
140
-    me_cmp_func sse[2];
141
-    me_cmp_func hadamard8_diff[2];
142
-    me_cmp_func dct_sad[2];
143
-    me_cmp_func quant_psnr[2];
144
-    me_cmp_func bit[2];
145
-    me_cmp_func rd[2];
139
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
140
+    
141
+    me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */
142
+    me_cmp_func sse[4];
143
+    me_cmp_func hadamard8_diff[4];
144
+    me_cmp_func dct_sad[4];
145
+    me_cmp_func quant_psnr[4];
146
+    me_cmp_func bit[4];
147
+    me_cmp_func rd[4];
146 148
     int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
147 149
 
148
-    me_cmp_func me_pre_cmp[11];
149
-    me_cmp_func me_cmp[11];
150
-    me_cmp_func me_sub_cmp[11];
151
-    me_cmp_func mb_cmp[11];
150
+    me_cmp_func me_pre_cmp[5];
151
+    me_cmp_func me_cmp[5];
152
+    me_cmp_func me_sub_cmp[5];
153
+    me_cmp_func mb_cmp[5];
152 154
 
153 155
     /* maybe create an array for 16/8/4/2 functions */
154 156
     /**
... ...
@@ -226,14 +226,7 @@ typedef struct DSPContext {
226 226
     qpel_mc_func put_h264_qpel_pixels_tab[3][16];
227 227
     qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
228 228
     
229
-    op_pixels_abs_func pix_abs16x16;
230
-    op_pixels_abs_func pix_abs16x16_x2;
231
-    op_pixels_abs_func pix_abs16x16_y2;
232
-    op_pixels_abs_func pix_abs16x16_xy2;
233
-    op_pixels_abs_func pix_abs8x8;
234
-    op_pixels_abs_func pix_abs8x8_x2;
235
-    op_pixels_abs_func pix_abs8x8_y2;
236
-    op_pixels_abs_func pix_abs8x8_xy2;
229
+    me_cmp_func pix_abs[2][4];
237 230
     
238 231
     /* huffyuv specific */
239 232
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
... ...
@@ -484,12 +477,24 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
484 484
                const FFTSample *input, FFTSample *tmp);
485 485
 void ff_mdct_end(MDCTContext *s);
486 486
 
487
-#define WARPER88_1616(name8, name16)\
488
-static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
489
-    return name8(s, dst           , src           , stride)\
490
-          +name8(s, dst+8         , src+8         , stride)\
491
-          +name8(s, dst  +8*stride, src  +8*stride, stride)\
492
-          +name8(s, dst+8+8*stride, src+8+8*stride, stride);\
487
+#define WARPER8_16(name8, name16)\
488
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
489
+    return name8(s, dst           , src           , stride, h)\
490
+          +name8(s, dst+8         , src+8         , stride, h);\
491
+}
492
+
493
+#define WARPER8_16_SQ(name8, name16)\
494
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
495
+    int score=0;\
496
+    score +=name8(s, dst           , src           , stride, 8);\
497
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
498
+    if(h==16){\
499
+        dst += 8*stride;\
500
+        src += 8*stride;\
501
+        score +=name8(s, dst           , src           , stride, 8);\
502
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
503
+    }\
504
+    return score;\
493 505
 }
494 506
 
495 507
 #ifndef HAVE_LRINTF
... ...
@@ -582,8 +582,8 @@ static int is_intra_more_likely(MpegEncContext *s){
582 582
                 uint8_t *mb_ptr     = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
583 583
                 uint8_t *last_mb_ptr= s->last_picture.data   [0] + mb_x*16 + mb_y*16*s->linesize;
584 584
     
585
-		is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr                    , s->linesize);
586
-                is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
585
+		is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr                    , s->linesize, 16);
586
+                is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
587 587
             }else{
588 588
                 if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
589 589
                    is_intra_likely++;
... ...
@@ -479,9 +479,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
479 479
     for(i=1; i<s->mb_num; i++){
480 480
         int mb_xy= s->mb_index2xy[i];
481 481
     
482
-        if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_INTER4V)){
483
-            s->mb_type[mb_xy]&= ~MB_TYPE_INTER4V;
484
-            s->mb_type[mb_xy]|= MB_TYPE_INTER;
482
+        if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
483
+            s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
484
+            s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
485 485
         }
486 486
     }
487 487
 
... ...
@@ -508,9 +508,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
508 508
     
509 509
         for(i=1; i<s->mb_num; i++){
510 510
             int mb_xy= s->mb_index2xy[i];
511
-            if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_DIRECT)){
512
-                s->mb_type[mb_xy]&= ~MB_TYPE_DIRECT;
513
-                s->mb_type[mb_xy]|= MB_TYPE_BIDIR;
511
+            if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
512
+                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
513
+                s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
514 514
             }
515 515
         }
516 516
     }
... ...
@@ -523,7 +523,7 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
523 523
  */
524 524
 int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
525 525
     const int mb_index= s->mb_x + s->mb_y*s->mb_stride;
526
-    const int colocated_mb_type= s->next_picture.mb_type[mb_index]; //FIXME or next?
526
+    const int colocated_mb_type= s->next_picture.mb_type[mb_index];
527 527
     int xy= s->block_index[0];
528 528
     uint16_t time_pp= s->pp_time;
529 529
     uint16_t time_pb= s->pb_time;
... ...
@@ -547,18 +547,18 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
547 547
         s->mv_type = MV_TYPE_FIELD;
548 548
         for(i=0; i<2; i++){
549 549
             if(s->top_field_first){
550
-                time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
551
-                time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
550
+                time_pp= s->pp_field_time - s->p_field_select_table[i][mb_index] + i;
551
+                time_pb= s->pb_field_time - s->p_field_select_table[i][mb_index] + i;
552 552
             }else{
553
-                time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
554
-                time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
553
+                time_pp= s->pp_field_time + s->p_field_select_table[i][mb_index] - i;
554
+                time_pb= s->pb_field_time + s->p_field_select_table[i][mb_index] - i;
555 555
             }
556
-            s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
557
-            s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
558
-            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
559
-                                : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
560
-            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] 
561
-                                : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
556
+            s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx;
557
+            s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my;
558
+            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0]
559
+                                : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp;
560
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] 
561
+                                : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp;
562 562
         }
563 563
         return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED;
564 564
     }else{
... ...
@@ -598,9 +598,9 @@ void ff_h263_update_motion_val(MpegEncContext * s){
598 598
             motion_y = s->mv[0][0][1] + s->mv[0][1][1];
599 599
             motion_x = (motion_x>>1) | (motion_x&1);
600 600
             for(i=0; i<2; i++){
601
-                s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
602
-                s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
603
-                s->field_select_table[mb_xy][i]= s->field_select[0][i];
601
+                s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0];
602
+                s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1];
603
+                s->p_field_select_table[i][mb_xy]= s->field_select[0][i];
604 604
             }
605 605
         }
606 606
         
... ...
@@ -744,12 +744,14 @@ void mpeg4_encode_mb(MpegEncContext * s,
744 744
         if(s->pict_type==B_TYPE){
745 745
             static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
746 746
             int mb_type=  mb_type_table[s->mv_dir];
747
-            
747
+
748 748
             if(s->mb_x==0){
749
-                s->last_mv[0][0][0]= 
750
-                s->last_mv[0][0][1]= 
751
-                s->last_mv[1][0][0]= 
752
-                s->last_mv[1][0][1]= 0;
749
+                for(i=0; i<2; i++){
750
+                    s->last_mv[i][0][0]= 
751
+                    s->last_mv[i][0][1]= 
752
+                    s->last_mv[i][1][0]= 
753
+                    s->last_mv[i][1][1]= 0;
754
+                }
753 755
             }
754 756
             
755 757
             assert(s->dquant>=-2 && s->dquant<=2);
... ...
@@ -803,50 +805,64 @@ void mpeg4_encode_mb(MpegEncContext * s,
803 803
                 if(cbp)
804 804
                     put_bits(&s->pb, 1, s->interlaced_dct);
805 805
                 if(mb_type) // not diect mode
806
-                    put_bits(&s->pb, 1, 0); // no interlaced ME yet
806
+                    put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD);
807 807
             }
808 808
 
809 809
             if(interleaved_stats){
810 810
                 s->misc_bits+= get_bits_diff(s);
811 811
             }
812 812
 
813
-            switch(mb_type)
814
-            {
815
-            case 0: /* direct */
813
+            if(mb_type == 0){
814
+                assert(s->mv_dir & MV_DIRECT);
816 815
                 h263_encode_motion(s, motion_x, 1);
817 816
                 h263_encode_motion(s, motion_y, 1);                
818 817
                 s->b_count++;
819 818
                 s->f_count++;
820
-                break;
821
-            case 1: /* bidir */
822
-                h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
823
-                h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
824
-                h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
825
-                h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
826
-                s->last_mv[0][0][0]= s->mv[0][0][0];
827
-                s->last_mv[0][0][1]= s->mv[0][0][1];
828
-                s->last_mv[1][0][0]= s->mv[1][0][0];
829
-                s->last_mv[1][0][1]= s->mv[1][0][1];
830
-                s->b_count++;
831
-                s->f_count++;
832
-                break;
833
-            case 2: /* backward */
834
-                h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
835
-                h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
836
-                s->last_mv[1][0][0]= motion_x;
837
-                s->last_mv[1][0][1]= motion_y;
838
-                s->b_count++;
839
-                break;
840
-            case 3: /* forward */
841
-                h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
842
-                h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
843
-                s->last_mv[0][0][0]= motion_x;
844
-                s->last_mv[0][0][1]= motion_y;
845
-                s->f_count++;
846
-                break;
847
-            default:
848
-                av_log(s->avctx, AV_LOG_ERROR, "unknown mb type\n");
849
-                return;
819
+            }else{
820
+                assert(mb_type > 0 && mb_type < 4);
821
+                if(s->mv_type != MV_TYPE_FIELD){
822
+                    if(s->mv_dir & MV_DIR_FORWARD){
823
+                        h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
824
+                        h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
825
+                        s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0];
826
+                        s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1];
827
+                        s->f_count++;
828
+                    }
829
+                    if(s->mv_dir & MV_DIR_BACKWARD){
830
+                        h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
831
+                        h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
832
+                        s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0];
833
+                        s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1];
834
+                        s->b_count++;
835
+                    }
836
+                }else{
837
+                    if(s->mv_dir & MV_DIR_FORWARD){
838
+                        put_bits(&s->pb, 1, s->field_select[0][0]);
839
+                        put_bits(&s->pb, 1, s->field_select[0][1]);
840
+                    }
841
+                    if(s->mv_dir & MV_DIR_BACKWARD){
842
+                        put_bits(&s->pb, 1, s->field_select[1][0]);
843
+                        put_bits(&s->pb, 1, s->field_select[1][1]);
844
+                    }
845
+                    if(s->mv_dir & MV_DIR_FORWARD){
846
+                        for(i=0; i<2; i++){
847
+                            h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0]  , s->f_code);
848
+                            h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code);
849
+                            s->last_mv[0][i][0]= s->mv[0][i][0];
850
+                            s->last_mv[0][i][1]= s->mv[0][i][1]*2;
851
+                        }
852
+                        s->f_count++;
853
+                    }
854
+                    if(s->mv_dir & MV_DIR_BACKWARD){
855
+                        for(i=0; i<2; i++){
856
+                            h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0]  , s->b_code);
857
+                            h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code);
858
+                            s->last_mv[1][i][0]= s->mv[1][i][0];
859
+                            s->last_mv[1][i][1]= s->mv[1][i][1]*2;
860
+                        }
861
+                        s->b_count++;
862
+                    }
863
+                }
850 864
             }
851 865
 
852 866
             if(interleaved_stats){
... ...
@@ -861,6 +877,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
861 861
             if(interleaved_stats){
862 862
                 s->p_tex_bits+= get_bits_diff(s);
863 863
             }
864
+
864 865
         }else{ /* s->pict_type==B_TYPE */
865 866
             cbp= get_p_cbp(s, block, motion_x, motion_y);
866 867
         
... ...
@@ -889,7 +906,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
889 889
                         if(pic==NULL || pic->pict_type!=B_TYPE) break;
890 890
 
891 891
                         b_pic= pic->data[0] + offset + 16; //FIXME +16
892
-			diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
892
+			diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
893 893
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
894 894
                             s->mb_skiped=0;
895 895
                             break;
... ...
@@ -929,7 +946,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
929 929
                 if(!s->progressive_sequence){
930 930
                     if(cbp)
931 931
                         put_bits(pb2, 1, s->interlaced_dct);
932
-                    put_bits(pb2, 1, 0); // no interlaced ME yet
932
+                    put_bits(pb2, 1, 0);
933 933
                 }
934 934
                     
935 935
                 if(interleaved_stats){
... ...
@@ -941,7 +958,38 @@ void mpeg4_encode_mb(MpegEncContext * s,
941 941
             
942 942
                 h263_encode_motion(s, motion_x - pred_x, s->f_code);
943 943
                 h263_encode_motion(s, motion_y - pred_y, s->f_code);
944
+            }else if(s->mv_type==MV_TYPE_FIELD){
945
+                if(s->dquant) cbpc+= 8;
946
+                put_bits(&s->pb,
947
+                        inter_MCBPC_bits[cbpc],
948
+                        inter_MCBPC_code[cbpc]);
949
+
950
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
951
+                if(s->dquant)
952
+                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
953
+
954
+                assert(!s->progressive_sequence);
955
+                if(cbp)
956
+                    put_bits(pb2, 1, s->interlaced_dct);
957
+                put_bits(pb2, 1, 1);
958
+                    
959
+                if(interleaved_stats){
960
+                    s->misc_bits+= get_bits_diff(s);
961
+                }
962
+
963
+                /* motion vectors: 16x8 interlaced mode */
964
+                h263_pred_motion(s, 0, &pred_x, &pred_y);
965
+                pred_y /=2;
966
+                
967
+                put_bits(&s->pb, 1, s->field_select[0][0]);
968
+                put_bits(&s->pb, 1, s->field_select[0][1]);
969
+            
970
+                h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code);
971
+                h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code);
972
+                h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code);
973
+                h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code);
944 974
             }else{
975
+                assert(s->mv_type==MV_TYPE_8X8);
945 976
                 put_bits(&s->pb,
946 977
                         inter_MCBPC_bits[cbpc+16],
947 978
                         inter_MCBPC_code[cbpc+16]);
... ...
@@ -61,8 +61,8 @@ static const int h263_mb_type_b_map[15]= {
61 61
                       MB_TYPE_L0L1 | MB_TYPE_CBP                 | MB_TYPE_16x16,
62 62
                       MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16,
63 63
     0, //stuffing
64
-    MB_TYPE_INTRA                  | MB_TYPE_CBP,
65
-    MB_TYPE_INTRA                  | MB_TYPE_CBP | MB_TYPE_QUANT,
64
+    MB_TYPE_INTRA4x4                | MB_TYPE_CBP,
65
+    MB_TYPE_INTRA4x4                | MB_TYPE_CBP | MB_TYPE_QUANT,
66 66
 };
67 67
 
68 68
 const uint8_t cbpc_b_tab[4][2] = {
... ...
@@ -687,10 +687,10 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
687 687
     return tmp;
688 688
 }
689 689
 
690
-static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) {
690
+static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
691 691
     int tmp;
692 692
   asm volatile (
693
-      "movl $16,%%ecx\n"
693
+      "movl %4,%%ecx\n"
694 694
       "pxor %%mm0,%%mm0\n"	/* mm0 = 0 */
695 695
       "pxor %%mm7,%%mm7\n"	/* mm7 holds the sum */
696 696
       "1:\n"
... ...
@@ -741,7 +741,9 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) {
741 741
       "psrlq $32, %%mm7\n"	/* shift hi dword to lo */
742 742
       "paddd %%mm7,%%mm1\n"
743 743
       "movd %%mm1,%2\n"
744
-      : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "%ecx");
744
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
745
+      : "r" (line_size) , "m" (h)
746
+      : "%ecx");
745 747
     return tmp;
746 748
 }
747 749
 
... ...
@@ -866,9 +868,11 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
866 866
         "movq "#c", "#o"+32(%1)		\n\t"\
867 867
         "movq "#d", "#o"+48(%1)		\n\t"\
868 868
 
869
-static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
869
+static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
870 870
     uint64_t temp[16] __align8;
871 871
     int sum=0;
872
+    
873
+    assert(h==8);
872 874
 
873 875
     diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
874 876
 
... ...
@@ -951,9 +955,11 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
951 951
     return sum&0xFFFF;
952 952
 }
953 953
 
954
-static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride){
954
+static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
955 955
     uint64_t temp[16] __align8;
956 956
     int sum=0;
957
+    
958
+    assert(h==8);
957 959
 
958 960
     diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
959 961
 
... ...
@@ -1037,8 +1043,8 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
1037 1037
 }
1038 1038
 
1039 1039
 
1040
-WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
1041
-WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
1040
+WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
1041
+WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
1042 1042
 #endif //CONFIG_ENCODERS
1043 1043
 
1044 1044
 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
... ...
@@ -28,9 +28,9 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
28 28
 
29 29
 static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL;
30 30
 
31
-static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
31
+static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
32 32
 {
33
-    int len= -(stride<<h);
33
+    int len= -(stride*h);
34 34
     asm volatile(
35 35
         ".balign 16			\n\t"
36 36
         "1:				\n\t"
... ...
@@ -64,9 +64,9 @@ static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
64 64
     );
65 65
 }
66 66
 
67
-static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
67
+static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
68 68
 {
69
-    int len= -(stride<<h);
69
+    int len= -(stride*h);
70 70
     asm volatile(
71 71
         ".balign 16			\n\t"
72 72
         "1:				\n\t"
... ...
@@ -88,7 +88,7 @@ static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
88 88
 
89 89
 static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
90 90
 {
91
-    int len= -(stride<<h);
91
+    int len= -(stride*h);
92 92
     asm volatile(
93 93
         ".balign 16			\n\t"
94 94
         "1:				\n\t"
... ...
@@ -114,7 +114,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
114 114
 
115 115
 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
116 116
 { //FIXME reuse src
117
-    int len= -(stride<<h);
117
+    int len= -(stride*h);
118 118
     asm volatile(
119 119
         ".balign 16			\n\t"
120 120
         "movq "MANGLE(bone)", %%mm5	\n\t"
... ...
@@ -151,7 +151,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
151 151
 
152 152
 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
153 153
 {
154
-    int len= -(stride<<h);
154
+    int len= -(stride*h);
155 155
     asm volatile(
156 156
         ".balign 16			\n\t"
157 157
         "1:				\n\t"
... ...
@@ -189,7 +189,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
189 189
 
190 190
 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
191 191
 {
192
-    int len= -(stride<<h);
192
+    int len= -(stride*h);
193 193
     asm volatile(
194 194
         ".balign 16			\n\t"
195 195
         "1:				\n\t"
... ...
@@ -265,85 +265,69 @@ static inline int sum_mmx2(void)
265 265
 
266 266
 
267 267
 #define PIX_SAD(suf)\
268
-static int pix_abs8x8_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
268
+static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
269 269
 {\
270
+    assert(h==8);\
270 271
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
271 272
                  "pxor %%mm6, %%mm6		\n\t":);\
272 273
 \
273
-    sad8_ ## suf(blk1, blk2, stride, 3);\
274
+    sad8_1_ ## suf(blk1, blk2, stride, 8);\
274 275
 \
275 276
     return sum_ ## suf();\
276 277
 }\
277
-static int sad8x8_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
278
-{\
279
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
280
-                 "pxor %%mm6, %%mm6		\n\t":);\
281
-\
282
-    sad8_ ## suf(blk1, blk2, stride, 3);\
283
-\
284
-    return sum_ ## suf();\
285
-}\
286
-\
287
-static int pix_abs8x8_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
278
+static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
288 279
 {\
280
+    assert(h==8);\
289 281
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
290 282
                  "pxor %%mm6, %%mm6		\n\t"\
291 283
                  "movq %0, %%mm5		\n\t"\
292 284
                  :: "m"(round_tab[1]) \
293 285
                  );\
294 286
 \
295
-    sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\
287
+    sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\
296 288
 \
297 289
     return sum_ ## suf();\
298 290
 }\
299 291
 \
300
-static int pix_abs8x8_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
292
+static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
301 293
 {\
294
+    assert(h==8);\
302 295
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
303 296
                  "pxor %%mm6, %%mm6		\n\t"\
304 297
                  "movq %0, %%mm5		\n\t"\
305 298
                  :: "m"(round_tab[1]) \
306 299
                  );\
307 300
 \
308
-    sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\
301
+    sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\
309 302
 \
310 303
     return sum_ ## suf();\
311 304
 }\
312 305
 \
313
-static int pix_abs8x8_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
306
+static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
314 307
 {\
308
+    assert(h==8);\
315 309
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
316 310
                  "pxor %%mm6, %%mm6		\n\t"\
317 311
                  "movq %0, %%mm5		\n\t"\
318 312
                  :: "m"(round_tab[2]) \
319 313
                  );\
320 314
 \
321
-    sad8_4_ ## suf(blk1, blk2, stride, 3);\
315
+    sad8_4_ ## suf(blk1, blk2, stride, 8);\
322 316
 \
323 317
     return sum_ ## suf();\
324 318
 }\
325 319
 \
326
-static int pix_abs16x16_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
327
-{\
328
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
329
-                 "pxor %%mm6, %%mm6		\n\t":);\
330
-\
331
-    sad8_ ## suf(blk1  , blk2  , stride, 4);\
332
-    sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
333
-\
334
-    return sum_ ## suf();\
335
-}\
336
-static int sad16x16_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
320
+static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
337 321
 {\
338 322
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
339 323
                  "pxor %%mm6, %%mm6		\n\t":);\
340 324
 \
341
-    sad8_ ## suf(blk1  , blk2  , stride, 4);\
342
-    sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
325
+    sad8_1_ ## suf(blk1  , blk2  , stride, h);\
326
+    sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
343 327
 \
344 328
     return sum_ ## suf();\
345 329
 }\
346
-static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
330
+static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
347 331
 {\
348 332
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
349 333
                  "pxor %%mm6, %%mm6		\n\t"\
... ...
@@ -351,12 +335,12 @@ static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
351 351
                  :: "m"(round_tab[1]) \
352 352
                  );\
353 353
 \
354
-    sad8_2_ ## suf(blk1  , blk1+1, blk2  , stride, 4);\
355
-    sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\
354
+    sad8_2_ ## suf(blk1  , blk1+1, blk2  , stride, h);\
355
+    sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\
356 356
 \
357 357
     return sum_ ## suf();\
358 358
 }\
359
-static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
359
+static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
360 360
 {\
361 361
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
362 362
                  "pxor %%mm6, %%mm6		\n\t"\
... ...
@@ -364,12 +348,12 @@ static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
364 364
                  :: "m"(round_tab[1]) \
365 365
                  );\
366 366
 \
367
-    sad8_2_ ## suf(blk1  , blk1+stride,  blk2  , stride, 4);\
368
-    sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\
367
+    sad8_2_ ## suf(blk1  , blk1+stride,  blk2  , stride, h);\
368
+    sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\
369 369
 \
370 370
     return sum_ ## suf();\
371 371
 }\
372
-static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
372
+static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
373 373
 {\
374 374
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
375 375
                  "pxor %%mm6, %%mm6		\n\t"\
... ...
@@ -377,8 +361,8 @@ static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
377 377
                  :: "m"(round_tab[2]) \
378 378
                  );\
379 379
 \
380
-    sad8_4_ ## suf(blk1  , blk2  , stride, 4);\
381
-    sad8_4_ ## suf(blk1+8, blk2+8, stride, 4);\
380
+    sad8_4_ ## suf(blk1  , blk2  , stride, h);\
381
+    sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
382 382
 \
383 383
     return sum_ ## suf();\
384 384
 }\
... ...
@@ -389,32 +373,32 @@ PIX_SAD(mmx2)
389 389
 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
390 390
 {
391 391
     if (mm_flags & MM_MMX) {
392
-        c->pix_abs16x16     = pix_abs16x16_mmx;
393
-        c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
394
-        c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
395
-        c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
396
-        c->pix_abs8x8     = pix_abs8x8_mmx;
397
-        c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx;
398
-        c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx;
399
-        c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
392
+        c->pix_abs[0][0] = sad16_mmx;
393
+        c->pix_abs[0][1] = sad16_x2_mmx;
394
+        c->pix_abs[0][2] = sad16_y2_mmx;
395
+        c->pix_abs[0][3] = sad16_xy2_mmx;
396
+        c->pix_abs[1][0] = sad8_mmx;
397
+        c->pix_abs[1][1] = sad8_x2_mmx;
398
+        c->pix_abs[1][2] = sad8_y2_mmx;
399
+        c->pix_abs[1][3] = sad8_xy2_mmx;
400 400
 
401
-	c->sad[0]= sad16x16_mmx;
402
-        c->sad[1]= sad8x8_mmx;
401
+	c->sad[0]= sad16_mmx;
402
+        c->sad[1]= sad8_mmx;
403 403
     }
404 404
     if (mm_flags & MM_MMXEXT) {
405
-	c->pix_abs16x16     = pix_abs16x16_mmx2;
406
-	c->pix_abs8x8     = pix_abs8x8_mmx2;
405
+	c->pix_abs[0][0] = sad16_mmx2;
406
+	c->pix_abs[1][0] = sad8_mmx2;
407 407
 
408
-	c->sad[0]= sad16x16_mmx2;
409
-	c->sad[1]= sad8x8_mmx2;
408
+	c->sad[0]= sad16_mmx2;
409
+	c->sad[1]= sad8_mmx2;
410 410
         
411 411
         if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
412
-            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
413
-            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx2;
414
-            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
415
-            c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx2;
416
-            c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
417
-            c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
412
+            c->pix_abs[0][1] = sad16_x2_mmx2;
413
+            c->pix_abs[0][2] = sad16_y2_mmx2;
414
+            c->pix_abs[0][3] = sad16_xy2_mmx2;
415
+            c->pix_abs[1][1] = sad8_x2_mmx2;
416
+            c->pix_abs[1][2] = sad8_y2_mmx2;
417
+            c->pix_abs[1][3] = sad8_xy2_mmx2;
418 418
         }
419 419
     }
420 420
 }
... ...
@@ -46,9 +46,9 @@
46 46
 
47 47
 static inline int sad_hpel_motion_search(MpegEncContext * s,
48 48
 				  int *mx_ptr, int *my_ptr, int dmin,
49
-				  int xmin, int ymin, int xmax, int ymax,
50
-                                  int pred_x, int pred_y, Picture *picture,
51
-                                  int n, int size, uint8_t * const mv_penalty);
49
+                                  int pred_x, int pred_y, uint8_t *src_data[3],
50
+                                  uint8_t *ref_data[6], int stride, int uvstride,
51
+                                  int size, int h, uint8_t * const mv_penalty);
52 52
 
53 53
 static inline int update_map_generation(MpegEncContext * s)
54 54
 {
... ...
@@ -78,20 +78,21 @@ static int minima_cmp(const void *a, const void *b){
78 78
 #define RENAME(a) simple_ ## a
79 79
 
80 80
 #define CMP(d, x, y, size)\
81
-d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
81
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);
82 82
 
83 83
 #define CMP_HPEL(d, dx, dy, x, y, size)\
84 84
 {\
85 85
     const int dxy= (dx) + 2*(dy);\
86
-    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
87
-    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
86
+    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
87
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
88 88
 }
89 89
 
90
+
90 91
 #define CMP_QPEL(d, dx, dy, x, y, size)\
91 92
 {\
92 93
     const int dxy= (dx) + 4*(dy);\
93 94
     qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
94
-    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
95
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
95 96
 }
96 97
 
97 98
 #include "motion_est_template.c"
... ...
@@ -105,29 +106,29 @@ d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
105 105
 #define RENAME(a) simple_chroma_ ## a
106 106
 
107 107
 #define CMP(d, x, y, size)\
108
-d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
108
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\
109 109
 if(chroma_cmp){\
110 110
     int dxy= ((x)&1) + 2*((y)&1);\
111 111
     int c= ((x)>>1) + ((y)>>1)*uvstride;\
112 112
 \
113
-    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
114
-    d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
115
-    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
116
-    d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
113
+    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
114
+    d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\
115
+    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
116
+    d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\
117 117
 }
118 118
 
119 119
 #define CMP_HPEL(d, dx, dy, x, y, size)\
120 120
 {\
121 121
     const int dxy= (dx) + 2*(dy);\
122
-    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
123
-    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
122
+    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
123
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
124 124
     if(chroma_cmp_sub){\
125 125
         int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
126 126
         int c= ((x)>>1) + ((y)>>1)*uvstride;\
127
-        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
128
-        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
129
-        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
130
-        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
127
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
128
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
129
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
130
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
131 131
     }\
132 132
 }
133 133
 
... ...
@@ -135,7 +136,7 @@ if(chroma_cmp){\
135 135
 {\
136 136
     const int dxy= (dx) + 4*(dy);\
137 137
     qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
138
-    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
138
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
139 139
     if(chroma_cmp_sub){\
140 140
         int cxy, c;\
141 141
         int cx= (4*(x) + (dx))/2;\
... ...
@@ -144,10 +145,10 @@ if(chroma_cmp){\
144 144
         cy= (cy>>1)|(cy&1);\
145 145
         cxy= (cx&1) + 2*(cy&1);\
146 146
         c= ((cx)>>1) + ((cy)>>1)*uvstride;\
147
-        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
148
-        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
149
-        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
150
-        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
147
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
148
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
149
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
150
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
151 151
     }\
152 152
 }
153 153
 
... ...
@@ -178,7 +179,7 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma
178 178
 \
179 179
             uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
180 180
             hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
181
-            hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
181
+            hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\
182 182
         }\
183 183
     }else{\
184 184
         int fx = s->me.direct_basis_mv[0][0] + hx;\
... ...
@@ -198,9 +199,9 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma
198 198
         assert((by>>1) + 16*s->mb_y <= s->height);\
199 199
 \
200 200
         hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
201
-        hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
201
+        hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\
202 202
     }\
203
-    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
203
+    d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
204 204
 }else\
205 205
     d= 256*256*256*32;
206 206
 
... ...
@@ -238,7 +239,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
238 238
 \
239 239
             uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
240 240
             qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
241
-            qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
241
+            qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\
242 242
         }\
243 243
     }else{\
244 244
         int fx = s->me.direct_basis_mv[0][0] + qx;\
... ...
@@ -252,12 +253,12 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
252 252
         qpel_put[1][fxy](s->me.scratchpad + 8           , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8           , stride);\
253 253
         qpel_put[1][fxy](s->me.scratchpad     + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride)     + 8*stride, stride);\
254 254
         qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\
255
-        qpel_avg[1][bxy](s->me.scratchpad               , (ref2_y) + (bx>>2) + (by>>2)*(stride)               , stride);\
256
-        qpel_avg[1][bxy](s->me.scratchpad + 8           , (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8           , stride);\
257
-        qpel_avg[1][bxy](s->me.scratchpad     + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride)     + 8*stride, stride);\
258
-        qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
255
+        qpel_avg[1][bxy](s->me.scratchpad               , (ref_data[3]) + (bx>>2) + (by>>2)*(stride)               , stride);\
256
+        qpel_avg[1][bxy](s->me.scratchpad + 8           , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8           , stride);\
257
+        qpel_avg[1][bxy](s->me.scratchpad     + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride)     + 8*stride, stride);\
258
+        qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
259 259
     }\
260
-    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
260
+    d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
261 261
 }else\
262 262
     d= 256*256*256*32;
263 263
 
... ...
@@ -277,7 +278,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
277 277
 #undef CMP__DIRECT
278 278
 
279 279
 
280
-static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
280
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
281 281
     return 0;
282 282
 }
283 283
 
... ...
@@ -285,44 +286,37 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
285 285
     DSPContext* c= &s->dsp;
286 286
     int i;
287 287
     
288
-    memset(cmp, 0, sizeof(void*)*11);
289
-
290
-    switch(type&0xFF){
291
-    case FF_CMP_SAD:
292
-        cmp[0]= c->sad[0];
293
-        cmp[1]= c->sad[1];
294
-        break;
295
-    case FF_CMP_SATD:
296
-        cmp[0]= c->hadamard8_diff[0];
297
-        cmp[1]= c->hadamard8_diff[1];
298
-        break;
299
-    case FF_CMP_SSE:
300
-        cmp[0]= c->sse[0];
301
-        cmp[1]= c->sse[1];
302
-        break;
303
-    case FF_CMP_DCT:
304
-        cmp[0]= c->dct_sad[0];
305
-        cmp[1]= c->dct_sad[1];
306
-        break;
307
-    case FF_CMP_PSNR:
308
-        cmp[0]= c->quant_psnr[0];
309
-        cmp[1]= c->quant_psnr[1];
310
-        break;
311
-    case FF_CMP_BIT:
312
-        cmp[0]= c->bit[0];
313
-        cmp[1]= c->bit[1];
314
-        break;
315
-    case FF_CMP_RD:
316
-        cmp[0]= c->rd[0];
317
-        cmp[1]= c->rd[1];
318
-        break;
319
-    case FF_CMP_ZERO:
320
-        for(i=0; i<7; i++){
288
+    memset(cmp, 0, sizeof(void*)*5);
289
+        
290
+    for(i=0; i<4; i++){
291
+        switch(type&0xFF){
292
+        case FF_CMP_SAD:
293
+            cmp[i]= c->sad[i];
294
+            break;
295
+        case FF_CMP_SATD:
296
+            cmp[i]= c->hadamard8_diff[i];
297
+            break;
298
+        case FF_CMP_SSE:
299
+            cmp[i]= c->sse[i];
300
+            break;
301
+        case FF_CMP_DCT:
302
+            cmp[i]= c->dct_sad[i];
303
+            break;
304
+        case FF_CMP_PSNR:
305
+            cmp[i]= c->quant_psnr[i];
306
+            break;
307
+        case FF_CMP_BIT:
308
+            cmp[i]= c->bit[i];
309
+            break;
310
+        case FF_CMP_RD:
311
+            cmp[i]= c->rd[i];
312
+            break;
313
+        case FF_CMP_ZERO:
321 314
             cmp[i]= zero_cmp;
315
+            break;
316
+        default:
317
+            av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
322 318
         }
323
-        break;
324
-    default:
325
-        av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
326 319
     }
327 320
 }
328 321
 
... ...
@@ -362,7 +356,7 @@ void ff_init_me(MpegEncContext *s){
362 362
         else if(   s->avctx->me_sub_cmp == FF_CMP_SAD 
363 363
                 && s->avctx->    me_cmp == FF_CMP_SAD 
364 364
                 && s->avctx->    mb_cmp == FF_CMP_SAD)
365
-            s->me.sub_motion_search= sad_hpel_motion_search;
365
+            s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
366 366
         else
367 367
             s->me.sub_motion_search= simple_hpel_motion_search;
368 368
     }
... ...
@@ -370,9 +364,11 @@ void ff_init_me(MpegEncContext *s){
370 370
     if(s->avctx->me_cmp&FF_CMP_CHROMA){
371 371
         s->me.motion_search[0]= simple_chroma_epzs_motion_search;
372 372
         s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
373
+        s->me.motion_search[4]= simple_chroma_epzs_motion_search2;
373 374
     }else{
374 375
         s->me.motion_search[0]= simple_epzs_motion_search;
375 376
         s->me.motion_search[1]= simple_epzs_motion_search4;
377
+        s->me.motion_search[4]= simple_epzs_motion_search2;
376 378
     }
377 379
     
378 380
     if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){
... ...
@@ -453,8 +449,8 @@ static int full_motion_search(MpegEncContext * s,
453 453
     my = 0;
454 454
     for (y = y1; y <= y2; y++) {
455 455
 	for (x = x1; x <= x2; x++) {
456
-	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
457
-			     s->linesize);
456
+	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
457
+			     s->linesize, 16);
458 458
 	    if (d < dmin ||
459 459
 		(d == dmin &&
460 460
 		 (abs(x - xx) + abs(y - yy)) <
... ...
@@ -518,7 +514,7 @@ static int log_motion_search(MpegEncContext * s,
518 518
     do {
519 519
 	for (y = y1; y <= y2; y += range) {
520 520
 	    for (x = x1; x <= x2; x += range) {
521
-		d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
521
+		d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
522 522
 		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
523 523
 		    dmin = d;
524 524
 		    mx = x;
... ...
@@ -598,7 +594,7 @@ static int phods_motion_search(MpegEncContext * s,
598 598
 
599 599
 	lastx = x;
600 600
 	for (x = x1; x <= x2; x += range) {
601
-	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
601
+	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
602 602
 	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
603 603
 		dminx = d;
604 604
 		mx = x;
... ...
@@ -607,7 +603,7 @@ static int phods_motion_search(MpegEncContext * s,
607 607
 
608 608
 	x = lastx;
609 609
 	for (y = y1; y <= y2; y += range) {
610
-	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
610
+	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
611 611
 	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
612 612
 		dminy = d;
613 613
 		my = y;
... ...
@@ -651,35 +647,25 @@ static int phods_motion_search(MpegEncContext * s,
651 651
 
652 652
 #define CHECK_SAD_HALF_MV(suffix, x, y) \
653 653
 {\
654
-    d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
654
+    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
655 655
     d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
656 656
     COPY3_IF_LT(dminh, d, dx, x, dy, y)\
657 657
 }
658 658
 
659 659
 static inline int sad_hpel_motion_search(MpegEncContext * s,
660 660
 				  int *mx_ptr, int *my_ptr, int dmin,
661
-				  int xmin, int ymin, int xmax, int ymax,
662
-                                  int pred_x, int pred_y, Picture *picture,
663
-                                  int n, int size, uint8_t * const mv_penalty)
661
+                                  int pred_x, int pred_y, uint8_t *src_data[3],
662
+                                  uint8_t *ref_data[6], int stride, int uvstride,                                  
663
+                                  int size, int h, uint8_t * const mv_penalty)
664 664
 {
665
-    uint8_t *ref_picture= picture->data[0];
666 665
     uint32_t *score_map= s->me.score_map;
667 666
     const int penalty_factor= s->me.sub_penalty_factor;
668
-    int mx, my, xx, yy, dminh;
667
+    int mx, my, dminh;
669 668
     uint8_t *pix, *ptr;
670
-    op_pixels_abs_func pix_abs_x2;
671
-    op_pixels_abs_func pix_abs_y2;
672
-    op_pixels_abs_func pix_abs_xy2;
673
-    
674
-    if(size==0){
675
-        pix_abs_x2 = s->dsp.pix_abs16x16_x2;
676
-        pix_abs_y2 = s->dsp.pix_abs16x16_y2;
677
-        pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
678
-    }else{
679
-        pix_abs_x2 = s->dsp.pix_abs8x8_x2;
680
-        pix_abs_y2 = s->dsp.pix_abs8x8_y2;
681
-        pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
682
-    }
669
+    const int xmin= s->me.xmin;
670
+    const int ymin= s->me.ymin;
671
+    const int xmax= s->me.xmax;
672
+    const int ymax= s->me.ymax;
683 673
 
684 674
     if(s->me.skip){
685 675
 //    printf("S");
... ...
@@ -689,13 +675,11 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
689 689
     }
690 690
 //    printf("N");
691 691
         
692
-    xx = 16 * s->mb_x + 8*(n&1);
693
-    yy = 16 * s->mb_y + 8*(n>>1);
694
-    pix =  s->new_picture.data[0] + (yy * s->linesize) + xx;
692
+    pix = src_data[0];
695 693
 
696 694
     mx = *mx_ptr;
697 695
     my = *my_ptr;
698
-    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
696
+    ptr = ref_data[0] + (my * stride) + mx;
699 697
     
700 698
     dminh = dmin;
701 699
 
... ...
@@ -715,16 +699,16 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
715 715
         pen_x= pred_x + mx;
716 716
         pen_y= pred_y + my;
717 717
 
718
-        ptr-= s->linesize;
718
+        ptr-= stride;
719 719
         if(t<=b){
720 720
             CHECK_SAD_HALF_MV(y2 , 0, -1)
721 721
             if(l<=r){
722 722
                 CHECK_SAD_HALF_MV(xy2, -1, -1)
723 723
                 if(t+r<=b+l){
724 724
                     CHECK_SAD_HALF_MV(xy2, +1, -1)
725
-                    ptr+= s->linesize;
725
+                    ptr+= stride;
726 726
                 }else{
727
-                    ptr+= s->linesize;
727
+                    ptr+= stride;
728 728
                     CHECK_SAD_HALF_MV(xy2, -1, +1)
729 729
                 }
730 730
                 CHECK_SAD_HALF_MV(x2 , -1,  0)
... ...
@@ -732,9 +716,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
732 732
                 CHECK_SAD_HALF_MV(xy2, +1, -1)
733 733
                 if(t+l<=b+r){
734 734
                     CHECK_SAD_HALF_MV(xy2, -1, -1)
735
-                    ptr+= s->linesize;
735
+                    ptr+= stride;
736 736
                 }else{
737
-                    ptr+= s->linesize;
737
+                    ptr+= stride;
738 738
                     CHECK_SAD_HALF_MV(xy2, +1, +1)
739 739
                 }
740 740
                 CHECK_SAD_HALF_MV(x2 , +1,  0)
... ...
@@ -743,9 +727,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
743 743
             if(l<=r){
744 744
                 if(t+l<=b+r){
745 745
                     CHECK_SAD_HALF_MV(xy2, -1, -1)
746
-                    ptr+= s->linesize;
746
+                    ptr+= stride;
747 747
                 }else{
748
-                    ptr+= s->linesize;
748
+                    ptr+= stride;
749 749
                     CHECK_SAD_HALF_MV(xy2, +1, +1)
750 750
                 }
751 751
                 CHECK_SAD_HALF_MV(x2 , -1,  0)
... ...
@@ -753,9 +737,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
753 753
             }else{
754 754
                 if(t+r<=b+l){
755 755
                     CHECK_SAD_HALF_MV(xy2, +1, -1)
756
-                    ptr+= s->linesize;
756
+                    ptr+= stride;
757 757
                 }else{
758
-                    ptr+= s->linesize;
758
+                    ptr+= stride;
759 759
                     CHECK_SAD_HALF_MV(xy2, -1, +1)
760 760
                 }
761 761
                 CHECK_SAD_HALF_MV(x2 , +1,  0)
... ...
@@ -802,35 +786,41 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
802 802
 
803 803
 /**
804 804
  * get fullpel ME search limits.
805
- * @param range the approximate search range for the old ME code, unused for EPZS and newer
806 805
  */
807
-static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax)
806
+static inline void get_limits(MpegEncContext *s, int x, int y)
808 807
 {
809
-    if(s->avctx->me_range) *range= s->avctx->me_range >> 1;
810
-    else                   *range= 16;
811
-
808
+/*
809
+    if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1;
810
+    else                   s->me.range= 16;
811
+*/
812 812
     if (s->unrestricted_mv) {
813
-        *xmin = -16;
814
-        *ymin = -16;
815
-        *xmax = s->mb_width*16;
816
-        *ymax = s->mb_height*16;
813
+        s->me.xmin = - x - 16;
814
+        s->me.ymin = - y - 16;
815
+        s->me.xmax = - x + s->mb_width *16;
816
+        s->me.ymax = - y + s->mb_height*16;
817 817
     } else {
818
-        *xmin = 0;
819
-        *ymin = 0;
820
-        *xmax = s->mb_width*16 - 16;
821
-        *ymax = s->mb_height*16 - 16;
818
+        s->me.xmin = - x;
819
+        s->me.ymin = - y;
820
+        s->me.xmax = - x + s->mb_width *16 - 16;
821
+        s->me.ymax = - y + s->mb_height*16 - 16;
822 822
     }
823
-    
824
-    //FIXME try to limit x/y min/max if me_range is set
825 823
 }
826 824
 
827
-static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
825
+static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
828 826
 {
827
+    const int size= 1;
828
+    const int h=8;
829 829
     int block;
830 830
     int P[10][2];
831 831
     int dmin_sum=0, mx4_sum=0, my4_sum=0;
832 832
     uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
833 833
     int same=1;
834
+    const int stride= s->linesize;
835
+    const int uvstride= s->uvlinesize;
836
+    const int xmin= s->me.xmin;
837
+    const int ymin= s->me.ymin;
838
+    const int xmax= s->me.xmax;
839
+    const int ymax= s->me.ymax;
834 840
 
835 841
     for(block=0; block<4; block++){
836 842
         int mx4, my4;
... ...
@@ -839,23 +829,23 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
839 839
         static const int off[4]= {2, 1, 1, -1};
840 840
         const int mot_stride = s->block_wrap[0];
841 841
         const int mot_xy = s->block_index[block];
842
-//        const int block_x= (block&1);
843
-//        const int block_y= (block>>1);
844
-#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
845
-        const int rel_xmin4= xmin;
846
-        const int rel_xmax4= xmax;
847
-        const int rel_ymin4= ymin;
848
-        const int rel_ymax4= ymax;
849
-#else
850
-        const int rel_xmin4= xmin - block_x*8;
851
-        const int rel_xmax4= xmax - block_x*8 + 8;
852
-        const int rel_ymin4= ymin - block_y*8;
853
-        const int rel_ymax4= ymax - block_y*8 + 8;
854
-#endif
842
+        const int block_x= (block&1);
843
+        const int block_y= (block>>1);
844
+        uint8_t *src_data[3]= {
845
+            s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride  *8*(2*s->mb_y + block_y), //FIXME chroma?
846
+            s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
847
+            s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
848
+        };
849
+        uint8_t *ref_data[3]= {
850
+            s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride  *8*(2*s->mb_y + block_y), //FIXME chroma?
851
+            s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
852
+            s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
853
+        };
854
+
855 855
         P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
856 856
         P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
857 857
 
858
-        if(P_LEFT[0]       > (rel_xmax4<<shift)) P_LEFT[0]       = (rel_xmax4<<shift);
858
+        if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
859 859
 
860 860
         /* special case for first line */
861 861
         if (s->mb_y == 0 && block<2) {
... ...
@@ -866,10 +856,10 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
866 866
             P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
867 867
             P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
868 868
             P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
869
-            if(P_TOP[1]      > (rel_ymax4<<shift)) P_TOP[1]     = (rel_ymax4<<shift);
870
-            if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
871
-            if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
872
-            if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
869
+            if(P_TOP[1]      > (s->me.ymax<<shift)) P_TOP[1]     = (s->me.ymax<<shift);
870
+            if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
871
+            if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
872
+            if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
873 873
     
874 874
             P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
875 875
             P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
... ...
@@ -887,33 +877,33 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
887 887
         P_MV1[0]= mx;
888 888
         P_MV1[1]= my;
889 889
 
890
-        dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
891
-                                       &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
890
+        dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4, 
891
+                                       src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
892 892
 
893
-        dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
894
-					  pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
893
+        dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, 
894
+					  pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
895 895
         
896 896
         if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
897 897
             int dxy;
898
-            const int offset= ((block&1) + (block>>1)*s->linesize)*8;
898
+            const int offset= ((block&1) + (block>>1)*stride)*8;
899 899
             uint8_t *dest_y = s->me.scratchpad + offset;
900 900
 
901 901
             if(s->quarter_sample){
902
-                uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset;
902
+                uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride + offset;
903 903
                 dxy = ((my4 & 3) << 2) | (mx4 & 3);
904 904
 
905 905
                 if(s->no_rounding)
906 906
                     s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , s->linesize);
907 907
                 else
908
-                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , s->linesize);
908
+                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
909 909
             }else{
910
-                uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset;
910
+                uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride + offset;
911 911
                 dxy = ((my4 & 1) << 1) | (mx4 & 1);
912 912
 
913 913
                 if(s->no_rounding)
914
-                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , s->linesize, 8);
914
+                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
915 915
                 else
916
-                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , s->linesize, 8);
916
+                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
917 917
             }
918 918
             dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
919 919
         }else
... ...
@@ -937,7 +927,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
937 937
         return INT_MAX;
938 938
     
939 939
     if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
940
-        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize);
940
+        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16);
941 941
     }
942 942
     
943 943
     if(s->avctx->mb_cmp&FF_CMP_CHROMA){
... ...
@@ -959,8 +949,8 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
959 959
             s->dsp.put_pixels_tab       [1][dxy](s->me.scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
960 960
         }
961 961
 
962
-        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad  , s->uvlinesize);
963
-        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize);
962
+        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad  , s->uvlinesize, 8);
963
+        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
964 964
     }
965 965
 
966 966
     switch(s->avctx->mb_cmp&0xFF){
... ...
@@ -973,13 +963,134 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
973 973
     }
974 974
 }
975 975
 
976
+static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3], 
977
+                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my)
978
+{
979
+    const int size=0;
980
+    const int h=8;
981
+    int block;
982
+    int P[10][2];
983
+    uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
984
+    int same=1;
985
+    const int stride= 2*s->linesize;
986
+    const int uvstride= 2*s->uvlinesize;
987
+    int dmin_sum= 0;
988
+    const int mot_stride= s->mb_stride;
989
+    const int xy= s->mb_x + s->mb_y*mot_stride;
990
+    
991
+    s->me.ymin>>=1;
992
+    s->me.ymax>>=1;
993
+    
994
+    for(block=0; block<2; block++){
995
+        int field_select;
996
+        int best_dmin= INT_MAX;
997
+        int best_field= -1;
998
+
999
+        uint8_t *src_data[3]= {
1000
+            frame_src_data[0] + s->  linesize*block,
1001
+            frame_src_data[1] + s->uvlinesize*block,
1002
+            frame_src_data[2] + s->uvlinesize*block
1003
+        };
1004
+
1005
+        for(field_select=0; field_select<2; field_select++){
1006
+            int dmin, mx_i, my_i, pred_x, pred_y;
1007
+            uint8_t *ref_data[3]= {
1008
+                frame_ref_data[0] + s->  linesize*field_select,
1009
+                frame_ref_data[1] + s->uvlinesize*field_select,
1010
+                frame_ref_data[2] + s->uvlinesize*field_select
1011
+            };
1012
+            int16_t (*mv_table)[2]= mv_tables[block][field_select];
1013
+            
1014
+            P_LEFT[0] = mv_table[xy - 1][0];
1015
+            P_LEFT[1] = mv_table[xy - 1][1];
1016
+            if(P_LEFT[0]       > (s->me.xmax<<1)) P_LEFT[0]       = (s->me.xmax<<1);
1017
+            
1018
+            pred_x= P_LEFT[0];
1019
+            pred_y= P_LEFT[1];
1020
+            
1021
+            if(s->mb_y){
1022
+                P_TOP[0]      = mv_table[xy - mot_stride][0];
1023
+                P_TOP[1]      = mv_table[xy - mot_stride][1];
1024
+                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
1025
+                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
1026
+                if(P_TOP[1]      > (s->me.ymax<<1)) P_TOP[1]     = (s->me.ymax<<1);
1027
+                if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1);
1028
+                if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1);
1029
+                if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1);
1030
+    
1031
+                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1032
+                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1033
+            }
1034
+            P_MV1[0]= mx; //FIXME not correct if block != field_select
1035
+            P_MV1[1]= my / 2;
1036
+            
1037
+            dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y, 
1038
+                                           src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty);
1039
+
1040
+            dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin, 
1041
+                                           pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
1042
+            
1043
+            mv_table[xy][0]= mx_i;
1044
+            mv_table[xy][1]= my_i;
1045
+            
1046
+            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
1047
+                int dxy;
1048
+
1049
+                //FIXME chroma ME
1050
+                uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride;
1051
+                dxy = ((my_i & 1) << 1) | (mx_i & 1);
1052
+
1053
+                if(s->no_rounding){
1054
+                    s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref    , stride, h);
1055
+                }else{
1056
+                    s->dsp.put_pixels_tab       [size][dxy](s->me.scratchpad, ref    , stride, h);
1057
+                }
1058
+                dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h);
1059
+                dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor;
1060
+            }else
1061
+                dmin+= s->me.mb_penalty_factor; //field_select bits
1062
+                
1063
+            dmin += field_select != block; //slightly prefer same field
1064
+            
1065
+            if(dmin < best_dmin){
1066
+                best_dmin= dmin;
1067
+                best_field= field_select;
1068
+            }
1069
+        }
1070
+        {
1071
+            int16_t (*mv_table)[2]= mv_tables[block][best_field];
1072
+
1073
+            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
1074
+            if(mv_table[xy][1]&1) same=0;
1075
+            if(mv_table[xy][1]*2 != my) same=0; 
1076
+            if(best_field != block) same=0;
1077
+        }
1078
+
1079
+        field_select_tables[block][xy]= best_field;
1080
+        dmin_sum += best_dmin;
1081
+    }
1082
+    
1083
+    s->me.ymin<<=1;
1084
+    s->me.ymax<<=1;
1085
+
1086
+    if(same)
1087
+        return INT_MAX;
1088
+    
1089
+    switch(s->avctx->mb_cmp&0xFF){
1090
+    /*case FF_CMP_SSE:
1091
+        return dmin_sum+ 32*s->qscale*s->qscale;*/
1092
+    case FF_CMP_RD:
1093
+        return dmin_sum;
1094
+    default:
1095
+        return dmin_sum+ 11*s->me.mb_penalty_factor;
1096
+    }
1097
+}
1098
+
976 1099
 void ff_estimate_p_frame_motion(MpegEncContext * s,
977 1100
                                 int mb_x, int mb_y)
978 1101
 {
979 1102
     uint8_t *pix, *ppix;
980
-    int sum, varc, vard, mx, my, range, dmin, xx, yy;
981
-    int xmin, ymin, xmax, ymax;
982
-    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1103
+    int sum, varc, vard, mx, my, dmin, xx, yy;
983 1104
     int pred_x=0, pred_y=0;
984 1105
     int P[10][2];
985 1106
     const int shift= 1+s->quarter_sample;
... ...
@@ -987,18 +1098,26 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
987 987
     uint8_t *ref_picture= s->last_picture.data[0];
988 988
     Picture * const pic= &s->current_picture;
989 989
     uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
990
-    
990
+    const int stride= s->linesize;
991
+    const int uvstride= s->uvlinesize;
992
+    uint8_t *src_data[3]= {
993
+        s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
994
+        s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
995
+        s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
996
+    };
997
+    uint8_t *ref_data[3]= {
998
+        s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
999
+        s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1000
+        s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1001
+    };
1002
+
991 1003
     assert(s->quarter_sample==0 || s->quarter_sample==1);
992 1004
 
993 1005
     s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
994 1006
     s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
995 1007
     s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
996 1008
 
997
-    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
998
-    rel_xmin= xmin - mb_x*16;
999
-    rel_xmax= xmax - mb_x*16;
1000
-    rel_ymin= ymin - mb_y*16;
1001
-    rel_ymax= ymax - mb_y*16;
1009
+    get_limits(s, 16*mb_x, 16*mb_y);
1002 1010
     s->me.skip=0;
1003 1011
 
1004 1012
     switch(s->me_method) {
... ...
@@ -1009,21 +1128,23 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1009 1009
         my-= mb_y*16;
1010 1010
         dmin = 0;
1011 1011
         break;
1012
+#if 0
1012 1013
     case ME_FULL:
1013
-	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1014
+	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1014 1015
         mx-= mb_x*16;
1015 1016
         my-= mb_y*16;
1016 1017
         break;
1017 1018
     case ME_LOG:
1018
-	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1019
+	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1019 1020
         mx-= mb_x*16;
1020 1021
         my-= mb_y*16;
1021 1022
         break;
1022 1023
     case ME_PHODS:
1023
-	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1024
+	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1024 1025
         mx-= mb_x*16;
1025 1026
         my-= mb_y*16;
1026 1027
         break;
1028
+#endif
1027 1029
     case ME_X1:
1028 1030
     case ME_EPZS:
1029 1031
        {
... ...
@@ -1033,16 +1154,16 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1033 1033
             P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
1034 1034
             P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1035 1035
 
1036
-            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1036
+            if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
1037 1037
 
1038 1038
             if(mb_y) {
1039 1039
                 P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
1040 1040
                 P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
1041 1041
                 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
1042 1042
                 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1043
-                if(P_TOP[1]      > (rel_ymax<<shift)) P_TOP[1]     = (rel_ymax<<shift);
1044
-                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1045
-                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1043
+                if(P_TOP[1]      > (s->me.ymax<<shift)) P_TOP[1]     = (s->me.ymax<<shift);
1044
+                if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
1045
+                if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1046 1046
         
1047 1047
                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1048 1048
                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
... ...
@@ -1060,8 +1181,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1060 1060
             }
1061 1061
 
1062 1062
         }
1063
-        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1064
-                                      &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1063
+        dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, 
1064
+                                      src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1065 1065
  
1066 1066
         break;
1067 1067
     }
... ...
@@ -1070,14 +1191,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1070 1070
     xx = mb_x * 16;
1071 1071
     yy = mb_y * 16;
1072 1072
 
1073
-    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
1073
+    pix = src_data[0];
1074 1074
     /* At this point (mx,my) are full-pell and the relative displacement */
1075
-    ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
1075
+    ppix = ref_data[0] + (my * s->linesize) + mx;
1076 1076
     
1077 1077
     sum = s->dsp.pix_sum(pix, s->linesize);
1078 1078
     
1079 1079
     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1080
-    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
1080
+    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
1081 1081
 
1082 1082
 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1083 1083
     pic->mb_var   [s->mb_stride * mb_y + mb_x] = varc;
... ...
@@ -1099,47 +1220,59 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1099 1099
             s->scene_change_score+= s->qscale;
1100 1100
 
1101 1101
         if (vard*2 + 200 > varc)
1102
-            mb_type|= MB_TYPE_INTRA;
1102
+            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1103 1103
         if (varc*2 + 200 > vard){
1104
-            mb_type|= MB_TYPE_INTER;
1105
-            s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1106
-				   pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1104
+            mb_type|= CANDIDATE_MB_TYPE_INTER;
1105
+            s->me.sub_motion_search(s, &mx, &my, dmin,
1106
+				   pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1107 1107
             if(s->flags&CODEC_FLAG_MV0)
1108 1108
                 if(mx || my)
1109
-                    mb_type |= MB_TYPE_SKIPED; //FIXME check difference
1109
+                    mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference
1110 1110
         }else{
1111 1111
             mx <<=shift;
1112 1112
             my <<=shift;
1113 1113
         }
1114 1114
         if((s->flags&CODEC_FLAG_4MV)
1115 1115
            && !s->me.skip && varc>50 && vard>10){
1116
-            if(h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift) < INT_MAX)
1117
-                mb_type|=MB_TYPE_INTER4V;
1116
+            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
1117
+                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1118 1118
 
1119 1119
             set_p_mv_tables(s, mx, my, 0);
1120 1120
         }else
1121 1121
             set_p_mv_tables(s, mx, my, 1);
1122
+        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1123
+           && !s->me.skip){ //FIXME varc/d checks
1124
+            if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX)
1125
+                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
1126
+        }
1122 1127
     }else{
1123 1128
         int intra_score, i;
1124
-        mb_type= MB_TYPE_INTER;
1129
+        mb_type= CANDIDATE_MB_TYPE_INTER;
1125 1130
 
1126
-        dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1127
-                                    pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1128
-        
1131
+        dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
1132
+                                    pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1129 1133
         if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1130
-            dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty);
1134
+            dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
1131 1135
 
1132 1136
         if((s->flags&CODEC_FLAG_4MV)
1133 1137
            && !s->me.skip && varc>50 && vard>10){
1134
-            int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1138
+            int dmin4= h263_mv4_search(s, mx, my, shift);
1135 1139
             if(dmin4 < dmin){
1136
-                mb_type= MB_TYPE_INTER4V;
1140
+                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1137 1141
                 dmin=dmin4;
1138 1142
             }
1139 1143
         }
1144
+        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1145
+           && !s->me.skip){ //FIXME varc/d checks
1146
+            int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my);
1147
+            if(dmin_i < dmin){
1148
+                mb_type = CANDIDATE_MB_TYPE_INTER_I;
1149
+                dmin= dmin_i;
1150
+            }
1151
+        }
1140 1152
                 
1141 1153
 //        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
1142
-        set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1154
+        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1143 1155
 
1144 1156
         /* get intra luma score */
1145 1157
         if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
... ...
@@ -1155,7 +1288,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1155 1155
                 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
1156 1156
             }
1157 1157
 
1158
-            intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize);
1158
+            intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16);
1159 1159
         }
1160 1160
 #if 0 //FIXME
1161 1161
         /* get chroma score */
... ...
@@ -1184,8 +1317,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1184 1184
         intra_score += s->me.mb_penalty_factor*16;
1185 1185
         
1186 1186
         if(intra_score < dmin){
1187
-            mb_type= MB_TYPE_INTRA;
1188
-            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup
1187
+            mb_type= CANDIDATE_MB_TYPE_INTRA;
1188
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1189 1189
         }else
1190 1190
             s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1191 1191
         
... ...
@@ -1202,30 +1335,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1202 1202
 int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1203 1203
                                     int mb_x, int mb_y)
1204 1204
 {
1205
-    int mx, my, range, dmin;
1206
-    int xmin, ymin, xmax, ymax;
1207
-    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1205
+    int mx, my, dmin;
1208 1206
     int pred_x=0, pred_y=0;
1209 1207
     int P[10][2];
1210 1208
     const int shift= 1+s->quarter_sample;
1211 1209
     uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1212 1210
     const int xy= mb_x + mb_y*s->mb_stride;
1211
+    const int stride= s->linesize;
1212
+    const int uvstride= s->uvlinesize;
1213
+    uint8_t *src_data[3]= {
1214
+        s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
1215
+        s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1216
+        s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1217
+    };
1218
+    uint8_t *ref_data[3]= {
1219
+        s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
1220
+        s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1221
+        s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1222
+    };
1213 1223
     
1214 1224
     assert(s->quarter_sample==0 || s->quarter_sample==1);
1215 1225
 
1216 1226
     s->me.pre_penalty_factor    = get_penalty_factor(s, s->avctx->me_pre_cmp);
1217 1227
 
1218
-    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
1219
-    rel_xmin= xmin - mb_x*16;
1220
-    rel_xmax= xmax - mb_x*16;
1221
-    rel_ymin= ymin - mb_y*16;
1222
-    rel_ymax= ymax - mb_y*16;
1228
+    get_limits(s, 16*mb_x, 16*mb_y);
1223 1229
     s->me.skip=0;
1224 1230
 
1225 1231
     P_LEFT[0]       = s->p_mv_table[xy + 1][0];
1226 1232
     P_LEFT[1]       = s->p_mv_table[xy + 1][1];
1227 1233
 
1228
-    if(P_LEFT[0]       < (rel_xmin<<shift)) P_LEFT[0]       = (rel_xmin<<shift);
1234
+    if(P_LEFT[0]       < (s->me.xmin<<shift)) P_LEFT[0]       = (s->me.xmin<<shift);
1229 1235
 
1230 1236
     /* special case for first line */
1231 1237
     if (mb_y == s->mb_height-1) {
... ...
@@ -1238,9 +1377,9 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1238 1238
         P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
1239 1239
         P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
1240 1240
         P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1241
-        if(P_TOP[1]      < (rel_ymin<<shift)) P_TOP[1]     = (rel_ymin<<shift);
1242
-        if(P_TOPRIGHT[0] > (rel_xmax<<shift)) P_TOPRIGHT[0]= (rel_xmax<<shift);
1243
-        if(P_TOPRIGHT[1] < (rel_ymin<<shift)) P_TOPRIGHT[1]= (rel_ymin<<shift);
1241
+        if(P_TOP[1]      < (s->me.ymin<<shift)) P_TOP[1]     = (s->me.ymin<<shift);
1242
+        if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
1243
+        if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift);
1244 1244
     
1245 1245
         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1246 1246
         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
... ...
@@ -1248,8 +1387,8 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1248 1248
         pred_x = P_MEDIAN[0];
1249 1249
         pred_y = P_MEDIAN[1];
1250 1250
     }
1251
-    dmin = s->me.pre_motion_search(s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1252
-                                   &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1251
+    dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y, 
1252
+                                   src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1253 1253
 
1254 1254
     s->p_mv_table[xy][0] = mx<<shift;
1255 1255
     s->p_mv_table[xy][1] = my<<shift;
... ...
@@ -1258,17 +1397,16 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1258 1258
 }
1259 1259
 
1260 1260
 static int ff_estimate_motion_b(MpegEncContext * s,
1261
-                       int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
1261
+                       int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *src_data[3],
1262
+                       uint8_t *ref_data[3], int stride, int uvstride, int f_code)
1262 1263
 {
1263
-    int mx, my, range, dmin;
1264
-    int xmin, ymin, xmax, ymax;
1265
-    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1264
+    int mx, my, dmin;
1266 1265
     int pred_x=0, pred_y=0;
1267 1266
     int P[10][2];
1268 1267
     const int shift= 1+s->quarter_sample;
1269 1268
     const int mot_stride = s->mb_stride;
1270 1269
     const int mot_xy = mb_y*mot_stride + mb_x;
1271
-    uint8_t * const ref_picture= picture->data[0];
1270
+    uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly
1272 1271
     uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1273 1272
     int mv_scale;
1274 1273
         
... ...
@@ -1276,11 +1414,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
1276 1276
     s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1277 1277
     s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
1278 1278
 
1279
-    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
1280
-    rel_xmin= xmin - mb_x*16;
1281
-    rel_xmax= xmax - mb_x*16;
1282
-    rel_ymin= ymin - mb_y*16;
1283
-    rel_ymax= ymax - mb_y*16;
1279
+    get_limits(s, 16*mb_x, 16*mb_y);
1284 1280
 
1285 1281
     switch(s->me_method) {
1286 1282
     case ME_ZERO:
... ...
@@ -1290,28 +1424,30 @@ static int ff_estimate_motion_b(MpegEncContext * s,
1290 1290
         mx-= mb_x*16;
1291 1291
         my-= mb_y*16;
1292 1292
         break;
1293
+#if 0
1293 1294
     case ME_FULL:
1294
-	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1295
+	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1295 1296
         mx-= mb_x*16;
1296 1297
         my-= mb_y*16;
1297 1298
         break;
1298 1299
     case ME_LOG:
1299
-	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1300
+	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1300 1301
         mx-= mb_x*16;
1301 1302
         my-= mb_y*16;
1302 1303
         break;
1303 1304
     case ME_PHODS:
1304
-	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1305
+	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1305 1306
         mx-= mb_x*16;
1306 1307
         my-= mb_y*16;
1307 1308
         break;
1309
+#endif
1308 1310
     case ME_X1:
1309 1311
     case ME_EPZS:
1310 1312
        {
1311 1313
             P_LEFT[0]        = mv_table[mot_xy - 1][0];
1312 1314
             P_LEFT[1]        = mv_table[mot_xy - 1][1];
1313 1315
 
1314
-            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1316
+            if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
1315 1317
 
1316 1318
             /* special case for first line */
1317 1319
             if (mb_y) {
... ...
@@ -1319,9 +1455,9 @@ static int ff_estimate_motion_b(MpegEncContext * s,
1319 1319
                 P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1320 1320
                 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1321 1321
                 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1322
-                if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
1323
-                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1324
-                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1322
+                if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift);
1323
+                if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
1324
+                if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1325 1325
         
1326 1326
                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1327 1327
                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
... ...
@@ -1336,17 +1472,17 @@ static int ff_estimate_motion_b(MpegEncContext * s,
1336 1336
             mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
1337 1337
         }
1338 1338
         
1339
-        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1340
-                                      picture, s->p_mv_table, mv_scale, mv_penalty);
1339
+        dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, 
1340
+                                      src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty);
1341 1341
  
1342 1342
         break;
1343 1343
     }
1344 1344
     
1345
-    dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1346
-				   pred_x, pred_y, picture, 0, 0, mv_penalty);
1345
+    dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
1346
+				   pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1347 1347
                                    
1348 1348
     if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1349
-        dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty);
1349
+        dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
1350 1350
 
1351 1351
 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1352 1352
 //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
... ...
@@ -1356,16 +1492,18 @@ static int ff_estimate_motion_b(MpegEncContext * s,
1356 1356
     return dmin;
1357 1357
 }
1358 1358
 
1359
-static inline int check_bidir_mv(MpegEncContext * s,
1360
-                   int mb_x, int mb_y,
1359
+static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1360
+                   int stride, int uvstride,
1361 1361
                    int motion_fx, int motion_fy,
1362 1362
                    int motion_bx, int motion_by,
1363 1363
                    int pred_fx, int pred_fy,
1364
-                   int pred_bx, int pred_by)
1364
+                   int pred_bx, int pred_by,
1365
+                   int size, int h)
1365 1366
 {
1366 1367
     //FIXME optimize?
1367 1368
     //FIXME move into template?
1368 1369
     //FIXME better f_code prediction (max mv & distance)
1370
+    //FIXME pointers
1369 1371
     uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1370 1372
     uint8_t *dest_y = s->me.scratchpad;
1371 1373
     uint8_t *ptr;
... ...
@@ -1375,45 +1513,37 @@ static inline int check_bidir_mv(MpegEncContext * s,
1375 1375
 
1376 1376
     if(s->quarter_sample){
1377 1377
         dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1378
-        src_x = mb_x * 16 + (motion_fx >> 2);
1379
-        src_y = mb_y * 16 + (motion_fy >> 2);
1380
-        assert(src_x >=-16 && src_x<=s->h_edge_pos);
1381
-        assert(src_y >=-16 && src_y<=s->v_edge_pos);
1378
+        src_x = motion_fx >> 2;
1379
+        src_y = motion_fy >> 2;
1382 1380
 
1383
-        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1384
-        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1381
+        ptr = ref_data[0] + (src_y * stride) + src_x;
1382
+        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
1385 1383
 
1386 1384
         dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1387
-        src_x = mb_x * 16 + (motion_bx >> 2);
1388
-        src_y = mb_y * 16 + (motion_by >> 2);
1389
-        assert(src_x >=-16 && src_x<=s->h_edge_pos);
1390
-        assert(src_y >=-16 && src_y<=s->v_edge_pos);
1385
+        src_x = motion_bx >> 2;
1386
+        src_y = motion_by >> 2;
1391 1387
     
1392
-        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1393
-        s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1388
+        ptr = ref_data[3] + (src_y * stride) + src_x;
1389
+        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
1394 1390
     }else{
1395 1391
         dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1396
-        src_x = mb_x * 16 + (motion_fx >> 1);
1397
-        src_y = mb_y * 16 + (motion_fy >> 1);
1398
-        assert(src_x >=-16 && src_x<=s->h_edge_pos);
1399
-        assert(src_y >=-16 && src_y<=s->v_edge_pos);
1392
+        src_x = motion_fx >> 1;
1393
+        src_y = motion_fy >> 1;
1400 1394
 
1401
-        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1402
-        s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1395
+        ptr = ref_data[0] + (src_y * stride) + src_x;
1396
+        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
1403 1397
 
1404 1398
         dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1405
-        src_x = mb_x * 16 + (motion_bx >> 1);
1406
-        src_y = mb_y * 16 + (motion_by >> 1);
1407
-        assert(src_x >=-16 && src_x<=s->h_edge_pos);
1408
-        assert(src_y >=-16 && src_y<=s->v_edge_pos);
1399
+        src_x = motion_bx >> 1;
1400
+        src_y = motion_by >> 1;
1409 1401
     
1410
-        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1411
-        s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1402
+        ptr = ref_data[3] + (src_y * stride) + src_x;
1403
+        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
1412 1404
     }
1413 1405
 
1414 1406
     fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
1415 1407
            +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
1416
-           + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1408
+           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1417 1409
            
1418 1410
     if(s->avctx->mb_cmp&FF_CMP_CHROMA){
1419 1411
     }
... ...
@@ -1423,7 +1553,8 @@ static inline int check_bidir_mv(MpegEncContext * s,
1423 1423
 }
1424 1424
 
1425 1425
 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1426
-static inline int bidir_refine(MpegEncContext * s,
1426
+static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1427
+                                  int stride, int uvstride,
1427 1428
                                   int mb_x, int mb_y)
1428 1429
 {
1429 1430
     const int mot_stride = s->mb_stride;
... ...
@@ -1440,16 +1571,18 @@ static inline int bidir_refine(MpegEncContext * s,
1440 1440
 
1441 1441
     //FIXME do refinement and add flag
1442 1442
     
1443
-    fbmin= check_bidir_mv(s, mb_x, mb_y, 
1443
+    fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride,
1444 1444
                           motion_fx, motion_fy,
1445 1445
                           motion_bx, motion_by,
1446 1446
                           pred_fx, pred_fy,
1447
-                          pred_bx, pred_by);
1447
+                          pred_bx, pred_by,
1448
+                          0, 16);
1448 1449
 
1449 1450
    return fbmin;
1450 1451
 }
1451 1452
 
1452
-static inline int direct_search(MpegEncContext * s,
1453
+static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1454
+                                int stride, int uvstride,
1453 1455
                                 int mb_x, int mb_y)
1454 1456
 {
1455 1457
     int P[10][2];
... ...
@@ -1508,6 +1641,11 @@ static inline int direct_search(MpegEncContext * s,
1508 1508
 
1509 1509
         return 256*256*256*64;
1510 1510
     }
1511
+    
1512
+    s->me.xmin= xmin;
1513
+    s->me.ymin= ymin;
1514
+    s->me.xmax= xmax;
1515
+    s->me.ymax= ymax;
1511 1516
 
1512 1517
     P_LEFT[0]        = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
1513 1518
     P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
... ...
@@ -1525,22 +1663,24 @@ static inline int direct_search(MpegEncContext * s,
1525 1525
  
1526 1526
     //FIXME direct_search  ptr in context!!! (needed for chroma anyway or this will get messy)   
1527 1527
     if(s->flags&CODEC_FLAG_QPEL){
1528
-        dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1529
-                                                     &s->last_picture, mv_table, 1<<14, mv_penalty);
1530
-        dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1531
-                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1528
+        dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0, 
1529
+                                                     src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty);
1530
+        dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin,
1531
+                                                0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1532 1532
         
1533 1533
         if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1534
-            dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
1534
+            dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1535 1535
     }else{
1536
-        dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1537
-                                                     &s->last_picture, mv_table, 1<<15, mv_penalty);
1538
-        dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1539
-                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1536
+        dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0, 
1537
+                                                     src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty);
1538
+        dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin,
1539
+                                                0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1540 1540
 
1541 1541
         if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1542
-            dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
1542
+            dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1543 1543
     }
1544
+    
1545
+    get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed
1544 1546
 
1545 1547
     s->b_direct_mv_table[mot_xy][0]= mx;
1546 1548
     s->b_direct_mv_table[mot_xy][1]= my;
... ...
@@ -1551,40 +1691,80 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
1551 1551
                              int mb_x, int mb_y)
1552 1552
 {
1553 1553
     const int penalty_factor= s->me.mb_penalty_factor;
1554
-    int fmin, bmin, dmin, fbmin;
1554
+    int fmin, bmin, dmin, fbmin, bimin, fimin;
1555 1555
     int type=0;
1556
+    const int stride= s->linesize;
1557
+    const int uvstride= s->uvlinesize;
1558
+    uint8_t *src_data[3]= {
1559
+        s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1560
+        s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1561
+        s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1562
+    };
1563
+    uint8_t *ref_data[6]= {
1564
+        s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1565
+        s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1566
+        s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y),
1567
+        s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1568
+        s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1569
+        s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1570
+    };
1556 1571
     
1557 1572
     s->me.skip=0;
1558 1573
     if (s->codec_id == CODEC_ID_MPEG4)
1559
-        dmin= direct_search(s, mb_x, mb_y);
1574
+        dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y);
1560 1575
     else
1561 1576
         dmin= INT_MAX;
1562
-
1577
+//FIXME penalty stuff for non mpeg4
1563 1578
     s->me.skip=0;
1564
-    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor;
1579
+    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data, 
1580
+                               ref_data, stride, uvstride, s->f_code) + 3*penalty_factor;
1565 1581
     
1566 1582
     s->me.skip=0;
1567
-    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor;
1583
+    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data, 
1584
+                               ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor;
1568 1585
 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1569 1586
 
1570 1587
     s->me.skip=0;
1571
-    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1588
+    fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor;
1572 1589
 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1590
+    
1591
+    if(s->flags & CODEC_FLAG_INTERLACED_ME){
1592
+        const int xy = mb_y*s->mb_stride + mb_x;
1593
+
1594
+//FIXME mb type penalty
1595
+        s->me.skip=0;
1596
+        fimin= interlaced_search(s, src_data, ref_data  , 
1597
+                                 s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code,
1598
+                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1599
+        bimin= interlaced_search(s, src_data, ref_data+3, 
1600
+                                 s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code,
1601
+                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]);
1602
+    }else
1603
+        fimin= bimin= INT_MAX;
1604
+
1573 1605
     {
1574 1606
         int score= fmin;
1575
-        type = MB_TYPE_FORWARD;
1607
+        type = CANDIDATE_MB_TYPE_FORWARD;
1576 1608
         
1577 1609
         if (dmin <= score){
1578 1610
             score = dmin;
1579
-            type = MB_TYPE_DIRECT;
1611
+            type = CANDIDATE_MB_TYPE_DIRECT;
1580 1612
         }
1581 1613
         if(bmin<score){
1582 1614
             score=bmin;
1583
-            type= MB_TYPE_BACKWARD; 
1615
+            type= CANDIDATE_MB_TYPE_BACKWARD; 
1584 1616
         }
1585 1617
         if(fbmin<score){
1586 1618
             score=fbmin;
1587
-            type= MB_TYPE_BIDIR;
1619
+            type= CANDIDATE_MB_TYPE_BIDIR;
1620
+        }
1621
+        if(fimin<score){
1622
+            score=fimin;
1623
+            type= CANDIDATE_MB_TYPE_FORWARD_I;
1624
+        }
1625
+        if(bimin<score){
1626
+            score=bimin;
1627
+            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1588 1628
         }
1589 1629
         
1590 1630
         score= ((unsigned)(score*score + 128*256))>>16;
... ...
@@ -1593,8 +1773,16 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
1593 1593
     }
1594 1594
 
1595 1595
     if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1596
-        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
1597
-        if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1596
+        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
1597
+        if(fimin < INT_MAX)
1598
+            type |= CANDIDATE_MB_TYPE_FORWARD_I;
1599
+        if(bimin < INT_MAX)
1600
+            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
1601
+        if(fimin < INT_MAX && bimin < INT_MAX){
1602
+            type |= CANDIDATE_MB_TYPE_BIDIR_I;
1603
+        }
1604
+         //FIXME something smarter
1605
+        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1598 1606
     }
1599 1607
 
1600 1608
     s->mb_type[mb_y*s->mb_stride + mb_x]= type;
... ...
@@ -1661,24 +1849,6 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
1661 1661
     
1662 1662
     if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
1663 1663
     
1664
-    /* clip / convert to intra 16x16 type MVs */
1665
-    for(y=0; y<s->mb_height; y++){
1666
-        int x;
1667
-        int xy= y*s->mb_stride;
1668
-        for(x=0; x<s->mb_width; x++){
1669
-            if(s->mb_type[xy]&MB_TYPE_INTER){
1670
-                if(   s->p_mv_table[xy][0] >=range || s->p_mv_table[xy][0] <-range
1671
-                   || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){
1672
-                    s->mb_type[xy] &= ~MB_TYPE_INTER;
1673
-                    s->mb_type[xy] |= MB_TYPE_INTRA;
1674
-                    s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
1675
-                    s->p_mv_table[xy][0] = 0;
1676
-                    s->p_mv_table[xy][1] = 0;
1677
-                }
1678
-            }
1679
-            xy++;
1680
-        }
1681
-    }
1682 1664
 //printf("%d no:%d %d//\n", clip, noclip, f_code);
1683 1665
     if(s->flags&CODEC_FLAG_4MV){
1684 1666
         const int wrap= 2+ s->mb_width*2;
... ...
@@ -1690,7 +1860,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
1690 1690
             int x;
1691 1691
 
1692 1692
             for(x=0; x<s->mb_width; x++){
1693
-                if(s->mb_type[i]&MB_TYPE_INTER4V){
1693
+                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1694 1694
                     int block;
1695 1695
                     for(block=0; block<4; block++){
1696 1696
                         int off= (block& 1) + (block>>1)*wrap;
... ...
@@ -1699,9 +1869,9 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
1699 1699
 
1700 1700
                         if(   mx >=range || mx <-range
1701 1701
                            || my >=range || my <-range){
1702
-                            s->mb_type[i] &= ~MB_TYPE_INTER4V;
1703
-                            s->mb_type[i] |= MB_TYPE_INTRA;
1704
-                            s->current_picture.mb_type[i]= MB_TYPE_INTRA;
1702
+                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
1703
+                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
1704
+                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1705 1705
                         }
1706 1706
                     }
1707 1707
                 }
... ...
@@ -1712,30 +1882,45 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
1712 1712
     }
1713 1713
 }
1714 1714
 
1715
-void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
1715
+/**
1716
+ *
1717
+ * @param truncate 1 for truncation, 0 for using intra
1718
+ */
1719
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, 
1720
+                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1716 1721
 {
1717
-    int y;
1722
+    int y, h_range, v_range;
1718 1723
 
1719 1724
     // RAL: 8 in MPEG-1, 16 in MPEG-4
1720 1725
     int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code);
1721
-    
1726
+
1727
+    if(s->msmpeg4_version) range= 16;
1722 1728
     if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
1723 1729
 
1730
+    h_range= range;
1731
+    v_range= field_select_table ? range>>1 : range;
1732
+
1724 1733
     /* clip / convert to intra 16x16 type MVs */
1725 1734
     for(y=0; y<s->mb_height; y++){
1726 1735
         int x;
1727 1736
         int xy= y*s->mb_stride;
1728 1737
         for(x=0; x<s->mb_width; x++){
1729 1738
             if (s->mb_type[xy] & type){    // RAL: "type" test added...
1730
-                if(   mv_table[xy][0] >=range || mv_table[xy][0] <-range
1731
-                   || mv_table[xy][1] >=range || mv_table[xy][1] <-range){
1732
-
1733
-                    if(s->codec_id == CODEC_ID_MPEG1VIDEO && 0){
1734
-                    }else{
1735
-                        if     (mv_table[xy][0] > range-1) mv_table[xy][0]=  range-1;
1736
-                        else if(mv_table[xy][0] < -range ) mv_table[xy][0]= -range;
1737
-                        if     (mv_table[xy][1] > range-1) mv_table[xy][1]=  range-1;
1738
-                        else if(mv_table[xy][1] < -range ) mv_table[xy][1]= -range;
1739
+                if(field_select_table==NULL || field_select_table[xy] == field_select){
1740
+                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
1741
+                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
1742
+
1743
+                        if(truncate){
1744
+                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
1745
+                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
1746
+                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
1747
+                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
1748
+                        }else{
1749
+                            s->mb_type[xy] &= ~type;
1750
+                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
1751
+                            mv_table[xy][0]=
1752
+                            mv_table[xy][1]= 0;
1753
+                        }
1739 1754
                     }
1740 1755
                 }
1741 1756
             }
... ...
@@ -22,29 +22,31 @@
22 22
  * @file motion_est_template.c
23 23
  * Motion estimation template.
24 24
  */
25
-
25
+//FIXME ref2_y next_pic?
26 26
 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
27 27
 //Note, the last line is there to kill these ugly unused var warnings
28
-#define LOAD_COMMON(x, y)\
28
+#define LOAD_COMMON\
29 29
     uint32_t * const score_map= s->me.score_map;\
30
-    const int stride= s->linesize;\
31
-    const int uvstride= s->uvlinesize;\
32 30
     const int time_pp= s->pp_time;\
33 31
     const int time_pb= s->pb_time;\
34
-    uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
35
-    uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
36
-    uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
37
-    uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
38
-    uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
39
-    uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
40
-    uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
32
+    const int xmin= s->me.xmin;\
33
+    const int ymin= s->me.ymin;\
34
+    const int xmax= s->me.xmax;\
35
+    const int ymax= s->me.ymax;\
36
+    uint8_t * const src_y= src_data[0];\
37
+    uint8_t * const src_u= src_data[1];\
38
+    uint8_t * const src_v= src_data[2];\
39
+    uint8_t * const ref_y= ref_data[0];\
40
+    uint8_t * const ref_u= ref_data[1];\
41
+    uint8_t * const ref_v= ref_data[2];\
41 42
     op_pixels_func (*hpel_put)[4];\
42 43
     op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
43 44
     op_pixels_func (*chroma_hpel_put)[4];\
44 45
     qpel_mc_func (*qpel_put)[16];\
45 46
     qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
46 47
     const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\
47
-                                           + (size_t)ref2_y + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map;\
48
+                                           + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\
49
+                                           + xmin + xmax + ymin + ymax;\
48 50
     if(s->no_rounding /*FIXME b_type*/){\
49 51
         hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
50 52
         chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
... ...
@@ -70,9 +72,8 @@
70 70
 #if 0
71 71
 static int RENAME(hpel_motion_search)(MpegEncContext * s,
72 72
 				  int *mx_ptr, int *my_ptr, int dmin,
73
-				  int xmin, int ymin, int xmax, int ymax,
74
-                                  int pred_x, int pred_y, Picture *ref_picture, 
75
-                                  int n, int size, uint8_t * const mv_penalty)
73
+                                  int pred_x, int pred_y, uint8_t *ref_data[3], 
74
+                                  int size, uint8_t * const mv_penalty)
76 75
 {
77 76
     const int xx = 16 * s->mb_x + 8*(n&1);
78 77
     const int yy = 16 * s->mb_y + 8*(n>>1);
... ...
@@ -80,7 +81,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
80 80
     const int my = *my_ptr;
81 81
     const int penalty_factor= s->me.sub_penalty_factor;
82 82
     
83
-    LOAD_COMMON(xx, yy);
83
+    LOAD_COMMON
84 84
     
85 85
  //   INIT;
86 86
  //FIXME factorize
... ...
@@ -139,19 +140,17 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
139 139
 #else
140 140
 static int RENAME(hpel_motion_search)(MpegEncContext * s,
141 141
 				  int *mx_ptr, int *my_ptr, int dmin,
142
-				  int xmin, int ymin, int xmax, int ymax,
143
-                                  int pred_x, int pred_y, Picture *ref_picture, 
144
-                                  int n, int size, uint8_t * const mv_penalty)
142
+                                  int pred_x, int pred_y, uint8_t *src_data[3], 
143
+                                  uint8_t *ref_data[3], int stride, int uvstride,
144
+                                  int size, int h, uint8_t * const mv_penalty)
145 145
 {
146
-    const int xx = 16 * s->mb_x + 8*(n&1);
147
-    const int yy = 16 * s->mb_y + 8*(n>>1);
148 146
     const int mx = *mx_ptr;
149 147
     const int my = *my_ptr;   
150 148
     const int penalty_factor= s->me.sub_penalty_factor;
151 149
     me_cmp_func cmp_sub, chroma_cmp_sub;
152 150
     int bx=2*mx, by=2*my;
153 151
 
154
-    LOAD_COMMON(xx, yy);
152
+    LOAD_COMMON
155 153
     
156 154
  //FIXME factorize
157 155
 
... ...
@@ -247,20 +246,18 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
247 247
 }
248 248
 #endif
249 249
 
250
-static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, 
250
+static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], 
251
+                                  uint8_t *ref_data[3], int stride, int uvstride,
251 252
                                   uint8_t * const mv_penalty)
252 253
 {
253 254
 //    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
254 255
     const int size= 0;
255
-    const int xx = 16 * s->mb_x;
256
-    const int yy = 16 * s->mb_y;
256
+    const int h= 16;
257 257
     const int penalty_factor= s->me.mb_penalty_factor;
258
-    const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
259
-    const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
260 258
     me_cmp_func cmp_sub, chroma_cmp_sub;
261 259
     int d;
262 260
 
263
-    LOAD_COMMON(xx, yy);
261
+    LOAD_COMMON
264 262
     
265 263
  //FIXME factorize
266 264
 
... ...
@@ -295,12 +292,10 @@ static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
295 295
 
296 296
 static int RENAME(qpel_motion_search)(MpegEncContext * s,
297 297
 				  int *mx_ptr, int *my_ptr, int dmin,
298
-				  int xmin, int ymin, int xmax, int ymax,
299
-                                  int pred_x, int pred_y, Picture *ref_picture, 
300
-                                  int n, int size, uint8_t * const mv_penalty)
298
+                                  int pred_x, int pred_y, uint8_t *src_data[3], 
299
+                                  uint8_t *ref_data[3], int stride, int uvstride,                                  
300
+                                  int size, int h, uint8_t * const mv_penalty)
301 301
 {
302
-    const int xx = 16 * s->mb_x + 8*(n&1);
303
-    const int yy = 16 * s->mb_y + 8*(n>>1);
304 302
     const int mx = *mx_ptr;
305 303
     const int my = *my_ptr;   
306 304
     const int penalty_factor= s->me.sub_penalty_factor;
... ...
@@ -310,7 +305,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
310 310
     me_cmp_func cmp, chroma_cmp;
311 311
     me_cmp_func cmp_sub, chroma_cmp_sub;
312 312
 
313
-    LOAD_COMMON(xx, yy);
313
+    LOAD_COMMON
314 314
     
315 315
     cmp= s->dsp.me_cmp[size];
316 316
     chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
... ...
@@ -514,19 +509,17 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
514 514
     return dmin;
515 515
 }
516 516
 
517
-static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, 
517
+static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], 
518
+                                  uint8_t *ref_data[3], int stride, int uvstride,
518 519
                                   uint8_t * const mv_penalty)
519 520
 {
520 521
     const int size= 0;
521
-    const int xx = 16 * s->mb_x;
522
-    const int yy = 16 * s->mb_y;
522
+    const int h= 16;
523 523
     const int penalty_factor= s->me.mb_penalty_factor;
524
-    const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
525
-    const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
526 524
     me_cmp_func cmp_sub, chroma_cmp_sub;
527 525
     int d;
528 526
 
529
-    LOAD_COMMON(xx, yy);
527
+    LOAD_COMMON
530 528
     
531 529
  //FIXME factorize
532 530
 
... ...
@@ -597,15 +590,16 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x,
597 597
 
598 598
 
599 599
 static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
600
-                                       Picture *ref_picture,
600
+                                       uint8_t *src_data[3],
601
+                                       uint8_t *ref_data[3], int stride, int uvstride,
601 602
                                        int const pred_x, int const pred_y, int const penalty_factor,
602
-                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
603
-                                       uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
603
+                                       int const shift,
604
+                                       uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
604 605
                                        )
605 606
 {
606 607
     me_cmp_func cmp, chroma_cmp;
607 608
     int next_dir=-1;
608
-    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
609
+    LOAD_COMMON
609 610
     
610 611
     cmp= s->dsp.me_cmp[size];
611 612
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -639,15 +633,16 @@ static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, in
639 639
 }
640 640
 
641 641
 static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin,
642
-                                       Picture *ref_picture,
642
+                                       uint8_t *src_data[3],
643
+                                       uint8_t *ref_data[3], int stride, int uvstride,
643 644
                                        int const pred_x, int const pred_y, int const penalty_factor,
644
-                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
645
-                                       uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
645
+                                       int const shift,
646
+                                       uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
646 647
                                        )
647 648
 {
648 649
     me_cmp_func cmp, chroma_cmp;
649 650
     int dia_size;
650
-    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
651
+    LOAD_COMMON
651 652
     
652 653
     cmp= s->dsp.me_cmp[size];
653 654
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -730,17 +725,18 @@ if(256*256*256*64 % (stats[0]+1)==0){
730 730
 
731 731
 #define MAX_SAB_SIZE 16
732 732
 static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin,
733
-                                       Picture *ref_picture,
733
+                                       uint8_t *src_data[3],
734
+                                       uint8_t *ref_data[3], int stride, int uvstride,
734 735
                                        int const pred_x, int const pred_y, int const penalty_factor,
735
-                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
736
-                                       uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
736
+                                       int const shift,
737
+                                       uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
737 738
                                        )
738 739
 {
739 740
     me_cmp_func cmp, chroma_cmp;
740 741
     Minima minima[MAX_SAB_SIZE];
741 742
     const int minima_count= ABS(s->me.dia_size);
742 743
     int i, j;
743
-    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
744
+    LOAD_COMMON
744 745
     
745 746
     cmp= s->dsp.me_cmp[size];
746 747
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -810,15 +806,16 @@ static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int
810 810
 }
811 811
 
812 812
 static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
813
-                                       Picture *ref_picture,
813
+                                       uint8_t *src_data[3],
814
+                                       uint8_t *ref_data[3], int stride, int uvstride,
814 815
                                        int const pred_x, int const pred_y, int const penalty_factor,
815
-                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
816
-                                       uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
816
+                                       int const shift,
817
+                                       uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
817 818
                                        )
818 819
 {
819 820
     me_cmp_func cmp, chroma_cmp;
820 821
     int dia_size;
821
-    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
822
+    LOAD_COMMON
822 823
     
823 824
     cmp= s->dsp.me_cmp[size];
824 825
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -886,10 +883,10 @@ if(256*256*256*64 % (stats[0]+1)==0){
886 886
     return dmin;    
887 887
 }
888 888
 
889
-static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
889
+static int RENAME(epzs_motion_search)(MpegEncContext * s,
890 890
                              int *mx_ptr, int *my_ptr,
891
-                             int P[10][2], int pred_x, int pred_y,
892
-                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], 
891
+                             int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], 
892
+                             uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], 
893 893
                              int ref_mv_scale, uint8_t * const mv_penalty)
894 894
 {
895 895
     int best[2]={0, 0};
... ...
@@ -899,10 +896,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
899 899
     int map_generation;
900 900
     const int penalty_factor= s->me.penalty_factor;
901 901
     const int size=0;
902
-    const int ref_mv_stride= s->mb_stride;
903
-    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride;
902
+    const int h=16;
903
+    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
904
+    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
904 905
     me_cmp_func cmp, chroma_cmp;
905
-    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
906
+    LOAD_COMMON
906 907
     
907 908
     cmp= s->dsp.me_cmp[size];
908 909
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -973,21 +971,21 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
973 973
 
974 974
 //check(best[0],best[1],0, b0)
975 975
     if(s->me.dia_size==-1)
976
-        dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture,
977
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
978
-				   shift, map, map_generation, size, mv_penalty);
976
+        dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
977
+                                   pred_x, pred_y, penalty_factor, 
978
+				   shift, map, map_generation, size, h, mv_penalty);
979 979
     else if(s->me.dia_size<-1)
980
-        dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture,
981
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
982
-				   shift, map, map_generation, size, mv_penalty);
980
+        dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
981
+                                   pred_x, pred_y, penalty_factor, 
982
+				   shift, map, map_generation, size, h, mv_penalty);
983 983
     else if(s->me.dia_size<2)
984
-        dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
985
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
986
-				   shift, map, map_generation, size, mv_penalty);
984
+        dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
985
+                                   pred_x, pred_y, penalty_factor, 
986
+				   shift, map, map_generation, size, h, mv_penalty);
987 987
     else
988
-        dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
989
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
990
-				   shift, map, map_generation, size, mv_penalty);
988
+        dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
989
+                                   pred_x, pred_y, penalty_factor, 
990
+				   shift, map, map_generation, size, h, mv_penalty);
991 991
 
992 992
 //check(best[0],best[1],0, b1)
993 993
     *mx_ptr= best[0];
... ...
@@ -998,10 +996,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
998 998
 }
999 999
 
1000 1000
 #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
1001
-static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
1001
+static int RENAME(epzs_motion_search4)(MpegEncContext * s,
1002 1002
                              int *mx_ptr, int *my_ptr,
1003 1003
                              int P[10][2], int pred_x, int pred_y,
1004
-                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], 
1004
+                             uint8_t *src_data[3], 
1005
+                             uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], 
1005 1006
                              int ref_mv_scale, uint8_t * const mv_penalty)
1006 1007
 {
1007 1008
     int best[2]={0, 0};
... ...
@@ -1011,10 +1010,11 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
1011 1011
     int map_generation;
1012 1012
     const int penalty_factor= s->me.penalty_factor;
1013 1013
     const int size=1;
1014
+    const int h=8;
1014 1015
     const int ref_mv_stride= s->mb_stride;
1015 1016
     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1016 1017
     me_cmp_func cmp, chroma_cmp;
1017
-    LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
1018
+    LOAD_COMMON
1018 1019
     
1019 1020
     cmp= s->dsp.me_cmp[size];
1020 1021
     chroma_cmp= s->dsp.me_cmp[size+1];
... ...
@@ -1024,7 +1024,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
1024 1024
     dmin = 1000000;
1025 1025
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
1026 1026
     /* first line */
1027
-    if (s->mb_y == 0 && block<2) {
1027
+    if (s->mb_y == 0/* && block<2*/) {
1028 1028
 	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029 1029
         CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
1030 1030
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
... ...
@@ -1049,21 +1049,100 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
1049 1049
     }
1050 1050
 
1051 1051
     if(s->me.dia_size==-1)
1052
-        dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture,
1053
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
1054
-				   shift, map, map_generation, size, mv_penalty);
1052
+        dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1053
+                                   pred_x, pred_y, penalty_factor, 
1054
+				   shift, map, map_generation, size, h, mv_penalty);
1055 1055
     else if(s->me.dia_size<-1)
1056
-        dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture,
1057
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
1058
-				   shift, map, map_generation, size, mv_penalty);
1056
+        dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1057
+                                   pred_x, pred_y, penalty_factor, 
1058
+				   shift, map, map_generation, size, h, mv_penalty);
1059 1059
     else if(s->me.dia_size<2)
1060
-        dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
1061
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
1062
-				   shift, map, map_generation, size, mv_penalty);
1060
+        dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1061
+                                   pred_x, pred_y, penalty_factor, 
1062
+				   shift, map, map_generation, size, h, mv_penalty);
1063 1063
     else
1064
-        dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
1065
-                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
1066
-				   shift, map, map_generation, size, mv_penalty);
1064
+        dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1065
+                                   pred_x, pred_y, penalty_factor, 
1066
+				   shift, map, map_generation, size, h, mv_penalty);
1067
+
1068
+
1069
+    *mx_ptr= best[0];
1070
+    *my_ptr= best[1];    
1071
+
1072
+//    printf("%d %d %d \n", best[0], best[1], dmin);
1073
+    return dmin;
1074
+}
1075
+
1076
+//try to merge with above FIXME (needs PSNR test)
1077
+static int RENAME(epzs_motion_search2)(MpegEncContext * s,
1078
+                             int *mx_ptr, int *my_ptr,
1079
+                             int P[10][2], int pred_x, int pred_y,
1080
+                             uint8_t *src_data[3], 
1081
+                             uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], 
1082
+                             int ref_mv_scale, uint8_t * const mv_penalty)
1083
+{
1084
+    int best[2]={0, 0};
1085
+    int d, dmin; 
1086
+    const int shift= 1+s->quarter_sample;
1087
+    uint32_t *map= s->me.map;
1088
+    int map_generation;
1089
+    const int penalty_factor= s->me.penalty_factor;
1090
+    const int size=0; //FIXME pass as arg
1091
+    const int h=8;
1092
+    const int ref_mv_stride= s->mb_stride;
1093
+    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1094
+    me_cmp_func cmp, chroma_cmp;
1095
+    LOAD_COMMON
1096
+    
1097
+    cmp= s->dsp.me_cmp[size];
1098
+    chroma_cmp= s->dsp.me_cmp[size+1];
1099
+
1100
+    map_generation= update_map_generation(s);
1101
+
1102
+    dmin = 1000000;
1103
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
1104
+    /* first line */
1105
+    if (s->mb_y == 0) {
1106
+	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1107
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
1108
+                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1109
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1110
+    }else{
1111
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1112
+        //FIXME try some early stop
1113
+        if(dmin>64*2){
1114
+            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1115
+            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1116
+            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1117
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1118
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
1119
+                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1120
+        }
1121
+    }
1122
+    if(dmin>64*4){
1123
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
1124
+                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1125
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
1126
+                        (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1127
+    }
1128
+
1129
+    if(s->me.dia_size==-1)
1130
+        dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1131
+                                   pred_x, pred_y, penalty_factor, 
1132
+				   shift, map, map_generation, size, h, mv_penalty);
1133
+    else if(s->me.dia_size<-1)
1134
+        dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1135
+                                   pred_x, pred_y, penalty_factor, 
1136
+				   shift, map, map_generation, size, h, mv_penalty);
1137
+    else if(s->me.dia_size<2)
1138
+        dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1139
+                                   pred_x, pred_y, penalty_factor, 
1140
+				   shift, map, map_generation, size, h, mv_penalty);
1141
+    else
1142
+        dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1143
+                                   pred_x, pred_y, penalty_factor, 
1144
+				   shift, map, map_generation, size, h, mv_penalty);
1145
+
1067 1146
 
1068 1147
     *mx_ptr= best[0];
1069 1148
     *my_ptr= best[1];    
... ...
@@ -29,6 +29,9 @@
29 29
 
30 30
 #include "mpeg12data.h"
31 31
 
32
+//#undef NDEBUG
33
+//#include <assert.h>
34
+
32 35
 
33 36
 /* Start codes. */
34 37
 #define SEQ_END_CODE		0x000001b7
... ...
@@ -476,12 +479,12 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
476 476
 }
477 477
 
478 478
 static inline void put_mb_modes(MpegEncContext *s, int n, int bits, 
479
-                                int has_mv)
479
+                                int has_mv, int field_motion)
480 480
 {
481 481
     put_bits(&s->pb, n, bits);
482 482
     if (!s->frame_pred_frame_dct) {
483 483
         if (has_mv) 
484
-            put_bits(&s->pb, 2, 2); /* motion_type: frame */
484
+            put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */
485 485
         put_bits(&s->pb, 1, s->interlaced_dct);
486 486
     }
487 487
 }
... ...
@@ -501,9 +504,9 @@ void mpeg1_encode_mb(MpegEncContext *s,
501 501
         if (s->block_last_index[i] >= 0)
502 502
             cbp |= 1 << (5 - i);
503 503
     }
504
-
504
+    
505 505
     if (cbp == 0 && !first_mb && (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) && 
506
-        ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) ||
506
+        ((s->pict_type == P_TYPE && s->mv_type == MV_TYPE_16X16 && (motion_x | motion_y) == 0) ||
507 507
         (s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) |
508 508
         ((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) {
509 509
         s->mb_skip_run++;
... ...
@@ -511,6 +514,10 @@ void mpeg1_encode_mb(MpegEncContext *s,
511 511
         s->skip_count++;
512 512
         s->misc_bits++;
513 513
         s->last_bits++;
514
+        if(s->pict_type == P_TYPE){
515
+            s->last_mv[0][1][0]= s->last_mv[0][0][0]= 
516
+            s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0;
517
+        }
514 518
     } else {
515 519
         if(first_mb){
516 520
             assert(s->mb_skip_run == 0);
... ...
@@ -521,150 +528,167 @@ void mpeg1_encode_mb(MpegEncContext *s,
521 521
         
522 522
         if (s->pict_type == I_TYPE) {
523 523
             if(s->dquant && cbp){
524
-                put_mb_modes(s, 2, 1, 0); /* macroblock_type : macroblock_quant = 1 */
524
+                put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */
525 525
                 put_bits(&s->pb, 5, s->qscale);
526 526
             }else{
527
-                put_mb_modes(s, 1, 1, 0); /* macroblock_type : macroblock_quant = 0 */
527
+                put_mb_modes(s, 1, 1, 0, 0); /* macroblock_type : macroblock_quant = 0 */
528 528
                 s->qscale -= s->dquant;
529 529
             }
530 530
             s->misc_bits+= get_bits_diff(s);
531 531
             s->i_count++;
532 532
         } else if (s->mb_intra) {
533 533
             if(s->dquant && cbp){
534
-                put_mb_modes(s, 6, 0x01, 0);
534
+                put_mb_modes(s, 6, 0x01, 0, 0);
535 535
                 put_bits(&s->pb, 5, s->qscale);
536 536
             }else{
537
-                put_mb_modes(s, 5, 0x03, 0);
537
+                put_mb_modes(s, 5, 0x03, 0, 0);
538 538
                 s->qscale -= s->dquant;
539 539
             }
540 540
             s->misc_bits+= get_bits_diff(s);
541 541
             s->i_count++;
542
-            s->last_mv[0][0][0] = 
543
-            s->last_mv[0][0][1] = 0;
542
+            memset(s->last_mv, 0, sizeof(s->last_mv));
544 543
         } else if (s->pict_type == P_TYPE) { 
544
+            if(s->mv_type == MV_TYPE_16X16){
545 545
                 if (cbp != 0) {
546
-                    if (motion_x == 0 && motion_y == 0) {
546
+                    if ((motion_x|motion_y) == 0) {
547 547
                         if(s->dquant){
548
-                            put_mb_modes(s, 5, 1, 0); /* macroblock_pattern & quant */
548
+                            put_mb_modes(s, 5, 1, 0, 0); /* macroblock_pattern & quant */
549 549
                             put_bits(&s->pb, 5, s->qscale);
550 550
                         }else{
551
-                            put_mb_modes(s, 2, 1, 0); /* macroblock_pattern only */
551
+                            put_mb_modes(s, 2, 1, 0, 0); /* macroblock_pattern only */
552 552
                         }
553 553
                         s->misc_bits+= get_bits_diff(s);
554
-                        put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
555 554
                     } else {
556 555
                         if(s->dquant){
557
-                            put_mb_modes(s, 5, 2, 1); /* motion + cbp */
556
+                            put_mb_modes(s, 5, 2, 1, 0); /* motion + cbp */
558 557
                             put_bits(&s->pb, 5, s->qscale);
559 558
                         }else{
560
-                            put_mb_modes(s, 1, 1, 1); /* motion + cbp */
559
+                            put_mb_modes(s, 1, 1, 1, 0); /* motion + cbp */
561 560
                         }
562 561
                         s->misc_bits+= get_bits_diff(s);
563 562
                         mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);    // RAL: f_code parameter added
564 563
                         mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);    // RAL: f_code parameter added
565 564
                         s->mv_bits+= get_bits_diff(s);
566
-                        put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
567 565
                     }
568 566
                 } else {
569 567
                     put_bits(&s->pb, 3, 1); /* motion only */
570 568
                     if (!s->frame_pred_frame_dct)
571 569
                         put_bits(&s->pb, 2, 2); /* motion_type: frame */
570
+                    s->misc_bits+= get_bits_diff(s);
572 571
                     mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);    // RAL: f_code parameter added
573 572
                     mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);    // RAL: f_code parameter added
574 573
                     s->qscale -= s->dquant;
575 574
                     s->mv_bits+= get_bits_diff(s);
576 575
                 }
577
-                s->f_count++;
578
-        } else
579
-            {    // RAL: All the following bloc added for B frames:
580
-                if (cbp != 0)
581
-                    {    // With coded bloc pattern
582
-                    if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD))
583
-                        {    // Bi-directional motion
584
-                        if (s->dquant) {
585
-                            put_mb_modes(s, 5, 2, 1);
586
-                            put_bits(&s->pb, 5, s->qscale);
587
-                        } else {
588
-                            put_mb_modes(s, 2, 3, 1);
589
-                        }
590
-                        s->misc_bits += get_bits_diff(s);
591
-                        mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
592
-                        mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
593
-                        mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
594
-                        mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
595
-                        s->b_count++;
596
-                        s->f_count++;
597
-                        s->mv_bits += get_bits_diff(s);
598
-                        put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
599
-                        }
600
-                    else if (s->mv_dir == MV_DIR_BACKWARD)
601
-                        {    // Backward motion
602
-                        if (s->dquant) {
603
-                            put_mb_modes(s, 6, 2, 1);
604
-                            put_bits(&s->pb, 5, s->qscale);
605
-                        } else {
606
-                            put_mb_modes(s, 3, 3, 1);
607
-                        }
608
-                        s->misc_bits += get_bits_diff(s);
609
-                        mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); 
610
-                        mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); 
611
-                        s->b_count++;
612
-                        s->mv_bits += get_bits_diff(s);
613
-                        put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
614
-                        }
615
-                    else if (s->mv_dir == MV_DIR_FORWARD)
616
-                        {    // Forward motion
617
-                        if (s->dquant) {
618
-                            put_mb_modes(s, 6, 3, 1);
619
-                            put_bits(&s->pb, 5, s->qscale);
620
-                        } else {
621
-                            put_mb_modes(s, 4, 3, 1);
622
-                        }
623
-                        s->misc_bits += get_bits_diff(s);
624
-                        mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); 
625
-                        mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); 
626
-                        s->f_count++;
627
-                        s->mv_bits += get_bits_diff(s);
628
-                        put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
629
-                        }
576
+                s->last_mv[0][1][0]= s->last_mv[0][0][0]= motion_x;
577
+                s->last_mv[0][1][1]= s->last_mv[0][0][1]= motion_y;
578
+            }else{
579
+                assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD);
580
+
581
+                if (cbp) {
582
+                    if(s->dquant){
583
+                        put_mb_modes(s, 5, 2, 1, 1); /* motion + cbp */
584
+                        put_bits(&s->pb, 5, s->qscale);
585
+                    }else{
586
+                        put_mb_modes(s, 1, 1, 1, 1); /* motion + cbp */
630 587
                     }
631
-                else
632
-                    {    // No coded bloc pattern
633
-                    if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD))
634
-                        {    // Bi-directional motion 
635
-                        put_bits(&s->pb, 2, 2); /* backward & forward motion */
636
-                        if (!s->frame_pred_frame_dct)
637
-                            put_bits(&s->pb, 2, 2); /* motion_type: frame */
638
-                        mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
639
-                        mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
640
-                        mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
641
-                        mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
642
-                        s->b_count++;
643
-                        s->f_count++;
644
-                        }
645
-                    else if (s->mv_dir == MV_DIR_BACKWARD)
646
-                        {    // Backward motion
647
-                        put_bits(&s->pb, 3, 2); /* backward motion only */
648
-                        if (!s->frame_pred_frame_dct)
649
-                            put_bits(&s->pb, 2, 2); /* motion_type: frame */
650
-                        mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); 
651
-                        mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); 
652
-                        s->b_count++;
653
-                        }
654
-                    else if (s->mv_dir == MV_DIR_FORWARD)
655
-                        {    // Forward motion
656
-                        put_bits(&s->pb, 4, 2); /* forward motion only */
657
-                        if (!s->frame_pred_frame_dct)
658
-                            put_bits(&s->pb, 2, 2); /* motion_type: frame */
659
-                        mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); 
660
-                        mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); 
661
-                        s->f_count++;
662
-                        }
588
+                } else {
589
+                    put_bits(&s->pb, 3, 1); /* motion only */
590
+                    put_bits(&s->pb, 2, 1); /* motion_type: field */
663 591
                     s->qscale -= s->dquant;
664
-                    s->mv_bits += get_bits_diff(s);
592
+                }
593
+                s->misc_bits+= get_bits_diff(s);
594
+                for(i=0; i<2; i++){
595
+                    put_bits(&s->pb, 1, s->field_select[0][i]);
596
+                    mpeg1_encode_motion(s, s->mv[0][i][0] -  s->last_mv[0][i][0]    , s->f_code);
597
+                    mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
598
+                    s->last_mv[0][i][0]=   s->mv[0][i][0];
599
+                    s->last_mv[0][i][1]= 2*s->mv[0][i][1];
600
+                }
601
+                s->mv_bits+= get_bits_diff(s);
602
+            }
603
+            if(cbp)
604
+                put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
605
+            s->f_count++;
606
+        } else{  
607
+            static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi
608
+
609
+            if(s->mv_type == MV_TYPE_16X16){
610
+                if (cbp){    // With coded bloc pattern
611
+                    if (s->dquant) {
612
+                        if(s->mv_dir == MV_DIR_FORWARD)
613
+                            put_mb_modes(s, 6, 3, 1, 0);
614
+                        else
615
+                            put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 0);
616
+                        put_bits(&s->pb, 5, s->qscale);
617
+                    } else {
618
+                        put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 0);
619
+                    }
620
+                }else{    // No coded bloc pattern
621
+                    put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
622
+                    if (!s->frame_pred_frame_dct)
623
+                        put_bits(&s->pb, 2, 2); /* motion_type: frame */
624
+                    s->qscale -= s->dquant;
625
+                }
626
+                s->misc_bits += get_bits_diff(s);
627
+                if (s->mv_dir&MV_DIR_FORWARD){
628
+                    mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); 
629
+                    mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); 
630
+                    s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0];
631
+                    s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1];
632
+                    s->f_count++;
633
+                }
634
+                if (s->mv_dir&MV_DIR_BACKWARD){
635
+                    mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); 
636
+                    mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); 
637
+                    s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0];
638
+                    s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1];
639
+                    s->b_count++;
640
+                }
641
+            }else{
642
+                assert(s->mv_type == MV_TYPE_FIELD);
643
+                assert(!s->frame_pred_frame_dct);
644
+                if (cbp){    // With coded bloc pattern
645
+                    if (s->dquant) {
646
+                        if(s->mv_dir == MV_DIR_FORWARD)
647
+                            put_mb_modes(s, 6, 3, 1, 1);
648
+                        else
649
+                            put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 1);
650
+                        put_bits(&s->pb, 5, s->qscale);
651
+                    } else {
652
+                        put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 1);
665 653
                     }
666
-            // End of bloc from RAL
654
+                }else{    // No coded bloc pattern
655
+                    put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
656
+                    put_bits(&s->pb, 2, 1); /* motion_type: field */
657
+                    s->qscale -= s->dquant;
658
+                }
659
+                s->misc_bits += get_bits_diff(s);
660
+                if (s->mv_dir&MV_DIR_FORWARD){
661
+                    for(i=0; i<2; i++){
662
+                        put_bits(&s->pb, 1, s->field_select[0][i]);
663
+                        mpeg1_encode_motion(s, s->mv[0][i][0] -  s->last_mv[0][i][0]    , s->f_code);
664
+                        mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
665
+                        s->last_mv[0][i][0]=   s->mv[0][i][0];
666
+                        s->last_mv[0][i][1]= 2*s->mv[0][i][1];
667
+                    }
668
+                    s->f_count++;
669
+                }
670
+                if (s->mv_dir&MV_DIR_BACKWARD){
671
+                    for(i=0; i<2; i++){
672
+                        put_bits(&s->pb, 1, s->field_select[1][i]);
673
+                        mpeg1_encode_motion(s, s->mv[1][i][0] -  s->last_mv[1][i][0]    , s->b_code);
674
+                        mpeg1_encode_motion(s, s->mv[1][i][1] - (s->last_mv[1][i][1]>>1), s->b_code);
675
+                        s->last_mv[1][i][0]=   s->mv[1][i][0];
676
+                        s->last_mv[1][i][1]= 2*s->mv[1][i][1];
677
+                    }
678
+                    s->b_count++;
679
+                }
667 680
             }
681
+            s->mv_bits += get_bits_diff(s);
682
+            if(cbp)
683
+                put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
684
+        }
668 685
         for(i=0;i<6;i++) {
669 686
             if (cbp & (1 << (5 - i))) {
670 687
                 mpeg1_encode_block(s, block[i], i);
... ...
@@ -676,18 +700,6 @@ void mpeg1_encode_mb(MpegEncContext *s,
676 676
         else
677 677
             s->p_tex_bits+= get_bits_diff(s);
678 678
     }
679
-
680
-    // RAL: By this:
681
-    if (s->mv_dir & MV_DIR_FORWARD)
682
-        {
683
-        s->last_mv[0][0][0]= s->mv[0][0][0];
684
-        s->last_mv[0][0][1]= s->mv[0][0][1];
685
-        }
686
-    if (s->mv_dir & MV_DIR_BACKWARD)
687
-        {
688
-        s->last_mv[1][0][0]= s->mv[1][0][0];
689
-        s->last_mv[1][0][1]= s->mv[1][0][1];
690
-        }
691 679
 }
692 680
 
693 681
 // RAL: Parameter added: f_or_b_code
... ...
@@ -1952,7 +1964,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
1952 1952
     s->repeat_first_field = get_bits1(&s->gb);
1953 1953
     s->chroma_420_type = get_bits1(&s->gb);
1954 1954
     s->progressive_frame = get_bits1(&s->gb);
1955
-    
1955
+
1956 1956
     if(s->picture_structure == PICT_FRAME)
1957 1957
         s->first_field=0;
1958 1958
     else{
... ...
@@ -1963,13 +1975,9 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
1963 1963
     if(s->alternate_scan){
1964 1964
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
1965 1965
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
1966
-        ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
1967
-        ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
1968 1966
     }else{
1969 1967
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
1970 1968
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
1971
-        ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
1972
-        ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
1973 1969
     }
1974 1970
     
1975 1971
     /* composite display not parsed */
... ...
@@ -2103,10 +2111,10 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
2103 2103
     s->qscale = get_qscale(s);
2104 2104
     if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) {
2105 2105
         if(s->avctx->debug&FF_DEBUG_PICT_INFO){
2106
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
2106
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
2107 2107
                  s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
2108 2108
                  s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
2109
-                 s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
2109
+                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
2110 2110
                  s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
2111 2111
                  s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
2112 2112
         }
... ...
@@ -252,8 +252,13 @@ int DCT_common_init(MpegEncContext *s)
252 252
     /* load & permutate scantables
253 253
        note: only wmv uses differnt ones 
254 254
     */
255
-    ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
256
-    ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
255
+    if(s->alternate_scan){
256
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
257
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
258
+    }else{
259
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
260
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
261
+    }
257 262
     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
258 263
     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
259 264
 
... ...
@@ -394,7 +399,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
394 394
 /* init common structure for both encoder and decoder */
395 395
 int MPV_common_init(MpegEncContext *s)
396 396
 {
397
-    int y_size, c_size, yc_size, i, mb_array_size, x, y;
397
+    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
398 398
 
399 399
     dsputil_init(&s->dsp, s->avctx);
400 400
     DCT_common_init(s);
... ...
@@ -407,6 +412,7 @@ int MPV_common_init(MpegEncContext *s)
407 407
     s->b8_stride = s->mb_width*2 + 1;
408 408
     s->b4_stride = s->mb_width*4 + 1;
409 409
     mb_array_size= s->mb_height * s->mb_stride;
410
+    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
410 411
 
411 412
     /* set default edge pos, will be overriden in decode_header if needed */
412 413
     s->h_edge_pos= s->mb_width*16;
... ...
@@ -458,8 +464,6 @@ int MPV_common_init(MpegEncContext *s)
458 458
     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
459 459
     
460 460
     if (s->encoding) {
461
-        int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
462
-
463 461
         /* Allocate MV tables */
464 462
         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
465 463
         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
... ...
@@ -491,7 +495,7 @@ int MPV_common_init(MpegEncContext *s)
491 491
         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
492 492
 
493 493
         /* Allocate MB type table */
494
-        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
494
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
495 495
         
496 496
         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
497 497
         
... ...
@@ -513,10 +517,21 @@ int MPV_common_init(MpegEncContext *s)
513 513
 
514 514
     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
515 515
     
516
-    if(s->codec_id==CODEC_ID_MPEG4){
516
+    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
517 517
         /* interlaced direct mode decoding tables */
518
-        CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
519
-        CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
518
+            for(i=0; i<2; i++){
519
+                int j, k;
520
+                for(j=0; j<2; j++){
521
+                    for(k=0; k<2; k++){
522
+                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
523
+                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
524
+                    }
525
+                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
526
+                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
527
+                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
528
+                }
529
+                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
530
+            }
520 531
     }
521 532
     if (s->out_format == FMT_H263) {
522 533
         /* ac values */
... ...
@@ -583,7 +598,7 @@ int MPV_common_init(MpegEncContext *s)
583 583
 /* init common structure for both encoder and decoder */
584 584
 void MPV_common_end(MpegEncContext *s)
585 585
 {
586
-    int i;
586
+    int i, j, k;
587 587
 
588 588
     av_freep(&s->parse_context.buffer);
589 589
     s->parse_context.buffer_size=0;
... ...
@@ -601,6 +616,18 @@ void MPV_common_end(MpegEncContext *s)
601 601
     s->b_bidir_forw_mv_table= NULL;
602 602
     s->b_bidir_back_mv_table= NULL;
603 603
     s->b_direct_mv_table= NULL;
604
+    for(i=0; i<2; i++){
605
+        for(j=0; j<2; j++){
606
+            for(k=0; k<2; k++){
607
+                av_freep(&s->b_field_mv_table_base[i][j][k]);
608
+                s->b_field_mv_table[i][j][k]=NULL;
609
+            }
610
+            av_freep(&s->b_field_select_table[i][j]);
611
+            av_freep(&s->p_field_mv_table_base[i][j]);
612
+            s->p_field_mv_table[i][j]=NULL;
613
+        }
614
+        av_freep(&s->p_field_select_table[i]);
615
+    }
604 616
     
605 617
     av_freep(&s->dc_val[0]);
606 618
     av_freep(&s->ac_val[0]);
... ...
@@ -618,8 +645,6 @@ void MPV_common_end(MpegEncContext *s)
618 618
     av_freep(&s->tex_pb_buffer);
619 619
     av_freep(&s->pb2_buffer);
620 620
     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
621
-    av_freep(&s->field_mv_table);
622
-    av_freep(&s->field_select_table);
623 621
     av_freep(&s->avctx->stats_out);
624 622
     av_freep(&s->ac_stats);
625 623
     av_freep(&s->error_status_table);
... ...
@@ -692,7 +717,7 @@ int MPV_encode_init(AVCodecContext *avctx)
692 692
     s->me_method = avctx->me_method;
693 693
 
694 694
     /* Fixed QSCALE */
695
-    s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
695
+    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
696 696
     
697 697
     s->adaptive_quant= (   s->avctx->lumi_masking
698 698
                         || s->avctx->dark_masking
... ...
@@ -702,8 +727,9 @@ int MPV_encode_init(AVCodecContext *avctx)
702 702
                         || (s->flags&CODEC_FLAG_QP_RD))
703 703
                        && !s->fixed_qscale;
704 704
     
705
-    s->obmc= (s->flags & CODEC_FLAG_OBMC);
706
-    s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER);
705
+    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
706
+    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
707
+    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
707 708
 
708 709
     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
709 710
        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
... ...
@@ -934,7 +960,7 @@ int MPV_encode_init(AVCodecContext *avctx)
934 934
     if(s->modified_quant)
935 935
         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
936 936
     s->progressive_frame= 
937
-    s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
937
+    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
938 938
     
939 939
     ff_init_me(s);
940 940
 
... ...
@@ -1610,7 +1636,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int st
1610 1610
     for(y=0; y<h; y+=16){
1611 1611
         for(x=0; x<w; x+=16){
1612 1612
             int offset= x + y*stride;
1613
-            int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1613
+            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1614 1614
             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1615 1615
             int sae = get_sae(src + offset, mean, stride);
1616 1616
             
... ...
@@ -1906,7 +1932,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
1906 1906
     if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){
1907 1907
         int vbv_delay;
1908 1908
 
1909
-        assert(s->repeat_first_field==0 && s->avctx->repeat_pic==0);
1909
+        assert(s->repeat_first_field==0);
1910 1910
         
1911 1911
         vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
1912 1912
         assert(vbv_delay < 0xFFFF);
... ...
@@ -3300,7 +3326,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3300 3300
         
3301 3301
         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3302 3302
             int progressive_score, interlaced_score;
3303
-            
3303
+
3304 3304
             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
3305 3305
             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
3306 3306
             
... ...
@@ -3417,12 +3443,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3417 3417
         /* pre quantization */         
3418 3418
         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
3419 3419
             //FIXME optimize
3420
-	    if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
3421
-            if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
3422
-            if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
3423
-            if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
3424
-            if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
3425
-            if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
3420
+	    if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
3421
+            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
3422
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
3423
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
3424
+            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
3425
+            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
3426 3426
 #if 0
3427 3427
 {
3428 3428
  static int stat[7];
... ...
@@ -3484,6 +3510,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3484 3484
         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3485 3485
     }
3486 3486
 
3487
+    //non c quantize code returns incorrect block_last_index FIXME
3488
+    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
3489
+        for(i=0; i<6; i++){
3490
+            int j;
3491
+            if(s->block_last_index[i]>0){
3492
+                for(j=63; j>0; j--){
3493
+                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
3494
+                }
3495
+                s->block_last_index[i]= j;
3496
+            }
3497
+        }
3498
+    }
3499
+
3487 3500
     /* huffman encode */
3488 3501
     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3489 3502
     case CODEC_ID_MPEG1VIDEO:
... ...
@@ -3724,9 +3763,9 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
3724 3724
     int x,y;
3725 3725
     
3726 3726
     if(w==16 && h==16) 
3727
-        return s->dsp.sse[0](NULL, src1, src2, stride);
3727
+        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
3728 3728
     else if(w==8 && h==8)
3729
-        return s->dsp.sse[1](NULL, src1, src2, stride);
3729
+        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
3730 3730
     
3731 3731
     for(y=0; y<h; y++){
3732 3732
         for(x=0; x<w; x++){
... ...
@@ -3747,9 +3786,9 @@ static int sse_mb(MpegEncContext *s){
3747 3747
     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3748 3748
 
3749 3749
     if(w==16 && h==16)
3750
-        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3751
-               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3752
-               +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3750
+        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3751
+               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3752
+               +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3753 3753
     else
3754 3754
         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3755 3755
                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
... ...
@@ -3759,7 +3798,7 @@ static int sse_mb(MpegEncContext *s){
3759 3759
 static void encode_picture(MpegEncContext *s, int picture_number)
3760 3760
 {
3761 3761
     int mb_x, mb_y, pdif = 0;
3762
-    int i;
3762
+    int i, j;
3763 3763
     int bits;
3764 3764
     MpegEncContext best_s, backup_s;
3765 3765
     uint8_t bit_buf[2][3000];
... ...
@@ -3843,7 +3882,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3843 3843
         //FIXME do we need to zero them?
3844 3844
         memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3845 3845
         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3846
-        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3846
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
3847
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3847 3848
         
3848 3849
         if(!s->fixed_qscale){
3849 3850
             /* finding spatial complexity for I-frame rate control */
... ...
@@ -3868,32 +3908,61 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3868 3868
 
3869 3869
     if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
3870 3870
         s->pict_type= I_TYPE;
3871
-        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3871
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
3872
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3872 3873
 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3873 3874
     }
3874 3875
 
3875 3876
     if(!s->umvplus){
3876 3877
         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3877
-            s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3878
-        
3878
+            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3879
+
3880
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
3881
+                int a,b;
3882
+                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3883
+                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3884
+                s->f_code= FFMAX(s->f_code, FFMAX(a,b));
3885
+            }
3886
+                    
3879 3887
             ff_fix_long_p_mvs(s);
3888
+            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3889
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
3890
+                for(i=0; i<2; i++){
3891
+                    for(j=0; j<2; j++)
3892
+                        ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
3893
+                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3894
+                }
3895
+            }
3880 3896
         }
3881 3897
 
3882 3898
         if(s->pict_type==B_TYPE){
3883 3899
             int a, b;
3884 3900
 
3885
-            a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3886
-            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3901
+            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3902
+            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3887 3903
             s->f_code = FFMAX(a, b);
3888 3904
 
3889
-            a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3890
-            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3905
+            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3906
+            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3891 3907
             s->b_code = FFMAX(a, b);
3892 3908
 
3893
-            ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3894
-            ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3895
-            ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3896
-            ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3909
+            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3910
+            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3911
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3912
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3913
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
3914
+                int dir;
3915
+                for(dir=0; dir<2; dir++){
3916
+                    for(i=0; i<2; i++){
3917
+                        for(j=0; j<2; j++){
3918
+                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
3919
+                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3920
+                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
3921
+                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3922
+                        }
3923
+                    }
3924
+                }
3925
+            }
3897 3926
         }
3898 3927
     }
3899 3928
     
... ...
@@ -3990,10 +4059,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3990 3990
         s->current_picture_ptr->error[i] = 0;
3991 3991
     }
3992 3992
     s->mb_skip_run = 0;
3993
-    s->last_mv[0][0][0] = 0;
3994
-    s->last_mv[0][0][1] = 0;
3995
-    s->last_mv[1][0][0] = 0;
3996
-    s->last_mv[1][0][1] = 0;
3993
+    memset(s->last_mv, 0, sizeof(s->last_mv));
3997 3994
      
3998 3995
     s->last_mv_dir = 0;
3999 3996
 
... ...
@@ -4027,6 +4093,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4027 4027
             int mb_type= s->mb_type[xy];
4028 4028
 //            int d;
4029 4029
             int dmin= INT_MAX;
4030
+            int dir;
4030 4031
 
4031 4032
             s->mb_x = mb_x;
4032 4033
             ff_update_block_index(s);
... ...
@@ -4134,25 +4201,37 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4134 4134
                     backup_s.tex_pb= s->tex_pb;
4135 4135
                 }
4136 4136
 
4137
-                if(mb_type&MB_TYPE_INTER){
4137
+                if(mb_type&CANDIDATE_MB_TYPE_INTER){
4138 4138
                     s->mv_dir = MV_DIR_FORWARD;
4139 4139
                     s->mv_type = MV_TYPE_16X16;
4140 4140
                     s->mb_intra= 0;
4141 4141
                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4142 4142
                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4143
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
4143
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
4144 4144
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4145 4145
                 }
4146
-                if(mb_type&MB_TYPE_SKIPED){
4146
+                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
4147
+                    s->mv_dir = MV_DIR_FORWARD;
4148
+                    s->mv_type = MV_TYPE_FIELD;
4149
+                    s->mb_intra= 0;
4150
+                    for(i=0; i<2; i++){
4151
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4152
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4153
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4154
+                    }
4155
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
4156
+                                 &dmin, &next_block, 0, 0);
4157
+                }
4158
+                if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4147 4159
                     s->mv_dir = MV_DIR_FORWARD;
4148 4160
                     s->mv_type = MV_TYPE_16X16;
4149 4161
                     s->mb_intra= 0;
4150 4162
                     s->mv[0][0][0] = 0;
4151 4163
                     s->mv[0][0][1] = 0;
4152
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4164
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4153 4165
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4154 4166
                 }
4155
-                if(mb_type&MB_TYPE_INTER4V){                 
4167
+                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
4156 4168
                     s->mv_dir = MV_DIR_FORWARD;
4157 4169
                     s->mv_type = MV_TYPE_8X8;
4158 4170
                     s->mb_intra= 0;
... ...
@@ -4160,28 +4239,28 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4160 4160
                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4161 4161
                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4162 4162
                     }
4163
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4163
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4164 4164
                                  &dmin, &next_block, 0, 0);
4165 4165
                 }
4166
-                if(mb_type&MB_TYPE_FORWARD){
4166
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4167 4167
                     s->mv_dir = MV_DIR_FORWARD;
4168 4168
                     s->mv_type = MV_TYPE_16X16;
4169 4169
                     s->mb_intra= 0;
4170 4170
                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4171 4171
                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4172
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4172
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4173 4173
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4174 4174
                 }
4175
-                if(mb_type&MB_TYPE_BACKWARD){
4175
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4176 4176
                     s->mv_dir = MV_DIR_BACKWARD;
4177 4177
                     s->mv_type = MV_TYPE_16X16;
4178 4178
                     s->mb_intra= 0;
4179 4179
                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4180 4180
                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4181
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4181
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4182 4182
                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4183 4183
                 }
4184
-                if(mb_type&MB_TYPE_BIDIR){
4184
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4185 4185
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4186 4186
                     s->mv_type = MV_TYPE_16X16;
4187 4187
                     s->mb_intra= 0;
... ...
@@ -4189,10 +4268,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4189 4189
                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4190 4190
                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4191 4191
                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4192
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4192
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4193 4193
                                  &dmin, &next_block, 0, 0);
4194 4194
                 }
4195
-                if(mb_type&MB_TYPE_DIRECT){
4195
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4196 4196
                     int mx= s->b_direct_mv_table[xy][0];
4197 4197
                     int my= s->b_direct_mv_table[xy][1];
4198 4198
                     
... ...
@@ -4201,16 +4280,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4201 4201
 #ifdef CONFIG_RISKY
4202 4202
                     ff_mpeg4_set_direct_mv(s, mx, my);
4203 4203
 #endif
4204
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4204
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4205 4205
                                  &dmin, &next_block, mx, my);
4206 4206
                 }
4207
-                if(mb_type&MB_TYPE_INTRA){
4207
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
4208
+                    s->mv_dir = MV_DIR_FORWARD;
4209
+                    s->mv_type = MV_TYPE_FIELD;
4210
+                    s->mb_intra= 0;
4211
+                    for(i=0; i<2; i++){
4212
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4213
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4214
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4215
+                    }
4216
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
4217
+                                 &dmin, &next_block, 0, 0);
4218
+                }
4219
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
4220
+                    s->mv_dir = MV_DIR_BACKWARD;
4221
+                    s->mv_type = MV_TYPE_FIELD;
4222
+                    s->mb_intra= 0;
4223
+                    for(i=0; i<2; i++){
4224
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4225
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4226
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4227
+                    }
4228
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
4229
+                                 &dmin, &next_block, 0, 0);
4230
+                }
4231
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
4232
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4233
+                    s->mv_type = MV_TYPE_FIELD;
4234
+                    s->mb_intra= 0;
4235
+                    for(dir=0; dir<2; dir++){
4236
+                        for(i=0; i<2; i++){
4237
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4238
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4239
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4240
+                        }
4241
+                    }
4242
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
4243
+                                 &dmin, &next_block, 0, 0);
4244
+                }
4245
+                if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4208 4246
                     s->mv_dir = 0;
4209 4247
                     s->mv_type = MV_TYPE_16X16;
4210 4248
                     s->mb_intra= 1;
4211 4249
                     s->mv[0][0][0] = 0;
4212 4250
                     s->mv[0][0][1] = 0;
4213
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
4251
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
4214 4252
                                  &dmin, &next_block, 0, 0);
4215 4253
                     if(s->h263_pred || s->h263_aic){
4216 4254
                         if(best_s.mb_intra)
... ...
@@ -4252,7 +4369,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4252 4252
                                 }
4253 4253
                             }
4254 4254
 
4255
-                            encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4255
+                            encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4256 4256
                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4257 4257
                             if(best_s.qscale != qp){
4258 4258
                                 if(s->mb_intra){
... ...
@@ -4312,19 +4429,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4312 4312
                 // only one MB-Type possible
4313 4313
                 
4314 4314
                 switch(mb_type){
4315
-                case MB_TYPE_INTRA:
4315
+                case CANDIDATE_MB_TYPE_INTRA:
4316 4316
                     s->mv_dir = 0;
4317 4317
                     s->mb_intra= 1;
4318 4318
                     motion_x= s->mv[0][0][0] = 0;
4319 4319
                     motion_y= s->mv[0][0][1] = 0;
4320 4320
                     break;
4321
-                case MB_TYPE_INTER:
4321
+                case CANDIDATE_MB_TYPE_INTER:
4322 4322
                     s->mv_dir = MV_DIR_FORWARD;
4323 4323
                     s->mb_intra= 0;
4324 4324
                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4325 4325
                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4326 4326
                     break;
4327
-                case MB_TYPE_INTER4V:
4327
+                case CANDIDATE_MB_TYPE_INTER_I:
4328
+                    s->mv_dir = MV_DIR_FORWARD;
4329
+                    s->mv_type = MV_TYPE_FIELD;
4330
+                    s->mb_intra= 0;
4331
+                    for(i=0; i<2; i++){
4332
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4333
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4334
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4335
+                    }
4336
+                    motion_x = motion_y = 0;
4337
+                    break;
4338
+                case CANDIDATE_MB_TYPE_INTER4V:
4328 4339
                     s->mv_dir = MV_DIR_FORWARD;
4329 4340
                     s->mv_type = MV_TYPE_8X8;
4330 4341
                     s->mb_intra= 0;
... ...
@@ -4334,7 +4462,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4334 4334
                     }
4335 4335
                     motion_x= motion_y= 0;
4336 4336
                     break;
4337
-                case MB_TYPE_DIRECT:
4337
+                case CANDIDATE_MB_TYPE_DIRECT:
4338 4338
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4339 4339
                     s->mb_intra= 0;
4340 4340
                     motion_x=s->b_direct_mv_table[xy][0];
... ...
@@ -4343,7 +4471,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4343 4343
                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
4344 4344
 #endif
4345 4345
                     break;
4346
-                case MB_TYPE_BIDIR:
4346
+                case CANDIDATE_MB_TYPE_BIDIR:
4347 4347
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4348 4348
                     s->mb_intra= 0;
4349 4349
                     motion_x=0;
... ...
@@ -4353,19 +4481,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
4353 4353
                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4354 4354
                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4355 4355
                     break;
4356
-                case MB_TYPE_BACKWARD:
4356
+                case CANDIDATE_MB_TYPE_BACKWARD:
4357 4357
                     s->mv_dir = MV_DIR_BACKWARD;
4358 4358
                     s->mb_intra= 0;
4359 4359
                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4360 4360
                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4361 4361
                     break;
4362
-                case MB_TYPE_FORWARD:
4362
+                case CANDIDATE_MB_TYPE_FORWARD:
4363 4363
                     s->mv_dir = MV_DIR_FORWARD;
4364 4364
                     s->mb_intra= 0;
4365 4365
                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4366 4366
                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4367 4367
 //                    printf(" %d %d ", motion_x, motion_y);
4368 4368
                     break;
4369
+                case CANDIDATE_MB_TYPE_FORWARD_I:
4370
+                    s->mv_dir = MV_DIR_FORWARD;
4371
+                    s->mv_type = MV_TYPE_FIELD;
4372
+                    s->mb_intra= 0;
4373
+                    for(i=0; i<2; i++){
4374
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4375
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4376
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4377
+                    }
4378
+                    motion_x=motion_y=0;
4379
+                    break;
4380
+                case CANDIDATE_MB_TYPE_BACKWARD_I:
4381
+                    s->mv_dir = MV_DIR_BACKWARD;
4382
+                    s->mv_type = MV_TYPE_FIELD;
4383
+                    s->mb_intra= 0;
4384
+                    for(i=0; i<2; i++){
4385
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4386
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4387
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4388
+                    }
4389
+                    motion_x=motion_y=0;
4390
+                    break;
4391
+                case CANDIDATE_MB_TYPE_BIDIR_I:
4392
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4393
+                    s->mv_type = MV_TYPE_FIELD;
4394
+                    s->mb_intra= 0;
4395
+                    for(dir=0; dir<2; dir++){
4396
+                        for(i=0; i<2; i++){
4397
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4398
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4399
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4400
+                        }
4401
+                    }
4402
+                    motion_x=motion_y=0;
4403
+                    break;
4369 4404
                 default:
4370 4405
                     motion_x=motion_y=0; //gcc warning fix
4371 4406
                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
... ...
@@ -137,6 +137,7 @@ typedef struct Picture{
137 137
     int16_t (*motion_val_base[2])[2];
138 138
     int8_t *ref_index[2];
139 139
     uint32_t *mb_type_base;
140
+#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if theres just one type
140 141
 #define IS_INTRA4x4(a)   ((a)&MB_TYPE_INTRA4x4)
141 142
 #define IS_INTRA16x16(a) ((a)&MB_TYPE_INTRA16x16)
142 143
 #define IS_PCM(a)        ((a)&MB_TYPE_INTRA_PCM)
... ...
@@ -206,23 +207,28 @@ typedef struct MotionEstContext{
206 206
     int mb_penalty_factor;
207 207
     int pre_pass;                      ///< = 1 for the pre pass 
208 208
     int dia_size;
209
+    int xmin;
210
+    int xmax;
211
+    int ymin;
212
+    int ymax;
209 213
     uint8_t (*mv_penalty)[MAX_MV*2+1];  ///< amount of bits needed to encode a MV 
210 214
     int (*sub_motion_search)(struct MpegEncContext * s,
211 215
 				  int *mx_ptr, int *my_ptr, int dmin,
212
-				  int xmin, int ymin, int xmax, int ymax,
213
-                                  int pred_x, int pred_y, Picture *ref_picture, 
214
-                                  int n, int size, uint8_t * const mv_penalty);
215
-    int (*motion_search[7])(struct MpegEncContext * s, int block,
216
+                                  int pred_x, int pred_y, uint8_t *src_data[3],
217
+                                  uint8_t *ref_data[6], int stride, int uvstride,
218
+                                  int size, int h, uint8_t * const mv_penalty);
219
+    int (*motion_search[7])(struct MpegEncContext * s,
216 220
                              int *mx_ptr, int *my_ptr,
217
-                             int P[10][2], int pred_x, int pred_y,
218
-                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], 
221
+                             int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
222
+                             uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], 
219 223
                              int ref_mv_scale, uint8_t * const mv_penalty);
220
-    int (*pre_motion_search)(struct MpegEncContext * s, int block,
224
+    int (*pre_motion_search)(struct MpegEncContext * s,
221 225
                              int *mx_ptr, int *my_ptr,
222
-                             int P[10][2], int pred_x, int pred_y,
223
-                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], 
226
+                             int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], 
227
+                             uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], 
224 228
                              int ref_mv_scale, uint8_t * const mv_penalty);
225
-    int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, 
229
+    int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
230
+                                  uint8_t *ref_data[6], int stride, int uvstride,    
226 231
                                   uint8_t * const mv_penalty);
227 232
 }MotionEstContext;
228 233
 
... ...
@@ -351,12 +357,18 @@ typedef struct MpegEncContext {
351 351
     int16_t (*b_bidir_forw_mv_table_base)[2]; 
352 352
     int16_t (*b_bidir_back_mv_table_base)[2]; 
353 353
     int16_t (*b_direct_mv_table_base)[2];
354
+    int16_t (*p_field_mv_table_base[2][2])[2];
355
+    int16_t (*b_field_mv_table_base[2][2][2])[2];
354 356
     int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) p-frame encoding 
355 357
     int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode b-frame encoding 
356 358
     int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode b-frame encoding 
357 359
     int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding 
358 360
     int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding 
359 361
     int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode b-frame encoding 
362
+    int16_t (*p_field_mv_table[2][2])[2];   ///< MV table (2MV per MB) interlaced p-frame encoding
363
+    int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding
364
+    uint8_t (*p_field_select_table[2]);
365
+    uint8_t (*b_field_select_table[2][2]);
360 366
     int me_method;                       ///< ME algorithm 
361 367
     int scene_change_score;
362 368
     int mv_dir;
... ...
@@ -391,17 +403,22 @@ typedef struct MpegEncContext {
391 391
     int mb_x, mb_y;
392 392
     int mb_skip_run;
393 393
     int mb_intra;
394
-    uint8_t *mb_type;       ///< Table for MB type FIXME remove and use picture->mb_type
395
-#define MB_TYPE_INTRA    0x01
396
-#define MB_TYPE_INTER    0x02
397
-#define MB_TYPE_INTER4V  0x04
398
-#define MB_TYPE_SKIPED   0x08
394
+    uint16_t *mb_type;           ///< Table for candidate MB types for encoding
395
+#define CANDIDATE_MB_TYPE_INTRA    0x01
396
+#define CANDIDATE_MB_TYPE_INTER    0x02
397
+#define CANDIDATE_MB_TYPE_INTER4V  0x04
398
+#define CANDIDATE_MB_TYPE_SKIPED   0x08
399 399
 //#define MB_TYPE_GMC      0x10
400 400
 
401
-#define MB_TYPE_DIRECT   0x10
402
-#define MB_TYPE_FORWARD  0x20
403
-#define MB_TYPE_BACKWARD 0x40
404
-#define MB_TYPE_BIDIR    0x80
401
+#define CANDIDATE_MB_TYPE_DIRECT   0x10
402
+#define CANDIDATE_MB_TYPE_FORWARD  0x20
403
+#define CANDIDATE_MB_TYPE_BACKWARD 0x40
404
+#define CANDIDATE_MB_TYPE_BIDIR    0x80
405
+
406
+#define CANDIDATE_MB_TYPE_INTER_I    0x100
407
+#define CANDIDATE_MB_TYPE_FORWARD_I  0x200
408
+#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
409
+#define CANDIDATE_MB_TYPE_BIDIR_I    0x800
405 410
 
406 411
     int block_index[6]; ///< index to current MB in block based arrays with edges
407 412
     int block_wrap[6];
... ...
@@ -551,8 +568,6 @@ typedef struct MpegEncContext {
551 551
     uint8_t *tex_pb_buffer;          
552 552
     uint8_t *pb2_buffer;
553 553
     int mpeg_quant;
554
-    int16_t (*field_mv_table)[2][2];   ///< used for interlaced b frame decoding 
555
-    int8_t (*field_select_table)[2];   ///< wtf, no really another table for interlaced b frames 
556 554
     int t_frame;                       ///< time distance of first I -> B, used for interlaced b frames 
557 555
     int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG4 
558 556
 
... ...
@@ -748,7 +763,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
748 748
                              int mb_x, int mb_y);
749 749
 int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
750 750
 void ff_fix_long_p_mvs(MpegEncContext * s);
751
-void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
751
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
752
+                     int16_t (*mv_table)[2], int f_code, int type, int truncate);
752 753
 void ff_init_me(MpegEncContext *s);
753 754
 int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y);
754 755
 
... ...
@@ -45,7 +45,7 @@ static void sigill_handler (int sig)
45 45
 }
46 46
 #endif /* CONFIG_DARWIN */
47 47
 
48
-int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
48
+int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
49 49
 {
50 50
     int i;
51 51
     int s __attribute__((aligned(16)));
... ...
@@ -57,7 +57,7 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
57 57
 
58 58
     s = 0;
59 59
     sad = (vector unsigned int)vec_splat_u32(0);
60
-    for(i=0;i<16;i++) {
60
+    for(i=0;i<h;i++) {
61 61
         /*
62 62
            Read unaligned pixels into our vectors. The vectors are as follows:
63 63
            pix1v: pix1[0]-pix1[15]
... ...
@@ -92,7 +92,7 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
92 92
     return s;
93 93
 }
94 94
 
95
-int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
95
+int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
96 96
 {
97 97
     int i;
98 98
     int s __attribute__((aligned(16)));
... ...
@@ -118,7 +118,7 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
118 118
     tv = (vector unsigned char *) &pix2[0];
119 119
     pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
120 120
     
121
-    for(i=0;i<16;i++) {
121
+    for(i=0;i<h;i++) {
122 122
         /*
123 123
            Read unaligned pixels into our vectors. The vectors are as follows:
124 124
            pix1v: pix1[0]-pix1[15]
... ...
@@ -152,7 +152,7 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
152 152
     return s;    
153 153
 }
154 154
 
155
-int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
155
+int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
156 156
 {
157 157
     int i;
158 158
     int s __attribute__((aligned(16)));
... ...
@@ -194,7 +194,7 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
194 194
     t1 = vec_add(pix2hv, pix2ihv);
195 195
     t2 = vec_add(pix2lv, pix2ilv);
196 196
     
197
-    for(i=0;i<16;i++) {
197
+    for(i=0;i<h;i++) {
198 198
         /*
199 199
            Read unaligned pixels into our vectors. The vectors are as follows:
200 200
            pix1v: pix1[0]-pix1[15]
... ...
@@ -253,7 +253,7 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
253 253
     return s;
254 254
 }
255 255
 
256
-int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
256
+int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
257 257
 {
258 258
     int i;
259 259
     int s __attribute__((aligned(16)));
... ...
@@ -266,7 +266,7 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
266 266
     sad = (vector unsigned int)vec_splat_u32(0);
267 267
 
268 268
 
269
-    for(i=0;i<16;i++) {
269
+    for(i=0;i<h;i++) {
270 270
 	/* Read potentially unaligned pixels into t1 and t2 */
271 271
         perm1 = vec_lvsl(0, pix1);
272 272
         pix1v = (vector unsigned char *) pix1;
... ...
@@ -295,7 +295,7 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
295 295
     return s;
296 296
 }
297 297
 
298
-int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
298
+int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
299 299
 {
300 300
     int i;
301 301
     int s __attribute__((aligned(16)));
... ...
@@ -309,7 +309,7 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
309 309
 
310 310
     permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
311 311
 
312
-    for(i=0;i<8;i++) {
312
+    for(i=0;i<h;i++) {
313 313
 	/* Read potentially unaligned pixels into t1 and t2
314 314
 	   Since we're reading 16 pixels, and actually only want 8,
315 315
 	   mask out the last 8 pixels. The 0s don't change the sum. */
... ...
@@ -374,9 +374,9 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
374 374
 /**
375 375
  * Sum of Squared Errors for a 8x8 block.
376 376
  * AltiVec-enhanced.
377
- * It's the pix_abs8x8_altivec code above w/ squaring added.
377
+ * It's the sad8_altivec code above w/ squaring added.
378 378
  */
379
-int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
379
+int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
380 380
 {
381 381
     int i;
382 382
     int s __attribute__((aligned(16)));
... ...
@@ -391,7 +391,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
391 391
     permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
392 392
 
393 393
     
394
-    for(i=0;i<8;i++) {
394
+    for(i=0;i<h;i++) {
395 395
 	/* Read potentially unaligned pixels into t1 and t2
396 396
 	   Since we're reading 16 pixels, and actually only want 8,
397 397
 	   mask out the last 8 pixels. The 0s don't change the sum. */
... ...
@@ -430,9 +430,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
430 430
 /**
431 431
  * Sum of Squared Errors for a 16x16 block.
432 432
  * AltiVec-enhanced.
433
- * It's the pix_abs16x16_altivec code above w/ squaring added.
433
+ * It's the sad16_altivec code above w/ squaring added.
434 434
  */
435
-int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
435
+int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
436 436
 {
437 437
     int i;
438 438
     int s __attribute__((aligned(16)));
... ...
@@ -444,7 +444,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
444 444
     
445 445
     sum = (vector unsigned int)vec_splat_u32(0);
446 446
     
447
-    for(i=0;i<16;i++) {
447
+    for(i=0;i<h;i++) {
448 448
 	/* Read potentially unaligned pixels into t1 and t2 */
449 449
         perm1 = vec_lvsl(0, pix1);
450 450
         pix1v = (vector unsigned char *) pix1;
... ...
@@ -609,14 +609,6 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
609 609
     }
610 610
 }
611 611
 
612
-int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
613
-  return pix_abs16x16_altivec(a,b,stride);
614
-}
615
-
616
-int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
617
-  return pix_abs8x8_altivec(a,b,stride);
618
-}
619
-
620 612
 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
621 613
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
622 614
     int i;
... ...
@@ -24,16 +24,14 @@
24 24
 
25 25
 #ifdef HAVE_ALTIVEC
26 26
 
27
-extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
28
-extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
29
-extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
30
-extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
31
-extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
32
-extern int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
33
-extern int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
27
+extern int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
28
+extern int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
29
+extern int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
30
+extern int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
31
+extern int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
34 32
 extern int pix_norm1_altivec(uint8_t *pix, int line_size);
35
-extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
36
-extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
33
+extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
34
+extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
37 35
 extern int pix_sum_altivec(uint8_t * pix, int line_size);
38 36
 extern void diff_pixels_altivec(DCTELEM* block, const uint8_t* s1, const uint8_t* s2, int stride);
39 37
 extern void get_pixels_altivec(DCTELEM* block, const uint8_t * pixels, int line_size);
... ...
@@ -240,13 +240,13 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
240 240
         mm_flags |= MM_ALTIVEC;
241 241
         
242 242
         // Altivec specific optimisations
243
-        c->pix_abs16x16_x2 = pix_abs16x16_x2_altivec;
244
-        c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec;
245
-        c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec;
246
-        c->pix_abs16x16 = pix_abs16x16_altivec;
247
-        c->pix_abs8x8 = pix_abs8x8_altivec;
248
-        c->sad[0]= sad16x16_altivec;
249
-        c->sad[1]= sad8x8_altivec;
243
+        c->pix_abs[0][1] = sad16_x2_altivec;
244
+        c->pix_abs[0][2] = sad16_y2_altivec;
245
+        c->pix_abs[0][3] = sad16_xy2_altivec;
246
+        c->pix_abs[0][0] = sad16_altivec;
247
+        c->pix_abs[1][0] = sad8_altivec;
248
+        c->sad[0]= sad16_altivec;
249
+        c->sad[1]= sad8_altivec;
250 250
         c->pix_norm1 = pix_norm1_altivec;
251 251
         c->sse[1]= sse8_altivec;
252 252
         c->sse[0]= sse16_altivec;
... ...
@@ -520,7 +520,7 @@ static void adaptive_quantization(MpegEncContext *s, double q){
520 520
         if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune
521 521
         if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune
522 522
 
523
-        if((s->mb_type[mb_xy]&MB_TYPE_INTRA)){//FIXME hq mode 
523
+        if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode 
524 524
             cplx= spat_cplx;
525 525
             factor= 1.0 + p_masking;
526 526
         }else{
... ...
@@ -26,21 +26,21 @@ stddev:  8.18 bytes:7602176
26 26
 920a0a8a0063655d1f34dcaad7857f98 *./data/a-h263p.avi
27 27
 0eb167c9dfcbeeecbf3debed8af8f811 *./data/out.yuv
28 28
 stddev:  2.08 bytes:7602176
29
-a8cc41cd5016bbb821e7c2691f5090ea *./data/a-odivx.mp4
30
-e48114a50ef4cfb4fe2016fa5b34ae4c *./data/out.yuv
31
-stddev:  8.02 bytes:7602176
29
+66f8b4b5b4f0655cff7bdbc44969cab3 *./data/a-odivx.mp4
30
+5bd332c77ef45e58b7017e06a0467dd3 *./data/out.yuv
31
+stddev:  7.94 bytes:7602176
32 32
 5704a082cc5c5970620123ae20566286 *./data/a-huffyuv.avi
33 33
 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv
34 34
 stddev:  0.00 bytes:7602176
35 35
 e9f63126859b97cd23cd1413038f8f7b *./data/a-mpeg4-rc.avi
36 36
 90a159074b1b109569914ee63f387860 *./data/out.yuv
37 37
 stddev: 10.18 bytes:7145472
38
-b3f1425e266569d5d726b88eadc13dd4 *./data/a-mpeg4-adv.avi
39
-fb61365b22c947adbaeab74478579020 *./data/out.yuv
40
-stddev:  7.31 bytes:7602176
41
-25ec5ab399fd4db0c8aaea78cb692611 *./data/a-error-mpeg4-adv.avi
42
-bd441fc1e2fb9a3c0bdc9c5f1ed25ef0 *./data/out.yuv
43
-stddev: 13.57 bytes:7602176
38
+d7d295f97a1e07b633f973d2325880ce *./data/a-mpeg4-adv.avi
39
+612f79510c8098f1421aa154047e2bf2 *./data/out.yuv
40
+stddev:  7.25 bytes:7602176
41
+f863f4198521bd76930ea33991b47273 *./data/a-error-mpeg4-adv.avi
42
+ba7fcd126c7c9fead5a5de71aaaf0624 *./data/out.yuv
43
+stddev: 16.80 bytes:7602176
44 44
 328ebd044362116e274739e23c482ee7 *./data/a-mpeg1b.mpg
45 45
 788a9d500dc8986231a18076fc80fd73 *./data/out.yuv
46 46
 stddev: 10.07 bytes:7145472
... ...
@@ -138,7 +138,7 @@ do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
138 138
 
139 139
 # mpeg2 encoding interlaced
140 140
 file=${outfile}mpeg2i.mpg
141
-do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -interlace $file 
141
+do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -ildct $file 
142 142
 
143 143
 # mpeg2 decoding
144 144
 do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
... ...
@@ -26,21 +26,21 @@ stddev:  5.41 bytes:7602176
26 26
 f7828488c31ccb6787367ef4e4a2ad42 *./data/a-h263p.avi
27 27
 7d39d1f272205a6a231d0e0baf32ff9d *./data/out.yuv
28 28
 stddev:  1.91 bytes:7602176
29
-f17dc7346f5d1d4307ecf4507f10fcc6 *./data/a-odivx.mp4
30
-ff7ddb57d9038b94f08c43bae7e1329f *./data/out.yuv
31
-stddev:  5.28 bytes:7602176
29
+a831828595e5764e6ee30c2d9e548385 *./data/a-odivx.mp4
30
+ad75d173bd30d642147f00da21df0012 *./data/out.yuv
31
+stddev:  5.27 bytes:7602176
32 32
 242a7a18c2793e115007bc163861ef4e *./data/a-huffyuv.avi
33 33
 dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv
34 34
 stddev:  0.00 bytes:7602176
35 35
 6a469f42ce6946dd4c708f9e51e3da6a *./data/a-mpeg4-rc.avi
36 36
 df9de7134d961119705b4e0cabca1f12 *./data/out.yuv
37 37
 stddev:  4.20 bytes:7145472
38
-742ffadf3c309d2c4ac888a6a0905bf9 *./data/a-mpeg4-adv.avi
39
-b02f71e91e9368ce94814ab3d74f91ba *./data/out.yuv
40
-stddev:  4.97 bytes:7602176
41
-f2888ab759ac28aba85a16d3d54b80d0 *./data/a-error-mpeg4-adv.avi
42
-93ab926aad2e658a5bb00c25b7cefdab *./data/out.yuv
43
-stddev:  5.22 bytes:7602176
38
+483504d060b0bd8ac1acfa3a823c2ad7 *./data/a-mpeg4-adv.avi
39
+08d24bdd7da80cffaf8abaa3e71b1843 *./data/out.yuv
40
+stddev:  4.96 bytes:7602176
41
+03ff35856faefb4882eaf4d86d95bea7 *./data/a-error-mpeg4-adv.avi
42
+8550acff0851ee915bd5800f1e20f37c *./data/out.yuv
43
+stddev:  9.66 bytes:7602176
44 44
 671802a2c5078e69f7f422765ea87f2a *./data/a-mpeg1b.mpg
45 45
 d3d5876cef34b728602d5a22eee9249f *./data/out.yuv
46 46
 stddev:  5.93 bytes:7145472