Browse code

qpel encoding 4mv+b frames encoding finally fixed chroma ME 5 comparission functions for ME b frame encoding speedup wmv2 codec (unfinished) user specified diamond size for EPZS

Originally committed as revision 1365 to svn://svn.ffmpeg.org/ffmpeg/trunk

Michael Niedermayer authored on 2002/12/28 08:51:46
Showing 18 changed files
... ...
@@ -53,7 +53,7 @@ void avcodec_register_all(void)
53 53
     register_avcodec(&msmpeg4v2_encoder);
54 54
     register_avcodec(&msmpeg4v3_encoder);
55 55
     register_avcodec(&wmv1_encoder);
56
-//    register_avcodec(&wmv2_encoder);
56
+    register_avcodec(&wmv2_encoder);
57 57
     register_avcodec(&huffyuv_encoder);
58 58
 #endif /* CONFIG_ENCODERS */
59 59
     register_avcodec(&rawvideo_codec);
... ...
@@ -66,7 +66,7 @@ void avcodec_register_all(void)
66 66
     register_avcodec(&msmpeg4v2_decoder);
67 67
     register_avcodec(&msmpeg4v3_decoder);
68 68
     register_avcodec(&wmv1_decoder);
69
-//    register_avcodec(&wmv2_decoder);
69
+    register_avcodec(&wmv2_decoder);
70 70
     register_avcodec(&mpeg_decoder);
71 71
     register_avcodec(&h263i_decoder);
72 72
     register_avcodec(&rv10_decoder);
... ...
@@ -5,8 +5,8 @@
5 5
 
6 6
 #define LIBAVCODEC_VERSION_INT 0x000406
7 7
 #define LIBAVCODEC_VERSION     "0.4.6"
8
-#define LIBAVCODEC_BUILD       4646
9
-#define LIBAVCODEC_BUILD_STR   "4646"
8
+#define LIBAVCODEC_BUILD       4647
9
+#define LIBAVCODEC_BUILD_STR   "4647"
10 10
 
11 11
 enum CodecID {
12 12
     CODEC_ID_NONE, 
... ...
@@ -850,6 +850,41 @@ typedef struct AVCodecContext {
850 850
      * decoding: unused
851 851
      */
852 852
     int mb_qmax;
853
+    
854
+    /**
855
+     * motion estimation compare function
856
+     * encoding: set by user.
857
+     * decoding: unused
858
+     */
859
+    int me_cmp;
860
+    /**
861
+     * subpixel motion estimation compare function
862
+     * encoding: set by user.
863
+     * decoding: unused
864
+     */
865
+    int me_sub_cmp;
866
+    /**
867
+     * macroblock compare function (not supported yet)
868
+     * encoding: set by user.
869
+     * decoding: unused
870
+     */
871
+    int mb_cmp;
872
+#define FF_CMP_SAD  0
873
+#define FF_CMP_SSE  1
874
+#define FF_CMP_SATD 2
875
+#define FF_CMP_DCT  3
876
+#define FF_CMP_PSNR 4
877
+#define FF_CMP_BIT  5
878
+#define FF_CMP_RD   6
879
+#define FF_CMP_ZERO 7
880
+#define FF_CMP_CHROMA 256
881
+    
882
+    /**
883
+     * ME diamond size
884
+     * encoding: set by user.
885
+     * decoding: unused
886
+     */
887
+    int dia_size;
853 888
 } AVCodecContext;
854 889
 
855 890
 typedef struct AVCodec {
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 #include "avcodec.h"
22 22
 #include "dsputil.h"
23
+#include "mpegvideo.h"
23 24
 
24 25
 int ff_bit_exact=0;
25 26
 
... ...
@@ -144,7 +145,28 @@ static int pix_norm1_c(UINT8 * pix, int line_size)
144 144
 }
145 145
 
146 146
 
147
-static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size)
147
+static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
148
+{
149
+    int s, i;
150
+    UINT32 *sq = squareTbl + 256;
151
+
152
+    s = 0;
153
+    for (i = 0; i < 8; i++) {
154
+        s += sq[pix1[0] - pix2[0]];
155
+        s += sq[pix1[1] - pix2[1]];
156
+        s += sq[pix1[2] - pix2[2]];
157
+        s += sq[pix1[3] - pix2[3]];
158
+        s += sq[pix1[4] - pix2[4]];
159
+        s += sq[pix1[5] - pix2[5]];
160
+        s += sq[pix1[6] - pix2[6]];
161
+        s += sq[pix1[7] - pix2[7]];
162
+        pix1 += line_size;
163
+        pix2 += line_size;
164
+    }
165
+    return s;
166
+}
167
+
168
+static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
148 169
 {
149 170
     int s, i, j;
150 171
     UINT32 *sq = squareTbl + 256;
... ...
@@ -1141,7 +1163,103 @@ QPEL_MC(0, avg_       , _       , op_avg)
1141 1141
 #undef op_put
1142 1142
 #undef op_put_no_rnd
1143 1143
 
1144
-static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1144
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1145
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1146
+    int i;
1147
+
1148
+    for(i=0; i<h; i++){
1149
+        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1150
+        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1151
+        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1152
+        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1153
+        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1154
+        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1155
+        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1156
+        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1157
+        dst+=dstStride;
1158
+        src+=srcStride;        
1159
+    }
1160
+}
1161
+
1162
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1163
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1164
+    int i;
1165
+
1166
+    for(i=0; i<w; i++){
1167
+        const int src_1= src[ -srcStride];
1168
+        const int src0 = src[0          ];
1169
+        const int src1 = src[  srcStride];
1170
+        const int src2 = src[2*srcStride];
1171
+        const int src3 = src[3*srcStride];
1172
+        const int src4 = src[4*srcStride];
1173
+        const int src5 = src[5*srcStride];
1174
+        const int src6 = src[6*srcStride];
1175
+        const int src7 = src[7*srcStride];
1176
+        const int src8 = src[8*srcStride];
1177
+        const int src9 = src[9*srcStride];
1178
+        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1179
+        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
1180
+        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
1181
+        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
1182
+        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
1183
+        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
1184
+        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
1185
+        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
1186
+        src++;
1187
+        dst++;
1188
+    }
1189
+}
1190
+
1191
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
1192
+    put_pixels8_c(dst, src, stride, 8);
1193
+}
1194
+
1195
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1196
+    uint8_t half[64];
1197
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1198
+    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1199
+}
1200
+
1201
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1202
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1203
+}
1204
+
1205
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1206
+    uint8_t half[64];
1207
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1208
+    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1209
+}
1210
+
1211
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1212
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1213
+}
1214
+
1215
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1216
+    uint8_t halfH[88];
1217
+    uint8_t halfV[64];
1218
+    uint8_t halfHV[64];
1219
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1220
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1221
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1222
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1223
+}
1224
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1225
+    uint8_t halfH[88];
1226
+    uint8_t halfV[64];
1227
+    uint8_t halfHV[64];
1228
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1229
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1230
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1231
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1232
+}
1233
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1234
+    uint8_t halfH[88];
1235
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1236
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1237
+}
1238
+
1239
+
1240
+static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1145 1241
 {
1146 1242
     int s, i;
1147 1243
 
... ...
@@ -1257,7 +1375,7 @@ static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1257 1257
     return s;
1258 1258
 }
1259 1259
 
1260
-static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1260
+static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1261 1261
 {
1262 1262
     int s, i;
1263 1263
 
... ...
@@ -1341,6 +1459,14 @@ static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1341 1341
     return s;
1342 1342
 }
1343 1343
 
1344
+static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
1345
+    return pix_abs16x16_c(a,b,stride);
1346
+}
1347
+
1348
+static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
1349
+    return pix_abs8x8_c(a,b,stride);
1350
+}
1351
+
1344 1352
 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
1345 1353
 {
1346 1354
     int i;
... ...
@@ -1399,6 +1525,156 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1399 1399
         dst[i+0] = src1[i+0]-src2[i+0];
1400 1400
 }
1401 1401
 
1402
+#define BUTTERFLY2(o1,o2,i1,i2) \
1403
+o1= (i1)+(i2);\
1404
+o2= (i1)-(i2);
1405
+
1406
+#define BUTTERFLY1(x,y) \
1407
+{\
1408
+    int a,b;\
1409
+    a= x;\
1410
+    b= y;\
1411
+    x= a+b;\
1412
+    y= a-b;\
1413
+}
1414
+
1415
+#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
1416
+
1417
+static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
1418
+    int i;
1419
+    int temp[64];
1420
+    int sum=0;
1421
+
1422
+    for(i=0; i<8; i++){
1423
+        //FIXME try pointer walks
1424
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1425
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1426
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1427
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1428
+        
1429
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1430
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1431
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1432
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1433
+        
1434
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1435
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1436
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1437
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1438
+    }
1439
+
1440
+    for(i=0; i<8; i++){
1441
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1442
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1443
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1444
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1445
+        
1446
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1447
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1448
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1449
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1450
+
1451
+        sum += 
1452
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1453
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1454
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1455
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1456
+    }
1457
+#if 0
1458
+static int maxi=0;
1459
+if(sum>maxi){
1460
+    maxi=sum;
1461
+    printf("MAX:%d\n", maxi);
1462
+}
1463
+#endif
1464
+    return sum;
1465
+}
1466
+
1467
+static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
1468
+    int i;
1469
+    int temp[64];
1470
+    int sum=0;
1471
+//FIXME OOOPS ignore 0 term instead of mean mess
1472
+    for(i=0; i<8; i++){
1473
+        //FIXME try pointer walks
1474
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
1475
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
1476
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
1477
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
1478
+        
1479
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1480
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1481
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1482
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1483
+        
1484
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1485
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1486
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1487
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1488
+    }
1489
+
1490
+    for(i=0; i<8; i++){
1491
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1492
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1493
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1494
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1495
+        
1496
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1497
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1498
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1499
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1500
+    
1501
+        sum += 
1502
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1503
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1504
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1505
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1506
+    }
1507
+    
1508
+    return sum;
1509
+}
1510
+
1511
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1512
+    MpegEncContext * const s= (MpegEncContext *)c;
1513
+    DCTELEM temp[64];
1514
+    int sum=0, i;
1515
+
1516
+    s->dsp.diff_pixels(temp, src1, src2, stride);
1517
+    s->fdct(temp);
1518
+
1519
+    for(i=0; i<64; i++)
1520
+        sum+= ABS(temp[i]);
1521
+        
1522
+    return sum;
1523
+}
1524
+
1525
+void simple_idct(INT16 *block); //FIXME
1526
+
1527
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1528
+    MpegEncContext * const s= (MpegEncContext *)c;
1529
+    DCTELEM temp[64], bak[64];
1530
+    int sum=0, i;
1531
+
1532
+    s->mb_intra=0;
1533
+    
1534
+    s->dsp.diff_pixels(temp, src1, src2, stride);
1535
+    
1536
+    memcpy(bak, temp, 64*sizeof(DCTELEM));
1537
+    
1538
+    s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1539
+    s->dct_unquantize(s, temp, 0, s->qscale);
1540
+    simple_idct(temp); //FIXME 
1541
+    
1542
+    for(i=0; i<64; i++)
1543
+        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
1544
+        
1545
+    return sum;
1546
+}
1547
+
1548
+WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
1549
+WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
1550
+WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
1551
+
1402 1552
 void dsputil_init(DSPContext* c, unsigned mask)
1403 1553
 {
1404 1554
     static int init_done = 0;
... ...
@@ -1429,7 +1705,8 @@ void dsputil_init(DSPContext* c, unsigned mask)
1429 1429
     c->clear_blocks = clear_blocks_c;
1430 1430
     c->pix_sum = pix_sum_c;
1431 1431
     c->pix_norm1 = pix_norm1_c;
1432
-    c->pix_norm = pix_norm_c;
1432
+    c->sse[0]= sse16_c;
1433
+    c->sse[1]= sse8_c;
1433 1434
 
1434 1435
     /* TODO [0] 16  [1] 8 */
1435 1436
     c->pix_abs16x16     = pix_abs16x16_c;
... ...
@@ -1489,6 +1766,28 @@ void dsputil_init(DSPContext* c, unsigned mask)
1489 1489
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
1490 1490
 #undef dspfunc
1491 1491
 
1492
+    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
1493
+    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
1494
+    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
1495
+    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
1496
+    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
1497
+    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
1498
+    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
1499
+    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
1500
+    
1501
+    c->hadamard8_diff[0]= hadamard8_diff16_c;
1502
+    c->hadamard8_diff[1]= hadamard8_diff_c;
1503
+    c->hadamard8_abs = hadamard8_abs_c;
1504
+    
1505
+    c->dct_sad[0]= dct_sad16x16_c;
1506
+    c->dct_sad[1]= dct_sad8x8_c;
1507
+    
1508
+    c->sad[0]= sad16x16_c;
1509
+    c->sad[1]= sad8x8_c;
1510
+    
1511
+    c->quant_psnr[0]= quant_psnr16x16_c;
1512
+    c->quant_psnr[1]= quant_psnr8x8_c;
1513
+    
1492 1514
     c->add_bytes= add_bytes_c;
1493 1515
     c->diff_bytes= diff_bytes_c;
1494 1516
 
... ...
@@ -1516,7 +1815,6 @@ void dsputil_init(DSPContext* c, unsigned mask)
1516 1516
 #ifdef HAVE_MMI
1517 1517
     dsputil_init_mmi(c, mask);
1518 1518
 #endif
1519
-
1520 1519
 }
1521 1520
 
1522 1521
 /* remove any non bit exact operation (testing purpose) */
... ...
@@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
79 79
 
80 80
 /* motion estimation */
81 81
 
82
-typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
83
-/*
84
-int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
85
-int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
86
-int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
87
-int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
88
-*/
82
+typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
83
+
84
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
85
+
89 86
 typedef struct DSPContext {
90 87
     /* pixel ops : interface with DCT */
91 88
     void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
... ...
@@ -98,7 +95,16 @@ typedef struct DSPContext {
98 98
     void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
99 99
     int (*pix_sum)(UINT8 * pix, int line_size);
100 100
     int (*pix_norm1)(UINT8 * pix, int line_size);
101
-    int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size);
101
+    me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
102
+    me_cmp_func sse[2];
103
+    me_cmp_func hadamard8_diff[2];
104
+    me_cmp_func dct_sad[2];
105
+    me_cmp_func quant_psnr[2];
106
+    int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
107
+
108
+    me_cmp_func me_cmp[11];
109
+    me_cmp_func me_sub_cmp[11];
110
+    me_cmp_func mb_cmp[11];
102 111
 
103 112
     /* maybe create an array for 16/8 functions */
104 113
     op_pixels_func put_pixels_tab[2][4];
... ...
@@ -109,6 +115,7 @@ typedef struct DSPContext {
109 109
     qpel_mc_func avg_qpel_pixels_tab[2][16];
110 110
     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
111 111
     qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
112
+    qpel_mc_func put_mspel_pixels_tab[8];
112 113
 
113 114
     op_pixels_abs_func pix_abs16x16;
114 115
     op_pixels_abs_func pix_abs16x16_x2;
... ...
@@ -120,9 +127,8 @@ typedef struct DSPContext {
120 120
     op_pixels_abs_func pix_abs8x8_xy2;
121 121
     
122 122
     /* huffyuv specific */
123
-    //FIXME note: alignment isnt guranteed currently but could be if needed
124 123
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
125
-    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/,int w);
124
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
126 125
 } DSPContext;
127 126
 
128 127
 void dsputil_init(DSPContext* p, unsigned mask);
... ...
@@ -156,6 +162,7 @@ static inline void emms(void)
156 156
     __asm __volatile ("emms;":::"memory");
157 157
 }
158 158
 
159
+
159 160
 #define emms_c() \
160 161
 {\
161 162
     if (mm_flags & MM_MMX)\
... ...
@@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
281 281
                const FFTSample *input, FFTSample *tmp);
282 282
 void ff_mdct_end(MDCTContext *s);
283 283
 
284
+#define WARPER88_1616(name8, name16)\
285
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
286
+    return name8(s, dst           , src           , stride)\
287
+          +name8(s, dst+8         , src+8         , stride)\
288
+          +name8(s, dst  +8*stride, src  +8*stride, stride)\
289
+          +name8(s, dst+8+8*stride, src+8+8*stride, stride);\
290
+}
291
+
284 292
 #ifndef HAVE_LRINTF
285 293
 /* XXX: add ISOC specific test to avoid specific BSD testing. */
286 294
 /* better than nothing implementation. */
... ...
@@ -204,10 +204,6 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
204 204
 		
205 205
         put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
206 206
         put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
207
-        if (s->pict_type == I_TYPE)
208
-            s->no_rounding = 0;
209
-        else
210
-            s->no_rounding ^= 1;
211 207
         put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
212 208
         put_bits(&s->pb,2,0); /* Reserved */
213 209
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
... ...
@@ -392,6 +388,57 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
392 392
     }
393 393
 }
394 394
 
395
+void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
396
+    const int mb_index= s->mb_x + s->mb_y*s->mb_width;
397
+    int xy= s->block_index[0];
398
+    uint16_t time_pp= s->pp_time;
399
+    uint16_t time_pb= s->pb_time;
400
+    int i;
401
+        
402
+    //FIXME avoid divides
403
+    switch(s->co_located_type_table[mb_index]){
404
+    case 0:
405
+        s->mv_type= MV_TYPE_16X16;
406
+        s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
407
+        s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
408
+        s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
409
+                            : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
410
+        s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
411
+                            : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
412
+        break;
413
+    case CO_LOCATED_TYPE_4MV:
414
+        s->mv_type = MV_TYPE_8X8;
415
+        for(i=0; i<4; i++){
416
+            xy= s->block_index[i];
417
+            s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
418
+            s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
419
+            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
420
+                                : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
421
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] 
422
+                                : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
423
+        }
424
+        break;
425
+    case CO_LOCATED_TYPE_FIELDMV:
426
+        s->mv_type = MV_TYPE_FIELD;
427
+        for(i=0; i<2; i++){
428
+            if(s->top_field_first){
429
+                time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
430
+                time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
431
+            }else{
432
+                time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
433
+                time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
434
+            }
435
+            s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
436
+            s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
437
+            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
438
+                                : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
439
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] 
440
+                                : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
441
+        }
442
+        break;
443
+    }
444
+}
445
+
395 446
 #ifdef CONFIG_ENCODERS
396 447
 void mpeg4_encode_mb(MpegEncContext * s,
397 448
 		    DCTELEM block[6][64],
... ...
@@ -442,7 +489,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
442 442
 
443 443
                 return;
444 444
             }
445
-
445
+            
446 446
             if ((cbp | motion_x | motion_y | mb_type) ==0) {
447 447
                 /* direct MB with MV={0,0} */
448 448
                 assert(s->dquant==0);
... ...
@@ -1386,7 +1433,7 @@ void h263_encode_init(MpegEncContext *s)
1386 1386
 
1387 1387
         init_mv_penalty_and_fcode(s);
1388 1388
     }
1389
-    s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
1389
+    s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
1390 1390
     
1391 1391
     // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
1392 1392
     switch(s->codec_id){
... ...
@@ -1519,7 +1566,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
1519 1519
 
1520 1520
 static void mpeg4_encode_vol_header(MpegEncContext * s)
1521 1521
 {
1522
-    int vo_ver_id=1; //must be 2 if we want GMC or q-pel
1522
+    int vo_ver_id=2; //must be 2 if we want GMC or q-pel
1523 1523
     char buf[255];
1524 1524
 
1525 1525
     if(s->max_b_frames){
... ...
@@ -1584,7 +1631,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
1584 1584
     if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */
1585 1585
 
1586 1586
     if (vo_ver_id != 1)
1587
-        put_bits(&s->pb, 1, s->quarter_sample=0);
1587
+        put_bits(&s->pb, 1, s->quarter_sample);
1588 1588
     put_bits(&s->pb, 1, 1);		/* complexity estimation disable */
1589 1589
     s->resync_marker= s->rtp_mode;
1590 1590
     put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
... ...
@@ -1618,7 +1665,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
1618 1618
     int time_div, time_mod;
1619 1619
     
1620 1620
     if(s->pict_type==I_TYPE){
1621
-        s->no_rounding=0;
1622 1621
         if(picture_number==0 || !s->strict_std_compliance)
1623 1622
             mpeg4_encode_vol_header(s);
1624 1623
     }
... ...
@@ -1645,7 +1691,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
1645 1645
     put_bits(&s->pb, 1, 1);	/* vop coded */
1646 1646
     if (    s->pict_type == P_TYPE 
1647 1647
         || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
1648
-        s->no_rounding ^= 1;
1649 1648
 	put_bits(&s->pb, 1, s->no_rounding);	/* rounding type */
1650 1649
     }
1651 1650
     put_bits(&s->pb, 3, 0);	/* intra dc VLC threshold */
... ...
@@ -1996,6 +2041,61 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
1996 1996
     }
1997 1997
 #endif
1998 1998
 }
1999
+
2000
+static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
2001
+                               UINT8 *scan_table)
2002
+{
2003
+    int i, last_non_zero;
2004
+    const RLTable *rl;
2005
+    UINT8 *len_tab;
2006
+    const int last_index = s->block_last_index[n];
2007
+    int len=0;
2008
+
2009
+    if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
2010
+	/* mpeg4 based DC predictor */
2011
+	//mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME
2012
+        if(last_index<1) return len;
2013
+	i = 1;
2014
+        rl = &rl_intra;
2015
+        len_tab = uni_mpeg4_intra_rl_len;
2016
+    } else {
2017
+        if(last_index<0) return 0;
2018
+	i = 0;
2019
+        rl = &rl_inter;
2020
+        len_tab = uni_mpeg4_inter_rl_len;
2021
+    }
2022
+
2023
+    /* AC coefs */
2024
+    last_non_zero = i - 1;
2025
+    for (; i < last_index; i++) {
2026
+	int level = block[ scan_table[i] ];
2027
+	if (level) {
2028
+	    int run = i - last_non_zero - 1;
2029
+            level+=64;
2030
+            if((level&(~127)) == 0){
2031
+                const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
2032
+                len += len_tab[index];
2033
+            }else{ //ESC3
2034
+                len += 7+2+1+6+1+12+1;
2035
+            }
2036
+	    last_non_zero = i;
2037
+	}
2038
+    }
2039
+    /*if(i<=last_index)*/{
2040
+	int level = block[ scan_table[i] ];
2041
+        int run = i - last_non_zero - 1;
2042
+        level+=64;
2043
+        if((level&(~127)) == 0){
2044
+            const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
2045
+            len += len_tab[index];
2046
+        }else{ //ESC3
2047
+            len += 7+2+1+6+1+12+1;
2048
+        }
2049
+    }
2050
+    
2051
+    return len;
2052
+}
2053
+
1999 2054
 #endif
2000 2055
 
2001 2056
 
... ...
@@ -3050,8 +3150,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
3050 3050
         int modb1; // first bit of modb
3051 3051
         int modb2; // second bit of modb
3052 3052
         int mb_type;
3053
-        uint16_t time_pp;
3054
-        uint16_t time_pb;
3055 3053
         int xy;
3056 3054
 
3057 3055
         s->mb_intra = 0; //B-frames never contain intra blocks
... ...
@@ -3173,9 +3271,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
3173 3173
         }
3174 3174
           
3175 3175
         if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){
3176
-            int mb_index= s->mb_x + s->mb_y*s->mb_width;
3177
-            int i;
3178
-            
3179 3176
             if(mb_type==4)
3180 3177
                 mx=my=0;
3181 3178
             else{
... ...
@@ -3184,55 +3279,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
3184 3184
             }
3185 3185
  
3186 3186
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3187
-            xy= s->block_index[0];
3188
-            time_pp= s->pp_time;
3189
-            time_pb= s->pb_time;
3190
-            
3191
-            //FIXME avoid divides
3192
-            switch(s->co_located_type_table[mb_index]){
3193
-            case 0:
3194
-                s->mv_type= MV_TYPE_16X16;
3195
-                s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
3196
-                s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
3197
-                s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
3198
-                                    : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
3199
-                s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
3200
-                                    : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
3201
-                PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
3202
-                break;
3203
-            case CO_LOCATED_TYPE_4MV:
3204
-                s->mv_type = MV_TYPE_8X8;
3205
-                for(i=0; i<4; i++){
3206
-                    xy= s->block_index[i];
3207
-                    s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
3208
-                    s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
3209
-                    s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
3210
-                                        : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
3211
-                    s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] 
3212
-                                        : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
3213
-                }
3214
-                PRINT_MB_TYPE("4");
3215
-                break;
3216
-            case CO_LOCATED_TYPE_FIELDMV:
3217
-                s->mv_type = MV_TYPE_FIELD;
3218
-                for(i=0; i<2; i++){
3219
-                    if(s->top_field_first){
3220
-                        time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
3221
-                        time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
3222
-                    }else{
3223
-                        time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
3224
-                        time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
3225
-                    }
3226
-                    s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
3227
-                    s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
3228
-                    s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
3229
-                                        : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
3230
-                    s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] 
3231
-                                        : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
3232
-                }
3233
-                PRINT_MB_TYPE("=");
3234
-                break;
3235
-            }
3187
+            ff_mpeg4_set_direct_mv(s, mx, my);
3236 3188
         }
3237 3189
         
3238 3190
         if(mb_type<0 || mb_type>4){
... ...
@@ -40,7 +40,7 @@ static inline long long rdtsc()
40 40
 }
41 41
 #endif
42 42
 
43
-static int h263_decode_init(AVCodecContext *avctx)
43
+int ff_h263_decode_init(AVCodecContext *avctx)
44 44
 {
45 45
     MpegEncContext *s = avctx->priv_data;
46 46
 
... ...
@@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx)
113 113
     return 0;
114 114
 }
115 115
 
116
-static int h263_decode_end(AVCodecContext *avctx)
116
+int ff_h263_decode_end(AVCodecContext *avctx)
117 117
 {
118 118
     MpegEncContext *s = avctx->priv_data;
119 119
 
... ...
@@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
343 343
     return -1;
344 344
 }
345 345
 
346
-static int h263_decode_frame(AVCodecContext *avctx, 
346
+int ff_h263_decode_frame(AVCodecContext *avctx, 
347 347
                              void *data, int *data_size,
348 348
                              UINT8 *buf, int buf_size)
349 349
 {
... ...
@@ -416,9 +416,11 @@ retry:
416 416
         if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
417 417
             return -1;
418 418
     }
419
-        
419
+      
420 420
     /* let's go :-) */
421
-    if (s->h263_msmpeg4) {
421
+    if (s->msmpeg4_version==5) {
422
+        ret= ff_wmv2_decode_picture_header(s);
423
+    } else if (s->msmpeg4_version) {
422 424
         ret = msmpeg4_decode_picture_header(s);
423 425
     } else if (s->h263_pred) {
424 426
         if(s->avctx->extradata_size && s->picture_number==0){
... ...
@@ -634,7 +636,6 @@ retry:
634 634
         }
635 635
         if(num_end_markers || error){
636 636
             fprintf(stderr, "concealing errors\n");
637
-//printf("type:%d\n", s->pict_type);
638 637
             ff_error_resilience(s);
639 638
         }
640 639
     }
... ...
@@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = {
713 713
     CODEC_TYPE_VIDEO,
714 714
     CODEC_ID_MPEG4,
715 715
     sizeof(MpegEncContext),
716
-    h263_decode_init,
716
+    ff_h263_decode_init,
717 717
     NULL,
718
-    h263_decode_end,
719
-    h263_decode_frame,
718
+    ff_h263_decode_end,
719
+    ff_h263_decode_frame,
720 720
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
721 721
 };
722 722
 
... ...
@@ -725,10 +726,10 @@ AVCodec h263_decoder = {
725 725
     CODEC_TYPE_VIDEO,
726 726
     CODEC_ID_H263,
727 727
     sizeof(MpegEncContext),
728
-    h263_decode_init,
728
+    ff_h263_decode_init,
729 729
     NULL,
730
-    h263_decode_end,
731
-    h263_decode_frame,
730
+    ff_h263_decode_end,
731
+    ff_h263_decode_frame,
732 732
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
733 733
 };
734 734
 
... ...
@@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = {
737 737
     CODEC_TYPE_VIDEO,
738 738
     CODEC_ID_MSMPEG4V1,
739 739
     sizeof(MpegEncContext),
740
-    h263_decode_init,
740
+    ff_h263_decode_init,
741 741
     NULL,
742
-    h263_decode_end,
743
-    h263_decode_frame,
742
+    ff_h263_decode_end,
743
+    ff_h263_decode_frame,
744 744
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
745 745
 };
746 746
 
... ...
@@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = {
749 749
     CODEC_TYPE_VIDEO,
750 750
     CODEC_ID_MSMPEG4V2,
751 751
     sizeof(MpegEncContext),
752
-    h263_decode_init,
752
+    ff_h263_decode_init,
753 753
     NULL,
754
-    h263_decode_end,
755
-    h263_decode_frame,
754
+    ff_h263_decode_end,
755
+    ff_h263_decode_frame,
756 756
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
757 757
 };
758 758
 
... ...
@@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = {
761 761
     CODEC_TYPE_VIDEO,
762 762
     CODEC_ID_MSMPEG4V3,
763 763
     sizeof(MpegEncContext),
764
-    h263_decode_init,
764
+    ff_h263_decode_init,
765 765
     NULL,
766
-    h263_decode_end,
767
-    h263_decode_frame,
766
+    ff_h263_decode_end,
767
+    ff_h263_decode_frame,
768 768
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
769 769
 };
770 770
 
... ...
@@ -773,22 +774,10 @@ AVCodec wmv1_decoder = {
773 773
     CODEC_TYPE_VIDEO,
774 774
     CODEC_ID_WMV1,
775 775
     sizeof(MpegEncContext),
776
-    h263_decode_init,
777
-    NULL,
778
-    h263_decode_end,
779
-    h263_decode_frame,
780
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
781
-};
782
-
783
-AVCodec wmv2_decoder = {
784
-    "wmv2",
785
-    CODEC_TYPE_VIDEO,
786
-    CODEC_ID_WMV2,
787
-    sizeof(MpegEncContext),
788
-    h263_decode_init,
776
+    ff_h263_decode_init,
789 777
     NULL,
790
-    h263_decode_end,
791
-    h263_decode_frame,
778
+    ff_h263_decode_end,
779
+    ff_h263_decode_frame,
792 780
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
793 781
 };
794 782
 
... ...
@@ -797,10 +786,10 @@ AVCodec h263i_decoder = {
797 797
     CODEC_TYPE_VIDEO,
798 798
     CODEC_ID_H263I,
799 799
     sizeof(MpegEncContext),
800
-    h263_decode_init,
800
+    ff_h263_decode_init,
801 801
     NULL,
802
-    h263_decode_end,
803
-    h263_decode_frame,
802
+    ff_h263_decode_end,
803
+    ff_h263_decode_frame,
804 804
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
805 805
 };
806 806
 
... ...
@@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
43 43
 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
44 44
 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
45 45
 
46
+int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
47
+int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
48
+int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
49
+int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
50
+
46 51
 /* pixel operations */
47 52
 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
48 53
 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
... ...
@@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
213 213
     );
214 214
 }
215 215
 
216
-static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
216
+static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
217 217
 {
218 218
     asm volatile(
219 219
         "pxor %%mm7, %%mm7	\n\t"
... ...
@@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
496 496
     for(; i<w; i++)
497 497
         dst[i+0] = src1[i+0]-src2[i+0];
498 498
 }
499
+#define LBUTTERFLY(a,b)\
500
+    "paddw " #b ", " #a "		\n\t"\
501
+    "paddw " #b ", " #b "		\n\t"\
502
+    "psubw " #a ", " #b "		\n\t"
503
+
504
+#define HADAMARD48\
505
+        LBUTTERFLY(%%mm0, %%mm1)\
506
+        LBUTTERFLY(%%mm2, %%mm3)\
507
+        LBUTTERFLY(%%mm4, %%mm5)\
508
+        LBUTTERFLY(%%mm6, %%mm7)\
509
+        \
510
+        LBUTTERFLY(%%mm0, %%mm2)\
511
+        LBUTTERFLY(%%mm1, %%mm3)\
512
+        LBUTTERFLY(%%mm4, %%mm6)\
513
+        LBUTTERFLY(%%mm5, %%mm7)\
514
+        \
515
+        LBUTTERFLY(%%mm0, %%mm4)\
516
+        LBUTTERFLY(%%mm1, %%mm5)\
517
+        LBUTTERFLY(%%mm2, %%mm6)\
518
+        LBUTTERFLY(%%mm3, %%mm7)
519
+
520
+#define MMABS(a,z)\
521
+    "pxor " #z ", " #z "		\n\t"\
522
+    "pcmpgtw " #a ", " #z "		\n\t"\
523
+    "pxor " #z ", " #a "		\n\t"\
524
+    "psubw " #z ", " #a "		\n\t"
525
+
526
+#define MMABS_SUM(a,z, sum)\
527
+    "pxor " #z ", " #z "		\n\t"\
528
+    "pcmpgtw " #a ", " #z "		\n\t"\
529
+    "pxor " #z ", " #a "		\n\t"\
530
+    "psubw " #z ", " #a "		\n\t"\
531
+    "paddusw " #a ", " #sum "		\n\t"
532
+
533
+    
534
+#define SBUTTERFLY(a,b,t,n)\
535
+    "movq " #a ", " #t "		\n\t" /* abcd */\
536
+    "punpckl" #n " " #b ", " #a "	\n\t" /* aebf */\
537
+    "punpckh" #n " " #b ", " #t "	\n\t" /* cgdh */\
538
+    
539
+#define TRANSPOSE4(a,b,c,d,t)\
540
+    SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
541
+    SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
542
+    SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
543
+    SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
544
+
545
+#define LOAD4(o, a, b, c, d)\
546
+        "movq "#o"(%1), " #a "		\n\t"\
547
+        "movq "#o"+16(%1), " #b "	\n\t"\
548
+        "movq "#o"+32(%1), " #c "	\n\t"\
549
+        "movq "#o"+48(%1), " #d "	\n\t"
550
+
551
+#define STORE4(o, a, b, c, d)\
552
+        "movq "#a", "#o"(%1)		\n\t"\
553
+        "movq "#b", "#o"+16(%1)		\n\t"\
554
+        "movq "#c", "#o"+32(%1)		\n\t"\
555
+        "movq "#d", "#o"+48(%1)		\n\t"\
556
+
557
+static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
558
+    uint64_t temp[16] __align8;
559
+    int sum=0;
560
+
561
+    diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
499 562
 
563
+    asm volatile(
564
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
565
+        LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
566
+        
567
+        HADAMARD48
568
+        
569
+        "movq %%mm7, 112(%1)		\n\t"
570
+        
571
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
572
+        STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
573
+        
574
+        "movq 112(%1), %%mm7 		\n\t"
575
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
576
+        STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
577
+
578
+        LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
579
+        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
580
+        
581
+        HADAMARD48
582
+        
583
+        "movq %%mm7, 120(%1)		\n\t"
584
+        
585
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
586
+        STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
587
+        
588
+        "movq 120(%1), %%mm7 		\n\t"
589
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
590
+        "movq %%mm7, %%mm5		\n\t"//FIXME remove
591
+        "movq %%mm6, %%mm7		\n\t"
592
+        "movq %%mm0, %%mm6		\n\t"
593
+//        STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
594
+        
595
+        LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
596
+//        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
597
+        
598
+        HADAMARD48
599
+        "movq %%mm7, 64(%1)		\n\t"
600
+        MMABS(%%mm0, %%mm7)
601
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
602
+        MMABS_SUM(%%mm2, %%mm7, %%mm0)
603
+        MMABS_SUM(%%mm3, %%mm7, %%mm0)
604
+        MMABS_SUM(%%mm4, %%mm7, %%mm0)
605
+        MMABS_SUM(%%mm5, %%mm7, %%mm0)
606
+        MMABS_SUM(%%mm6, %%mm7, %%mm0)
607
+        "movq 64(%1), %%mm1		\n\t"
608
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
609
+        "movq %%mm0, 64(%1)		\n\t"
610
+        
611
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
612
+        LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
613
+        
614
+        HADAMARD48
615
+        "movq %%mm7, (%1)		\n\t"
616
+        MMABS(%%mm0, %%mm7)
617
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
618
+        MMABS_SUM(%%mm2, %%mm7, %%mm0)
619
+        MMABS_SUM(%%mm3, %%mm7, %%mm0)
620
+        MMABS_SUM(%%mm4, %%mm7, %%mm0)
621
+        MMABS_SUM(%%mm5, %%mm7, %%mm0)
622
+        MMABS_SUM(%%mm6, %%mm7, %%mm0)
623
+        "movq (%1), %%mm1		\n\t"
624
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
625
+        "movq 64(%1), %%mm1		\n\t"
626
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
627
+        
628
+        "movq %%mm0, %%mm1		\n\t"
629
+        "psrlq $32, %%mm0		\n\t"
630
+        "paddusw %%mm1, %%mm0		\n\t"
631
+        "movq %%mm0, %%mm1		\n\t"
632
+        "psrlq $16, %%mm0		\n\t"
633
+        "paddusw %%mm1, %%mm0		\n\t"
634
+        "movd %%mm0, %0			\n\t"
635
+                
636
+        : "=r" (sum)
637
+        : "r"(temp)
638
+    );
639
+    return sum&0xFFFF;
640
+}
641
+
642
+WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
500 643
 
501 644
 #if 0
502 645
 static void just_return() { return; }
... ...
@@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
579 579
         
580 580
         c->add_bytes= add_bytes_mmx;
581 581
         c->diff_bytes= diff_bytes_mmx;
582
-
582
+        
583
+        c->hadamard8_diff[0]= hadamard8_diff16_mmx;
584
+        c->hadamard8_diff[1]= hadamard8_diff_mmx;
585
+        
586
+        c->sad[0]= sad16x16_mmx;
587
+        c->sad[1]= sad8x8_mmx;
588
+        
583 589
         if (mm_flags & MM_MMXEXT) {
584 590
             c->pix_abs16x16     = pix_abs16x16_mmx2;
585 591
             c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
... ...
@@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
591 591
             c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
592 592
             c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
593 593
 
594
+            c->sad[0]= sad16x16_mmx2;
595
+            c->sad[1]= sad8x8_mmx2;
596
+            
594 597
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
595 598
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
596 599
             c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
... ...
@@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
274 274
 \
275 275
     return sum_ ## suf();\
276 276
 }\
277
+int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
278
+{\
279
+    asm volatile("pxor %%mm7, %%mm7		\n\t"\
280
+                 "pxor %%mm6, %%mm6		\n\t":);\
281
+\
282
+    sad8_ ## suf(blk1, blk2, stride, 3);\
283
+\
284
+    return sum_ ## suf();\
285
+}\
277 286
 \
278 287
 int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
279 288
 {\
... ...
@@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
324 324
 \
325 325
     return sum_ ## suf();\
326 326
 }\
327
+int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
328
+{\
329
+    asm volatile("pxor %%mm7, %%mm7		\n\t"\
330
+                 "pxor %%mm6, %%mm6		\n\t":);\
331
+\
332
+    sad8_ ## suf(blk1  , blk2  , stride, 4);\
333
+    sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
334
+\
335
+    return sum_ ## suf();\
336
+}\
327 337
 int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
328 338
 {\
329 339
     asm volatile("pxor %%mm7, %%mm7		\n\t"\
... ...
@@ -26,8 +26,10 @@
26 26
 #include "dsputil.h"
27 27
 #include "mpegvideo.h"
28 28
 
29
+//#undef NDEBUG
30
+//#include <assert.h>
31
+
29 32
 #define SQ(a) ((a)*(a))
30
-#define INTER_BIAS	257
31 33
 
32 34
 #define P_LAST P[0]
33 35
 #define P_LEFT P[1]
... ...
@@ -40,7 +42,295 @@
40 40
 #define P_LAST_BOTTOM P[8]
41 41
 #define P_MV1 P[9]
42 42
 
43
+static inline int sad_hpel_motion_search(MpegEncContext * s,
44
+				  int *mx_ptr, int *my_ptr, int dmin,
45
+				  int xmin, int ymin, int xmax, int ymax,
46
+                                  int pred_x, int pred_y, Picture *picture,
47
+                                  int n, int size, uint16_t * const mv_penalty);
48
+
49
+static inline int update_map_generation(MpegEncContext * s)
50
+{
51
+    s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
52
+    if(s->me.map_generation==0){
53
+        s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
54
+        memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
55
+    }
56
+    return s->me.map_generation;
57
+}
58
+
59
+
60
+                                  
61
+/* SIMPLE */
62
+#define RENAME(a) simple_ ## a
63
+
64
+#define CMP(d, x, y, size)\
65
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
66
+
67
+#define CMP_HPEL(d, dx, dy, x, y, size)\
68
+{\
69
+    const int dxy= (dx) + 2*(dy);\
70
+    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
71
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
72
+}
73
+
74
+#define CMP_QPEL(d, dx, dy, x, y, size)\
75
+{\
76
+    const int dxy= (dx) + 4*(dy);\
77
+    qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
78
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
79
+}
80
+
81
+#include "motion_est_template.c"
82
+#undef RENAME
83
+#undef CMP
84
+#undef CMP_HPEL
85
+#undef CMP_QPEL
86
+#undef INIT
87
+
88
+/* SIMPLE CHROMA */
89
+#define RENAME(a) simple_chroma_ ## a
90
+
91
+#define CMP(d, x, y, size)\
92
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
93
+if(chroma_cmp){\
94
+    int dxy= ((x)&1) + 2*((y)&1);\
95
+    int c= ((x)>>1) + ((y)>>1)*uvstride;\
96
+\
97
+    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
98
+    d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
99
+    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
100
+    d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
101
+}
102
+
103
+#define CMP_HPEL(d, dx, dy, x, y, size)\
104
+{\
105
+    const int dxy= (dx) + 2*(dy);\
106
+    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
107
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
108
+    if(chroma_cmp_sub){\
109
+        int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
110
+        int c= ((x)>>1) + ((y)>>1)*uvstride;\
111
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
112
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
113
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
114
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
115
+    }\
116
+}
117
+
118
+#define CMP_QPEL(d, dx, dy, x, y, size)\
119
+{\
120
+    const int dxy= (dx) + 4*(dy);\
121
+    qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
122
+    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
123
+    if(chroma_cmp_sub){\
124
+        int cxy, c;\
125
+        int cx= (4*(x) + (dx))/2;\
126
+        int cy= (4*(y) + (dy))/2;\
127
+        cx= (cx>>1)|(cx&1);\
128
+        cy= (cy>>1)|(cy&1);\
129
+        cxy= (cx&1) + 2*(cy&1);\
130
+        c= ((cx)>>1) + ((cy)>>1)*uvstride;\
131
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
132
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
133
+        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
134
+        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
135
+    }\
136
+}
137
+
138
+#include "motion_est_template.c"
139
+#undef RENAME
140
+#undef CMP
141
+#undef CMP_HPEL
142
+#undef CMP_QPEL
143
+#undef INIT
144
+
145
+/* SIMPLE DIRECT HPEL */
146
+#define RENAME(a) simple_direct_hpel_ ## a
147
+//FIXME precalc divisions stuff
148
+
149
+#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
150
+if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
151
+    const int hx= 2*(x) + (dx);\
152
+    const int hy= 2*(y) + (dy);\
153
+    if(s->mv_type==MV_TYPE_8X8){\
154
+        int i;\
155
+        for(i=0; i<4; i++){\
156
+            int fx = s->me.direct_basis_mv[i][0] + hx;\
157
+            int fy = s->me.direct_basis_mv[i][1] + hy;\
158
+            int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
159
+            int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
160
+            int fxy= (fx&1) + 2*(fy&1);\
161
+            int bxy= (bx&1) + 2*(by&1);\
162
+\
163
+            uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
164
+            hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
165
+            hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
166
+        }\
167
+    }else{\
168
+        int fx = s->me.direct_basis_mv[0][0] + hx;\
169
+        int fy = s->me.direct_basis_mv[0][1] + hy;\
170
+        int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
171
+        int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
172
+        int fxy= (fx&1) + 2*(fy&1);\
173
+        int bxy= (bx&1) + 2*(by&1);\
174
+\
175
+        hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
176
+        hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
177
+    }\
178
+    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
179
+}else\
180
+    d= 256*256*256*32;
181
+
182
+
183
+#define CMP_HPEL(d, dx, dy, x, y, size)\
184
+    CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
185
+
186
+#define CMP(d, x, y, size)\
187
+    CMP_DIRECT(d, 0, 0, x, y, size, cmp)
188
+    
189
+#include "motion_est_template.c"
190
+#undef RENAME
191
+#undef CMP
192
+#undef CMP_HPEL
193
+#undef CMP_QPEL
194
+#undef INIT
195
+#undef CMP_DIRECT
196
+
197
+/* SIMPLE DIRECT QPEL */
198
+#define RENAME(a) simple_direct_qpel_ ## a
199
+
200
+#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
201
+if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
202
+    const int qx= 4*(x) + (dx);\
203
+    const int qy= 4*(y) + (dy);\
204
+    if(s->mv_type==MV_TYPE_8X8){\
205
+        int i;\
206
+        for(i=0; i<4; i++){\
207
+            int fx = s->me.direct_basis_mv[i][0] + qx;\
208
+            int fy = s->me.direct_basis_mv[i][1] + qy;\
209
+            int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
210
+            int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
211
+            int fxy= (fx&3) + 4*(fy&3);\
212
+            int bxy= (bx&3) + 4*(by&3);\
213
+\
214
+            uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
215
+            qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
216
+            qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
217
+        }\
218
+    }else{\
219
+        int fx = s->me.direct_basis_mv[0][0] + qx;\
220
+        int fy = s->me.direct_basis_mv[0][1] + qy;\
221
+        int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
222
+        int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
223
+        int fxy= (fx&3) + 4*(fy&3);\
224
+        int bxy= (bx&3) + 4*(by&3);\
225
+\
226
+        qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
227
+        qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
228
+    }\
229
+    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
230
+}else\
231
+    d= 256*256*256*32;
232
+
233
+
234
+#define CMP_QPEL(d, dx, dy, x, y, size)\
235
+    CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
236
+
237
+#define CMP(d, x, y, size)\
238
+    CMP_DIRECT(d, 0, 0, x, y, size, cmp)
239
+
240
+#include "motion_est_template.c"
241
+#undef RENAME
242
+#undef CMP
243
+#undef CMP_HPEL
244
+#undef CMP_QPEL
245
+#undef INIT
246
+#undef CMP__DIRECT
247
+
248
+
249
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
250
+    return 0;
251
+}
252
+
253
+static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
254
+    DSPContext* c= &s->dsp;
255
+    int i;
256
+    
257
+    memset(cmp, 0, sizeof(void*)*11);
258
+
259
+    switch(type&0xFF){
260
+    case FF_CMP_SAD:
261
+        cmp[0]= c->sad[0];
262
+        cmp[1]= c->sad[1];
263
+        break;
264
+    case FF_CMP_SATD:
265
+        cmp[0]= c->hadamard8_diff[0];
266
+        cmp[1]= c->hadamard8_diff[1];
267
+        break;
268
+    case FF_CMP_SSE:
269
+        cmp[0]= c->sse[0];
270
+        cmp[1]= c->sse[1];
271
+        break;
272
+    case FF_CMP_DCT:
273
+        cmp[0]= c->dct_sad[0];
274
+        cmp[1]= c->dct_sad[1];
275
+        break;
276
+    case FF_CMP_PSNR:
277
+        cmp[0]= c->quant_psnr[0];
278
+        cmp[1]= c->quant_psnr[1];
279
+        break;
280
+    case FF_CMP_ZERO:
281
+        for(i=0; i<7; i++){
282
+            cmp[i]= zero_cmp;
283
+        }
284
+        break;
285
+    default:
286
+        fprintf(stderr,"internal error in cmp function selection\n");
287
+    }
288
+};
289
+
290
+static inline int get_penalty_factor(MpegEncContext *s, int type){
291
+
292
+    switch(type){
293
+    default:
294
+    case FF_CMP_SAD:
295
+        return s->qscale;
296
+    case FF_CMP_SSE:
297
+//        return s->qscale*8;
298
+    case FF_CMP_DCT:
299
+    case FF_CMP_SATD:
300
+        return s->qscale*8;
301
+    }
302
+}
303
+
304
+void ff_init_me(MpegEncContext *s){
305
+    set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
306
+    set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
307
+    set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
43 308
 
309
+    if(s->flags&CODEC_FLAG_QPEL){
310
+        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
311
+            s->me.sub_motion_search= simple_chroma_qpel_motion_search;
312
+        else
313
+            s->me.sub_motion_search= simple_qpel_motion_search;
314
+    }else{
315
+        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
316
+            s->me.sub_motion_search= simple_chroma_hpel_motion_search;
317
+        else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
318
+            s->me.sub_motion_search= sad_hpel_motion_search;
319
+        else
320
+            s->me.sub_motion_search= simple_hpel_motion_search;
321
+    }
322
+
323
+    if(s->avctx->me_cmp&FF_CMP_CHROMA){
324
+        s->me.motion_search[0]= simple_chroma_epzs_motion_search;
325
+        s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
326
+    }else{
327
+        s->me.motion_search[0]= simple_epzs_motion_search;
328
+        s->me.motion_search[1]= simple_epzs_motion_search4;
329
+    }
330
+}
331
+      
44 332
 static int pix_dev(UINT8 * pix, int line_size, int mean)
45 333
 {
46 334
     int s, i, j;
... ...
@@ -294,492 +584,39 @@ static int phods_motion_search(MpegEncContext * s,
294 294
 
295 295
 #define Z_THRESHOLD 256
296 296
 
297
-#define CHECK_MV(x,y)\
298
-{\
299
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
300
-    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
301
-    if(map[index]!=key){\
302
-        d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
303
-        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
304
-        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
305
-        map[index]= key;\
306
-        score_map[index]= d;\
307
-    }\
308
-}
309
-
310
-#define CHECK_MV_DIR(x,y,new_dir)\
311
-{\
312
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
313
-    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
314
-    if(map[index]!=key){\
315
-        d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
316
-        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
317
-        if(d<dmin){\
318
-            best[0]=x;\
319
-            best[1]=y;\
320
-            dmin=d;\
321
-            next_dir= new_dir;\
322
-        }\
323
-        map[index]= key;\
324
-        score_map[index]= d;\
325
-    }\
326
-}
327
-
328
-#define CHECK_MV4(x,y)\
329
-{\
330
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
331
-    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
332
-    if(map[index]!=key){\
333
-        d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
334
-        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
335
-        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
336
-        map[index]= key;\
337
-        score_map[index]= d;\
338
-    }\
339
-}
340
-
341
-#define check(x,y,S,v)\
342
-if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
343
-if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
344
-if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
345
-if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
346
-
347
-
348
-static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
349
-                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
350
-                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
351
-                                       int xmin, int ymin, int xmax, int ymax, int shift,
352
-                                       uint32_t *map, uint16_t *score_map, int map_generation,
353
-                                       op_pixels_abs_func pix_abs)
354
-{
355
-    int next_dir=-1;
356
-
357
-    for(;;){
358
-        int d;
359
-        const int dir= next_dir;
360
-        const int x= best[0];
361
-        const int y= best[1];
362
-        next_dir=-1;
363
-
364
-//printf("%d", dir);
365
-        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
366
-        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
367
-        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
368
-        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
369
-
370
-        if(next_dir==-1){
371
-            return dmin;
372
-        }
373
-    }
374
-
375
-/*    for(;;){
376
-        int d;
377
-        const int x= best[0];
378
-        const int y= best[1];
379
-        const int last_min=dmin;
380
-        if(x>xmin) CHECK_MV(x-1, y  )
381
-        if(y>xmin) CHECK_MV(x  , y-1)
382
-        if(x<xmax) CHECK_MV(x+1, y  )
383
-        if(y<xmax) CHECK_MV(x  , y+1)
384
-        if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
385
-        if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
386
-        if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
387
-        if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
388
-        if(x-1>xmin) CHECK_MV(x-2, y  )
389
-        if(y-1>xmin) CHECK_MV(x  , y-2)
390
-        if(x+1<xmax) CHECK_MV(x+2, y  )
391
-        if(y+1<xmax) CHECK_MV(x  , y+2)
392
-        if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
393
-        if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
394
-        if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
395
-        if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
396
-        if(dmin==last_min) return dmin;
397
-    }
398
-    */
399
-}
400
-
401
-#if 1
402
-#define SNAKE_1 3
403
-#define SNAKE_2 2
404
-#else
405
-#define SNAKE_1 7
406
-#define SNAKE_2 3
407
-#endif
408
-static inline int snake_search(MpegEncContext * s, int *best, int dmin,
409
-                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
410
-                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
411
-                                       int xmin, int ymin, int xmax, int ymax, int shift,
412
-                                       uint32_t *map, uint16_t *score_map,int map_generation,
413
-                                       op_pixels_abs_func pix_abs)
414
-{
415
-    int dir=0;
416
-    int c=1;
417
-    static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
418
-    static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
419
-    int fails=0;
420
-    int last_d[2]={dmin, dmin};
421
-
422
-/*static int good=0;
423
-static int bad=0;
424
-static int point=0;
425
-
426
-point++;
427
-if(256*256*256*64%point==0)
428
-{
429
-    printf("%d %d %d\n", good, bad, point);
430
-}*/
431
-
432
-    for(;;){
433
-        int x= best[0];
434
-        int y= best[1];
435
-        int d;
436
-        x+=x_dir[dir];
437
-        y+=y_dir[dir];
438
-        if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
439
-            const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
440
-            const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
441
-            if(map[index]!=key){
442
-                d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
443
-                d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
444
-                map[index]=key;
445
-                score_map[index]=d;
446
-            }else
447
-                d= dmin+1;
448
-        }else{
449
-            d = dmin + 10000; //FIXME smarter boundary handling
450
-        }
451
-        if(d<dmin){
452
-            best[0]=x;
453
-            best[1]=y;
454
-            dmin=d;
455
-
456
-            if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
457
-            dir+=c;
458
-
459
-            fails=0;
460
-//good++;
461
-            last_d[1]=last_d[0];
462
-            last_d[0]=d;
463
-        }else{
464
-//bad++;
465
-            if(fails){
466
-                if(fails>=SNAKE_1+1) return dmin;
467
-            }else{
468
-                if(dir&1) dir-= c*3;
469
-                else      c= -c;
470
-//                c= -c;
471
-            }
472
-            dir+=c*SNAKE_2;
473
-            fails++;
474
-        }
475
-        dir&=7;
476
-    }
477
-}
478
-
479
-static inline int cross_search(MpegEncContext * s, int *best, int dmin,
480
-                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
481
-                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
482
-                                       int xmin, int ymin, int xmax, int ymax, int shift,
483
-                                       uint32_t *map, uint16_t *score_map,int map_generation,
484
-                                       op_pixels_abs_func pix_abs)
485
-{
486
-    static int x_dir[4]= {-1, 0, 1, 0};
487
-    static int y_dir[4]= { 0,-1, 0, 1};
488
-    int improvement[2]={100000, 100000};
489
-    int dirs[2]={2, 3};
490
-    int dir;
491
-    int last_dir= -1;
492
-    
493
-    for(;;){
494
-        dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
495
-        if(improvement[dir&1]==-1) return dmin;
496
-        
497
-        {
498
-            const int x= best[0] + x_dir[dir];
499
-            const int y= best[1] + y_dir[dir];
500
-            const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
501
-            const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
502
-            int d;
503
-            if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
504
-                if(map[index]!=key){
505
-                    d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
506
-                    d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
507
-                    map[index]=key;
508
-                    score_map[index]=d;
509
-                    if(d<dmin){
510
-                        improvement[dir&1]= dmin-d;
511
-                        improvement[(dir&1)^1]++;
512
-                        dmin=d;
513
-                        best[0]= x;
514
-                        best[1]= y;
515
-                        last_dir=dir;
516
-                        continue;
517
-                    }
518
-                }else{
519
-                    d= score_map[index];
520
-                }
521
-            }else{
522
-                d= dmin + 1000; //FIXME is this a good idea?
523
-            }
524
-            /* evaluated point was cached or checked and worse */
525
-
526
-            if(last_dir==dir){
527
-                improvement[dir&1]= -1;
528
-            }else{
529
-                improvement[dir&1]= d-dmin;
530
-                last_dir= dirs[dir&1]= dir^2;
531
-            }
532
-        }
533
-    }
534
-}
535
-
536
-static inline int update_map_generation(MpegEncContext * s)
537
-{
538
-    s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
539
-    if(s->me_map_generation==0){
540
-        s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
541
-        memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
542
-    }
543
-    return s->me_map_generation;
544
-}
545
-
546
-static int epzs_motion_search(MpegEncContext * s,
547
-                             int *mx_ptr, int *my_ptr,
548
-                             int P[10][2], int pred_x, int pred_y,
549
-                             int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
550
-{
551
-    int best[2]={0, 0};
552
-    int d, dmin; 
553
-    UINT8 *new_pic, *old_pic;
554
-    const int pic_stride= s->linesize;
555
-    const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
556
-    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
557
-    int quant= s->qscale; // qscale of the prev frame
558
-    const int shift= 1+s->quarter_sample;
559
-    uint32_t *map= s->me_map;
560
-    uint16_t *score_map= s->me_score_map;
561
-    int map_generation;
562
-
563
-    new_pic = s->new_picture.data[0] + pic_xy;
564
-    old_pic = ref_picture + pic_xy;
565
-    
566
-    map_generation= update_map_generation(s);
567
-
568
-    dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
569
-    map[0]= map_generation;
570
-    score_map[0]= dmin;
571
-
572
-    /* first line */
573
-    if ((s->mb_y == 0 || s->first_slice_line)) {
574
-        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
575
-        CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
576
-    }else{
577
-        if(dmin<256 && ( P_LEFT[0]    |P_LEFT[1]
578
-                        |P_TOP[0]     |P_TOP[1]
579
-                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
580
-            *mx_ptr= 0;
581
-            *my_ptr= 0;
582
-            s->skip_me=1;
583
-            return dmin;
584
-        }
585
-        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
586
-        if(dmin>256*2){
587
-            CHECK_MV(P_LAST[0]    >>shift, P_LAST[1]    >>shift)
588
-            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
589
-            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
590
-            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
591
-        }
592
-    }
593
-    if(dmin>256*4){
594
-        CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
595
-        CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
596
-    }
597
-#if 0 //doest only slow things down
598
-    if(dmin>512*3){
599
-        int step;
600
-        dmin= score_map[0];
601
-        best[0]= best[1]=0;
602
-        for(step=128; step>0; step>>=1){
603
-            const int step2= step;
604
-            int y;
605
-            for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
606
-                int x;
607
-                if(y<ymin || y>ymax) continue;
608
-
609
-                for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
610
-                    if(x<xmin || x>xmax) continue;
611
-                    if(x==best[0] && y==best[1]) continue;
612
-                    CHECK_MV(x,y)
613
-                }
614
-            }
615
-        }
616
-    }
617
-#endif
618
-//check(best[0],best[1],0, b0)
619
-    if(s->me_method==ME_EPZS)
620
-        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
621
-                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
622
-				   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
623
-    else
624
-        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
625
-                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
626
-                                   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
627
-//check(best[0],best[1],0, b1)
628
-    *mx_ptr= best[0];
629
-    *my_ptr= best[1];    
630
-
631
-//    printf("%d %d %d \n", best[0], best[1], dmin);
632
-    return dmin;
633
-}
634
-
635
-static int epzs_motion_search4(MpegEncContext * s, int block,
636
-                             int *mx_ptr, int *my_ptr,
637
-                             int P[10][2], int pred_x, int pred_y,
638
-                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
639
-{
640
-    int best[2]={0, 0};
641
-    int d, dmin; 
642
-    UINT8 *new_pic, *old_pic;
643
-    const int pic_stride= s->linesize;
644
-    const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
645
-    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
646
-    int quant= s->qscale; // qscale of the prev frame
647
-    const int shift= 1+s->quarter_sample;
648
-    uint32_t *map= s->me_map;
649
-    uint16_t *score_map= s->me_score_map;
650
-    int map_generation;
651
-
652
-    new_pic = s->new_picture.data[0] + pic_xy;
653
-    old_pic = ref_picture + pic_xy;
654
-
655
-    map_generation= update_map_generation(s);
656
-
657
-    dmin = 1000000;
658
-//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
659
-    /* first line */
660
-    if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
661
-	CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
662
-        CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
663
-        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
664
-    }else{
665
-        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
666
-        //FIXME try some early stop
667
-        if(dmin>64*2){
668
-            CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
669
-            CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
670
-            CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
671
-            CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
672
-            CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
673
-        }
674
-    }
675
-    if(dmin>64*4){
676
-        CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
677
-        CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
678
-    }
679
-
680
-    if(s->me_method==ME_EPZS)
681
-        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
682
-                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
683
-				   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
684
-    else
685
-        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
686
-                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
687
-                                   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
688
-
689
-    *mx_ptr= best[0];
690
-    *my_ptr= best[1];    
691
-
692
-//    printf("%d %d %d \n", best[0], best[1], dmin);
693
-    return dmin;
694
-}
695
-
696
-#define CHECK_HALF_MV(suffix, x, y) \
297
+#define CHECK_SAD_HALF_MV(suffix, x, y) \
697 298
 {\
698 299
     d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
699
-    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
300
+    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
700 301
     COPY3_IF_LT(dminh, d, dx, x, dy, y)\
701 302
 }
702 303
 
703
-    
704
-/* The idea would be to make half pel ME after Inter/Intra decision to 
705
-   save time. */
706
-static inline int halfpel_motion_search(MpegEncContext * s,
304
+static inline int sad_hpel_motion_search(MpegEncContext * s,
707 305
 				  int *mx_ptr, int *my_ptr, int dmin,
708 306
 				  int xmin, int ymin, int xmax, int ymax,
709
-                                  int pred_x, int pred_y, uint8_t *ref_picture,
710
-                                  op_pixels_abs_func pix_abs_x2, 
711
-                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
307
+                                  int pred_x, int pred_y, Picture *picture,
308
+                                  int n, int size, uint16_t * const mv_penalty)
712 309
 {
713
-    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
714
-    const int quant= s->qscale;
310
+    uint8_t *ref_picture= picture->data[0];
311
+    uint32_t *score_map= s->me.score_map;
312
+    const int penalty_factor= s->me.sub_penalty_factor;
715 313
     int mx, my, xx, yy, dminh;
716 314
     UINT8 *pix, *ptr;
717
-
718
-    if(s->skip_me){
719
-        *mx_ptr = 0;
720
-        *my_ptr = 0;
721
-        return dmin;
722
-    }
723
-
724
-    xx = 16 * s->mb_x + 8*(n&1);
725
-    yy = 16 * s->mb_y + 8*(n>>1);
726
-    pix =  s->new_picture.data[0] + (yy * s->linesize) + xx;
727
-
728
-    mx = *mx_ptr;
729
-    my = *my_ptr;
730
-    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
315
+    op_pixels_abs_func pix_abs_x2;
316
+    op_pixels_abs_func pix_abs_y2;
317
+    op_pixels_abs_func pix_abs_xy2;
731 318
     
732
-    dminh = dmin;
733
-
734
-    if (mx > xmin && mx < xmax && 
735
-        my > ymin && my < ymax) {
736
-        int dx=0, dy=0;
737
-        int d, pen_x, pen_y; 
738
-
739
-        mx<<=1;
740
-        my<<=1;
741
-        
742
-        pen_x= pred_x + mx;
743
-        pen_y= pred_y + my;
744
-
745
-        ptr-= s->linesize;
746
-        CHECK_HALF_MV(xy2, -1, -1)
747
-        CHECK_HALF_MV(y2 ,  0, -1)
748
-        CHECK_HALF_MV(xy2, +1, -1)
749
-        
750
-        ptr+= s->linesize;
751
-        CHECK_HALF_MV(x2 , -1,  0)
752
-        CHECK_HALF_MV(x2 , +1,  0)
753
-        CHECK_HALF_MV(xy2, -1, +1)
754
-        CHECK_HALF_MV(y2 ,  0, +1)
755
-        CHECK_HALF_MV(xy2, +1, +1)
756
-
757
-        mx+=dx;
758
-        my+=dy;
319
+    if(size==0){
320
+        pix_abs_x2 = s->dsp.pix_abs16x16_x2;
321
+        pix_abs_y2 = s->dsp.pix_abs16x16_y2;
322
+        pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
759 323
     }else{
760
-        mx<<=1;
761
-        my<<=1;
324
+        pix_abs_x2 = s->dsp.pix_abs8x8_x2;
325
+        pix_abs_y2 = s->dsp.pix_abs8x8_y2;
326
+        pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
762 327
     }
763 328
 
764
-    *mx_ptr = mx;
765
-    *my_ptr = my;
766
-    return dminh;
767
-}
768
-
769
-static inline int fast_halfpel_motion_search(MpegEncContext * s,
770
-				  int *mx_ptr, int *my_ptr, int dmin,
771
-				  int xmin, int ymin, int xmax, int ymax,
772
-                                  int pred_x, int pred_y, uint8_t *ref_picture,
773
-                                  op_pixels_abs_func pix_abs_x2, 
774
-                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
775
-{
776
-    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
777
-    uint16_t *score_map= s->me_score_map;
778
-    const int quant= s->qscale;
779
-    int mx, my, xx, yy, dminh;
780
-    UINT8 *pix, *ptr;
781
-
782
-    if(s->skip_me){
329
+    if(s->me.skip){
783 330
 //    printf("S");
784 331
         *mx_ptr = 0;
785 332
         *my_ptr = 0;
... ...
@@ -815,51 +652,51 @@ static inline int fast_halfpel_motion_search(MpegEncContext * s,
815 815
 
816 816
         ptr-= s->linesize;
817 817
         if(t<=b){
818
-            CHECK_HALF_MV(y2 ,  0, -1)
818
+            CHECK_SAD_HALF_MV(y2 , 0, -1)
819 819
             if(l<=r){
820
-                CHECK_HALF_MV(xy2, -1, -1)
820
+                CHECK_SAD_HALF_MV(xy2, -1, -1)
821 821
                 if(t+r<=b+l){
822
-                    CHECK_HALF_MV(xy2, +1, -1)
822
+                    CHECK_SAD_HALF_MV(xy2, +1, -1)
823 823
                     ptr+= s->linesize;
824 824
                 }else{
825 825
                     ptr+= s->linesize;
826
-                    CHECK_HALF_MV(xy2, -1, +1)
826
+                    CHECK_SAD_HALF_MV(xy2, -1, +1)
827 827
                 }
828
-                CHECK_HALF_MV(x2 , -1,  0)
828
+                CHECK_SAD_HALF_MV(x2 , -1,  0)
829 829
             }else{
830
-                CHECK_HALF_MV(xy2, +1, -1)
830
+                CHECK_SAD_HALF_MV(xy2, +1, -1)
831 831
                 if(t+l<=b+r){
832
-                    CHECK_HALF_MV(xy2, -1, -1)
832
+                    CHECK_SAD_HALF_MV(xy2, -1, -1)
833 833
                     ptr+= s->linesize;
834 834
                 }else{
835 835
                     ptr+= s->linesize;
836
-                    CHECK_HALF_MV(xy2, +1, +1)
836
+                    CHECK_SAD_HALF_MV(xy2, +1, +1)
837 837
                 }
838
-                CHECK_HALF_MV(x2 , +1,  0)
838
+                CHECK_SAD_HALF_MV(x2 , +1,  0)
839 839
             }
840 840
         }else{
841 841
             if(l<=r){
842 842
                 if(t+l<=b+r){
843
-                    CHECK_HALF_MV(xy2, -1, -1)
843
+                    CHECK_SAD_HALF_MV(xy2, -1, -1)
844 844
                     ptr+= s->linesize;
845 845
                 }else{
846 846
                     ptr+= s->linesize;
847
-                    CHECK_HALF_MV(xy2, +1, +1)
847
+                    CHECK_SAD_HALF_MV(xy2, +1, +1)
848 848
                 }
849
-                CHECK_HALF_MV(x2 , -1,  0)
850
-                CHECK_HALF_MV(xy2, -1, +1)
849
+                CHECK_SAD_HALF_MV(x2 , -1,  0)
850
+                CHECK_SAD_HALF_MV(xy2, -1, +1)
851 851
             }else{
852 852
                 if(t+r<=b+l){
853
-                    CHECK_HALF_MV(xy2, +1, -1)
853
+                    CHECK_SAD_HALF_MV(xy2, +1, -1)
854 854
                     ptr+= s->linesize;
855 855
                 }else{
856 856
                     ptr+= s->linesize;
857
-                    CHECK_HALF_MV(xy2, -1, +1)
857
+                    CHECK_SAD_HALF_MV(xy2, -1, +1)
858 858
                 }
859
-                CHECK_HALF_MV(x2 , +1,  0)
860
-                CHECK_HALF_MV(xy2, +1, +1)
859
+                CHECK_SAD_HALF_MV(x2 , +1,  0)
860
+                CHECK_SAD_HALF_MV(xy2, +1, +1)
861 861
             }
862
-            CHECK_HALF_MV(y2 ,  0, +1)
862
+            CHECK_SAD_HALF_MV(y2 ,  0, +1)
863 863
         }
864 864
         mx+=dx;
865 865
         my+=dy;
... ...
@@ -933,6 +770,7 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
933 933
     int P[10][2];
934 934
     uint8_t *ref_picture= s->last_picture.data[0];
935 935
     int dmin_sum=0;
936
+    uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
936 937
 
937 938
     for(block=0; block<4; block++){
938 939
         int mx4, my4;
... ...
@@ -995,11 +833,11 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
995 995
         P_MV1[0]= mx;
996 996
         P_MV1[1]= my;
997 997
 
998
-        dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
998
+        dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
999
+                                       &s->last_picture, mv_penalty);
999 1000
 
1000
-        dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
1001
-					  pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2,
1002
-					  s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block);
1001
+        dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
1002
+					  pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
1003 1003
  
1004 1004
         s->motion_val[ s->block_index[block] ][0]= mx4;
1005 1005
         s->motion_val[ s->block_index[block] ][1]= my4;
... ...
@@ -1021,13 +859,19 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1021 1021
     int mb_type=0;
1022 1022
     uint8_t *ref_picture= s->last_picture.data[0];
1023 1023
     Picture * const pic= &s->current_picture;
1024
+    uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1025
+    
1026
+    assert(s->quarter_sample==0 || s->quarter_sample==1);
1027
+
1028
+    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
1029
+    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1024 1030
 
1025 1031
     get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
1026 1032
     rel_xmin= xmin - mb_x*16;
1027 1033
     rel_xmax= xmax - mb_x*16;
1028 1034
     rel_ymin= ymin - mb_y*16;
1029 1035
     rel_ymax= ymax - mb_y*16;
1030
-    s->skip_me=0;
1036
+    s->me.skip=0;
1031 1037
 
1032 1038
     switch(s->me_method) {
1033 1039
     case ME_ZERO:
... ...
@@ -1096,7 +940,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1096 1096
                 }
1097 1097
             }
1098 1098
         }
1099
-        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
1099
+        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1100
+                                      &s->last_picture, mv_penalty);
1100 1101
  
1101 1102
         break;
1102 1103
     }
... ...
@@ -1112,8 +957,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1112 1112
     sum = s->dsp.pix_sum(pix, s->linesize);
1113 1113
     
1114 1114
     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1115
-    // FIXME: MMX OPTIMIZE
1116
-    vard = (s->dsp.pix_norm(pix, ppix, s->linesize)+128)>>8;
1115
+    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
1117 1116
 
1118 1117
 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1119 1118
     pic->mb_var   [s->mb_width * mb_y + mb_x] = varc;
... ...
@@ -1137,20 +981,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1137 1137
             mb_type|= MB_TYPE_INTRA;
1138 1138
         if (varc*2 + 200 > vard){
1139 1139
             mb_type|= MB_TYPE_INTER;
1140
-            if(s->me_method >= ME_EPZS)
1141
-                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1142
-					   pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1143
-					   s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1144
-            else
1145
-                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1146
-				           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1147
-				           s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1140
+            s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1141
+				   pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1148 1142
         }else{
1149
-            mx <<=1;
1150
-            my <<=1;
1143
+            mx <<=shift;
1144
+            my <<=shift;
1151 1145
         }
1152 1146
         if((s->flags&CODEC_FLAG_4MV)
1153
-           && !s->skip_me && varc>50 && vard>10){
1147
+           && !s->me.skip && varc>50 && vard>10){
1154 1148
             mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1155 1149
             mb_type|=MB_TYPE_INTER4V;
1156 1150
 
... ...
@@ -1159,19 +997,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1159 1159
             set_p_mv_tables(s, mx, my, 1);
1160 1160
     }else{
1161 1161
         if (vard <= 64 || vard < varc) {
1162
+//        if (sadP <= 32 || sadP < sadI + 500) {
1162 1163
             s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1163 1164
             mb_type|= MB_TYPE_INTER;
1164 1165
             if (s->me_method != ME_ZERO) {
1165
-                if(s->me_method >= ME_EPZS)
1166
-		    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1167
-                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1168
-                                           s->dsp.pix_abs16x16_xy2, 0);
1169
-                else
1170
-                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1171
-                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1172
-                                           s->dsp.pix_abs16x16_xy2, 0);
1166
+                dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1167
+                                            pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1173 1168
                 if((s->flags&CODEC_FLAG_4MV)
1174
-                   && !s->skip_me && varc>50 && vard>10){
1169
+                   && !s->me.skip && varc>50 && vard>10){
1175 1170
                     int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1176 1171
                     if(dmin4 + 128 <dmin)
1177 1172
                         mb_type= MB_TYPE_INTER4V;
... ...
@@ -1179,8 +1012,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1179 1179
                 set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1180 1180
 
1181 1181
             } else {
1182
-                mx <<=1;
1183
-                my <<=1;
1182
+                mx <<=shift;
1183
+                my <<=shift;
1184 1184
             }
1185 1185
 #if 0
1186 1186
             if (vard < 10) {
... ...
@@ -1201,7 +1034,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1201 1201
 }
1202 1202
 
1203 1203
 int ff_estimate_motion_b(MpegEncContext * s,
1204
-                       int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
1204
+                       int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
1205 1205
 {
1206 1206
     int mx, my, range, dmin;
1207 1207
     int xmin, ymin, xmax, ymax;
... ...
@@ -1211,7 +1044,12 @@ int ff_estimate_motion_b(MpegEncContext * s,
1211 1211
     const int shift= 1+s->quarter_sample;
1212 1212
     const int mot_stride = s->mb_width + 2;
1213 1213
     const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1214
-    
1214
+    uint8_t * const ref_picture= picture->data[0];
1215
+    uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1216
+        
1217
+    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
1218
+    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1219
+
1215 1220
     get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
1216 1221
     rel_xmin= xmin - mb_x*16;
1217 1222
     rel_xmax= xmax - mb_x*16;
... ...
@@ -1275,22 +1113,22 @@ int ff_estimate_motion_b(MpegEncContext * s,
1275 1275
             pred_x= P_LEFT[0];
1276 1276
             pred_y= P_LEFT[1];
1277 1277
         }
1278
-        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
1278
+        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1279
+                                      picture, mv_penalty);
1279 1280
  
1280 1281
         break;
1281 1282
     }
1282 1283
     
1283
-    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1284
-                                pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1285
-                                s->dsp.pix_abs16x16_xy2, 0);
1284
+    dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1285
+				   pred_x, pred_y, picture, 0, 0, mv_penalty);
1286 1286
 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1287 1287
 //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1288 1288
     mv_table[mot_xy][0]= mx;
1289 1289
     mv_table[mot_xy][1]= my;
1290
+
1290 1291
     return dmin;
1291 1292
 }
1292 1293
 
1293
-
1294 1294
 static inline int check_bidir_mv(MpegEncContext * s,
1295 1295
                    int mb_x, int mb_y,
1296 1296
                    int motion_fx, int motion_fy,
... ...
@@ -1299,45 +1137,57 @@ static inline int check_bidir_mv(MpegEncContext * s,
1299 1299
                    int pred_bx, int pred_by)
1300 1300
 {
1301 1301
     //FIXME optimize?
1302
-    //FIXME direct mode penalty
1303
-    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1304
-    uint8_t *dest_y = s->me_scratchpad;
1302
+    //FIXME move into template?
1303
+    //FIXME better f_code prediction (max mv & distance)
1304
+    UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1305
+    uint8_t *dest_y = s->me.scratchpad;
1305 1306
     uint8_t *ptr;
1306 1307
     int dxy;
1307 1308
     int src_x, src_y;
1308 1309
     int fbmin;
1309 1310
 
1310
-    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale;
1311
-
1312
-    dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1313
-    src_x = mb_x * 16 + (motion_fx >> 1);
1314
-    src_y = mb_y * 16 + (motion_fy >> 1);
1315
-    src_x = clip(src_x, -16, s->width);
1316
-    if (src_x == s->width)
1317
-        dxy&= 2;
1318
-    src_y = clip(src_y, -16, s->height);
1319
-    if (src_y == s->height)
1320
-        dxy&= 1;
1321
-
1322
-    ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1323
-    s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1324
-
1325
-    fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
1326
-
1327
-    dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1328
-    src_x = mb_x * 16 + (motion_bx >> 1);
1329
-    src_y = mb_y * 16 + (motion_by >> 1);
1330
-    src_x = clip(src_x, -16, s->width);
1331
-    if (src_x == s->width)
1332
-        dxy&= 2;
1333
-    src_y = clip(src_y, -16, s->height);
1334
-    if (src_y == s->height)
1335
-        dxy&= 1;
1336
-
1337
-    ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1338
-    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1339
-
1340
-    fbmin += s->dsp.pix_abs16x16(s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1311
+    if(s->quarter_sample){
1312
+        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1313
+        src_x = mb_x * 16 + (motion_fx >> 2);
1314
+        src_y = mb_y * 16 + (motion_fy >> 2);
1315
+        assert(src_x >=-16 && src_x<=s->width);
1316
+        assert(src_y >=-16 && src_y<=s->height);
1317
+
1318
+        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1319
+        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1320
+
1321
+        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1322
+        src_x = mb_x * 16 + (motion_bx >> 2);
1323
+        src_y = mb_y * 16 + (motion_by >> 2);
1324
+        assert(src_x >=-16 && src_x<=s->width);
1325
+        assert(src_y >=-16 && src_y<=s->height);
1326
+    
1327
+        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1328
+        s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1329
+    }else{
1330
+        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1331
+        src_x = mb_x * 16 + (motion_fx >> 1);
1332
+        src_y = mb_y * 16 + (motion_fy >> 1);
1333
+        assert(src_x >=-16 && src_x<=s->width);
1334
+        assert(src_y >=-16 && src_y<=s->height);
1335
+
1336
+        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1337
+        s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1338
+
1339
+        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1340
+        src_x = mb_x * 16 + (motion_bx >> 1);
1341
+        src_y = mb_y * 16 + (motion_by >> 1);
1342
+        assert(src_x >=-16 && src_x<=s->width);
1343
+        assert(src_y >=-16 && src_y<=s->height);
1344
+    
1345
+        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1346
+        s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1347
+    }
1348
+
1349
+    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
1350
+           +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
1351
+           + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1352
+
1341 1353
     return fbmin;
1342 1354
 }
1343 1355
 
... ...
@@ -1374,66 +1224,14 @@ static inline int direct_search(MpegEncContext * s,
1374 1374
     int P[10][2];
1375 1375
     const int mot_stride = s->mb_width + 2;
1376 1376
     const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1377
-    int dmin, dmin2;
1378
-    int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0;
1379
-    int motion_dx, motion_dy;
1380
-    const int motion_px= s->p_mv_table[mot_xy][0];
1381
-    const int motion_py= s->p_mv_table[mot_xy][1];
1377
+    const int shift= 1+s->quarter_sample;
1378
+    int dmin, i;
1382 1379
     const int time_pp= s->pp_time;
1383 1380
     const int time_pb= s->pb_time;
1384
-    const int time_bp= time_pp - time_pb;
1385
-    int bx, by;
1386
-    int mx, my, mx2, my2;
1387
-    uint8_t *ref_picture= s->me_scratchpad - (mb_x - 1 + (mb_y - 1)*s->linesize)*16;
1381
+    int mx, my, xmin, xmax, ymin, ymax;
1388 1382
     int16_t (*mv_table)[2]= s->b_direct_mv_table;
1389
-/*    uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; */ // f_code of the prev frame
1390
-
1391
-    /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
1392
-    motion_fx= (motion_px*time_pb)/time_pp;
1393
-    motion_fy= (motion_py*time_pb)/time_pp;
1394
-    motion_bx0= (-motion_px*time_bp)/time_pp;
1395
-    motion_by0= (-motion_py*time_bp)/time_pp;
1396
-    motion_dx= motion_dy=0;
1397
-    dmin2= check_bidir_mv(s, mb_x, mb_y, 
1398
-                          motion_fx, motion_fy,
1399
-                          motion_bx0, motion_by0,
1400
-                          motion_fx, motion_fy,
1401
-                          motion_bx0, motion_by0) - s->qscale;
1402
-
1403
-    motion_bx= motion_fx - motion_px;
1404
-    motion_by= motion_fy - motion_py;
1405
-    for(by=-1; by<2; by++){
1406
-        for(bx=-1; bx<2; bx++){
1407
-            uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
1408
-            uint8_t *ptr;
1409
-            int dxy;
1410
-            int src_x, src_y;
1411
-            const int width= s->width;
1412
-            const int height= s->height;
1413
-
1414
-            dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1415
-            src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
1416
-            src_y = (mb_y + by) * 16 + (motion_fy >> 1);
1417
-            src_x = clip(src_x, -16, width);
1418
-            if (src_x == width) dxy &= ~1;
1419
-            src_y = clip(src_y, -16, height);
1420
-            if (src_y == height) dxy &= ~2;
1421
-
1422
-            ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1423
-            s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1424
-
1425
-            dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1426
-            src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
1427
-            src_y = (mb_y + by) * 16 + (motion_by >> 1);
1428
-            src_x = clip(src_x, -16, width);
1429
-            if (src_x == width) dxy &= ~1;
1430
-            src_y = clip(src_y, -16, height);
1431
-            if (src_y == height) dxy &= ~2;
1432
-
1433
-	    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1434
-        }
1435
-    }
1436
-
1383
+    uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV;
1384
+    
1437 1385
     P_LAST[0]        = mv_table[mot_xy    ][0];
1438 1386
     P_LAST[1]        = mv_table[mot_xy    ][1];
1439 1387
     P_LEFT[0]        = mv_table[mot_xy - 1][0];
... ...
@@ -1458,62 +1256,81 @@ static inline int direct_search(MpegEncContext * s,
1458 1458
         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1459 1459
         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1460 1460
     }
1461
-    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
1462
-    if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
1463
-    if(dmin2<dmin){ 
1464
-        dmin= dmin2;
1465
-        mx=0;
1466
-        my=0;
1461
+
1462
+    ymin= xmin=(-32)>>shift;
1463
+    ymax= xmax=   31>>shift;
1464
+
1465
+    if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){
1466
+        s->mv_type= MV_TYPE_8X8;
1467
+    }else{
1468
+        s->mv_type= MV_TYPE_16X16;
1467 1469
     }
1468
-#if 1
1469
-    mx2= mx= mx*2; 
1470
-    my2= my= my*2;
1471
-    for(by=-1; by<2; by++){
1472
-        if(my2+by < -32) continue;
1473
-        for(bx=-1; bx<2; bx++){
1474
-            if(bx==0 && by==0) continue;
1475
-            if(mx2+bx < -32) continue;
1476
-            dmin2= check_bidir_mv(s, mb_x, mb_y, 
1477
-                          mx2+bx+motion_fx, my2+by+motion_fy,
1478
-                          mx2+bx+motion_bx, my2+by+motion_by,
1479
-                          mx2+bx+motion_fx, my2+by+motion_fy,
1480
-                          motion_bx, motion_by) - s->qscale;
1481
-            
1482
-            if(dmin2<dmin){
1483
-                dmin=dmin2;
1484
-                mx= mx2 + bx;
1485
-                my= my2 + by;
1486
-            }
1487
-        }
1470
+
1471
+    for(i=0; i<4; i++){
1472
+        int index= s->block_index[i];
1473
+        int min, max;
1474
+    
1475
+        s->me.co_located_mv[i][0]= s->motion_val[index][0];
1476
+        s->me.co_located_mv[i][1]= s->motion_val[index][1];
1477
+        s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
1478
+        s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
1479
+//        s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
1480
+//        s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
1481
+
1482
+        max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1483
+        min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1484
+        max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding
1485
+        min+= (2*mb_x + (i& 1))*8 + 1;
1486
+        if(max >= s->width) xmax= s->width - max - 1;
1487
+        if(min < -16      ) xmin= - 32 - min;
1488
+
1489
+        max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1490
+        min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1491
+        max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding
1492
+        min+= (2*mb_y + (i>>1))*8 + 1;
1493
+        if(max >= s->height) ymax= s->height - max - 1;
1494
+        if(min < -16       ) ymin= - 32 - min;
1495
+        
1496
+        if(s->mv_type == MV_TYPE_16X16) break;
1488 1497
     }
1489
-#else
1490
-    mx*=2; my*=2;
1491
-#endif
1492
-    if(mx==0 && my==0){
1493
-        motion_bx= motion_bx0;
1494
-        motion_by= motion_by0;
1498
+    
1499
+    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1500
+    
1501
+    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
1502
+        s->b_direct_mv_table[mot_xy][0]= 0;
1503
+        s->b_direct_mv_table[mot_xy][1]= 0;
1504
+
1505
+        return 256*256*256*64;
1506
+    }
1507
+
1508
+    if(s->flags&CODEC_FLAG_QPEL){
1509
+        dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1510
+                                                     &s->last_picture, mv_penalty);
1511
+        dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1512
+                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1513
+    }else{
1514
+        dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1515
+                                                     &s->last_picture, mv_penalty);
1516
+        dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1517
+                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1495 1518
     }
1496 1519
 
1497 1520
     s->b_direct_mv_table[mot_xy][0]= mx;
1498 1521
     s->b_direct_mv_table[mot_xy][1]= my;
1499
-    s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
1500
-    s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
1501
-    s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
1502
-    s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
1503 1522
     return dmin;
1504 1523
 }
1505 1524
 
1506 1525
 void ff_estimate_b_frame_motion(MpegEncContext * s,
1507 1526
                              int mb_x, int mb_y)
1508 1527
 {
1509
-    const int quant= s->qscale;
1528
+    const int penalty_factor= s->me.penalty_factor;
1510 1529
     int fmin, bmin, dmin, fbmin;
1511 1530
     int type=0;
1512 1531
     
1513 1532
     dmin= direct_search(s, mb_x, mb_y);
1514 1533
 
1515
-    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture.data[0], s->f_code);
1516
-    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture.data[0], s->b_code) - quant;
1534
+    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
1535
+    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
1517 1536
 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1518 1537
 
1519 1538
     fbmin= bidir_refine(s, mb_x, mb_y);
... ...
@@ -1541,22 +1358,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
1541 1541
 
1542 1542
     if(s->flags&CODEC_FLAG_HQ){
1543 1543
         type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
1544
+        if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1544 1545
     }
1545 1546
 
1546
-/*
1547
-{
1548
-static int count=0;
1549
-static int sum=0;
1550
-if(type==MB_TYPE_DIRECT){
1551
-  int diff= ABS(s->b_forw_mv_table)
1552
-}
1553
-}*/
1554
-
1555 1547
     s->mb_type[mb_y*s->mb_width + mb_x]= type;
1556
-/*    if(mb_y==0 && mb_x==0) printf("\n");
1557
-    if(mb_x==0) printf("\n");
1558
-    printf("%d", av_log2(type));
1559
-*/
1560 1548
 }
1561 1549
 
1562 1550
 /* find best f_code for ME which do unlimited searches */
... ...
@@ -1569,7 +1374,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1569 1569
         int best_fcode=-1;
1570 1570
         int best_score=-10000000;
1571 1571
 
1572
-        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer
1572
+        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1573 1573
 
1574 1574
         for(y=0; y<s->mb_height; y++){
1575 1575
             int x;
1576 1576
new file mode 100644
... ...
@@ -0,0 +1,737 @@
0
+/*
1
+ * Motion estimation 
2
+ * Copyright (c) 2002 Michael Niedermayer
3
+ *
4
+ * This library is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU Lesser General Public
6
+ * License as published by the Free Software Foundation; either
7
+ * version 2 of the License, or (at your option) any later version.
8
+ *
9
+ * This library is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * Lesser General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Lesser General Public
15
+ * License along with this library; if not, write to the Free Software
16
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
+ *
18
+ */
19
+
20
+//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
21
+//Note, the last line is there to kill these ugly unused var warnings
22
+#define LOAD_COMMON(x, y)\
23
+    uint32_t * const score_map= s->me.score_map;\
24
+    const int stride= s->linesize;\
25
+    const int uvstride= s->uvlinesize;\
26
+    const int time_pp= s->pp_time;\
27
+    const int time_pb= s->pb_time;\
28
+    uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
29
+    uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
30
+    uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
31
+    uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
32
+    uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
33
+    uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
34
+    uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
35
+    op_pixels_func (*hpel_put)[4];\
36
+    op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
37
+    op_pixels_func (*chroma_hpel_put)[4];\
38
+    qpel_mc_func (*qpel_put)[16];\
39
+    qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
40
+    const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\
41
+                                           + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\
42
+    if(s->no_rounding /*FIXME b_type*/){\
43
+        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
44
+        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
45
+        qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
46
+    }else{\
47
+        hpel_put=& s->dsp.put_pixels_tab[size];\
48
+        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
49
+        qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
50
+    }
51
+
52
+
53
+#ifdef CMP_HPEL
54
+    
55
+#define CHECK_HALF_MV(dx, dy, x, y)\
56
+{\
57
+    const int hx= 2*(x)+(dx);\
58
+    const int hy= 2*(y)+(dy);\
59
+    CMP_HPEL(d, dx, dy, x, y, size);\
60
+    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
61
+    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
62
+}
63
+
64
+#if 0
65
+static int RENAME(hpel_motion_search)(MpegEncContext * s,
66
+				  int *mx_ptr, int *my_ptr, int dmin,
67
+				  int xmin, int ymin, int xmax, int ymax,
68
+                                  int pred_x, int pred_y, Picture *ref_picture, 
69
+                                  int n, int size)
70
+{
71
+    UINT8 *ptr;
72
+    
73
+    const int xx = 16 * s->mb_x + 8*(n&1);
74
+    const int yy = 16 * s->mb_y + 8*(n>>1);
75
+    const int mx = *mx_ptr;
76
+    const int my = *my_ptr;
77
+    
78
+    LOAD_COMMON(xx, yy);
79
+    
80
+ //   INIT;
81
+ //FIXME factorize
82
+    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
83
+
84
+    if(s->no_rounding /*FIXME b_type*/){
85
+        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
86
+        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
87
+    }else{
88
+        hpel_put=& s->dsp.put_pixels_tab[size];
89
+        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
90
+    }
91
+    cmp= s->dsp.me_cmp[size];
92
+    chroma_cmp= s->dsp.me_cmp[size+1];
93
+    cmp_sub= s->dsp.me_sub_cmp[size];
94
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
95
+
96
+    if(s->me.skip){ //FIXME somehow move up (benchmark)
97
+        *mx_ptr = 0;
98
+        *my_ptr = 0;
99
+        return dmin;
100
+    }
101
+        
102
+    if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
103
+        CMP_HPEL(dmin, 0, 0, mx, my, size);
104
+        if(mx || my)
105
+            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
106
+    }
107
+        
108
+    if (mx > xmin && mx < xmax && 
109
+        my > ymin && my < ymax) {
110
+        int bx=2*mx, by=2*my;
111
+        int d= dmin;
112
+        
113
+        CHECK_HALF_MV(1, 1, mx-1, my-1)
114
+        CHECK_HALF_MV(0, 1, mx  , my-1)        
115
+        CHECK_HALF_MV(1, 1, mx  , my-1)
116
+        CHECK_HALF_MV(1, 0, mx-1, my  )
117
+        CHECK_HALF_MV(1, 0, mx  , my  )
118
+        CHECK_HALF_MV(1, 1, mx-1, my  )
119
+        CHECK_HALF_MV(0, 1, mx  , my  )        
120
+        CHECK_HALF_MV(1, 1, mx  , my  )
121
+
122
+        assert(bx < xmin*2 || bx > xmax*2 || by < ymin*2 || by > ymax*2);
123
+
124
+        *mx_ptr = bx;
125
+        *my_ptr = by;
126
+    }else{
127
+        *mx_ptr =2*mx;
128
+        *my_ptr =2*my;
129
+    }
130
+
131
+    return dmin;
132
+}
133
+
134
+#else
135
+static int RENAME(hpel_motion_search)(MpegEncContext * s,
136
+				  int *mx_ptr, int *my_ptr, int dmin,
137
+				  int xmin, int ymin, int xmax, int ymax,
138
+                                  int pred_x, int pred_y, Picture *ref_picture, 
139
+                                  int n, int size, uint16_t * const mv_penalty)
140
+{
141
+    const int xx = 16 * s->mb_x + 8*(n&1);
142
+    const int yy = 16 * s->mb_y + 8*(n>>1);
143
+    const int mx = *mx_ptr;
144
+    const int my = *my_ptr;   
145
+    const int penalty_factor= s->me.sub_penalty_factor;
146
+    me_cmp_func cmp_sub, chroma_cmp_sub;
147
+
148
+    LOAD_COMMON(xx, yy);
149
+    
150
+ //FIXME factorize
151
+
152
+    cmp_sub= s->dsp.me_sub_cmp[size];
153
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
154
+
155
+    if(s->me.skip){ //FIXME move out of hpel?
156
+        *mx_ptr = 0;
157
+        *my_ptr = 0;
158
+        return dmin;
159
+    }
160
+        
161
+    if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
162
+        CMP_HPEL(dmin, 0, 0, mx, my, size);
163
+        if(mx || my)
164
+            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
165
+    }
166
+        
167
+    if (mx > xmin && mx < xmax && 
168
+        my > ymin && my < ymax) {
169
+        int bx=2*mx, by=2*my;
170
+        int d= dmin;
171
+        const int index= (my<<ME_MAP_SHIFT) + mx;
172
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] 
173
+                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*penalty_factor;
174
+        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
175
+                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*penalty_factor;
176
+        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
177
+                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*penalty_factor;
178
+        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
179
+                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*penalty_factor;
180
+        
181
+        if(t<=b){
182
+            CHECK_HALF_MV(0, 1, mx  ,my-1)
183
+            if(l<=r){
184
+                CHECK_HALF_MV(1, 1, mx-1, my-1)
185
+                if(t+r<=b+l){
186
+                    CHECK_HALF_MV(1, 1, mx  , my-1)
187
+                }else{
188
+                    CHECK_HALF_MV(1, 1, mx-1, my  )
189
+                }
190
+                CHECK_HALF_MV(1, 0, mx-1, my  )
191
+            }else{
192
+                CHECK_HALF_MV(1, 1, mx  , my-1)
193
+                if(t+l<=b+r){
194
+                    CHECK_HALF_MV(1, 1, mx-1, my-1)
195
+                }else{
196
+                    CHECK_HALF_MV(1, 1, mx  , my  )
197
+                }
198
+                CHECK_HALF_MV(1, 0, mx  , my  )
199
+            }
200
+        }else{
201
+            if(l<=r){
202
+                if(t+l<=b+r){
203
+                    CHECK_HALF_MV(1, 1, mx-1, my-1)
204
+                }else{
205
+                    CHECK_HALF_MV(1, 1, mx  , my  )
206
+                }
207
+                CHECK_HALF_MV(1, 0, mx-1, my)
208
+                CHECK_HALF_MV(1, 1, mx-1, my)
209
+            }else{
210
+                if(t+r<=b+l){
211
+                    CHECK_HALF_MV(1, 1, mx  , my-1)
212
+                }else{
213
+                    CHECK_HALF_MV(1, 1, mx-1, my)
214
+                }
215
+                CHECK_HALF_MV(1, 0, mx  , my)
216
+                CHECK_HALF_MV(1, 1, mx  , my)
217
+            }
218
+            CHECK_HALF_MV(0, 1, mx  , my)
219
+        }
220
+        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
221
+
222
+        *mx_ptr = bx;
223
+        *my_ptr = by;
224
+    }else{
225
+        *mx_ptr =2*mx;
226
+        *my_ptr =2*my;
227
+    }
228
+
229
+    return dmin;
230
+}
231
+#endif
232
+
233
+#endif /* CMP_HPEL */
234
+
235
+#ifdef CMP_QPEL
236
+
237
+#define CHECK_QUARTER_MV(dx, dy, x, y)\
238
+{\
239
+    const int hx= 4*(x)+(dx);\
240
+    const int hy= 4*(y)+(dy);\
241
+    CMP_QPEL(d, dx, dy, x, y, size);\
242
+    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
243
+    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
244
+}
245
+
246
+static int RENAME(qpel_motion_search)(MpegEncContext * s,
247
+				  int *mx_ptr, int *my_ptr, int dmin,
248
+				  int xmin, int ymin, int xmax, int ymax,
249
+                                  int pred_x, int pred_y, Picture *ref_picture, 
250
+                                  int n, int size, uint16_t * const mv_penalty)
251
+{
252
+    const int xx = 16 * s->mb_x + 8*(n&1);
253
+    const int yy = 16 * s->mb_y + 8*(n>>1);
254
+    const int mx = *mx_ptr;
255
+    const int my = *my_ptr;   
256
+    const int penalty_factor= s->me.sub_penalty_factor;
257
+    const int map_generation= s->me.map_generation;
258
+    uint32_t *map= s->me.map;
259
+    me_cmp_func cmp, chroma_cmp;
260
+    me_cmp_func cmp_sub, chroma_cmp_sub;
261
+
262
+    LOAD_COMMON(xx, yy);
263
+    
264
+    cmp= s->dsp.me_cmp[size];
265
+    chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
266
+ //FIXME factorize
267
+
268
+    cmp_sub= s->dsp.me_sub_cmp[size];
269
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
270
+
271
+    if(s->me.skip){ //FIXME somehow move up (benchmark)
272
+        *mx_ptr = 0;
273
+        *my_ptr = 0;
274
+        return dmin;
275
+    }
276
+        
277
+    if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
278
+        CMP_QPEL(dmin, 0, 0, mx, my, size);
279
+        if(mx || my)
280
+            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
281
+    }
282
+        
283
+    if (mx > xmin && mx < xmax && 
284
+        my > ymin && my < ymax) {
285
+        int bx=4*mx, by=4*my;
286
+        int d= dmin;
287
+        int i, nx, ny;
288
+        const int index= (my<<ME_MAP_SHIFT) + mx;
289
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
290
+        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
291
+        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
292
+        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
293
+        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
294
+        int best[8];
295
+        int best_pos[8][2];
296
+        
297
+        memset(best, 64, sizeof(int)*8);
298
+#if 1
299
+        if(s->avctx->dia_size>=2){        
300
+            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
301
+            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
302
+            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
303
+            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
304
+
305
+            for(ny= -3; ny <= 3; ny++){
306
+                for(nx= -3; nx <= 3; nx++){
307
+                    const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
308
+                    const int c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
309
+                    const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
310
+                    int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2;
311
+                    int i;
312
+                    
313
+                    if((nx&3)==0 && (ny&3)==0) continue;
314
+                    
315
+                    score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
316
+                    
317
+//                    if(nx&1) score-=1024*s->me.penalty_factor;
318
+//                    if(ny&1) score-=1024*s->me.penalty_factor;
319
+                    
320
+                    for(i=0; i<8; i++){
321
+                        if(score < best[i]){
322
+                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
323
+                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
324
+                            best[i]= score;
325
+                            best_pos[i][0]= nx + 4*mx;
326
+                            best_pos[i][1]= ny + 4*my;
327
+                            break;
328
+                        }
329
+                    }
330
+                }
331
+            }
332
+        }else{
333
+            int tl;
334
+            const int cx = 4*(r - l);
335
+            const int cx2= r + l - 2*c; 
336
+            const int cy = 4*(b - t);
337
+            const int cy2= b + t - 2*c;
338
+            int cxy;
339
+              
340
+            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
341
+                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
342
+            }else{
343
+                CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different
344
+            }
345
+            
346
+            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; 
347
+           
348
+            assert(16*cx2 + 4*cx + 32*c == 32*r);
349
+            assert(16*cx2 - 4*cx + 32*c == 32*l);
350
+            assert(16*cy2 + 4*cy + 32*c == 32*b);
351
+            assert(16*cy2 - 4*cy + 32*c == 32*t);
352
+            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
353
+            
354
+            for(ny= -3; ny <= 3; ny++){
355
+                for(nx= -3; nx <= 3; nx++){
356
+                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
357
+                    int i;
358
+                    
359
+                    if((nx&3)==0 && (ny&3)==0) continue;
360
+                
361
+                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
362
+//                    if(nx&1) score-=32*s->me.penalty_factor;
363
+  //                  if(ny&1) score-=32*s->me.penalty_factor;
364
+                    
365
+                    for(i=0; i<8; i++){
366
+                        if(score < best[i]){
367
+                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
368
+                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
369
+                            best[i]= score;
370
+                            best_pos[i][0]= nx + 4*mx;
371
+                            best_pos[i][1]= ny + 4*my;
372
+                            break;
373
+                        }
374
+                    }
375
+                }
376
+            }            
377
+        }
378
+        for(i=0; i<8; i++){
379
+            nx= best_pos[i][0];
380
+            ny= best_pos[i][1];
381
+            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
382
+        }
383
+#if 0
384
+            nx= FFMAX(4*mx - bx, bx - 4*mx);
385
+            ny= FFMAX(4*my - by, by - 4*my);
386
+            
387
+            static int stats[4][4];
388
+            stats[nx][ny]++;
389
+            if(256*256*256*64 % (stats[0][0]+1) ==0){
390
+                for(i=0; i<16; i++){
391
+                    if((i&3)==0) printf("\n");
392
+                    printf("%6d ", stats[0][i]);
393
+                }
394
+                printf("\n");
395
+            }
396
+#endif
397
+#else
398
+
399
+        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
400
+        CHECK_QUARTER_MV(0, 2, mx  , my-1)        
401
+        CHECK_QUARTER_MV(2, 2, mx  , my-1)
402
+        CHECK_QUARTER_MV(2, 0, mx  , my  )
403
+        CHECK_QUARTER_MV(2, 2, mx  , my  )
404
+        CHECK_QUARTER_MV(0, 2, mx  , my  )
405
+        CHECK_QUARTER_MV(2, 2, mx-1, my  )
406
+        CHECK_QUARTER_MV(2, 0, mx-1, my  )
407
+        
408
+        nx= bx;
409
+        ny= by;
410
+        
411
+        for(i=0; i<8; i++){
412
+            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
413
+            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
414
+            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
415
+        }
416
+#endif
417
+#if 0
418
+        //outer ring
419
+        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
420
+        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
421
+        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
422
+        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
423
+        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
424
+        CHECK_QUARTER_MV(0, 1, mx  , my-1)
425
+        CHECK_QUARTER_MV(1, 1, mx  , my-1)
426
+        CHECK_QUARTER_MV(2, 1, mx  , my-1)
427
+        CHECK_QUARTER_MV(3, 1, mx  , my-1)
428
+        CHECK_QUARTER_MV(3, 2, mx  , my-1)
429
+        CHECK_QUARTER_MV(3, 3, mx  , my-1)
430
+        CHECK_QUARTER_MV(3, 0, mx  , my  )
431
+        CHECK_QUARTER_MV(3, 1, mx  , my  )
432
+        CHECK_QUARTER_MV(3, 2, mx  , my  )
433
+        CHECK_QUARTER_MV(3, 3, mx  , my  )
434
+        CHECK_QUARTER_MV(2, 3, mx  , my  )
435
+        CHECK_QUARTER_MV(1, 3, mx  , my  )
436
+        CHECK_QUARTER_MV(0, 3, mx  , my  )
437
+        CHECK_QUARTER_MV(3, 3, mx-1, my  )
438
+        CHECK_QUARTER_MV(2, 3, mx-1, my  )
439
+        CHECK_QUARTER_MV(1, 3, mx-1, my  )
440
+        CHECK_QUARTER_MV(1, 2, mx-1, my  )
441
+        CHECK_QUARTER_MV(1, 1, mx-1, my  )
442
+        CHECK_QUARTER_MV(1, 0, mx-1, my  )
443
+#endif
444
+        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
445
+
446
+        *mx_ptr = bx;
447
+        *my_ptr = by;
448
+    }else{
449
+        *mx_ptr =4*mx;
450
+        *my_ptr =4*my;
451
+    }
452
+
453
+    return dmin;
454
+}
455
+
456
+#endif /* CMP_QPEL */
457
+
458
+#define CHECK_MV(x,y)\
459
+{\
460
+    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
461
+    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
462
+    if(map[index]!=key){\
463
+        CMP(d, x, y, size);\
464
+        map[index]= key;\
465
+        score_map[index]= d;\
466
+        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
467
+        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
468
+    }\
469
+}
470
+
471
+#define CHECK_MV_DIR(x,y,new_dir)\
472
+{\
473
+    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
474
+    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
475
+    if(map[index]!=key){\
476
+        CMP(d, x, y, size);\
477
+        map[index]= key;\
478
+        score_map[index]= d;\
479
+        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
480
+        if(d<dmin){\
481
+            best[0]=x;\
482
+            best[1]=y;\
483
+            dmin=d;\
484
+            next_dir= new_dir;\
485
+        }\
486
+    }\
487
+}
488
+
489
+#define check(x,y,S,v)\
490
+if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
491
+if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
492
+if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
493
+if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
494
+
495
+
496
+static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
497
+                                       Picture *ref_picture,
498
+                                       int const pred_x, int const pred_y, int const penalty_factor,
499
+                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
500
+                                       uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
501
+                                       )
502
+{
503
+    me_cmp_func cmp, chroma_cmp;
504
+    int next_dir=-1;
505
+    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
506
+    
507
+    cmp= s->dsp.me_cmp[size];
508
+    chroma_cmp= s->dsp.me_cmp[size+1];
509
+
510
+    for(;;){
511
+        int d;
512
+        const int dir= next_dir;
513
+        const int x= best[0];
514
+        const int y= best[1];
515
+        next_dir=-1;
516
+
517
+//printf("%d", dir);
518
+        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
519
+        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
520
+        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
521
+        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
522
+
523
+        if(next_dir==-1){
524
+            return dmin;
525
+        }
526
+    }
527
+}
528
+
529
+static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
530
+                                       Picture *ref_picture,
531
+                                       int const pred_x, int const pred_y, int const penalty_factor,
532
+                                       int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
533
+                                       uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
534
+                                       )
535
+{
536
+    me_cmp_func cmp, chroma_cmp;
537
+    int dia_size=1;
538
+    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
539
+    
540
+    cmp= s->dsp.me_cmp[size];
541
+    chroma_cmp= s->dsp.me_cmp[size+1];
542
+
543
+    for(dia_size=1; dia_size<=s->avctx->dia_size; dia_size++){
544
+        int dir, start, end;
545
+        const int x= best[0];
546
+        const int y= best[1];
547
+
548
+        start= FFMAX(0, y + dia_size - ymax);
549
+        end  = FFMIN(dia_size, xmax - x);
550
+        for(dir= start; dir<end; dir++){
551
+            int d;
552
+
553
+//check(x + dir,y + dia_size - dir,0, a0)
554
+            CHECK_MV(x + dir           , y + dia_size - dir);
555
+        }
556
+
557
+        start= FFMAX(0, x + dia_size - xmax);
558
+        end  = FFMIN(dia_size, y - ymin);
559
+        for(dir= start; dir<end; dir++){
560
+            int d;
561
+
562
+//check(x + dia_size - dir, y - dir,0, a1)
563
+            CHECK_MV(x + dia_size - dir, y - dir           );
564
+        }
565
+
566
+        start= FFMAX(0, -y + dia_size + ymin );
567
+        end  = FFMIN(dia_size, x - xmin);
568
+        for(dir= start; dir<end; dir++){
569
+            int d;
570
+
571
+//check(x - dir,y - dia_size + dir,0, a2)
572
+            CHECK_MV(x - dir           , y - dia_size + dir);
573
+        }
574
+
575
+        start= FFMAX(0, -x + dia_size + xmin );
576
+        end  = FFMIN(dia_size, ymax - y);
577
+        for(dir= start; dir<end; dir++){
578
+            int d;
579
+
580
+//check(x - dia_size + dir, y + dir,0, a3)
581
+            CHECK_MV(x - dia_size + dir, y + dir           );
582
+        }
583
+
584
+        if(x!=best[0] || y!=best[1])
585
+            dia_size=0;
586
+    }
587
+    return dmin;    
588
+}
589
+
590
+static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
591
+                             int *mx_ptr, int *my_ptr,
592
+                             int P[10][2], int pred_x, int pred_y,
593
+                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
594
+{
595
+    int best[2]={0, 0};
596
+    int d, dmin; 
597
+    const int shift= 1+s->quarter_sample;
598
+    uint32_t *map= s->me.map;
599
+    int map_generation;
600
+    const int penalty_factor= s->me.penalty_factor;
601
+    const int size=0;
602
+    me_cmp_func cmp, chroma_cmp;
603
+    LOAD_COMMON(s->mb_x*16, s->mb_y*16);
604
+    
605
+    cmp= s->dsp.me_cmp[size];
606
+    chroma_cmp= s->dsp.me_cmp[size+1];
607
+    
608
+    map_generation= update_map_generation(s);
609
+
610
+    CMP(dmin, 0, 0, size);
611
+    map[0]= map_generation;
612
+    score_map[0]= dmin;
613
+
614
+    /* first line */
615
+    if ((s->mb_y == 0 || s->first_slice_line)) {
616
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
617
+        CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
618
+    }else{
619
+        if(dmin<256 && ( P_LEFT[0]    |P_LEFT[1]
620
+                        |P_TOP[0]     |P_TOP[1]
621
+                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0 && s->avctx->dia_size==0){
622
+            *mx_ptr= 0;
623
+            *my_ptr= 0;
624
+            s->me.skip=1;
625
+            return dmin;
626
+        }
627
+        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
628
+        if(dmin>256*2){
629
+            CHECK_MV(P_LAST[0]    >>shift, P_LAST[1]    >>shift)
630
+            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
631
+            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
632
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
633
+        }
634
+    }
635
+    if(dmin>256*4){
636
+        CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
637
+        CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
638
+    }
639
+#if 0 //doest only slow things down
640
+    if(dmin>512*3){
641
+        int step;
642
+        dmin= score_map[0];
643
+        best[0]= best[1]=0;
644
+        for(step=128; step>0; step>>=1){
645
+            const int step2= step;
646
+            int y;
647
+            for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
648
+                int x;
649
+                if(y<ymin || y>ymax) continue;
650
+
651
+                for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
652
+                    if(x<xmin || x>xmax) continue;
653
+                    if(x==best[0] && y==best[1]) continue;
654
+                    CHECK_MV(x,y)
655
+                }
656
+            }
657
+        }
658
+    }
659
+#endif
660
+//check(best[0],best[1],0, b0)
661
+    if(s->avctx->dia_size<2)
662
+        dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
663
+                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
664
+				   shift, map, map_generation, size, mv_penalty);
665
+    else
666
+        dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
667
+                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
668
+				   shift, map, map_generation, size, mv_penalty);
669
+
670
+//check(best[0],best[1],0, b1)
671
+    *mx_ptr= best[0];
672
+    *my_ptr= best[1];    
673
+
674
+//    printf("%d %d %d \n", best[0], best[1], dmin);
675
+    return dmin;
676
+}
677
+
678
+#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
679
+static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
680
+                             int *mx_ptr, int *my_ptr,
681
+                             int P[10][2], int pred_x, int pred_y,
682
+                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
683
+{
684
+    int best[2]={0, 0};
685
+    int d, dmin; 
686
+    const int shift= 1+s->quarter_sample;
687
+    uint32_t *map= s->me.map;
688
+    int map_generation;
689
+    const int penalty_factor= s->me.penalty_factor;
690
+    const int size=1;
691
+    me_cmp_func cmp, chroma_cmp;
692
+    LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
693
+    
694
+    cmp= s->dsp.me_cmp[size];
695
+    chroma_cmp= s->dsp.me_cmp[size+1];
696
+
697
+    map_generation= update_map_generation(s);
698
+
699
+    dmin = 1000000;
700
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
701
+    /* first line */
702
+    if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
703
+	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
704
+        CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
705
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
706
+    }else{
707
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
708
+        //FIXME try some early stop
709
+        if(dmin>64*2){
710
+            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
711
+            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
712
+            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
713
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
714
+            CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
715
+        }
716
+    }
717
+    if(dmin>64*4){
718
+        CHECK_MV(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
719
+        CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
720
+    }
721
+
722
+    if(s->avctx->dia_size<2)
723
+        dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
724
+                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
725
+				   shift, map, map_generation, size, mv_penalty);
726
+    else
727
+        dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
728
+                                   pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, 
729
+				   shift, map, map_generation, size, mv_penalty);
730
+    *mx_ptr= best[0];
731
+    *my_ptr= best[1];    
732
+
733
+//    printf("%d %d %d \n", best[0], best[1], dmin);
734
+    return dmin;
735
+}
736
+#endif /* !CMP_DIRECT */
... ...
@@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
526 526
             }
527 527
         }
528 528
     }
529
-    s->mv_penalty= mv_penalty;
529
+    s->me.mv_penalty= mv_penalty;
530 530
     s->fcode_tab= fcode_tab;
531 531
     s->min_qcoeff=-255;
532 532
     s->max_qcoeff= 255;
... ...
@@ -43,8 +43,6 @@ static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
43 43
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
44 44
 
45 45
 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
46
-static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
47
-                                    int src_x, int src_y, int w, int h);
48 46
 
49 47
 
50 48
 /* enable all paranoid tests for rounding, overflows, etc... */
... ...
@@ -64,8 +62,8 @@ static const uint16_t aanscales[64] = {
64 64
     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
65 65
     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
66 66
     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
67
-    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
68
-    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
67
+    8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
68
+    4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
69 69
 };
70 70
 
71 71
 /* Input permutation for the simple_idct_mmx */
... ...
@@ -87,9 +85,6 @@ static const uint8_t h263_chroma_roundtab[16] = {
87 87
 static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
88 88
 static UINT8 default_fcode_tab[MAX_MV*2+1];
89 89
 
90
-/* default motion estimation */
91
-int motion_estimation_method = ME_EPZS;
92
-
93 90
 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
94 91
                            const UINT16 *quant_matrix, int bias, int qmin, int qmax)
95 92
 {
... ...
@@ -394,15 +389,13 @@ int MPV_common_init(MpegEncContext *s)
394 394
         CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(INT16))
395 395
         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
396 396
         CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
397
-        CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
398
-        CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
399 397
         CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(INT16))
400 398
 
401 399
         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
402
-        CHECKED_ALLOCZ(s->me_scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
400
+        CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
403 401
         
404
-        CHECKED_ALLOCZ(s->me_map      , ME_MAP_SIZE*sizeof(uint32_t))
405
-        CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
402
+        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
403
+        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
406 404
 
407 405
         if(s->codec_id==CODEC_ID_MPEG4){
408 406
             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
... ...
@@ -498,8 +491,6 @@ void MPV_common_end(MpegEncContext *s)
498 498
     av_freep(&s->b_back_mv_table);
499 499
     av_freep(&s->b_bidir_forw_mv_table);
500 500
     av_freep(&s->b_bidir_back_mv_table);
501
-    av_freep(&s->b_direct_forw_mv_table);
502
-    av_freep(&s->b_direct_back_mv_table);
503 501
     av_freep(&s->b_direct_mv_table);
504 502
     av_freep(&s->motion_val);
505 503
     av_freep(&s->dc_val[0]);
... ...
@@ -508,9 +499,9 @@ void MPV_common_end(MpegEncContext *s)
508 508
     av_freep(&s->mbintra_table);
509 509
     av_freep(&s->cbp_table);
510 510
     av_freep(&s->pred_dir_table);
511
-    av_freep(&s->me_scratchpad);
512
-    av_freep(&s->me_map);
513
-    av_freep(&s->me_score_map);
511
+    av_freep(&s->me.scratchpad);
512
+    av_freep(&s->me.map);
513
+    av_freep(&s->me.score_map);
514 514
     
515 515
     av_freep(&s->mbskip_table);
516 516
     av_freep(&s->bitstream_buffer);
... ...
@@ -566,6 +557,7 @@ int MPV_encode_init(AVCodecContext *avctx)
566 566
     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
567 567
     s->strict_std_compliance= avctx->strict_std_compliance;
568 568
     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
569
+    s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
569 570
     s->mpeg_quant= avctx->mpeg_quant;
570 571
 
571 572
     if (s->gop_size <= 1) {
... ...
@@ -575,12 +567,7 @@ int MPV_encode_init(AVCodecContext *avctx)
575 575
         s->intra_only = 0;
576 576
     }
577 577
 
578
-    /* ME algorithm */
579
-    if (avctx->me_method == 0)
580
-        /* For compatibility */
581
-        s->me_method = motion_estimation_method;
582
-    else
583
-        s->me_method = avctx->me_method;
578
+    s->me_method = avctx->me_method;
584 579
 
585 580
     /* Fixed QSCALE */
586 581
     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
... ...
@@ -713,13 +700,14 @@ int MPV_encode_init(AVCodecContext *avctx)
713 713
             }
714 714
         }
715 715
     }
716
-    s->mv_penalty= default_mv_penalty;
716
+    s->me.mv_penalty= default_mv_penalty;
717 717
     s->fcode_tab= default_fcode_tab;
718 718
     s->y_dc_scale_table=
719 719
     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
720 720
  
721 721
     /* dont use mv_penalty table for crap MV as it would be confused */
722
-    if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
722
+    //FIXME remove after fixing / removing old ME
723
+    if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
723 724
 
724 725
     s->encoding = 1;
725 726
 
... ...
@@ -727,6 +715,8 @@ int MPV_encode_init(AVCodecContext *avctx)
727 727
     if (MPV_common_init(s) < 0)
728 728
         return -1;
729 729
     
730
+    ff_init_me(s);
731
+
730 732
 #ifdef CONFIG_ENCODERS
731 733
     if (s->out_format == FMT_H263)
732 734
         h263_encode_init(s);
... ...
@@ -947,6 +937,18 @@ void MPV_frame_end(MpegEncContext *s)
947 947
         if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
948 948
             s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
949 949
     }
950
+    if(s->avctx->debug&FF_DEBUG_SKIP){
951
+        int x,y;        
952
+        for(y=0; y<s->mb_height; y++){
953
+            for(x=0; x<s->mb_width; x++){
954
+                int count= s->mbskip_table[x + y*s->mb_width];
955
+                if(count>9) count=9;
956
+                printf(" %1d", count);
957
+            }
958
+            printf("\n");
959
+        }
960
+        printf("pict type: %d\n", s->pict_type);
961
+    }
950 962
 }
951 963
 
952 964
 static int get_sae(uint8_t *src, int ref, int stride){
... ...
@@ -1284,7 +1286,7 @@ static inline void gmc1_motion(MpegEncContext *s,
1284 1284
     if(s->flags&CODEC_FLAG_EMU_EDGE){
1285 1285
         if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
1286 1286
                               || src_y + (motion_y&15) + 16 > s->v_edge_pos){
1287
-            emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1287
+            ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1288 1288
             ptr= s->edge_emu_buffer;
1289 1289
             emu=1;
1290 1290
         }
... ...
@@ -1322,14 +1324,14 @@ static inline void gmc1_motion(MpegEncContext *s,
1322 1322
     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1323 1323
     ptr = ref_picture[1] + offset;
1324 1324
     if(emu){
1325
-        emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1325
+        ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1326 1326
         ptr= s->edge_emu_buffer;
1327 1327
     }
1328 1328
     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1329 1329
     
1330 1330
     ptr = ref_picture[2] + offset;
1331 1331
     if(emu){
1332
-        emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1332
+        ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1333 1333
         ptr= s->edge_emu_buffer;
1334 1334
     }
1335 1335
     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
... ...
@@ -1401,7 +1403,7 @@ static inline void gmc_motion(MpegEncContext *s,
1401 1401
 }
1402 1402
 
1403 1403
 
1404
-static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
1404
+void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
1405 1405
                                     int src_x, int src_y, int w, int h){
1406 1406
     int x, y;
1407 1407
     int start_y, start_x, end_y, end_x;
... ...
@@ -1501,7 +1503,7 @@ if(s->quarter_sample)
1501 1501
     if(s->flags&CODEC_FLAG_EMU_EDGE){
1502 1502
         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1503 1503
                               || src_y + (motion_y&1) + h  > v_edge_pos){
1504
-            emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1504
+            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1505 1505
                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1506 1506
             ptr= s->edge_emu_buffer + src_offset;
1507 1507
             emu=1;
... ...
@@ -1538,7 +1540,7 @@ if(s->quarter_sample)
1538 1538
     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1539 1539
     ptr = ref_picture[1] + offset;
1540 1540
     if(emu){
1541
-        emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1541
+        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1542 1542
                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1543 1543
         ptr= s->edge_emu_buffer + (src_offset >> 1);
1544 1544
     }
... ...
@@ -1546,7 +1548,7 @@ if(s->quarter_sample)
1546 1546
 
1547 1547
     ptr = ref_picture[2] + offset;
1548 1548
     if(emu){
1549
-        emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1549
+        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1550 1550
                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1551 1551
         ptr= s->edge_emu_buffer + (src_offset >> 1);
1552 1552
     }
... ...
@@ -1586,7 +1588,7 @@ static inline void qpel_motion(MpegEncContext *s,
1586 1586
     if(s->flags&CODEC_FLAG_EMU_EDGE){
1587 1587
         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1588 1588
                               || src_y + (motion_y&3) + h  > v_edge_pos){
1589
-            emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1589
+            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1590 1590
                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1591 1591
             ptr= s->edge_emu_buffer + src_offset;
1592 1592
             emu=1;
... ...
@@ -1631,7 +1633,7 @@ static inline void qpel_motion(MpegEncContext *s,
1631 1631
     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1632 1632
     ptr = ref_picture[1] + offset;
1633 1633
     if(emu){
1634
-        emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1634
+        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1635 1635
                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1636 1636
         ptr= s->edge_emu_buffer + (src_offset >> 1);
1637 1637
     }
... ...
@@ -1639,7 +1641,7 @@ static inline void qpel_motion(MpegEncContext *s,
1639 1639
     
1640 1640
     ptr = ref_picture[2] + offset;
1641 1641
     if(emu){
1642
-        emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1642
+        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1643 1643
                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1644 1644
         ptr= s->edge_emu_buffer + (src_offset >> 1);
1645 1645
     }
... ...
@@ -1675,6 +1677,10 @@ static inline void MPV_motion(MpegEncContext *s,
1675 1675
                         ref_picture, 0,
1676 1676
                         0, pix_op, qpix_op,
1677 1677
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1678
+        }else if(s->mspel){
1679
+            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
1680
+                        ref_picture, pix_op,
1681
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
1678 1682
         }else{
1679 1683
             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1680 1684
                         ref_picture, 0,
... ...
@@ -1706,7 +1712,7 @@ static inline void MPV_motion(MpegEncContext *s,
1706 1706
                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1707 1707
                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1708 1708
                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1709
-                        emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1709
+                        ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1710 1710
                         ptr= s->edge_emu_buffer;
1711 1711
                     }
1712 1712
                 }
... ...
@@ -1737,7 +1743,7 @@ static inline void MPV_motion(MpegEncContext *s,
1737 1737
                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1738 1738
                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1739 1739
                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1740
-                        emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1740
+                        ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1741 1741
                         ptr= s->edge_emu_buffer;
1742 1742
                     }
1743 1743
                 }
... ...
@@ -1784,7 +1790,7 @@ static inline void MPV_motion(MpegEncContext *s,
1784 1784
         if(s->flags&CODEC_FLAG_EMU_EDGE){
1785 1785
                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
1786 1786
                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
1787
-                    emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1787
+                    ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1788 1788
                     ptr= s->edge_emu_buffer;
1789 1789
                     emu=1;
1790 1790
                 }
... ...
@@ -1793,7 +1799,7 @@ static inline void MPV_motion(MpegEncContext *s,
1793 1793
 
1794 1794
         ptr = ref_picture[2] + offset;
1795 1795
         if(emu){
1796
-            emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1796
+            ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1797 1797
             ptr= s->edge_emu_buffer;
1798 1798
         }
1799 1799
         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
... ...
@@ -1928,7 +1934,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1928 1928
 
1929 1929
     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
1930 1930
     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
1931
-        
1931
+        //FIXME a lot of thet is only needed for !low_delay
1932 1932
         const int wrap = s->block_wrap[0];
1933 1933
         const int xy = s->block_index[0];
1934 1934
         const int mb_index= s->mb_x + s->mb_y*s->mb_width;
... ...
@@ -2064,7 +2070,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2064 2064
                     add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2065 2065
                     add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2066 2066
                 }
2067
-            } else {
2067
+            } else if(s->codec_id != CODEC_ID_WMV2){
2068 2068
                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2069 2069
                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2070 2070
                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
... ...
@@ -2074,6 +2080,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2074 2074
                     add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2075 2075
                     add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2076 2076
                 }
2077
+            } else{
2078
+                ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2077 2079
             }
2078 2080
         } else {
2079 2081
             /* dct only in intra block */
... ...
@@ -2376,7 +2384,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2376 2376
         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2377 2377
 
2378 2378
         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2379
-            emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2379
+            ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2380 2380
             ptr= s->edge_emu_buffer;
2381 2381
             emu=1;
2382 2382
         }
... ...
@@ -2408,14 +2416,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2408 2408
             int wrap_c = s->uvlinesize;
2409 2409
             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2410 2410
             if(emu){
2411
-                emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2411
+                ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2412 2412
                 ptr= s->edge_emu_buffer;
2413 2413
             }
2414 2414
 	    s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2415 2415
 
2416 2416
             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2417 2417
             if(emu){
2418
-                emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2418
+                ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2419 2419
                 ptr= s->edge_emu_buffer;
2420 2420
             }
2421 2421
             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
... ...
@@ -2455,7 +2463,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2455 2455
         }
2456 2456
 
2457 2457
         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2458
-            emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2458
+            ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2459 2459
             ptr_y= s->edge_emu_buffer;
2460 2460
             emu=1;
2461 2461
         }
... ...
@@ -2487,12 +2495,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2487 2487
             skip_dct[5]= 1;
2488 2488
         }else{
2489 2489
             if(emu){
2490
-                emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2490
+                ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2491 2491
                 ptr_cb= s->edge_emu_buffer;
2492 2492
             }
2493 2493
             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2494 2494
             if(emu){
2495
-                emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2495
+                ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2496 2496
                 ptr_cr= s->edge_emu_buffer;
2497 2497
             }
2498 2498
             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
... ...
@@ -2574,21 +2582,25 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2574 2574
 
2575 2575
 #ifdef CONFIG_ENCODERS
2576 2576
     /* huffman encode */
2577
-    switch(s->out_format) {
2578
-    case FMT_MPEG1:
2579
-        mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2580
-        break;
2581
-    case FMT_H263:
2582
-        if (s->h263_msmpeg4)
2583
-            msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2584
-        else if(s->h263_pred)
2585
-            mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2586
-        else
2587
-            h263_encode_mb(s, s->block, motion_x, motion_y);
2588
-        break;
2589
-    case FMT_MJPEG:
2590
-        mjpeg_encode_mb(s, s->block);
2591
-        break;
2577
+    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2578
+    case CODEC_ID_MPEG1VIDEO:
2579
+        mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
2580
+    case CODEC_ID_MPEG4:
2581
+        mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2582
+    case CODEC_ID_MSMPEG4V2:
2583
+    case CODEC_ID_MSMPEG4V3:
2584
+    case CODEC_ID_WMV1:
2585
+        msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2586
+    case CODEC_ID_WMV2:
2587
+         ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
2588
+    case CODEC_ID_MJPEG:
2589
+        mjpeg_encode_mb(s, s->block); break;
2590
+    case CODEC_ID_H263:
2591
+    case CODEC_ID_H263P:
2592
+    case CODEC_ID_RV10:
2593
+        h263_encode_mb(s, s->block, motion_x, motion_y); break;
2594
+    default:
2595
+        assert(0);
2592 2596
     }
2593 2597
 #endif
2594 2598
 }
... ...
@@ -2704,13 +2716,18 @@ static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, in
2704 2704
     int x,y;
2705 2705
     
2706 2706
     if(w==16 && h==16) 
2707
-        return s->dsp.pix_norm(src1, src2, stride);
2707
+        return s->dsp.sse[0](NULL, src1, src2, stride);
2708
+    else if(w==8 && h==8)
2709
+        return s->dsp.sse[1](NULL, src1, src2, stride);
2708 2710
     
2709 2711
     for(y=0; y<h; y++){
2710 2712
         for(x=0; x<w; x++){
2711 2713
             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2712 2714
         } 
2713 2715
     }
2716
+    
2717
+    assert(acc>=0);
2718
+    
2714 2719
     return acc;
2715 2720
 }
2716 2721
 
... ...
@@ -2751,6 +2768,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
2751 2751
     s->scene_change_score=0;
2752 2752
     
2753 2753
     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
2754
+    
2755
+    if(s->msmpeg4_version){
2756
+        if(s->pict_type==I_TYPE)
2757
+            s->no_rounding=1;
2758
+        else if(s->flipflop_rounding)
2759
+            s->no_rounding ^= 1;          
2760
+    }else{
2761
+        if(s->pict_type==I_TYPE)
2762
+            s->no_rounding=0;
2763
+        else if(s->pict_type!=B_TYPE)
2764
+            s->no_rounding ^= 1;          
2765
+    }
2754 2766
 
2755 2767
     /* Estimate motion for every MB */
2756 2768
     if(s->pict_type != I_TYPE){
... ...
@@ -2772,7 +2801,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
2772 2772
                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
2773 2773
                 else
2774 2774
                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
2775
-//                s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
2776 2775
             }
2777 2776
         }
2778 2777
     }else /* if(s->pict_type == I_TYPE) */{
... ...
@@ -2867,7 +2895,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
2867 2867
         mjpeg_picture_header(s);
2868 2868
         break;
2869 2869
     case FMT_H263:
2870
-        if (s->h263_msmpeg4) 
2870
+        if (s->codec_id == CODEC_ID_WMV2) 
2871
+            ff_wmv2_encode_picture_header(s, picture_number);
2872
+        else if (s->h263_msmpeg4) 
2871 2873
             msmpeg4_encode_picture_header(s, picture_number);
2872 2874
         else if (s->h263_pred)
2873 2875
             mpeg4_encode_picture_header(s, picture_number);
... ...
@@ -3049,15 +3079,14 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3049 3049
                                  &dmin, &next_block, 0, 0);
3050 3050
                 }
3051 3051
                 if(mb_type&MB_TYPE_DIRECT){
3052
+                    int mx= s->b_direct_mv_table[xy][0];
3053
+                    int my= s->b_direct_mv_table[xy][1];
3054
+                    
3052 3055
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3053
-                    s->mv_type = MV_TYPE_16X16; //FIXME
3054 3056
                     s->mb_intra= 0;
3055
-                    s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
3056
-                    s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
3057
-                    s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
3058
-                    s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
3057
+                    ff_mpeg4_set_direct_mv(s, mx, my);
3059 3058
                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
3060
-                                 &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
3059
+                                 &dmin, &next_block, mx, my);
3061 3060
                 }
3062 3061
                 if(mb_type&MB_TYPE_INTRA){
3063 3062
                     s->mv_dir = MV_DIR_FORWARD;
... ...
@@ -3122,10 +3151,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3122 3122
                     s->mb_intra= 0;
3123 3123
                     motion_x=s->b_direct_mv_table[xy][0];
3124 3124
                     motion_y=s->b_direct_mv_table[xy][1];
3125
-                    s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
3126
-                    s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
3127
-                    s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
3128
-                    s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
3125
+                    ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3129 3126
                     break;
3130 3127
                 case MB_TYPE_BIDIR:
3131 3128
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
... ...
@@ -3170,7 +3196,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
3170 3170
 
3171 3171
                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3172 3172
                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3173
-                
3173
+
3174 3174
                 s->current_picture.error[0] += sse(
3175 3175
                     s,
3176 3176
                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
... ...
@@ -3471,6 +3497,7 @@ char ff_get_pict_type_char(int pict_type){
3471 3471
     case P_TYPE: return 'P'; 
3472 3472
     case B_TYPE: return 'B'; 
3473 3473
     case S_TYPE: return 'S'; 
3474
+    default:     return '?';
3474 3475
     }
3475 3476
 }
3476 3477
 
... ...
@@ -3574,12 +3601,3 @@ AVCodec wmv1_encoder = {
3574 3574
     MPV_encode_end,
3575 3575
 };
3576 3576
 
3577
-AVCodec wmv2_encoder = {
3578
-    "wmv2",
3579
-    CODEC_TYPE_VIDEO,
3580
-    CODEC_ID_WMV2,
3581
-    sizeof(MpegEncContext),
3582
-    MPV_encode_init,
3583
-    MPV_encode_picture,
3584
-    MPV_encode_end,
3585
-};
... ...
@@ -129,6 +129,31 @@ typedef struct ParseContext{
129 129
     int frame_start_found;
130 130
 } ParseContext;
131 131
 
132
+struct MpegEncContext;
133
+
134
+typedef struct MotionEstContext{
135
+    int skip;                          /* set if ME is skiped for the current MB */
136
+    int co_located_mv[4][2];           /* mv from last p frame for direct mode ME */
137
+    int direct_basis_mv[4][2];
138
+    uint8_t *scratchpad;               /* data area for the me algo, so that the ME doesnt need to malloc/free */
139
+    uint32_t *map;                     /* map to avoid duplicate evaluations */
140
+    uint32_t *score_map;               /* map to store the scores */
141
+    int map_generation;  
142
+    int penalty_factor;
143
+    int sub_penalty_factor;
144
+    UINT16 (*mv_penalty)[MAX_MV*2+1];  /* amount of bits needed to encode a MV */
145
+    int (*sub_motion_search)(struct MpegEncContext * s,
146
+				  int *mx_ptr, int *my_ptr, int dmin,
147
+				  int xmin, int ymin, int xmax, int ymax,
148
+                                  int pred_x, int pred_y, Picture *ref_picture, 
149
+                                  int n, int size, uint16_t * const mv_penalty);
150
+    int (*motion_search[7])(struct MpegEncContext * s, int block,
151
+                             int *mx_ptr, int *my_ptr,
152
+                             int P[10][2], int pred_x, int pred_y,
153
+                             int xmin, int ymin, int xmax, int ymax, Picture *ref_picture,
154
+                             uint16_t * const mv_penalty);
155
+}MotionEstContext;
156
+
132 157
 typedef struct MpegEncContext {
133 158
     struct AVCodecContext *avctx;
134 159
     /* the following parameters must be initialized before encoding */
... ...
@@ -222,15 +247,8 @@ typedef struct MpegEncContext {
222 222
     INT16 (*b_back_mv_table)[2];       /* MV table (1MV per MB) backward mode b-frame encoding */
223 223
     INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
224 224
     INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
225
-    INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
226
-    INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
227 225
     INT16 (*b_direct_mv_table)[2];     /* MV table (1MV per MB) direct mode b-frame encoding */
228 226
     int me_method;                     /* ME algorithm */
229
-    uint8_t *me_scratchpad;            /* data area for the me algo, so that the ME doesnt need to malloc/free */
230
-    uint32_t *me_map;                  /* map to avoid duplicate evaluations */
231
-    uint16_t *me_score_map;            /* map to store the SADs */
232
-    int me_map_generation;
233
-    int skip_me;                       /* set if ME is skiped for the current MB */
234 227
     int scene_change_score;
235 228
     int mv_dir;
236 229
 #define MV_DIR_BACKWARD  1
... ...
@@ -250,8 +268,9 @@ typedef struct MpegEncContext {
250 250
     int mv[2][4][2];
251 251
     int field_select[2][2];
252 252
     int last_mv[2][2][2];             /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
253
-    UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
254 253
     UINT8 *fcode_tab; /* smallest fcode needed for each MV */
254
+    
255
+    MotionEstContext me;
255 256
 
256 257
     int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) 
257 258
                         for b-frames rounding mode is allways 0 */
... ...
@@ -458,6 +477,7 @@ typedef struct MpegEncContext {
458 458
     /* [mb_intra][isChroma][level][run][last] */
459 459
     int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
460 460
     int inter_intra_pred;
461
+    int mspel;
461 462
 
462 463
     /* decompression specific */
463 464
     GetBitContext gb;
... ...
@@ -519,6 +539,7 @@ typedef struct MpegEncContext {
519 519
     void (*fdct)(DCTELEM *block/* align 16*/);
520 520
     void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
521 521
     void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
522
+    //FIXME move above funcs into dspContext perhaps
522 523
 } MpegEncContext;
523 524
 
524 525
 
... ...
@@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s);
528 528
 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
529 529
 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
530 530
 void MPV_frame_end(MpegEncContext *s);
531
+int MPV_encode_init(AVCodecContext *avctx);
532
+int MPV_encode_end(AVCodecContext *avctx);
533
+int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
531 534
 #ifdef HAVE_MMX
532 535
 void MPV_common_init_mmx(MpegEncContext *s);
533 536
 #endif
... ...
@@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s);
553 553
 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
554 554
 void ff_error_resilience(MpegEncContext *s);
555 555
 void ff_draw_horiz_band(MpegEncContext *s);
556
+void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
557
+                                    int src_x, int src_y, int w, int h);
556 558
 char ff_get_pict_type_char(int pict_type);
557 559
 
558 560
 
... ...
@@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
585 585
 int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
586 586
 void ff_fix_long_p_mvs(MpegEncContext * s);
587 587
 void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
588
+void ff_init_me(MpegEncContext *s);
588 589
 
589 590
 
590 591
 /* mpeg12.c */
... ...
@@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32];
631 631
 extern UINT8 ff_mpeg4_c_dc_scale_table[32];
632 632
 extern const INT16 ff_mpeg4_default_intra_matrix[64];
633 633
 extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
634
+int ff_h263_decode_init(AVCodecContext *avctx);
635
+int ff_h263_decode_frame(AVCodecContext *avctx, 
636
+                             void *data, int *data_size,
637
+                             UINT8 *buf, int buf_size);
638
+int ff_h263_decode_end(AVCodecContext *avctx);
634 639
 void h263_encode_mb(MpegEncContext *s, 
635 640
                     DCTELEM block[6][64],
636 641
                     int motion_x, int motion_y);
... ...
@@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s);
667 667
 int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
668 668
 int ff_h263_resync(MpegEncContext *s);
669 669
 int ff_h263_get_gob_height(MpegEncContext *s);
670
+void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
670 671
 
671 672
 
672 673
 /* rv10.c */
... ...
@@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s);
684 684
 int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
685 685
 int ff_msmpeg4_decode_init(MpegEncContext *s);
686 686
 void ff_msmpeg4_encode_init(MpegEncContext *s);
687
-
687
+int ff_wmv2_decode_picture_header(MpegEncContext * s);
688
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
689
+void ff_mspel_motion(MpegEncContext *s,
690
+                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
691
+                               UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
692
+                               int motion_x, int motion_y, int h);
693
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
694
+void ff_wmv2_encode_mb(MpegEncContext * s, 
695
+                       DCTELEM block[6][64],
696
+                       int motion_x, int motion_y);
688 697
 
689 698
 /* mjpegenc.c */
690 699
 int mjpeg_init(MpegEncContext *s);
... ...
@@ -48,12 +48,14 @@
48 48
 #define II_BITRATE 128*1024
49 49
 #define MBAC_BITRATE 50*1024
50 50
 
51
+#define DEFAULT_INTER_INDEX 3
52
+
51 53
 static UINT32 v2_dc_lum_table[512][2];
52 54
 static UINT32 v2_dc_chroma_table[512][2];
53 55
 
54 56
 static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
55 57
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
56
-                                       int n, int coded);
58
+                                       int n, int coded, const uint8_t *scantable);
57 59
 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
58 60
 static int msmpeg4_decode_motion(MpegEncContext * s, 
59 61
                                  int *mx_ptr, int *my_ptr);
... ...
@@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n);
63 63
 static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
64 64
 static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
65 65
 static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
66
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
66 67
 
67 68
 extern UINT32 inverse[256];
68 69
 
... ...
@@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s)
160 160
         }
161 161
         break;
162 162
     case 4:
163
+    case 5:
163 164
         s->y_dc_scale_table= wmv1_y_dc_scale_table;
164 165
         s->c_dc_scale_table= wmv1_c_dc_scale_table;
165 166
         break;
166 167
     }
167 168
 
168 169
     
169
-    if(s->msmpeg4_version==4){
170
+    if(s->msmpeg4_version>=4){
170 171
         ff_init_scantable(s, &s->intra_scantable  , wmv1_scantable[1]);
171 172
         ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
172 173
         ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
... ...
@@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
370 370
     s->per_mb_rl_table = 0;
371 371
     if(s->msmpeg4_version==4)
372 372
         s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
373
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
373 374
 
374 375
     if (s->pict_type == I_TYPE) {
375
-        s->no_rounding = 1;
376 376
         s->slice_height= s->mb_height/1;
377 377
         put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
378 378
         
... ...
@@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
404 404
 
405 405
             put_bits(&s->pb, 1, s->mv_table_index);
406 406
         }
407
-
408
-	if(s->flipflop_rounding){
409
-	    s->no_rounding ^= 1;
410
-	}else{
411
-	    s->no_rounding = 0;
412
-	}
413 407
     }
414 408
 
415 409
     s->esc3_level_length= 0;
... ...
@@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
923 923
     }
924 924
 
925 925
     /* recalculate block_last_index for M$ wmv1 */
926
-    if(s->msmpeg4_version==4 && s->block_last_index[n]>0){
926
+    if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
927 927
         for(last_index=63; last_index>=0; last_index--){
928 928
             if(block[scantable[last_index]]) break;
929 929
         }
... ...
@@ -975,7 +973,7 @@ else
975 975
                         /* third escape */
976 976
                         put_bits(&s->pb, 1, 0);
977 977
                         put_bits(&s->pb, 1, last);
978
-                        if(s->msmpeg4_version==4){
978
+                        if(s->msmpeg4_version>=4){
979 979
                             if(s->esc3_level_length==0){
980 980
                                 s->esc3_level_length=8;
981 981
                                 s->esc3_run_length= 6;
... ...
@@ -1014,7 +1012,7 @@ else
1014 1014
 /****************************************/
1015 1015
 /* decoding stuff */
1016 1016
 
1017
-static VLC mb_non_intra_vlc;
1017
+static VLC mb_non_intra_vlc[4];
1018 1018
 static VLC mb_intra_vlc;
1019 1019
 static VLC dc_lum_vlc[2];
1020 1020
 static VLC dc_chroma_vlc[2];
... ...
@@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
1139 1139
                  &mvtab[0][1], 2, 1,
1140 1140
                  &mvtab[0][0], 2, 1);
1141 1141
 
1142
-        init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128, 
1143
-                 &table_mb_non_intra[0][1], 8, 4,
1144
-                 &table_mb_non_intra[0][0], 8, 4);
1142
+        for(i=0; i<4; i++){
1143
+            init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128, 
1144
+                     &wmv2_inter_table[i][0][1], 8, 4,
1145
+                     &wmv2_inter_table[i][0][0], 8, 4); //FIXME name?
1146
+        }
1147
+        
1145 1148
         init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64, 
1146 1149
                  &table_mb_intra[0][1], 4, 2,
1147 1150
                  &table_mb_intra[0][0], 4, 2);
... ...
@@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
1167 1167
     case 4:
1168 1168
         s->decode_mb= msmpeg4v34_decode_mb;
1169 1169
         break;
1170
+    case 5:
1171
+        s->decode_mb= wmv2_decode_mb;
1172
+        break;
1170 1173
     }
1171 1174
     
1172 1175
     s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
... ...
@@ -1334,6 +1338,7 @@ return -1;
1334 1334
 	    s->no_rounding = 0;
1335 1335
 	}
1336 1336
     }
1337
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
1337 1338
 
1338 1339
     s->esc3_level_length= 0;
1339 1340
     s->esc3_run_length= 0;
... ...
@@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1523 1523
     }
1524 1524
 
1525 1525
     for (i = 0; i < 6; i++) {
1526
-        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
1526
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
1527 1527
 	{
1528 1528
              fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
1529 1529
              return -1;
... ...
@@ -1566,7 +1571,7 @@ printf("S ");
1566 1566
             }
1567 1567
         }
1568 1568
         
1569
-        code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3);
1569
+        code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
1570 1570
         if (code < 0)
1571 1571
             return -1;
1572 1572
 	//s->mb_intra = (code & 0x40) ? 0 : 1;
... ...
@@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I');
1628 1628
     }
1629 1629
 
1630 1630
     for (i = 0; i < 6; i++) {
1631
-        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
1631
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
1632 1632
 	{
1633 1633
 	    fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
1634 1634
 	    return -1;
... ...
@@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I');
1639 1639
 }
1640 1640
 //#define ERROR_DETAILS
1641 1641
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
1642
-                              int n, int coded)
1642
+                              int n, int coded, const uint8_t *scan_table)
1643 1643
 {
1644 1644
     int level, i, last, run, run_diff;
1645 1645
     int dc_pred_dir;
1646 1646
     RLTable *rl;
1647 1647
     RL_VLC_ELEM *rl_vlc;
1648
-    const UINT8 *scan_table;
1649 1648
     int qmul, qadd;
1650 1649
 
1651 1650
     if (s->mb_intra) {
... ...
@@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
1713 1713
             s->block_last_index[n] = i;
1714 1714
             return 0;
1715 1715
         }
1716
-        scan_table = s->inter_scantable.permutated;
1716
+        if(!scan_table)
1717
+            scan_table = s->inter_scantable.permutated;
1717 1718
         set_stat(ST_INTER_AC);
1718 1719
         rl_vlc= rl->rl_vlc[s->qscale];
1719 1720
     }
... ...
@@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
1889 1889
             i = 63; /* XXX: not optimal */
1890 1890
         }
1891 1891
     }
1892
-    if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize
1892
+    if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
1893 1893
     s->block_last_index[n] = i;
1894 1894
     
1895 1895
     return 0;
... ...
@@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
1990 1990
     *my_ptr = my;
1991 1991
     return 0;
1992 1992
 }
1993
+
1994
+/* cleanest way to support it
1995
+ * there is too much shared between versions so that we cant have 1 file per version & 1 common
1996
+ * as allmost everything would be in the common file 
1997
+ */
1998
+#include "wmv2.c"
... ...
@@ -3,7 +3,7 @@
3 3
  */
4 4
 
5 5
 /* intra picture macro block coded block pattern */
6
-static const UINT16 table_mb_intra[64][2] = {
6
+static const uint16_t table_mb_intra[64][2] = {
7 7
 { 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
8 8
 { 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
9 9
 { 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
... ...
@@ -23,7 +23,7 @@ static const UINT16 table_mb_intra[64][2] = {
23 23
 };
24 24
 
25 25
 /* non intra picture macro block coded block pattern + mb type */
26
-static const UINT32 table_mb_non_intra[128][2] = {
26
+static const uint32_t table_mb_non_intra[128][2] = {
27 27
 { 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 },
28 28
 { 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 },
29 29
 { 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 },
... ...
@@ -60,7 +60,7 @@ static const UINT32 table_mb_non_intra[128][2] = {
60 60
 
61 61
 /* dc table 0 */
62 62
 
63
-static const UINT32 table0_dc_lum[120][2] = {
63
+static const uint32_t table0_dc_lum[120][2] = {
64 64
 { 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 },
65 65
 { 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 },
66 66
 { 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 },
... ...
@@ -93,7 +93,7 @@ static const UINT32 table0_dc_lum[120][2] = {
93 93
 { 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 },
94 94
 };
95 95
 
96
-static const UINT32 table0_dc_chroma[120][2] = {
96
+static const uint32_t table0_dc_chroma[120][2] = {
97 97
 { 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 },
98 98
 { 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 },
99 99
 { 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 },
... ...
@@ -128,7 +128,7 @@ static const UINT32 table0_dc_chroma[120][2] = {
128 128
 
129 129
 /* dc table 1 */
130 130
 
131
-static const UINT32 table1_dc_lum[120][2] = {
131
+static const uint32_t table1_dc_lum[120][2] = {
132 132
 { 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 },
133 133
 { 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 },
134 134
 { 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 },
... ...
@@ -161,7 +161,7 @@ static const UINT32 table1_dc_lum[120][2] = {
161 161
 { 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 },
162 162
 };
163 163
 
164
-static const UINT32 table1_dc_chroma[120][2] = {
164
+static const uint32_t table1_dc_chroma[120][2] = {
165 165
 { 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 },
166 166
 { 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 },
167 167
 { 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 },
... ...
@@ -196,7 +196,7 @@ static const UINT32 table1_dc_chroma[120][2] = {
196 196
 
197 197
 /* vlc table 0, for intra luma */
198 198
 
199
-static const UINT16 table0_vlc[133][2] = {
199
+static const uint16_t table0_vlc[133][2] = {
200 200
 { 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 },
201 201
 { 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 },
202 202
 { 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 },
... ...
@@ -233,7 +233,7 @@ static const UINT16 table0_vlc[133][2] = {
233 233
 { 0x16, 7 },
234 234
 };
235 235
 
236
-static const INT8 table0_level[132] = {
236
+static const int8_t table0_level[132] = {
237 237
   1,  2,  3,  4,  5,  6,  7,  8,
238 238
   9, 10, 11, 12, 13, 14, 15, 16,
239 239
   1,  2,  3,  4,  5,  6,  7,  8,
... ...
@@ -253,7 +253,7 @@ static const INT8 table0_level[132] = {
253 253
   1,  1,  1,  1,
254 254
 };
255 255
 
256
-static const INT8 table0_run[132] = {
256
+static const int8_t table0_run[132] = {
257 257
   0,  0,  0,  0,  0,  0,  0,  0,
258 258
   0,  0,  0,  0,  0,  0,  0,  0,
259 259
   1,  1,  1,  1,  1,  1,  1,  1,
... ...
@@ -275,7 +275,7 @@ static const INT8 table0_run[132] = {
275 275
 
276 276
 /* vlc table 1, for intra chroma and P macro blocks */
277 277
 
278
-static const UINT16 table1_vlc[149][2] = {
278
+static const uint16_t table1_vlc[149][2] = {
279 279
 { 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 },
280 280
 { 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 },
281 281
 { 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 },
... ...
@@ -316,7 +316,7 @@ static const UINT16 table1_vlc[149][2] = {
316 316
 { 0xd, 9 },
317 317
 };
318 318
 
319
-static const INT8 table1_level[148] = {
319
+static const int8_t table1_level[148] = {
320 320
   1,  2,  3,  4,  5,  6,  7,  8,
321 321
   9, 10, 11, 12, 13, 14,  1,  2,
322 322
   3,  4,  5,  6,  7,  8,  9,  1,
... ...
@@ -338,7 +338,7 @@ static const INT8 table1_level[148] = {
338 338
   1,  1,  1,  1,
339 339
 };
340 340
 
341
-static const INT8 table1_run[148] = {
341
+static const int8_t table1_run[148] = {
342 342
   0,  0,  0,  0,  0,  0,  0,  0,
343 343
   0,  0,  0,  0,  0,  0,  1,  1,
344 344
   1,  1,  1,  1,  1,  1,  1,  2,
... ...
@@ -362,7 +362,7 @@ static const INT8 table1_run[148] = {
362 362
 
363 363
 /* third vlc table */
364 364
 
365
-static const UINT16 table2_vlc[186][2] = {
365
+static const uint16_t table2_vlc[186][2] = {
366 366
 { 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 },
367 367
 { 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 },
368 368
 { 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 },
... ...
@@ -412,7 +412,7 @@ static const UINT16 table2_vlc[186][2] = {
412 412
 { 0x23dc, 14 },{ 0x4a, 9 },
413 413
 };
414 414
 
415
-static const INT8 table2_level[185] = {
415
+static const int8_t table2_level[185] = {
416 416
   1,  2,  3,  4,  5,  6,  7,  8,
417 417
   9, 10, 11, 12, 13, 14, 15, 16,
418 418
  17, 18, 19,  1,  2,  3,  4,  5,
... ...
@@ -439,7 +439,7 @@ static const INT8 table2_level[185] = {
439 439
   1,
440 440
 };
441 441
 
442
-static const INT8 table2_run[185] = {
442
+static const int8_t table2_run[185] = {
443 443
   0,  0,  0,  0,  0,  0,  0,  0,
444 444
   0,  0,  0,  0,  0,  0,  0,  0,
445 445
   0,  0,  0,  1,  1,  1,  1,  1,
... ...
@@ -467,7 +467,7 @@ static const INT8 table2_run[185] = {
467 467
 };
468 468
 
469 469
 /* second non intra vlc table */
470
-static const UINT16 table4_vlc[169][2] = {
470
+static const uint16_t table4_vlc[169][2] = {
471 471
 { 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 },
472 472
 { 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 },
473 473
 { 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 },
... ...
@@ -513,7 +513,7 @@ static const UINT16 table4_vlc[169][2] = {
513 513
 { 0x169, 9 },
514 514
 };
515 515
 
516
-static const INT8 table4_level[168] = {
516
+static const int8_t table4_level[168] = {
517 517
   1,  2,  3,  4,  5,  6,  7,  8,
518 518
   9, 10, 11, 12, 13, 14, 15, 16,
519 519
  17, 18, 19, 20, 21, 22, 23,  1,
... ...
@@ -537,7 +537,7 @@ static const INT8 table4_level[168] = {
537 537
   1,  1,  1,  1,  1,  1,  1,  1,
538 538
 };
539 539
 
540
-static const INT8 table4_run[168] = {
540
+static const int8_t table4_run[168] = {
541 541
   0,  0,  0,  0,  0,  0,  0,  0,
542 542
   0,  0,  0,  0,  0,  0,  0,  0,
543 543
   0,  0,  0,  0,  0,  0,  0,  1,
... ...
@@ -561,25 +561,25 @@ static const INT8 table4_run[168] = {
561 561
  29, 30, 31, 32, 33, 34, 35, 36,
562 562
 };
563 563
 
564
-extern const UINT16 inter_vlc[103][2];
565
-extern const INT8 inter_level[102];
566
-extern const INT8 inter_run[102];
564
+extern const uint16_t inter_vlc[103][2];
565
+extern const int8_t inter_level[102];
566
+extern const int8_t inter_run[102];
567 567
 
568
-extern const UINT16 intra_vlc[103][2];
569
-extern const INT8 intra_level[102];
570
-extern const INT8 intra_run[102];
568
+extern const uint16_t intra_vlc[103][2];
569
+extern const int8_t intra_level[102];
570
+extern const int8_t intra_run[102];
571 571
 
572
-extern const UINT8 DCtab_lum[13][2];
573
-extern const UINT8 DCtab_chrom[13][2];
572
+extern const uint8_t DCtab_lum[13][2];
573
+extern const uint8_t DCtab_chrom[13][2];
574 574
 
575
-extern const UINT8 cbpy_tab[16][2];
576
-extern const UINT8 mvtab[33][2];
575
+extern const uint8_t cbpy_tab[16][2];
576
+extern const uint8_t mvtab[33][2];
577 577
 
578
-extern const UINT8 intra_MCBPC_code[8];
579
-extern const UINT8 intra_MCBPC_bits[8];
578
+extern const uint8_t intra_MCBPC_code[8];
579
+extern const uint8_t intra_MCBPC_bits[8];
580 580
 
581
-extern const UINT8 inter_MCBPC_code[25];
582
-extern const UINT8 inter_MCBPC_bits[25];
581
+extern const uint8_t inter_MCBPC_code[25];
582
+extern const uint8_t inter_MCBPC_bits[25];
583 583
 
584 584
 #define NB_RL_TABLES  6
585 585
 
... ...
@@ -632,7 +632,7 @@ static RLTable rl_table[NB_RL_TABLES] = {
632 632
 
633 633
 /* motion vector table 0 */
634 634
 
635
-static const UINT16 table0_mv_code[1100] = {
635
+static const uint16_t table0_mv_code[1100] = {
636 636
  0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001,
637 637
  0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f,
638 638
  0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b,
... ...
@@ -773,7 +773,7 @@ static const UINT16 table0_mv_code[1100] = {
773 773
  0x5f0d, 0x5f0e, 0x5f0f, 0x0000,
774 774
 };
775 775
 
776
-static const UINT8 table0_mv_bits[1100] = {
776
+static const uint8_t table0_mv_bits[1100] = {
777 777
   1,  4,  4,  4,  5,  5,  5,  6,
778 778
   6,  6,  7,  7,  7,  8,  8,  8,
779 779
   8,  8,  8,  8,  8,  8,  8,  8,
... ...
@@ -914,7 +914,7 @@ static const UINT8 table0_mv_bits[1100] = {
914 914
  17, 17, 17,  8,
915 915
 };
916 916
 
917
-static const UINT8 table0_mvx[1099] = {
917
+static const uint8_t table0_mvx[1099] = {
918 918
  32, 32, 31, 32, 33, 31, 33, 31,
919 919
  33, 32, 34, 32, 30, 32, 31, 34,
920 920
  35, 32, 34, 33, 29, 33, 30, 30,
... ...
@@ -1055,7 +1055,7 @@ static const UINT8 table0_mvx[1099] = {
1055 1055
  61, 19, 19,
1056 1056
 };
1057 1057
 
1058
-static const UINT8 table0_mvy[1099] = {
1058
+static const uint8_t table0_mvy[1099] = {
1059 1059
  32, 31, 32, 33, 32, 31, 31, 33,
1060 1060
  33, 34, 32, 30, 32, 35, 34, 31,
1061 1061
  32, 29, 33, 30, 32, 34, 33, 31,
... ...
@@ -1197,7 +1197,7 @@ static const UINT8 table0_mvy[1099] = {
1197 1197
 };
1198 1198
 
1199 1199
 /* motion vector table 1 */
1200
-static const UINT16 table1_mv_code[1100] = {
1200
+static const uint16_t table1_mv_code[1100] = {
1201 1201
  0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c,
1202 1202
  0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041,
1203 1203
  0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069,
... ...
@@ -1338,7 +1338,7 @@ static const UINT16 table1_mv_code[1100] = {
1338 1338
  0x2473, 0x26a2, 0x26a3, 0x000b,
1339 1339
 };
1340 1340
 
1341
-static const UINT8 table1_mv_bits[1100] = {
1341
+static const uint8_t table1_mv_bits[1100] = {
1342 1342
   2,  4,  4,  4,  5,  5,  5,  5,
1343 1343
   6,  6,  7,  7,  7,  7,  7,  8,
1344 1344
   8,  8,  8,  8,  8,  8,  8,  8,
... ...
@@ -1479,7 +1479,7 @@ static const UINT8 table1_mv_bits[1100] = {
1479 1479
  15, 15, 15,  4,
1480 1480
 };
1481 1481
 
1482
-static const UINT8 table1_mvx[1099] = {
1482
+static const uint8_t table1_mvx[1099] = {
1483 1483
  32, 31, 32, 31, 33, 32, 33, 33,
1484 1484
  31, 34, 30, 32, 32, 34, 35, 32,
1485 1485
  34, 33, 29, 30, 30, 32, 31, 31,
... ...
@@ -1620,7 +1620,7 @@ static const UINT8 table1_mvx[1099] = {
1620 1620
   0, 12, 27,
1621 1621
 };
1622 1622
 
1623
-static const UINT8 table1_mvy[1099] = {
1623
+static const uint8_t table1_mvy[1099] = {
1624 1624
  32, 32, 31, 31, 32, 33, 31, 33,
1625 1625
  33, 32, 32, 30, 34, 31, 32, 29,
1626 1626
  33, 30, 32, 33, 31, 35, 34, 30,
... ...
@@ -1764,11 +1764,11 @@ static const UINT8 table1_mvy[1099] = {
1764 1764
 /* motion vector table */
1765 1765
 typedef struct MVTable {
1766 1766
     int n;
1767
-    const UINT16 *table_mv_code;
1768
-    const UINT8 *table_mv_bits;
1769
-    const UINT8 *table_mvx;
1770
-    const UINT8 *table_mvy;
1771
-    UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */
1767
+    const uint16_t *table_mv_code;
1768
+    const uint8_t *table_mv_bits;
1769
+    const uint8_t *table_mvx;
1770
+    const uint8_t *table_mvy;
1771
+    uint16_t *table_mv_index; /* encoding: convert mv to index in table_mv */
1772 1772
     VLC vlc;                /* decoding: vlc */
1773 1773
 } MVTable;
1774 1774
 
... ...
@@ -1789,29 +1789,29 @@ static MVTable mv_tables[2] = {
1789 1789
     }
1790 1790
 };
1791 1791
 
1792
-static const UINT8 v2_mb_type[8][2] = {
1792
+static const uint8_t v2_mb_type[8][2] = {
1793 1793
  {1, 1}, {0   , 2}, {3   , 3}, {9   , 5},
1794 1794
  {5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6},
1795 1795
 };
1796 1796
 
1797
-static const UINT8 v2_intra_cbpc[4][2] = {
1797
+static const uint8_t v2_intra_cbpc[4][2] = {
1798 1798
  {1, 1}, {0, 3}, {1, 3}, {1, 2},
1799 1799
 };
1800 1800
 
1801
-static UINT8 wmv1_y_dc_scale_table[32]={
1801
+static uint8_t wmv1_y_dc_scale_table[32]={
1802 1802
 //  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1803 1803
     0, 8, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21
1804 1804
 };
1805
-static UINT8 wmv1_c_dc_scale_table[32]={
1805
+static uint8_t wmv1_c_dc_scale_table[32]={
1806 1806
 //  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1807 1807
     0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
1808 1808
 };
1809 1809
 
1810
-static UINT8 old_ff_y_dc_scale_table[32]={
1810
+static uint8_t old_ff_y_dc_scale_table[32]={
1811 1811
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
1812 1812
     0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
1813 1813
 };
1814
-static UINT8 old_ff_c_dc_scale_table[32]={
1814
+static uint8_t old_ff_c_dc_scale_table[32]={
1815 1815
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
1816 1816
     0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
1817 1817
 };
... ...
@@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={
1819 1819
 
1820 1820
 #define WMV1_SCANTABLE_COUNT 4
1821 1821
 
1822
-static const UINT8 wmv1_scantable00[64]= {
1822
+static const uint8_t wmv1_scantable00[64]= {
1823 1823
 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 
1824 1824
 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 
1825 1825
 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, 
... ...
@@ -1829,7 +1829,7 @@ static const UINT8 wmv1_scantable00[64]= {
1829 1829
 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 
1830 1830
 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, 
1831 1831
 };
1832
-static const UINT8 wmv1_scantable01[64]= {
1832
+static const uint8_t wmv1_scantable01[64]= {
1833 1833
 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 
1834 1834
 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 
1835 1835
 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, 
... ...
@@ -1839,7 +1839,7 @@ static const UINT8 wmv1_scantable01[64]= {
1839 1839
 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 
1840 1840
 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, 
1841 1841
 };
1842
-static const UINT8 wmv1_scantable02[64]= {
1842
+static const uint8_t wmv1_scantable02[64]= {
1843 1843
 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 
1844 1844
 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 
1845 1845
 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, 
... ...
@@ -1849,7 +1849,7 @@ static const UINT8 wmv1_scantable02[64]= {
1849 1849
 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 
1850 1850
 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 
1851 1851
 };
1852
-static const UINT8 wmv1_scantable03[64]= {
1852
+static const uint8_t wmv1_scantable03[64]= {
1853 1853
 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 
1854 1854
 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 
1855 1855
 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, 
... ...
@@ -1860,14 +1860,141 @@ static const UINT8 wmv1_scantable03[64]= {
1860 1860
 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 
1861 1861
 };
1862 1862
 
1863
-static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
1863
+static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
1864 1864
     wmv1_scantable00,
1865 1865
     wmv1_scantable01,
1866 1866
     wmv1_scantable02,
1867 1867
     wmv1_scantable03,
1868 1868
 };
1869 1869
 
1870
-static UINT8 table_inter_intra[4][2]={
1870
+static const uint8_t table_inter_intra[4][2]={
1871 1871
     {0,1},{2,2},{6,3},{7,3}
1872 1872
 };
1873 1873
 
1874
+#define WMV2_INTER_CBP_TABLE_COUNT 4
1875
+
1876
+static const uint32_t table_mb_non_intra2[128][2] = {
1877
+{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19}, 
1878
+{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21}, 
1879
+{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24}, 
1880
+{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16}, 
1881
+{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19}, 
1882
+{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18}, 
1883
+{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20}, 
1884
+{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16}, 
1885
+{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19}, 
1886
+{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20}, 
1887
+{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19}, 
1888
+{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17}, 
1889
+{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17}, 
1890
+{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16}, 
1891
+{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16}, 
1892
+{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10}, 
1893
+{0x000001,  3}, {0x000010,  8}, {0x00002F,  6}, {0x00004C, 10}, 
1894
+{0x00000D,  4}, {0x000000, 10}, {0x000006,  9}, {0x000134, 12}, 
1895
+{0x00000C,  4}, {0x000007, 10}, {0x000007,  9}, {0x0006E1, 12}, 
1896
+{0x00000E,  5}, {0x0000DA,  9}, {0x000022,  9}, {0x000364, 11}, 
1897
+{0x00000F,  4}, {0x000006, 10}, {0x00000F,  9}, {0x000135, 12}, 
1898
+{0x000014,  5}, {0x0000DD,  9}, {0x000004,  9}, {0x000015, 11}, 
1899
+{0x00001A,  6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12}, 
1900
+{0x00000C,  5}, {0x0000B9,  8}, {0x000004,  8}, {0x0000DB,  9}, 
1901
+{0x00000E,  4}, {0x00000B, 10}, {0x000023,  9}, {0x0006CB, 12}, 
1902
+{0x000005,  6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12}, 
1903
+{0x000011,  5}, {0x0000DF,  9}, {0x00000E,  9}, {0x000373, 11}, 
1904
+{0x000003,  5}, {0x0000B8,  8}, {0x000006,  8}, {0x000175,  9}, 
1905
+{0x000015,  5}, {0x000174,  9}, {0x000027,  9}, {0x000372, 11}, 
1906
+{0x000010,  5}, {0x0000BB,  8}, {0x000005,  8}, {0x0000DE,  9}, 
1907
+{0x00000F,  5}, {0x000001,  9}, {0x000012,  8}, {0x000004, 10}, 
1908
+{0x000002,  3}, {0x000016,  5}, {0x000009,  4}, {0x000001,  5}, 
1909
+};
1910
+
1911
+static const uint32_t table_mb_non_intra3[128][2] = {
1912
+{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17}, 
1913
+{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17}, 
1914
+{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19}, 
1915
+{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16}, 
1916
+{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17}, 
1917
+{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16}, 
1918
+{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17}, 
1919
+{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15}, 
1920
+{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17}, 
1921
+{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18}, 
1922
+{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17}, 
1923
+{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16}, 
1924
+{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16}, 
1925
+{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15}, 
1926
+{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14}, 
1927
+{0x000158,  9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12}, 
1928
+{0x000000,  2}, {0x000069,  8}, {0x00006B,  8}, {0x00068C, 12}, 
1929
+{0x000007,  3}, {0x00015E,  9}, {0x0002A3, 10}, {0x000AE9, 12}, 
1930
+{0x000006,  3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14}, 
1931
+{0x000010,  5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12}, 
1932
+{0x000004,  4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13}, 
1933
+{0x000012,  5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12}, 
1934
+{0x00001B,  6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14}, 
1935
+{0x000016,  5}, {0x00015F,  9}, {0x0002A0, 10}, {0x00054D, 11}, 
1936
+{0x000005,  4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13}, 
1937
+{0x000022,  6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14}, 
1938
+{0x000013,  5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13}, 
1939
+{0x000017,  5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12}, 
1940
+{0x000014,  5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13}, 
1941
+{0x000019,  6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12}, 
1942
+{0x000018,  6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12}, 
1943
+{0x000007,  4}, {0x000055,  7}, {0x000047,  7}, {0x0000AD,  8}, 
1944
+};
1945
+
1946
+static const uint32_t table_mb_non_intra4[128][2] = {
1947
+{0x0000D4,  8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16}, 
1948
+{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17}, 
1949
+{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17}, 
1950
+{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16}, 
1951
+{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15}, 
1952
+{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15}, 
1953
+{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15}, 
1954
+{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15}, 
1955
+{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15}, 
1956
+{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15}, 
1957
+{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15}, 
1958
+{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15}, 
1959
+{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14}, 
1960
+{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14}, 
1961
+{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14}, 
1962
+{0x0001AE,  9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13}, 
1963
+{0x000000,  1}, {0x0000F0,  8}, {0x0001AD,  9}, {0x0010C1, 13}, 
1964
+{0x00000A,  4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13}, 
1965
+{0x000009,  4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13}, 
1966
+{0x00001D,  5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13}, 
1967
+{0x00000B,  4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13}, 
1968
+{0x00001F,  5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12}, 
1969
+{0x00003D,  6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14}, 
1970
+{0x000036,  6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13}, 
1971
+{0x00000C,  4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13}, 
1972
+{0x000042,  7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13}, 
1973
+{0x000020,  6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13}, 
1974
+{0x000037,  6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13}, 
1975
+{0x000034,  6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13}, 
1976
+{0x000039,  6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13}, 
1977
+{0x000038,  6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13}, 
1978
+{0x000011,  5}, {0x0001AC,  9}, {0x0000F3,  8}, {0x000439, 11}, 
1979
+};
1980
+
1981
+static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={
1982
+    table_mb_non_intra2,
1983
+    table_mb_non_intra3,
1984
+    table_mb_non_intra4,
1985
+    table_mb_non_intra,
1986
+};
1987
+
1988
+static const uint8_t wmv2_scantableA[64]={
1989
+0x00, 0x01, 0x02, 0x08, 0x03, 0x09, 0x0A, 0x10,
1990
+0x04, 0x0B, 0x11, 0x18, 0x12, 0x0C, 0x05, 0x13,
1991
+0x19, 0x0D, 0x14, 0x1A, 0x1B, 0x06, 0x15, 0x1C,
1992
+0x0E, 0x16, 0x1D, 0x07, 0x1E, 0x0F, 0x17, 0x1F,
1993
+};
1994
+
1995
+static const uint8_t wmv2_scantableB[64]={
1996
+0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02,
1997
+0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A, 
1998
+0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A, 
1999
+0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B,
2000
+};
... ...
@@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
473 473
         idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
474 474
     }
475 475
 }
476
+
477
+/* 8x4 & 4x8 WMV2 IDCT */
478
+#undef CN_SHIFT
479
+#undef C_SHIFT
480
+#undef C_FIX
481
+#undef C1
482
+#undef C2
483
+#define CN_SHIFT 12
484
+#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
485
+#define C1 C_FIX(0.6532814824)
486
+#define C2 C_FIX(0.2705980501)
487
+#define C3 C_FIX(0.5)
488
+#define C_SHIFT (4+1+12)
489
+static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
490
+{
491
+    int c0, c1, c2, c3, a0, a1, a2, a3;
492
+    const UINT8 *cm = cropTbl + MAX_NEG_CROP;
493
+
494
+    a0 = col[8*0];
495
+    a1 = col[8*1];
496
+    a2 = col[8*2];
497
+    a3 = col[8*3];
498
+    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
499
+    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
500
+    c1 = a1 * C1 + a3 * C2;
501
+    c3 = a1 * C2 - a3 * C1;
502
+    dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
503
+    dest += line_size;
504
+    dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
505
+    dest += line_size;
506
+    dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
507
+    dest += line_size;
508
+    dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
509
+}
510
+
511
+#define RN_SHIFT 15
512
+#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
513
+#define R1 R_FIX(0.6532814824)
514
+#define R2 R_FIX(0.2705980501)
515
+#define R3 R_FIX(0.5)
516
+#define R_SHIFT 11
517
+static inline void idct4row(INT16 *row)
518
+{
519
+    int c0, c1, c2, c3, a0, a1, a2, a3;
520
+    const UINT8 *cm = cropTbl + MAX_NEG_CROP;
521
+
522
+    a0 = row[0];
523
+    a1 = row[1];
524
+    a2 = row[2];
525
+    a3 = row[3];
526
+    c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
527
+    c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
528
+    c1 = a1 * R1 + a3 * R2;
529
+    c3 = a1 * R2 - a3 * R1;
530
+    row[0]= (c0 + c1) >> R_SHIFT;
531
+    row[1]= (c2 + c3) >> R_SHIFT;
532
+    row[2]= (c2 - c3) >> R_SHIFT;
533
+    row[3]= (c0 - c1) >> R_SHIFT;
534
+}
535
+
536
+void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
537
+{
538
+    int i;
539
+
540
+    /* IDCT8 on each line */
541
+    for(i=0; i<4; i++) {
542
+        idctRowCondDC(block + i*8);
543
+    }
544
+
545
+    /* IDCT4 and store */
546
+    for(i=0;i<8;i++) {
547
+        idct4col_add(dest + i, line_size, block + i);
548
+    }
549
+}
550
+
551
+void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block)
552
+{
553
+    int i;
554
+
555
+    /* IDCT4 on each line */
556
+    for(i=0; i<8; i++) {
557
+        idct4row(block + i*8);
558
+    }
559
+
560
+    /* IDCT8 and store */
561
+    for(i=0; i<4; i++){
562
+        idctSparseColAdd(dest + i, line_size, block + i);
563
+    }
564
+}
565
+
... ...
@@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
26 26
 void simple_idct(short *block);
27 27
 
28 28
 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block);
29
+
30
+void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block);
31
+void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block);
29 32
new file mode 100644
... ...
@@ -0,0 +1,850 @@
0
+/*
1
+ * Copyright (c) 2002 The FFmpeg Project.
2
+ *
3
+ * This library is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2 of the License, or (at your option) any later version.
7
+ *
8
+ * This library is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with this library; if not, write to the Free Software
15
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
+ *
17
+ */
18
+
19
+#include "simple_idct.h"
20
+ 
21
+#define SKIP_TYPE_NONE 0
22
+#define SKIP_TYPE_MPEG 1
23
+#define SKIP_TYPE_ROW  2
24
+#define SKIP_TYPE_COL  3
25
+
26
+
27
+typedef struct Wmv2Context{
28
+    MpegEncContext s;
29
+    int j_type_bit;
30
+    int j_type;
31
+    int flag3;
32
+    int flag63;
33
+    int abt_flag;
34
+    int abt_type;
35
+    int abt_type_table[6];
36
+    int per_mb_abt;
37
+    int per_block_abt;
38
+    int mspel_bit;
39
+    int cbp_table_index;
40
+    int top_left_mv_flag;
41
+    int per_mb_rl_bit;
42
+    int skip_type;
43
+    int hshift;
44
+    
45
+    ScanTable abt_scantable[2];
46
+    DCTELEM abt_block2[6][64] __align8;
47
+}Wmv2Context;
48
+
49
+static void wmv2_common_init(Wmv2Context * w){
50
+    MpegEncContext * const s= &w->s;
51
+        
52
+    ff_init_scantable(s, &w->abt_scantable[0], wmv2_scantableA);
53
+    ff_init_scantable(s, &w->abt_scantable[1], wmv2_scantableB);
54
+}
55
+
56
+static int encode_ext_header(Wmv2Context *w){
57
+    MpegEncContext * const s= &w->s;
58
+    PutBitContext pb;
59
+    int code;
60
+        
61
+    init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size, NULL, NULL);
62
+
63
+    put_bits(&pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29
64
+    put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047));
65
+    
66
+    put_bits(&pb, 1, w->mspel_bit=1);
67
+    put_bits(&pb, 1, w->flag3=1);
68
+    put_bits(&pb, 1, w->abt_flag=1);
69
+    put_bits(&pb, 1, w->j_type_bit=1);
70
+    put_bits(&pb, 1, w->top_left_mv_flag=0);
71
+    put_bits(&pb, 1, w->per_mb_rl_bit=1);
72
+    put_bits(&pb, 3, code=1);
73
+    
74
+    flush_put_bits(&pb);
75
+
76
+    s->slice_height = s->mb_height / code;
77
+    
78
+    return 0;
79
+}
80
+
81
+static int wmv2_encode_init(AVCodecContext *avctx){
82
+    Wmv2Context * const w= avctx->priv_data;
83
+    
84
+    if(MPV_encode_init(avctx) < 0)
85
+        return -1;
86
+    
87
+    wmv2_common_init(w);
88
+
89
+    avctx->extradata_size= 4;
90
+    avctx->extradata= av_mallocz(avctx->extradata_size + 10);
91
+    encode_ext_header(w);
92
+    
93
+    return 0;
94
+}
95
+
96
+static int wmv2_encode_end(AVCodecContext *avctx){
97
+    Wmv2Context * const w= avctx->priv_data;
98
+    
99
+    if(MPV_encode_end(avctx) < 0)
100
+        return -1;
101
+    
102
+    avctx->extradata_size= 0;
103
+    av_freep(&avctx->extradata);
104
+    
105
+    return 0;
106
+}
107
+
108
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
109
+{
110
+    Wmv2Context * const w= (Wmv2Context*)s;
111
+
112
+    put_bits(&s->pb, 1, s->pict_type - 1);
113
+    if(s->pict_type == I_TYPE){
114
+        put_bits(&s->pb, 7, 0);
115
+    }
116
+    put_bits(&s->pb, 5, s->qscale);
117
+
118
+    s->dc_table_index = 1;
119
+    s->mv_table_index = 1; /* only if P frame */
120
+//    s->use_skip_mb_code = 1; /* only if P frame */
121
+    s->per_mb_rl_table = 0;
122
+    s->mspel= 0;
123
+    w->per_mb_abt=0;
124
+    w->abt_type=0;
125
+    w->j_type=0;
126
+
127
+    if (s->pict_type == I_TYPE) {
128
+        if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
129
+        
130
+        if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
131
+        
132
+        if(!s->per_mb_rl_table){
133
+            code012(&s->pb, s->rl_chroma_table_index);
134
+            code012(&s->pb, s->rl_table_index);
135
+        }
136
+
137
+        put_bits(&s->pb, 1, s->dc_table_index);
138
+
139
+        s->inter_intra_pred= 0;
140
+        s->no_rounding = 1;
141
+    }else{
142
+        int cbp_index;
143
+
144
+        put_bits(&s->pb, 2, SKIP_TYPE_NONE);
145
+        
146
+        code012(&s->pb, cbp_index=0);
147
+        if(s->qscale <= 10){
148
+            int map[3]= {0,2,1};
149
+            w->cbp_table_index= map[cbp_index];
150
+        }else if(s->qscale <= 20){
151
+            int map[3]= {1,0,2};
152
+            w->cbp_table_index= map[cbp_index];
153
+        }else{
154
+            int map[3]= {2,1,0};
155
+            w->cbp_table_index= map[cbp_index];
156
+        }
157
+
158
+        if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel);
159
+    
160
+        if(w->abt_flag){
161
+            put_bits(&s->pb, 1, w->per_mb_abt^1);
162
+            if(!w->per_mb_abt){
163
+                code012(&s->pb, w->abt_type);
164
+            }
165
+        }
166
+
167
+        if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
168
+        
169
+        if(!s->per_mb_rl_table){
170
+            code012(&s->pb, s->rl_table_index);
171
+            s->rl_chroma_table_index = s->rl_table_index;
172
+        }
173
+        put_bits(&s->pb, 1, s->dc_table_index);
174
+        put_bits(&s->pb, 1, s->mv_table_index);
175
+    
176
+        s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
177
+        s->no_rounding ^= 1;
178
+    }
179
+    s->esc3_level_length= 0;
180
+    s->esc3_run_length= 0;
181
+
182
+    return 0;
183
+}
184
+
185
+// nearly idential to wmv1 but thats just because we dont use the useless M$ crap features
186
+// its duplicated here in case someone wants to add support for these carp features
187
+void ff_wmv2_encode_mb(MpegEncContext * s, 
188
+                       DCTELEM block[6][64],
189
+                       int motion_x, int motion_y)
190
+{
191
+    Wmv2Context * const w= (Wmv2Context*)s;
192
+    int cbp, coded_cbp, i;
193
+    int pred_x, pred_y;
194
+    UINT8 *coded_block;
195
+
196
+    handle_slices(s);
197
+    
198
+    if (!s->mb_intra) {
199
+	/* compute cbp */
200
+        set_stat(ST_INTER_MB);
201
+	cbp = 0;
202
+	for (i = 0; i < 6; i++) {
203
+	    if (s->block_last_index[i] >= 0)
204
+		cbp |= 1 << (5 - i);
205
+	}
206
+        
207
+        put_bits(&s->pb, 
208
+                 wmv2_inter_table[w->cbp_table_index][cbp + 64][1], 
209
+                 wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
210
+
211
+        /* motion vector */
212
+        h263_pred_motion(s, 0, &pred_x, &pred_y);
213
+        msmpeg4_encode_motion(s, motion_x - pred_x, 
214
+                              motion_y - pred_y);
215
+    } else {
216
+	/* compute cbp */
217
+	cbp = 0;
218
+        coded_cbp = 0;
219
+	for (i = 0; i < 6; i++) {
220
+            int val, pred;
221
+            val = (s->block_last_index[i] >= 1);
222
+            cbp |= val << (5 - i);
223
+            if (i < 4) {
224
+                /* predict value for close blocks only for luma */
225
+                pred = coded_block_pred(s, i, &coded_block);
226
+                *coded_block = val;
227
+                val = val ^ pred;
228
+            }
229
+            coded_cbp |= val << (5 - i);
230
+	}
231
+#if 0
232
+        if (coded_cbp)
233
+            printf("cbp=%x %x\n", cbp, coded_cbp);
234
+#endif
235
+
236
+        if (s->pict_type == I_TYPE) {
237
+            set_stat(ST_INTRA_MB);
238
+            put_bits(&s->pb, 
239
+                     table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]);
240
+        } else {
241
+            put_bits(&s->pb, 
242
+                     wmv2_inter_table[w->cbp_table_index][cbp][1], 
243
+                     wmv2_inter_table[w->cbp_table_index][cbp][0]);
244
+        }
245
+        set_stat(ST_INTRA_MB);
246
+        put_bits(&s->pb, 1, 0);	/* no AC prediction yet */
247
+        if(s->inter_intra_pred){
248
+            s->h263_aic_dir=0;
249
+            put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
250
+        }
251
+    }
252
+
253
+    for (i = 0; i < 6; i++) {
254
+        msmpeg4_encode_block(s, block[i], i);
255
+    }
256
+}
257
+
258
+static void parse_mb_skip(Wmv2Context * w){
259
+    int mb_x, mb_y;
260
+    MpegEncContext * const s= &w->s;
261
+
262
+    w->skip_type= get_bits(&s->gb, 2);
263
+    switch(w->skip_type){
264
+    case SKIP_TYPE_NONE:
265
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
266
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
267
+                s->mb_type[mb_y*s->mb_width + mb_x]= 0;
268
+            }
269
+        }
270
+        break;
271
+    case SKIP_TYPE_MPEG:
272
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
273
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
274
+                s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
275
+            }
276
+        }
277
+        break;
278
+    case SKIP_TYPE_ROW:
279
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
280
+            if(get_bits1(&s->gb)){
281
+                for(mb_x=0; mb_x<s->mb_width; mb_x++){
282
+                    s->mb_type[mb_y*s->mb_width + mb_x]=  MB_TYPE_SKIPED;
283
+                }
284
+            }else{
285
+                for(mb_x=0; mb_x<s->mb_width; mb_x++){
286
+                    s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
287
+                }
288
+            }
289
+        }
290
+        break;
291
+    case SKIP_TYPE_COL:
292
+        for(mb_x=0; mb_x<s->mb_width; mb_x++){
293
+            if(get_bits1(&s->gb)){
294
+                for(mb_y=0; mb_y<s->mb_height; mb_y++){
295
+                    s->mb_type[mb_y*s->mb_width + mb_x]=  MB_TYPE_SKIPED;
296
+                }
297
+            }else{
298
+                for(mb_y=0; mb_y<s->mb_height; mb_y++){
299
+                    s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
300
+                }
301
+            }
302
+        }
303
+        break;
304
+    }
305
+}
306
+
307
+static int decode_ext_header(Wmv2Context *w){
308
+    MpegEncContext * const s= &w->s;
309
+    GetBitContext gb;
310
+    int fps;
311
+    int code;
312
+
313
+    if(s->avctx->extradata_size<4) return -1;
314
+    
315
+    init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size);
316
+
317
+    fps                = get_bits(&gb, 5);
318
+    s->bit_rate        = get_bits(&gb, 11)*1024;
319
+    w->mspel_bit       = get_bits1(&gb);
320
+    w->flag3           = get_bits1(&gb);
321
+    w->abt_flag        = get_bits1(&gb);
322
+    w->j_type_bit      = get_bits1(&gb);
323
+    w->top_left_mv_flag= get_bits1(&gb);
324
+    w->per_mb_rl_bit   = get_bits1(&gb);
325
+    code               = get_bits(&gb, 3);
326
+    
327
+    if(code==0) return -1;
328
+            
329
+    s->slice_height = s->mb_height / code;
330
+
331
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
332
+        printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n", 
333
+        fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3);
334
+    }
335
+    return 0;
336
+}
337
+
338
+int ff_wmv2_decode_picture_header(MpegEncContext * s)
339
+{
340
+    Wmv2Context * const w= (Wmv2Context*)s;
341
+    int code, i;
342
+
343
+#if 0
344
+{
345
+int i;
346
+for(i=0; i<s->gb.size*8; i++)
347
+    printf("%d", get_bits1(&s->gb));
348
+//    get_bits1(&s->gb);
349
+printf("END\n");
350
+return -1;
351
+}
352
+#endif
353
+    if(s->picture_number==0)
354
+        decode_ext_header(w);
355
+
356
+    s->pict_type = get_bits(&s->gb, 1) + 1;
357
+    if(s->pict_type == I_TYPE){
358
+        code = get_bits(&s->gb, 7);
359
+        printf("I7:%X/\n", code);
360
+    }
361
+    s->qscale = get_bits(&s->gb, 5);
362
+
363
+    if (s->pict_type == I_TYPE) {
364
+        if(w->j_type_bit) w->j_type= get_bits1(&s->gb);
365
+        else              w->j_type= 0; //FIXME check
366
+        
367
+        if(!w->j_type){
368
+            if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
369
+            else                 s->per_mb_rl_table= 0;
370
+        
371
+            if(!s->per_mb_rl_table){
372
+                s->rl_chroma_table_index = decode012(&s->gb);
373
+                s->rl_table_index = decode012(&s->gb);
374
+            }
375
+
376
+            s->dc_table_index = get_bits1(&s->gb);
377
+        }
378
+        s->inter_intra_pred= 0;
379
+        s->no_rounding = 1;
380
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
381
+	    printf("qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n", 
382
+		s->qscale,
383
+		s->rl_chroma_table_index,
384
+		s->rl_table_index, 
385
+		s->dc_table_index,
386
+                s->per_mb_rl_table,
387
+                w->j_type);
388
+        }
389
+    }else{
390
+        int cbp_index;
391
+        w->j_type=0;
392
+
393
+        parse_mb_skip(w);
394
+        cbp_index= decode012(&s->gb);
395
+        if(s->qscale <= 10){
396
+            int map[3]= {0,2,1};
397
+            w->cbp_table_index= map[cbp_index];
398
+        }else if(s->qscale <= 20){
399
+            int map[3]= {1,0,2};
400
+            w->cbp_table_index= map[cbp_index];
401
+        }else{
402
+            int map[3]= {2,1,0};
403
+            w->cbp_table_index= map[cbp_index];
404
+        }
405
+
406
+        if(w->mspel_bit) s->mspel= get_bits1(&s->gb);
407
+        else             s->mspel= 0; //FIXME check
408
+    
409
+        if(w->abt_flag){
410
+            w->per_mb_abt= get_bits1(&s->gb)^1;
411
+            if(!w->per_mb_abt){
412
+                w->abt_type= decode012(&s->gb);
413
+            }
414
+        }
415
+
416
+        if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
417
+        else                 s->per_mb_rl_table= 0;
418
+        
419
+        if(!s->per_mb_rl_table){
420
+            s->rl_table_index = decode012(&s->gb);
421
+            s->rl_chroma_table_index = s->rl_table_index;
422
+        }
423
+
424
+        s->dc_table_index = get_bits1(&s->gb);
425
+        s->mv_table_index = get_bits1(&s->gb);
426
+    
427
+        s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
428
+        s->no_rounding ^= 1;
429
+        
430
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
431
+            printf("rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n", 
432
+		s->rl_table_index, 
433
+		s->rl_chroma_table_index, 
434
+		s->dc_table_index,
435
+		s->mv_table_index,
436
+                s->per_mb_rl_table,
437
+                s->qscale,
438
+                s->mspel,
439
+                w->per_mb_abt,
440
+                w->abt_type,
441
+                w->cbp_table_index,
442
+                s->inter_intra_pred);
443
+        }
444
+    }
445
+    s->esc3_level_length= 0;
446
+    s->esc3_run_length= 0;
447
+    
448
+    if(s->avctx->debug&FF_DEBUG_SKIP){
449
+        for(i=0; i<s->mb_num; i++){
450
+            if(i%s->mb_width==0) printf("\n");
451
+            printf("%d", s->mb_type[i]);
452
+        }
453
+    }
454
+s->picture_number++; //FIXME ?
455
+
456
+
457
+//    if(w->j_type)
458
+//        return wmv2_decode_j_picture(w); //FIXME
459
+
460
+    if(w->j_type){
461
+        printf("J-type picture isnt supported\n");
462
+        return -1;
463
+    }
464
+
465
+    return 0;
466
+}
467
+
468
+void ff_wmv2_decode_init(MpegEncContext *s){
469
+}
470
+
471
+static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){
472
+    MpegEncContext * const s= &w->s;
473
+    int ret;
474
+   
475
+    ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr);
476
+   
477
+    if(ret<0) return -1;
478
+   
479
+    if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel)
480
+        w->hshift= get_bits1(&s->gb);
481
+    else 
482
+        w->hshift= 0;
483
+
484
+//printf("%d %d  ", *mx_ptr, *my_ptr);
485
+   
486
+    return 0;
487
+}
488
+
489
+static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
490
+    MpegEncContext * const s= &w->s;
491
+    int xy, wrap, diff, type;
492
+    INT16 *A, *B, *C, *mot_val;
493
+
494
+    wrap = s->block_wrap[0];
495
+    xy = s->block_index[0];
496
+
497
+    mot_val = s->motion_val[xy];
498
+
499
+    A = s->motion_val[xy - 1];
500
+    B = s->motion_val[xy - wrap];
501
+    C = s->motion_val[xy + 2 - wrap];
502
+    
503
+    diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1]));
504
+    
505
+    if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8)
506
+        //FIXME top/left bit too if y=!0 && first_slice_line?
507
+        type= get_bits1(&s->gb);
508
+    else
509
+        type= 2;
510
+    
511
+    if(type == 0){
512
+        *px= A[0];
513
+        *py= A[1];
514
+    }else if(type == 1){
515
+        *px= B[0];
516
+        *py= B[1];
517
+    }else{
518
+        /* special case for first (slice) line */
519
+        if (s->first_slice_line) {
520
+            *px = A[0];
521
+            *py = A[1];
522
+        } else {
523
+            *px = mid_pred(A[0], B[0], C[0]);
524
+            *py = mid_pred(A[1], B[1], C[1]);
525
+        }
526
+    }
527
+
528
+    return mot_val;
529
+}
530
+
531
+static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){
532
+    MpegEncContext * const s= &w->s;
533
+    static const int sub_cbp_table[3]= {2,3,1};
534
+    int sub_cbp;
535
+
536
+    if(!cbp){ 
537
+        s->block_last_index[n] = -1;
538
+
539
+        return 0;
540
+    }
541
+    
542
+    if(w->per_block_abt)
543
+        w->abt_type= decode012(&s->gb);
544
+#if 0
545
+    if(w->per_block_abt)
546
+        printf("B%d", w->abt_type);
547
+#endif
548
+    w->abt_type_table[n]= w->abt_type;
549
+
550
+    if(w->abt_type){
551
+//        const uint8_t *scantable= w->abt_scantable[w->abt_type-1].permutated;
552
+        const uint8_t *scantable= w->abt_scantable[w->abt_type-1].scantable;
553
+//        const uint8_t *scantable= w->abt_type-1 ? w->abt_scantable[1].permutated : w->abt_scantable[0].scantable;
554
+
555
+        sub_cbp= sub_cbp_table[ decode012(&s->gb) ];
556
+//        printf("S%d", sub_cbp);
557
+
558
+        if(sub_cbp&1){
559
+            if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0)
560
+                return -1;
561
+        }
562
+        
563
+        if(sub_cbp&2){
564
+            if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0)
565
+                return -1;
566
+        }
567
+        s->block_last_index[n] = 63;
568
+
569
+        return 0;
570
+    }else{
571
+        return msmpeg4_decode_block(s, block, n, 1, s->inter_scantable.permutated);
572
+    }
573
+}
574
+
575
+static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){
576
+    MpegEncContext * const s= &w->s;
577
+    uint8_t temp[2][64];
578
+    int i;
579
+    
580
+    if(w->abt_type_table[n] && 0){
581
+        int a,b;
582
+        
583
+        a= block1[0];
584
+        b= w->abt_block2[n][0];
585
+        block1[0]= a+b;
586
+        w->abt_block2[n][0]= a-b;
587
+    }
588
+    
589
+    switch(w->abt_type_table[n]){
590
+    case 0:
591
+        if (s->block_last_index[n] >= 0) {
592
+            s->idct_add (dst, stride, block1);
593
+        }
594
+        break;
595
+    case 1:
596
+        simple_idct84_add(dst           , stride, block1);
597
+        simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
598
+        memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
599
+        break;
600
+    case 2:
601
+        simple_idct48_add(dst           , stride, block1);
602
+        simple_idct48_add(dst + 4       , stride, w->abt_block2[n]);
603
+        memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
604
+        break;
605
+    default:
606
+        fprintf(stderr, "internal error in WMV2 abt\n");
607
+    }
608
+}
609
+
610
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
611
+    Wmv2Context * const w= (Wmv2Context*)s;
612
+
613
+    wmv2_add_block(w, block1[0], dest_y                    , s->linesize, 0);
614
+    wmv2_add_block(w, block1[1], dest_y + 8                , s->linesize, 1);
615
+    wmv2_add_block(w, block1[2], dest_y +     8*s->linesize, s->linesize, 2);
616
+    wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3);
617
+    
618
+    if(s->flags&CODEC_FLAG_GRAY) return;
619
+    
620
+    wmv2_add_block(w, block1[4], dest_cb                   , s->uvlinesize, 4);
621
+    wmv2_add_block(w, block1[5], dest_cr                   , s->uvlinesize, 5);
622
+}
623
+
624
+void ff_mspel_motion(MpegEncContext *s,
625
+                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
626
+                               UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
627
+                               int motion_x, int motion_y, int h)
628
+{
629
+    Wmv2Context * const w= (Wmv2Context*)s;
630
+    UINT8 *ptr;
631
+    int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize;
632
+    int emu=0;
633
+    
634
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
635
+    dxy = 2*dxy + w->hshift;
636
+    src_x = s->mb_x * 16 + (motion_x >> 1);
637
+    src_y = s->mb_y * 16 + (motion_y >> 1);
638
+                
639
+    /* WARNING: do no forget half pels */
640
+    v_edge_pos = s->v_edge_pos;
641
+    src_x = clip(src_x, -16, s->width);
642
+    src_y = clip(src_y, -16, s->height);
643
+    linesize   = s->linesize;
644
+    uvlinesize = s->uvlinesize;
645
+    ptr = ref_picture[0] + (src_y * linesize) + src_x;
646
+
647
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
648
+        if(src_x<1 || src_y<1 || src_x + 17  >= s->h_edge_pos
649
+                              || src_y + h+1 >= v_edge_pos){
650
+            ff_emulated_edge_mc(s, ptr - 1 - s->linesize, s->linesize, 19, 19, 
651
+                             src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
652
+            ptr= s->edge_emu_buffer + 1 + s->linesize;
653
+            emu=1;
654
+        }
655
+    }
656
+
657
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y             , ptr             , linesize);
658
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y+8           , ptr+8           , linesize);
659
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y  +8*linesize, ptr  +8*linesize, linesize);
660
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y+8+8*linesize, ptr+8+8*linesize, linesize);
661
+
662
+    if(s->flags&CODEC_FLAG_GRAY) return;
663
+
664
+    if (s->out_format == FMT_H263) {
665
+        dxy = 0;
666
+        if ((motion_x & 3) != 0)
667
+            dxy |= 1;
668
+        if ((motion_y & 3) != 0)
669
+            dxy |= 2;
670
+        mx = motion_x >> 2;
671
+        my = motion_y >> 2;
672
+    } else {
673
+        mx = motion_x / 2;
674
+        my = motion_y / 2;
675
+        dxy = ((my & 1) << 1) | (mx & 1);
676
+        mx >>= 1;
677
+        my >>= 1;
678
+    }
679
+    
680
+    src_x = s->mb_x * 8 + mx;
681
+    src_y = s->mb_y * 8 + my;
682
+    src_x = clip(src_x, -8, s->width >> 1);
683
+    if (src_x == (s->width >> 1))
684
+        dxy &= ~1;
685
+    src_y = clip(src_y, -8, s->height >> 1);
686
+    if (src_y == (s->height >> 1))
687
+        dxy &= ~2;
688
+    offset = (src_y * uvlinesize) + src_x;
689
+    ptr = ref_picture[1] + offset;
690
+    if(emu){
691
+        ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, 
692
+                         src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
693
+        ptr= s->edge_emu_buffer;
694
+    }
695
+    pix_op[1][dxy](dest_cb, ptr, uvlinesize, h >> 1);
696
+
697
+    ptr = ref_picture[2] + offset;
698
+    if(emu){
699
+        ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, 
700
+                         src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
701
+        ptr= s->edge_emu_buffer;
702
+    }
703
+    pix_op[1][dxy](dest_cr, ptr, uvlinesize, h >> 1);
704
+}
705
+
706
+
707
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
708
+{
709
+    Wmv2Context * const w= (Wmv2Context*)s;
710
+    int cbp, code, i;
711
+    UINT8 *coded_val;
712
+
713
+    if(w->j_type) return 0;
714
+    
715
+    s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0;
716
+    
717
+    if (s->pict_type == P_TYPE) {
718
+        if(s->mb_type[s->mb_y * s->mb_width + s->mb_x]&MB_TYPE_SKIPED){
719
+            /* skip mb */
720
+            s->mb_intra = 0;
721
+            for(i=0;i<6;i++)
722
+                s->block_last_index[i] = -1;
723
+            s->mv_dir = MV_DIR_FORWARD;
724
+            s->mv_type = MV_TYPE_16X16;
725
+            s->mv[0][0][0] = 0;
726
+            s->mv[0][0][1] = 0;
727
+            s->mb_skiped = 1;
728
+            return 0;
729
+        }
730
+
731
+        code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3);
732
+        if (code < 0)
733
+            return -1;
734
+	s->mb_intra = (~code & 0x40) >> 6;
735
+            
736
+        cbp = code & 0x3f;
737
+    } else {
738
+        s->mb_intra = 1;
739
+        code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2);
740
+        if (code < 0){
741
+            fprintf(stderr, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y);
742
+            return -1;
743
+        }
744
+        /* predict coded block pattern */
745
+        cbp = 0;
746
+        for(i=0;i<6;i++) {
747
+            int val = ((code >> (5 - i)) & 1);
748
+            if (i < 4) {
749
+                int pred = coded_block_pred(s, i, &coded_val);
750
+                val = val ^ pred;
751
+                *coded_val = val;
752
+            }
753
+            cbp |= val << (5 - i);
754
+        }
755
+    }
756
+
757
+    if (!s->mb_intra) {
758
+        int mx, my;
759
+//printf("P at %d %d\n", s->mb_x, s->mb_y);
760
+        wmv2_pred_motion(w, &mx, &my);
761
+        
762
+        if(cbp){
763
+            if(s->per_mb_rl_table){
764
+                s->rl_table_index = decode012(&s->gb);
765
+                s->rl_chroma_table_index = s->rl_table_index;
766
+            }
767
+
768
+            if(w->abt_flag && w->per_mb_abt){
769
+                w->per_block_abt= get_bits1(&s->gb);
770
+                if(!w->per_block_abt)
771
+                    w->abt_type= decode012(&s->gb);
772
+            }else
773
+                w->per_block_abt=0;
774
+        }
775
+        
776
+        if (wmv2_decode_motion(w, &mx, &my) < 0)
777
+            return -1;
778
+
779
+        s->mv_dir = MV_DIR_FORWARD;
780
+        s->mv_type = MV_TYPE_16X16;
781
+        s->mv[0][0][0] = mx;
782
+        s->mv[0][0][1] = my;
783
+
784
+        for (i = 0; i < 6; i++) {
785
+            if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0)
786
+	    {
787
+	        fprintf(stderr,"\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
788
+	        return -1;
789
+	    }
790
+        }    
791
+    } else {
792
+//if(s->pict_type==P_TYPE)
793
+//   printf("%d%d ", s->inter_intra_pred, cbp);
794
+//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
795
+        s->ac_pred = get_bits1(&s->gb);
796
+        if(s->inter_intra_pred){
797
+            s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
798
+//            printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
799
+        }
800
+        if(s->per_mb_rl_table && cbp){
801
+            s->rl_table_index = decode012(&s->gb);
802
+            s->rl_chroma_table_index = s->rl_table_index;
803
+        }
804
+    
805
+        for (i = 0; i < 6; i++) {
806
+            if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
807
+	    {
808
+	        fprintf(stderr,"\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
809
+	        return -1;
810
+	    }
811
+        }    
812
+    }
813
+
814
+    return 0;
815
+}
816
+
817
+static int wmv2_decode_init(AVCodecContext *avctx){
818
+    Wmv2Context * const w= avctx->priv_data;
819
+    
820
+    if(ff_h263_decode_init(avctx) < 0)
821
+        return -1;
822
+    
823
+    wmv2_common_init(w);
824
+    
825
+    return 0;
826
+}
827
+
828
+AVCodec wmv2_decoder = {
829
+    "wmv2",
830
+    CODEC_TYPE_VIDEO,
831
+    CODEC_ID_WMV2,
832
+    sizeof(Wmv2Context),
833
+    wmv2_decode_init,
834
+    NULL,
835
+    ff_h263_decode_end,
836
+    ff_h263_decode_frame,
837
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
838
+};
839
+
840
+AVCodec wmv2_encoder = {
841
+    "wmv2",
842
+    CODEC_TYPE_VIDEO,
843
+    CODEC_ID_WMV2,
844
+    sizeof(Wmv2Context),
845
+    wmv2_encode_init,
846
+    MPV_encode_picture,
847
+    MPV_encode_end,
848
+};
849
+