Browse code

* using DSPContext - so each codec could use its local (sub)set of CPU extension

Originally committed as revision 1194 to svn://svn.ffmpeg.org/ffmpeg/trunk

Zdenek Kabelac authored on 2002/11/11 18:40:17
Showing 13 changed files
... ...
@@ -20,7 +20,7 @@
20 20
  */
21 21
 #include "avcodec.h"
22 22
 #include "dsputil.h"
23
-
23
+/*
24 24
 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
25 25
 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
26 26
 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
... ...
@@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8;
41 41
 op_pixels_abs_func pix_abs8x8_x2;
42 42
 op_pixels_abs_func pix_abs8x8_y2;
43 43
 op_pixels_abs_func pix_abs8x8_xy2;
44
-
44
+*/
45 45
 int ff_bit_exact=0;
46 46
 
47 47
 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
... ...
@@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = {
84 84
 };
85 85
 
86 86
 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
87
-UINT32 inverse[256]={
87
+const UINT32 inverse[256]={
88 88
          0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
89 89
  536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
90 90
  268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
... ...
@@ -119,7 +119,7 @@ UINT32 inverse[256]={
119 119
   17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
120 120
 };
121 121
 
122
-int pix_sum_c(UINT8 * pix, int line_size)
122
+static int pix_sum_c(UINT8 * pix, int line_size)
123 123
 {
124 124
     int s, i, j;
125 125
 
... ...
@@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size)
141 141
     return s;
142 142
 }
143 143
 
144
-int pix_norm1_c(UINT8 * pix, int line_size)
144
+static int pix_norm1_c(UINT8 * pix, int line_size)
145 145
 {
146 146
     int s, i, j;
147 147
     UINT32 *sq = squareTbl + 256;
... ...
@@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size)
165 165
 }
166 166
 
167 167
 
168
-void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
168
+static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
169 169
 {
170 170
     int i;
171 171
 
... ...
@@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
184 184
     }
185 185
 }
186 186
 
187
-void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
188
-		   int stride){
187
+static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
188
+			  const UINT8 *s2, int stride){
189 189
     int i;
190 190
 
191 191
     /* read the pixels */
... ...
@@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
205 205
 }
206 206
 
207 207
 
208
-void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
209
-                          int line_size)
208
+static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
209
+				 int line_size)
210 210
 {
211 211
     int i;
212 212
     UINT8 *cm = cropTbl + MAX_NEG_CROP;
... ...
@@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
227 227
     }
228 228
 }
229 229
 
230
-void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
230
+static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
231 231
                           int line_size)
232 232
 {
233 233
     int i;
... ...
@@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_       , _       , op_avg)
1353 1353
 #undef op_put
1354 1354
 #undef op_put_no_rnd
1355 1355
 
1356
-int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1356
+static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1357 1357
 {
1358 1358
     int s, i;
1359 1359
 
... ...
@@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1381 1381
     return s;
1382 1382
 }
1383 1383
 
1384
-int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1384
+static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1385 1385
 {
1386 1386
     int s, i;
1387 1387
 
... ...
@@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1409 1409
     return s;
1410 1410
 }
1411 1411
 
1412
-int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1412
+static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1413 1413
 {
1414 1414
     int s, i;
1415 1415
     UINT8 *pix3 = pix2 + line_size;
... ...
@@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1439 1439
     return s;
1440 1440
 }
1441 1441
 
1442
-int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1442
+static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1443 1443
 {
1444 1444
     int s, i;
1445 1445
     UINT8 *pix3 = pix2 + line_size;
... ...
@@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1469 1469
     return s;
1470 1470
 }
1471 1471
 
1472
-int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1472
+static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1473 1473
 {
1474 1474
     int s, i;
1475 1475
 
... ...
@@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1489 1489
     return s;
1490 1490
 }
1491 1491
 
1492
-int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1492
+static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1493 1493
 {
1494 1494
     int s, i;
1495 1495
 
... ...
@@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1509 1509
     return s;
1510 1510
 }
1511 1511
 
1512
-int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1512
+static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1513 1513
 {
1514 1514
     int s, i;
1515 1515
     UINT8 *pix3 = pix2 + line_size;
... ...
@@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1531 1531
     return s;
1532 1532
 }
1533 1533
 
1534
-int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1534
+static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1535 1535
 {
1536 1536
     int s, i;
1537 1537
     UINT8 *pix3 = pix2 + line_size;
... ...
@@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
1574 1574
     }
1575 1575
 }
1576 1576
 
1577
-void clear_blocks_c(DCTELEM *blocks)
1577
+static void clear_blocks_c(DCTELEM *blocks)
1578 1578
 {
1579 1579
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
1580 1580
 }
1581 1581
 
1582
-void dsputil_init(void)
1582
+void dsputil_init(DSPContext* c, unsigned mask)
1583 1583
 {
1584 1584
     int i;
1585 1585
 
... ...
@@ -1593,42 +1593,82 @@ void dsputil_init(void)
1593 1593
         squareTbl[i] = (i - 256) * (i - 256);
1594 1594
     }
1595 1595
 
1596
-    get_pixels = get_pixels_c;
1597
-    diff_pixels = diff_pixels_c;
1598
-    put_pixels_clamped = put_pixels_clamped_c;
1599
-    add_pixels_clamped = add_pixels_clamped_c;
1600
-    ff_gmc1= gmc1_c;
1601
-    ff_gmc= gmc_c;
1602
-    clear_blocks= clear_blocks_c;
1603
-    pix_sum= pix_sum_c;
1604
-    pix_norm1= pix_norm1_c;
1605
-
1606
-    pix_abs16x16     = pix_abs16x16_c;
1607
-    pix_abs16x16_x2  = pix_abs16x16_x2_c;
1608
-    pix_abs16x16_y2  = pix_abs16x16_y2_c;
1609
-    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1610
-    pix_abs8x8     = pix_abs8x8_c;
1611
-    pix_abs8x8_x2  = pix_abs8x8_x2_c;
1612
-    pix_abs8x8_y2  = pix_abs8x8_y2_c;
1613
-    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1596
+    c->get_pixels = get_pixels_c;
1597
+    c->diff_pixels = diff_pixels_c;
1598
+    c->put_pixels_clamped = put_pixels_clamped_c;
1599
+    c->add_pixels_clamped = add_pixels_clamped_c;
1600
+    c->gmc1 = gmc1_c;
1601
+    c->gmc = gmc_c;
1602
+    c->clear_blocks = clear_blocks_c;
1603
+    c->pix_sum = pix_sum_c;
1604
+    c->pix_norm1 = pix_norm1_c;
1605
+
1606
+    c->pix_abs16x16     = pix_abs16x16_c;
1607
+    c->pix_abs16x16_x2  = pix_abs16x16_x2_c;
1608
+    c->pix_abs16x16_y2  = pix_abs16x16_y2_c;
1609
+    c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1610
+    c->pix_abs8x8     = pix_abs8x8_c;
1611
+    c->pix_abs8x8_x2  = pix_abs8x8_x2_c;
1612
+    c->pix_abs8x8_y2  = pix_abs8x8_y2_c;
1613
+    c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1614
+
1615
+    c->put_pixels_tab[0][0] = put_pixels16;
1616
+    c->put_pixels_tab[0][1] = put_pixels16_x2;
1617
+    c->put_pixels_tab[0][2] = put_pixels16_y2;
1618
+    c->put_pixels_tab[0][3] = put_pixels16_xy2;
1619
+
1620
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
1621
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
1622
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
1623
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
1624
+
1625
+    c->avg_pixels_tab[0][0] = avg_pixels16;
1626
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2;
1627
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2;
1628
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
1629
+
1630
+    c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
1631
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
1632
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
1633
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
1634
+
1635
+    c->put_pixels_tab[1][0] = put_pixels8;
1636
+    c->put_pixels_tab[1][1] = put_pixels8_x2;
1637
+    c->put_pixels_tab[1][2] = put_pixels8_y2;
1638
+    c->put_pixels_tab[1][3] = put_pixels8_xy2;
1639
+
1640
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
1641
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
1642
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
1643
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
1644
+
1645
+    c->avg_pixels_tab[1][0] = avg_pixels8;
1646
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2;
1647
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2;
1648
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
1649
+
1650
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
1651
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
1652
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
1653
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
1614 1654
 
1615 1655
 #ifdef HAVE_MMX
1616
-    dsputil_init_mmx();
1656
+    dsputil_init_mmx(c, mask);
1617 1657
 #endif
1618 1658
 #ifdef ARCH_ARMV4L
1619
-    dsputil_init_armv4l();
1659
+    dsputil_init_armv4l(c, mask);
1620 1660
 #endif
1621 1661
 #ifdef HAVE_MLIB
1622
-    dsputil_init_mlib();
1662
+    dsputil_init_mlib(c, mask);
1623 1663
 #endif
1624 1664
 #ifdef ARCH_ALPHA
1625
-    dsputil_init_alpha();
1665
+    dsputil_init_alpha(c, mask);
1626 1666
 #endif
1627 1667
 #ifdef ARCH_POWERPC
1628
-    dsputil_init_ppc();
1668
+    dsputil_init_ppc(c, mask);
1629 1669
 #endif
1630 1670
 #ifdef HAVE_MMI
1631
-    dsputil_init_mmi();
1671
+    dsputil_init_mmi(c, mask);
1632 1672
 #endif
1633 1673
 
1634 1674
     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
... ...
@@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void)
1639 1639
 {
1640 1640
     ff_bit_exact=1;
1641 1641
 #ifdef HAVE_MMX
1642
-    dsputil_set_bit_exact_mmx();
1642
+#warning FIXME - set_bit_exact
1643
+//    dsputil_set_bit_exact_mmx();
1643 1644
 #endif
1644 1645
 }
1645 1646
 
... ...
@@ -45,10 +45,9 @@ extern const UINT8 ff_zigzag_direct[64];
45 45
 extern UINT32 squareTbl[512];
46 46
 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
47 47
 
48
-void dsputil_init(void);
49 48
 
50 49
 /* minimum alignment rules ;)
51
-if u notice errors in the align stuff, need more alignment for some asm code for some cpu 
50
+if u notice errors in the align stuff, need more alignment for some asm code for some cpu
52 51
 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
53 52
 
54 53
 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
... ...
@@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b
57 57
 !future video codecs might need functions with less strict alignment
58 58
 */
59 59
 
60
-/* pixel ops : interface with DCT */
61
-extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
62
-extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
63
-extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
64
-extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
65
-extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
66
-extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, 
67
-                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
68
-extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
69
-extern int (*pix_sum)(UINT8 * pix, int line_size);
70
-extern int (*pix_norm1)(UINT8 * pix, int line_size);
71
-
72
-
73
-
60
+/*
74 61
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
75 62
 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
76 63
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
77 64
 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
78 65
 void clear_blocks_c(DCTELEM *blocks);
66
+*/
79 67
 
80 68
 /* add and put pixel (decoding) */
81 69
 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
82 70
 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
83 71
 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
84 72
 
85
-extern op_pixels_func put_pixels_tab[2][4];
86
-extern op_pixels_func avg_pixels_tab[2][4];
87
-extern op_pixels_func put_no_rnd_pixels_tab[2][4];
88
-extern op_pixels_func avg_no_rnd_pixels_tab[2][4];
89
-extern qpel_mc_func put_qpel_pixels_tab[2][16];
90
-extern qpel_mc_func avg_qpel_pixels_tab[2][16];
91
-extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
92
-extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
73
+
93 74
 
94 75
 #define CALL_2X_PIXELS(a, b, n)\
95 76
 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
... ...
@@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
100 100
 /* motion estimation */
101 101
 
102 102
 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
103
-
104
-extern op_pixels_abs_func pix_abs16x16;
105
-extern op_pixels_abs_func pix_abs16x16_x2;
106
-extern op_pixels_abs_func pix_abs16x16_y2;
107
-extern op_pixels_abs_func pix_abs16x16_xy2;
108
-extern op_pixels_abs_func pix_abs8x8;
109
-extern op_pixels_abs_func pix_abs8x8_x2;
110
-extern op_pixels_abs_func pix_abs8x8_y2;
111
-extern op_pixels_abs_func pix_abs8x8_xy2;
112
-
103
+/*
113 104
 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
114 105
 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
115 106
 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
116 107
 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
108
+*/
109
+typedef struct DSPContext {
110
+    /* pixel ops : interface with DCT */
111
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
112
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
113
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
114
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
115
+    void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
116
+    void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
117
+		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
118
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
119
+    int (*pix_sum)(UINT8 * pix, int line_size);
120
+    int (*pix_norm1)(UINT8 * pix, int line_size);
121
+
122
+    /* maybe create an array for 16/8 functions */
123
+    op_pixels_func put_pixels_tab[2][4];
124
+    op_pixels_func avg_pixels_tab[2][4];
125
+    op_pixels_func put_no_rnd_pixels_tab[2][4];
126
+    op_pixels_func avg_no_rnd_pixels_tab[2][4];
127
+    qpel_mc_func put_qpel_pixels_tab[2][16];
128
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
129
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
130
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
131
+
132
+    op_pixels_abs_func pix_abs16x16;
133
+    op_pixels_abs_func pix_abs16x16_x2;
134
+    op_pixels_abs_func pix_abs16x16_y2;
135
+    op_pixels_abs_func pix_abs16x16_xy2;
136
+    op_pixels_abs_func pix_abs8x8;
137
+    op_pixels_abs_func pix_abs8x8_x2;
138
+    op_pixels_abs_func pix_abs8x8_y2;
139
+    op_pixels_abs_func pix_abs8x8_xy2;
140
+} DSPContext;
141
+
142
+void dsputil_init(DSPContext* p, unsigned mask);
117 143
 
118 144
 /**
119 145
  * permute block according to permuatation.
... ...
@@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
121 121
  */
122 122
 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
123 123
 
124
+#define emms_c()
125
+
124 126
 #if defined(HAVE_MMX)
125 127
 
128
+#undef emms_c()
129
+
126 130
 #define MM_MMX    0x0001 /* standard MMX */
127 131
 #define MM_3DNOW  0x0004 /* AMD 3DNOW */
128 132
 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
... ...
@@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
132 132
 extern int mm_flags;
133 133
 
134 134
 int mm_support(void);
135
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
136
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
135 137
 
136 138
 static inline void emms(void)
137 139
 {
... ...
@@ -146,54 +158,44 @@ static inline void emms(void)
146 146
 
147 147
 #define __align8 __attribute__ ((aligned (8)))
148 148
 
149
-void dsputil_init_mmx(void);
150
-void dsputil_set_bit_exact_mmx(void);
149
+void dsputil_init_mmx(DSPContext* c, unsigned mask);
150
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
151 151
 
152 152
 #elif defined(ARCH_ARMV4L)
153 153
 
154
-#define emms_c()
155
-
156 154
 /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
157 155
    line ptimizations */
158 156
 #define __align8 __attribute__ ((aligned (4)))
159 157
 
160
-void dsputil_init_armv4l(void);   
158
+void dsputil_init_armv4l(DSPContext* c, unsigned mask);
161 159
 
162 160
 #elif defined(HAVE_MLIB)
163
- 
164
-#define emms_c()
165 161
 
166 162
 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
167 163
 #define __align8 __attribute__ ((aligned (8)))
168 164
 
169
-void dsputil_init_mlib(void);   
165
+void dsputil_init_mlib(DSPContext* c, unsigned mask);
170 166
 
171 167
 #elif defined(ARCH_ALPHA)
172 168
 
173
-#define emms_c()
174 169
 #define __align8 __attribute__ ((aligned (8)))
175 170
 
176
-void dsputil_init_alpha(void);
171
+void dsputil_init_alpha(DSPContext* c, unsigned mask);
177 172
 
178 173
 #elif defined(ARCH_POWERPC)
179 174
 
180
-#define emms_c()
181 175
 #define __align8 __attribute__ ((aligned (16)))
182 176
 
183
-void dsputil_init_ppc(void);
177
+void dsputil_init_ppc(DSPContext* c, unsigned mask);
184 178
 
185 179
 #elif defined(HAVE_MMI)
186 180
 
187
-#define emms_c()
188
-
189 181
 #define __align8 __attribute__ ((aligned (16)))
190 182
 
191
-void dsputil_init_mmi(void);   
183
+void dsputil_init_mmi(DSPContext* c, unsigned mask);
192 184
 
193 185
 #else
194 186
 
195
-#define emms_c()
196
-
197 187
 #define __align8
198 188
 
199 189
 #endif
... ...
@@ -263,9 +265,9 @@ typedef struct MDCTContext {
263 263
 } MDCTContext;
264 264
 
265 265
 int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
266
-void ff_imdct_calc(MDCTContext *s, FFTSample *output, 
266
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
267 267
                 const FFTSample *input, FFTSample *tmp);
268
-void ff_mdct_calc(MDCTContext *s, FFTSample *out, 
268
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
269 269
                const FFTSample *input, FFTSample *tmp);
270 270
 void ff_mdct_end(MDCTContext *s);
271 271
 
... ...
@@ -114,6 +114,7 @@ static int dvvideo_decode_init(AVCodecContext *avctx)
114 114
     /* XXX: fix it */
115 115
     memset(&s2, 0, sizeof(MpegEncContext));
116 116
     s2.avctx = avctx;
117
+    dsputil_init(&s2.dsp, avctx->dsp_mask);
117 118
     if (DCT_common_init(&s2) < 0)
118 119
        return -1;
119 120
 
... ...
@@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){
331 331
                 s->mv_type = MV_TYPE_16X16;
332 332
                 s->mb_skiped=0;
333 333
 
334
-                clear_blocks(s->block[0]);
334
+		s->dsp.clear_blocks(s->block[0]);
335 335
 
336 336
                 s->mb_x= mb_x;
337 337
                 s->mb_y= mb_y;
... ...
@@ -458,7 +458,7 @@ int score_sum=0;
458 458
                     s->mv_type = MV_TYPE_16X16;
459 459
                     s->mb_skiped=0;
460 460
 
461
-                    clear_blocks(s->block[0]);
461
+		    s->dsp.clear_blocks(s->block[0]);
462 462
 
463 463
                     s->mb_x= mb_x;
464 464
                     s->mb_y= mb_y;
... ...
@@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){
559 559
                 UINT8 *mb_ptr     = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize;
560 560
                 UINT8 *last_mb_ptr= s->last_picture   [0] + mb_x*16 + mb_y*16*s->linesize;
561 561
     
562
-                is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr                    , s->linesize);
563
-                is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
562
+		is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr                    , s->linesize);
563
+                is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
564 564
             }else{
565 565
                 if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[])
566 566
                    is_intra_likely++;
... ...
@@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){
738 738
                 s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1];
739 739
             }
740 740
         
741
-            clear_blocks(s->block[0]);
741
+	    s->dsp.clear_blocks(s->block[0]);
742 742
 
743 743
             s->mb_x= mb_x;
744 744
             s->mb_y= mb_y;
... ...
@@ -778,8 +778,8 @@ void ff_error_resilience(MpegEncContext *s){
778 778
                     s->mv[1][0][0]= 0;
779 779
                     s->mv[1][0][1]= 0;
780 780
                 }
781
-                                
782
-                clear_blocks(s->block[0]);
781
+
782
+                s->dsp.clear_blocks(s->block[0]);
783 783
                 s->mb_x= mb_x;
784 784
                 s->mb_y= mb_y;
785 785
                 MPV_decode_mb(s, s->block);
... ...
@@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
538 538
                         if(s->coded_order[i+1].pict_type!=B_TYPE) break;
539 539
 
540 540
                         b_pic= s->coded_order[i+1].picture[0] + offset;
541
-                        diff= pix_abs16x16(p_pic, b_pic, s->linesize);
541
+			diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
542 542
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
543 543
                             s->mb_skiped=0;
544 544
                             break;
... ...
@@ -195,7 +195,7 @@ static int decode_slice(MpegEncContext *s){
195 195
             }
196 196
 
197 197
             /* DCT & quantize */
198
-            clear_blocks(s->block[0]);
198
+	    s->dsp.clear_blocks(s->block[0]);
199 199
             
200 200
             s->mv_dir = MV_DIR_FORWARD;
201 201
             s->mv_type = MV_TYPE_16X16;
... ...
@@ -22,7 +22,7 @@
22 22
 #include "../dsputil.h"
23 23
 
24 24
 int mm_flags; /* multimedia extension flags */
25
-
25
+/* FIXME use them in static form */
26 26
 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
27 27
 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
28 28
 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
... ...
@@ -242,7 +242,7 @@ static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, in
242 242
     );
243 243
 }
244 244
 
245
-static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
245
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
246 246
 {
247 247
     const DCTELEM *p;
248 248
     UINT8 *pix;
... ...
@@ -297,7 +297,7 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
297 297
 	    :"memory");
298 298
 }
299 299
 
300
-static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
300
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
301 301
 {
302 302
     const DCTELEM *p;
303 303
     UINT8 *pix;
... ...
@@ -457,7 +457,7 @@ static int pix_sum16_mmx(UINT8 * pix, int line_size){
457 457
 static void just_return() { return; }
458 458
 #endif
459 459
 
460
-void dsputil_init_mmx(void)
460
+void dsputil_init_mmx(DSPContext* c, unsigned mask)
461 461
 {
462 462
     mm_flags = mm_support();
463 463
 #if 0
... ...
@@ -476,112 +476,112 @@ void dsputil_init_mmx(void)
476 476
 #endif
477 477
 
478 478
     if (mm_flags & MM_MMX) {
479
-        get_pixels = get_pixels_mmx;
480
-        diff_pixels = diff_pixels_mmx;
481
-        put_pixels_clamped = put_pixels_clamped_mmx;
482
-        add_pixels_clamped = add_pixels_clamped_mmx;
483
-        clear_blocks= clear_blocks_mmx;
484
-        pix_sum= pix_sum16_mmx;
485
-
486
-        pix_abs16x16     = pix_abs16x16_mmx;
487
-        pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
488
-        pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
489
-        pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
490
-        pix_abs8x8    = pix_abs8x8_mmx;
491
-        pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
492
-        pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
493
-        pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
494
-
495
-        put_pixels_tab[0][0] = put_pixels16_mmx;
496
-        put_pixels_tab[0][1] = put_pixels16_x2_mmx;
497
-        put_pixels_tab[0][2] = put_pixels16_y2_mmx;
498
-        put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
499
-
500
-        put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
501
-        put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
502
-        put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
503
-        put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
504
-
505
-        avg_pixels_tab[0][0] = avg_pixels16_mmx;
506
-        avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
507
-        avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
508
-        avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
509
-
510
-        avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
511
-        avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
512
-        avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
513
-        avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
514
-        
515
-        put_pixels_tab[1][0] = put_pixels8_mmx;
516
-        put_pixels_tab[1][1] = put_pixels8_x2_mmx;
517
-        put_pixels_tab[1][2] = put_pixels8_y2_mmx;
518
-        put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
519
-
520
-        put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
521
-        put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
522
-        put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
523
-        put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
524
-
525
-        avg_pixels_tab[1][0] = avg_pixels8_mmx;
526
-        avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
527
-        avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
528
-        avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
529
-
530
-        avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
531
-        avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
532
-        avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
533
-        avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
479
+        c->get_pixels = get_pixels_mmx;
480
+        c->diff_pixels = diff_pixels_mmx;
481
+        c->put_pixels_clamped = put_pixels_clamped_mmx;
482
+        c->add_pixels_clamped = add_pixels_clamped_mmx;
483
+        c->clear_blocks = clear_blocks_mmx;
484
+        c->pix_sum = pix_sum16_mmx;
485
+
486
+        c->pix_abs16x16     = pix_abs16x16_mmx;
487
+        c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
488
+        c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
489
+        c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
490
+        c->pix_abs8x8     = pix_abs8x8_mmx;
491
+        c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx;
492
+        c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx;
493
+        c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
494
+
495
+        c->put_pixels_tab[0][0] = put_pixels16_mmx;
496
+        c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
497
+        c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
498
+        c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
499
+
500
+        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
501
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
502
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
503
+        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
504
+
505
+        c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
506
+        c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
507
+        c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
508
+        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
509
+
510
+        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
511
+        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
512
+        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
513
+        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
514
+
515
+        c->put_pixels_tab[1][0] = put_pixels8_mmx;
516
+        c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
517
+        c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
518
+        c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
519
+
520
+        c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
521
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
522
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
523
+        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
524
+
525
+        c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
526
+        c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
527
+        c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
528
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
529
+
530
+        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
531
+        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
532
+        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
533
+        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
534 534
 
535 535
         if (mm_flags & MM_MMXEXT) {
536
-            pix_abs16x16    = pix_abs16x16_mmx2;
537
-            pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
538
-            pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
539
-            pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
540
-
541
-            pix_abs8x8    = pix_abs8x8_mmx2;
542
-            pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
543
-            pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
544
-            pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
545
-
546
-            put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
547
-            put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
548
-            put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
549
-            put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
550
-
551
-            avg_pixels_tab[0][0] = avg_pixels16_mmx2;
552
-            avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
553
-            avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
554
-            avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
555
-
556
-            put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
557
-            put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
558
-            put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
559
-            put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
560
-
561
-            avg_pixels_tab[1][0] = avg_pixels8_mmx2;
562
-            avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
563
-            avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
564
-            avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
536
+            c->pix_abs16x16     = pix_abs16x16_mmx2;
537
+            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
538
+            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx2;
539
+            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
540
+
541
+            c->pix_abs8x8     = pix_abs8x8_mmx2;
542
+            c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx2;
543
+            c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
544
+            c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
545
+
546
+            c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
547
+            c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
548
+            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
549
+            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
550
+
551
+            c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
552
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
553
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
554
+            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
555
+
556
+            c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
557
+            c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
558
+            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
559
+            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
560
+
561
+            c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
562
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
563
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
564
+            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
565 565
         } else if (mm_flags & MM_3DNOW) {
566
-            put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
567
-            put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
568
-            put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
569
-            put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
570
-
571
-            avg_pixels_tab[0][0] = avg_pixels16_3dnow;
572
-            avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
573
-            avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
574
-            avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
575
-            
576
-            put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
577
-            put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
578
-            put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
579
-            put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
580
-
581
-            avg_pixels_tab[1][0] = avg_pixels8_3dnow;
582
-            avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
583
-            avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
584
-            avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
566
+            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
567
+            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
568
+            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
569
+            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
570
+
571
+            c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
572
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
573
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
574
+            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
575
+
576
+            c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
577
+            c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
578
+            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
579
+            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
580
+
581
+            c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
582
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
583
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
584
+            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
585 585
         }
586 586
     }
587 587
 
... ...
@@ -624,25 +624,24 @@ void dsputil_init_mmx(void)
624 624
 /* remove any non bit exact operation (testing purpose). NOTE that
625 625
    this function should be kept as small as possible because it is
626 626
    always difficult to test automatically non bit exact cases. */
627
-void dsputil_set_bit_exact_mmx(void)
627
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
628 628
 {
629 629
     if (mm_flags & MM_MMX) {
630
-    
631 630
         /* MMX2 & 3DNOW */
632
-        put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
633
-        put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
634
-        avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
635
-        put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
636
-        put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
637
-        avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
631
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
632
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
633
+        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
634
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
635
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
636
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
638 637
 
639 638
         if (mm_flags & MM_MMXEXT) {
640
-            pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
641
-            pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
642
-            pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
643
-            pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
644
-            pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
645
-            pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
639
+            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
640
+            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
641
+            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
642
+            c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
643
+            c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
644
+            c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
646 645
         }
647 646
     }
648 647
 }
... ...
@@ -88,8 +88,8 @@ static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
88 88
     return s;
89 89
 }
90 90
 
91
-static void no_motion_search(MpegEncContext * s,
92
-			     int *mx_ptr, int *my_ptr)
91
+static inline void no_motion_search(MpegEncContext * s,
92
+				    int *mx_ptr, int *my_ptr)
93 93
 {
94 94
     *mx_ptr = 16 * s->mb_x;
95 95
     *my_ptr = 16 * s->mb_y;
... ...
@@ -123,7 +123,7 @@ static int full_motion_search(MpegEncContext * s,
123 123
     my = 0;
124 124
     for (y = y1; y <= y2; y++) {
125 125
 	for (x = x1; x <= x2; x++) {
126
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
126
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
127 127
 			     s->linesize);
128 128
 	    if (d < dmin ||
129 129
 		(d == dmin &&
... ...
@@ -188,7 +188,7 @@ static int log_motion_search(MpegEncContext * s,
188 188
     do {
189 189
 	for (y = y1; y <= y2; y += range) {
190 190
 	    for (x = x1; x <= x2; x += range) {
191
-		d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
191
+		d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
192 192
 		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
193 193
 		    dmin = d;
194 194
 		    mx = x;
... ...
@@ -268,7 +268,7 @@ static int phods_motion_search(MpegEncContext * s,
268 268
 
269 269
 	lastx = x;
270 270
 	for (x = x1; x <= x2; x += range) {
271
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
271
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
272 272
 	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
273 273
 		dminx = d;
274 274
 		mx = x;
... ...
@@ -277,7 +277,7 @@ static int phods_motion_search(MpegEncContext * s,
277 277
 
278 278
 	x = lastx;
279 279
 	for (y = y1; y <= y2; y += range) {
280
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
280
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
281 281
 	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
282 282
 		dminy = d;
283 283
 		my = y;
... ...
@@ -324,7 +324,7 @@ static int phods_motion_search(MpegEncContext * s,
324 324
     const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
325 325
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
326 326
     if(map[index]!=key){\
327
-        d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
327
+        d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
328 328
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
329 329
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
330 330
         map[index]= key;\
... ...
@@ -355,7 +355,7 @@ static int phods_motion_search(MpegEncContext * s,
355 355
     const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
356 356
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
357 357
     if(map[index]!=key){\
358
-        d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
358
+        d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
359 359
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
360 360
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
361 361
         map[index]= key;\
... ...
@@ -590,7 +590,7 @@ static int epzs_motion_search(MpegEncContext * s,
590 590
     
591 591
     map_generation= update_map_generation(s);
592 592
 
593
-    dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
593
+    dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
594 594
     map[0]= map_generation;
595 595
     score_map[0]= dmin;
596 596
 
... ...
@@ -644,11 +644,11 @@ static int epzs_motion_search(MpegEncContext * s,
644 644
     if(s->me_method==ME_EPZS)
645 645
         dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
646 646
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
647
-                                   shift, map, score_map, map_generation, pix_abs16x16);
647
+				   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
648 648
     else
649 649
         dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
650 650
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
651
-                                   shift, map, score_map, map_generation, pix_abs16x16);
651
+                                   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
652 652
 //check(best[0],best[1],0, b1)
653 653
     *mx_ptr= best[0];
654 654
     *my_ptr= best[1];    
... ...
@@ -683,7 +683,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
683 683
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
684 684
     /* first line */
685 685
     if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
686
-        CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
686
+	CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
687 687
         CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
688 688
         CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
689 689
     }else{
... ...
@@ -705,11 +705,11 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
705 705
     if(s->me_method==ME_EPZS)
706 706
         dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
707 707
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
708
-                                   shift, map, score_map, map_generation, pix_abs8x8);
708
+				   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
709 709
     else
710 710
         dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
711 711
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
712
-                                   shift, map, score_map, map_generation, pix_abs8x8);
712
+                                   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
713 713
 
714 714
     *mx_ptr= best[0];
715 715
     *my_ptr= best[1];    
... ...
@@ -1023,8 +1023,8 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
1023 1023
         dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
1024 1024
 
1025 1025
         dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
1026
-                                   pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, 
1027
-                                   pix_abs8x8_y2, pix_abs8x8_xy2, block);
1026
+					  pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2,
1027
+					  s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block);
1028 1028
  
1029 1029
         s->motion_val[ s->block_index[block] ][0]= mx4;
1030 1030
         s->motion_val[ s->block_index[block] ][1]= my4;
... ...
@@ -1133,9 +1133,10 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1133 1133
     /* At this point (mx,my) are full-pell and the relative displacement */
1134 1134
     ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
1135 1135
     
1136
-    sum = pix_sum(pix, s->linesize);
1136
+    sum = s->dsp.pix_sum(pix, s->linesize);
1137 1137
     
1138
-    varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1138
+    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1139
+    // FIXME: MMX OPTIMIZE
1139 1140
     vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
1140 1141
 
1141 1142
 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
... ...
@@ -1161,13 +1162,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1161 1161
         if (varc*2 + 200 > vard){
1162 1162
             mb_type|= MB_TYPE_INTER;
1163 1163
             if(s->me_method >= ME_EPZS)
1164
-                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1165
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1166
-                                           pix_abs16x16_xy2, 0);
1164
+                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1165
+					   pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1166
+					   s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1167 1167
             else
1168
-                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1169
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1170
-                                           pix_abs16x16_xy2, 0);                                           
1168
+                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1169
+				           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1170
+				           s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1171 1171
         }else{
1172 1172
             mx <<=1;
1173 1173
             my <<=1;
... ...
@@ -1186,13 +1187,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
1186 1186
             mb_type|= MB_TYPE_INTER;
1187 1187
             if (s->me_method != ME_ZERO) {
1188 1188
                 if(s->me_method >= ME_EPZS)
1189
-                    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1190
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1191
-                                           pix_abs16x16_xy2, 0);
1189
+		    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1190
+                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1191
+                                           s->dsp.pix_abs16x16_xy2, 0);
1192 1192
                 else
1193
-                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1194
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1195
-                                           pix_abs16x16_xy2, 0);
1193
+                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1194
+                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1195
+                                           s->dsp.pix_abs16x16_xy2, 0);
1196 1196
                 if((s->flags&CODEC_FLAG_4MV)
1197 1197
                    && !s->skip_me && varc>50 && vard>10){
1198 1198
                     int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
... ...
@@ -1303,9 +1304,9 @@ int ff_estimate_motion_b(MpegEncContext * s,
1303 1303
         break;
1304 1304
     }
1305 1305
     
1306
-    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1307
-                                pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1308
-                                pix_abs16x16_xy2, 0);
1306
+    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1307
+                                pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1308
+                                s->dsp.pix_abs16x16_xy2, 0);
1309 1309
 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1310 1310
 //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1311 1311
     mv_table[mot_xy][0]= mx;
... ...
@@ -1343,8 +1344,8 @@ static inline int check_bidir_mv(MpegEncContext * s,
1343 1343
         dxy&= 1;
1344 1344
 
1345 1345
     ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
1346
-    put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1347
-    
1346
+    s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1347
+
1348 1348
     fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
1349 1349
 
1350 1350
     dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
... ...
@@ -1356,11 +1357,11 @@ static inline int check_bidir_mv(MpegEncContext * s,
1356 1356
     src_y = clip(src_y, -16, s->height);
1357 1357
     if (src_y == s->height)
1358 1358
         dxy&= 1;
1359
-            
1359
+
1360 1360
     ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
1361
-    avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1362
-    
1363
-    fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1361
+    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1362
+
1363
+    fbmin += s->dsp.pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1364 1364
     return fbmin;
1365 1365
 }
1366 1366
 
... ...
@@ -1443,7 +1444,7 @@ static inline int direct_search(MpegEncContext * s,
1443 1443
             if (src_y == height) dxy &= ~2;
1444 1444
 
1445 1445
             ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
1446
-            put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1446
+            s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1447 1447
 
1448 1448
             dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1449 1449
             src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
... ...
@@ -1453,7 +1454,7 @@ static inline int direct_search(MpegEncContext * s,
1453 1453
             src_y = clip(src_y, -16, height);
1454 1454
             if (src_y == height) dxy &= ~2;
1455 1455
 
1456
-            avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1456
+	    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1457 1457
         }
1458 1458
     }
1459 1459
 
... ...
@@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
1623 1623
     s->mb_incr= 1;
1624 1624
 
1625 1625
     for(;;) {
1626
-        clear_blocks(s->block[0]);
1626
+	s->dsp.clear_blocks(s->block[0]);
1627 1627
         
1628 1628
         ret = mpeg_decode_mb(s, s->block);
1629 1629
         dprintf("ret=%d\n", ret);
... ...
@@ -57,7 +57,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl
57 57
 /* for jpeg fast DCT */
58 58
 #define CONST_BITS 14
59 59
 
60
-static const unsigned short aanscales[64] = {
60
+static const uint16_t aanscales[64] = {
61 61
     /* precomputed values scaled up by 14 bits */
62 62
     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
63 63
     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
... ...
@@ -70,7 +70,7 @@ static const unsigned short aanscales[64] = {
70 70
 };
71 71
 
72 72
 /* Input permutation for the simple_idct_mmx */
73
-static const UINT8 simple_mmx_permutation[64]={
73
+static const uint8_t simple_mmx_permutation[64]={
74 74
 	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
75 75
 	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
76 76
 	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
... ...
@@ -81,7 +81,7 @@ static const UINT8 simple_mmx_permutation[64]={
81 81
 	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
82 82
 };
83 83
 
84
-static UINT8 h263_chroma_roundtab[16] = {
84
+static const uint8_t h263_chroma_roundtab[16] = {
85 85
     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
86 86
 };
87 87
 
... ...
@@ -172,16 +172,19 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scanta
172 172
 }
173 173
 
174 174
 /* XXX: those functions should be suppressed ASAP when all IDCTs are
175
-   converted */
175
+ converted */
176
+// *FIXME* this is ugly hack using local static
177
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
178
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
176 179
 static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
177 180
 {
178 181
     j_rev_dct (block);
179
-    put_pixels_clamped(block, dest, line_size);
182
+    ff_put_pixels_clamped(block, dest, line_size);
180 183
 }
181 184
 static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
182 185
 {
183 186
     j_rev_dct (block);
184
-    add_pixels_clamped(block, dest, line_size);
187
+    ff_add_pixels_clamped(block, dest, line_size);
185 188
 }
186 189
 
187 190
 /* init common dct for both encoder and decoder */
... ...
@@ -189,6 +192,9 @@ int DCT_common_init(MpegEncContext *s)
189 189
 {
190 190
     int i;
191 191
 
192
+    ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
193
+    ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
194
+
192 195
     s->dct_unquantize_h263 = dct_unquantize_h263_c;
193 196
     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
194 197
     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
... ...
@@ -268,29 +274,30 @@ int MPV_common_init(MpegEncContext *s)
268 268
     UINT8 *pict;
269 269
     int y_size, c_size, yc_size, i;
270 270
 
271
+    dsputil_init(&s->dsp, s->avctx->dsp_mask);
271 272
     DCT_common_init(s);
272
-    
273
+
273 274
     s->flags= s->avctx->flags;
274 275
 
275 276
     s->mb_width = (s->width + 15) / 16;
276 277
     s->mb_height = (s->height + 15) / 16;
277
-    
278
-    y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
279
-    c_size = (s->mb_width + 2) * (s->mb_height + 2);
280
-    yc_size = y_size + 2 * c_size;
281
-    
278
+
282 279
     /* set default edge pos, will be overriden in decode_header if needed */
283 280
     s->h_edge_pos= s->mb_width*16;
284 281
     s->v_edge_pos= s->mb_height*16;
285
-    
282
+
283
+    s->mb_num = s->mb_width * s->mb_height;
284
+
285
+    y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
286
+    c_size = (s->mb_width + 2) * (s->mb_height + 2);
287
+    yc_size = y_size + 2 * c_size;
288
+
286 289
     /* convert fourcc to upper case */
287 290
     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)          
288 291
                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
289 292
                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) 
290 293
                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
291 294
 
292
-    s->mb_num = s->mb_width * s->mb_height;
293
-    
294 295
     if(!(s->flags&CODEC_FLAG_DR1)){
295 296
       s->linesize   = s->mb_width * 16 + 2 * EDGE_WIDTH;
296 297
       s->uvlinesize = s->mb_width * 8  +     EDGE_WIDTH;
... ...
@@ -1133,17 +1140,17 @@ static inline void gmc1_motion(MpegEncContext *s,
1133 1133
     }
1134 1134
     
1135 1135
     if((motion_x|motion_y)&7){
1136
-        ff_gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1137
-        ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1136
+        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1137
+        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1138 1138
     }else{
1139 1139
         int dxy;
1140 1140
         
1141 1141
         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1142 1142
         if (s->no_rounding){
1143
-            put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1143
+	    s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1144 1144
         }else{
1145
-            put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1146
-        }        
1145
+            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1146
+        }
1147 1147
     }
1148 1148
     
1149 1149
     if(s->flags&CODEC_FLAG_GRAY) return;
... ...
@@ -1167,14 +1174,14 @@ static inline void gmc1_motion(MpegEncContext *s,
1167 1167
         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1168 1168
         ptr= s->edge_emu_buffer;
1169 1169
     }
1170
-    ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1170
+    s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1171 1171
     
1172 1172
     ptr = ref_picture[2] + offset;
1173 1173
     if(emu){
1174 1174
         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1175 1175
         ptr= s->edge_emu_buffer;
1176 1176
     }
1177
-    ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1177
+    s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1178 1178
     
1179 1179
     return;
1180 1180
 }
... ...
@@ -1199,14 +1206,14 @@ static inline void gmc_motion(MpegEncContext *s,
1199 1199
     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1200 1200
     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1201 1201
 
1202
-    ff_gmc(dest_y, ptr, linesize, 16, 
1202
+    s->dsp.gmc(dest_y, ptr, linesize, 16,
1203 1203
            ox, 
1204 1204
            oy, 
1205 1205
            s->sprite_delta[0][0], s->sprite_delta[0][1],
1206 1206
            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1207 1207
            a+1, (1<<(2*a+1)) - s->no_rounding,
1208 1208
            s->h_edge_pos, s->v_edge_pos);
1209
-    ff_gmc(dest_y+8, ptr, linesize, 16, 
1209
+    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1210 1210
            ox + s->sprite_delta[0][0]*8, 
1211 1211
            oy + s->sprite_delta[1][0]*8, 
1212 1212
            s->sprite_delta[0][0], s->sprite_delta[0][1],
... ...
@@ -1224,7 +1231,7 @@ static inline void gmc_motion(MpegEncContext *s,
1224 1224
     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1225 1225
 
1226 1226
     ptr = ref_picture[1] + (src_offset>>1);
1227
-    ff_gmc(dest_cb, ptr, uvlinesize, 8, 
1227
+    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1228 1228
            ox, 
1229 1229
            oy, 
1230 1230
            s->sprite_delta[0][0], s->sprite_delta[0][1],
... ...
@@ -1233,7 +1240,7 @@ static inline void gmc_motion(MpegEncContext *s,
1233 1233
            s->h_edge_pos>>1, s->v_edge_pos>>1);
1234 1234
     
1235 1235
     ptr = ref_picture[2] + (src_offset>>1);
1236
-    ff_gmc(dest_cr, ptr, uvlinesize, 8, 
1236
+    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1237 1237
            ox, 
1238 1238
            oy, 
1239 1239
            s->sprite_delta[0][0], s->sprite_delta[0][1],
... ...
@@ -1248,7 +1255,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl
1248 1248
     int x, y;
1249 1249
     int start_y, start_x, end_y, end_x;
1250 1250
     UINT8 *buf= s->edge_emu_buffer;
1251
-    
1251
+
1252 1252
     if(src_y>= h){
1253 1253
         src+= (h-1-src_y)*linesize;
1254 1254
         src_y=h-1;
... ...
@@ -1860,17 +1867,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1860 1860
             /* decoding or more than one mb_type (MC was allready done otherwise) */
1861 1861
             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
1862 1862
                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
1863
-                    op_pix = put_pixels_tab;
1864
-                    op_qpix= put_qpel_pixels_tab;
1863
+		    op_pix = s->dsp.put_pixels_tab;
1864
+                    op_qpix= s->dsp.put_qpel_pixels_tab;
1865 1865
                 }else{
1866
-                    op_pix = put_no_rnd_pixels_tab;
1867
-                    op_qpix= put_no_rnd_qpel_pixels_tab;
1866
+                    op_pix = s->dsp.put_no_rnd_pixels_tab;
1867
+                    op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
1868 1868
                 }
1869 1869
 
1870 1870
                 if (s->mv_dir & MV_DIR_FORWARD) {
1871 1871
                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
1872
-                    op_pix = avg_pixels_tab;
1873
-                    op_qpix= avg_qpel_pixels_tab;
1872
+		    op_pix = s->dsp.avg_pixels_tab;
1873
+                    op_qpix= s->dsp.avg_qpel_pixels_tab;
1874 1874
                 }
1875 1875
                 if (s->mv_dir & MV_DIR_BACKWARD) {
1876 1876
                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
... ...
@@ -2224,10 +2231,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2224 2224
                 s->interlaced_dct=0;
2225 2225
         }
2226 2226
         
2227
-        get_pixels(s->block[0], ptr                 , wrap_y);
2228
-        get_pixels(s->block[1], ptr              + 8, wrap_y);
2229
-        get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2230
-        get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2227
+	s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2228
+        s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2229
+        s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2230
+        s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2231 2231
 
2232 2232
         if(s->flags&CODEC_FLAG_GRAY){
2233 2233
             skip_dct[4]= 1;
... ...
@@ -2239,14 +2246,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2239 2239
                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2240 2240
                 ptr= s->edge_emu_buffer;
2241 2241
             }
2242
-            get_pixels(s->block[4], ptr, wrap_c);
2242
+	    s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2243 2243
 
2244 2244
             ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2245 2245
             if(emu){
2246 2246
                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2247 2247
                 ptr= s->edge_emu_buffer;
2248 2248
             }
2249
-            get_pixels(s->block[5], ptr, wrap_c);
2249
+            s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2250 2250
         }
2251 2251
     }else{
2252 2252
         op_pixels_func (*op_pix)[4];
... ...
@@ -2266,17 +2273,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2266 2266
         ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2267 2267
 
2268 2268
         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2269
-            op_pix = put_pixels_tab;
2270
-            op_qpix= put_qpel_pixels_tab;
2269
+	    op_pix = s->dsp.put_pixels_tab;
2270
+            op_qpix= s->dsp.put_qpel_pixels_tab;
2271 2271
         }else{
2272
-            op_pix = put_no_rnd_pixels_tab;
2273
-            op_qpix= put_no_rnd_qpel_pixels_tab;
2272
+            op_pix = s->dsp.put_no_rnd_pixels_tab;
2273
+            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2274 2274
         }
2275 2275
 
2276 2276
         if (s->mv_dir & MV_DIR_FORWARD) {
2277 2277
             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
2278
-            op_pix = avg_pixels_tab;
2279
-            op_qpix= avg_qpel_pixels_tab;
2278
+            op_pix = s->dsp.avg_pixels_tab;
2279
+            op_qpix= s->dsp.avg_qpel_pixels_tab;
2280 2280
         }
2281 2281
         if (s->mv_dir & MV_DIR_BACKWARD) {
2282 2282
             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
... ...
@@ -2305,10 +2312,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2305 2305
                 s->interlaced_dct=0;
2306 2306
         }
2307 2307
         
2308
-        diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2309
-        diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2310
-        diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2311
-        diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2308
+	s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2309
+        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2310
+        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2311
+        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2312 2312
         
2313 2313
         if(s->flags&CODEC_FLAG_GRAY){
2314 2314
             skip_dct[4]= 1;
... ...
@@ -2318,23 +2325,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2318 2318
                 emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2319 2319
                 ptr_cb= s->edge_emu_buffer;
2320 2320
             }
2321
-            diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2321
+            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2322 2322
             if(emu){
2323 2323
                 emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2324 2324
                 ptr_cr= s->edge_emu_buffer;
2325 2325
             }
2326
-            diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2326
+            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2327 2327
         }
2328 2328
 
2329 2329
         /* pre quantization */         
2330 2330
         if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
2331 2331
             //FIXME optimize
2332
-            if(pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2333
-            if(pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2334
-            if(pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2335
-            if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2336
-            if(pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
2337
-            if(pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
2332
+	    if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2333
+            if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2334
+            if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2335
+            if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2336
+            if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
2337
+            if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
2338 2338
 #if 0
2339 2339
 {
2340 2340
  static int stat[7];
... ...
@@ -2601,9 +2608,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
2601 2601
                     int yy = mb_y * 16;
2602 2602
                     uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
2603 2603
                     int varc;
2604
-                    int sum = pix_sum(pix, s->linesize);
2604
+		    int sum = s->dsp.pix_sum(pix, s->linesize);
2605 2605
     
2606
-                    varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2606
+		    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2607 2607
 
2608 2608
                     s->mb_var [s->mb_width * mb_y + mb_x] = varc;
2609 2609
                     s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
... ...
@@ -221,6 +221,7 @@ typedef struct MpegEncContext {
221 221
     int unrestricted_mv;
222 222
     int h263_long_vectors; /* use horrible h263v1 long vector mode */
223 223
 
224
+    DSPContext dsp;             /* pointers for accelerated dsp fucntions */
224 225
     int f_code; /* forward MV resolution */
225 226
     int b_code; /* backward MV resolution for B Frames (mpeg4) */
226 227
     INT16 (*motion_val)[2];            /* used for MV prediction (4MV per MB) */
... ...
@@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
447 447
         printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
448 448
 #endif
449 449
         
450
-        clear_blocks(s->block[0]);
450
+	s->dsp.clear_blocks(s->block[0]);
451 451
         s->mv_dir = MV_DIR_FORWARD;
452 452
         s->mv_type = MV_TYPE_16X16; 
453 453
         if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) {
... ...
@@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int
804 804
   }
805 805
 }
806 806
 
807
-static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
807
+static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf,
808 808
 			       uint8_t *current, uint8_t *previous, int pitch,
809 809
 			       svq1_pmv_t *motion, int x, int y) {
810 810
   uint8_t    *src;
... ...
@@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
839 839
   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
840 840
   dst = current;
841 841
 
842
-  put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
842
+  s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
843 843
 
844 844
   return 0;
845 845
 }
846 846
 
847
-static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
847
+static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf,
848 848
 				  uint8_t *current, uint8_t *previous, int pitch,
849 849
 				  svq1_pmv_t *motion,int x, int y) {
850 850
   uint8_t    *src;
... ...
@@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
906 906
     src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch];
907 907
     dst = current;
908 908
 
909
-    put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
909
+    s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
910 910
 
911 911
     /* select next block */
912 912
     if (i & 1) {
... ...
@@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
921 921
   return 0;
922 922
 }
923 923
 
924
-static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
924
+static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf,
925 925
 			uint8_t *current, uint8_t *previous, int pitch,
926 926
 			svq1_pmv_t *motion, int x, int y) {
927 927
   uint32_t bit_cache;
... ...
@@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
951 951
     break;
952 952
 
953 953
   case SVQ1_BLOCK_INTER:
954
-    result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y);
954
+    result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
955 955
 
956 956
     if (result != 0)
957 957
     {
... ...
@@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
964 964
     break;
965 965
 
966 966
   case SVQ1_BLOCK_INTER_4V:
967
-    result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y);
967
+    result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
968 968
 
969 969
     if (result != 0)
970 970
     {
... ...
@@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx,
1142 1142
 
1143 1143
       for (y=0; y < height; y+=16) {
1144 1144
 	for (x=0; x < width; x+=16) {
1145
-	  result = svq1_decode_delta_block (&s->gb, &current[x], previous,
1146
-				       linesize, pmv, x, y);
1145
+	  result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
1146
+					    linesize, pmv, x, y);
1147 1147
 	  if (result != 0)
1148 1148
 	  {
1149 1149
 #ifdef DEBUG_SVQ1