Originally committed as revision 1194 to svn://svn.ffmpeg.org/ffmpeg/trunk
Zdenek Kabelac authored on 2002/11/11 18:40:17... | ... |
@@ -20,7 +20,7 @@ |
20 | 20 |
*/ |
21 | 21 |
#include "avcodec.h" |
22 | 22 |
#include "dsputil.h" |
23 |
- |
|
23 |
+/* |
|
24 | 24 |
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
25 | 25 |
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
26 | 26 |
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
... | ... |
@@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8; |
41 | 41 |
op_pixels_abs_func pix_abs8x8_x2; |
42 | 42 |
op_pixels_abs_func pix_abs8x8_y2; |
43 | 43 |
op_pixels_abs_func pix_abs8x8_xy2; |
44 |
- |
|
44 |
+*/ |
|
45 | 45 |
int ff_bit_exact=0; |
46 | 46 |
|
47 | 47 |
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
... | ... |
@@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = { |
84 | 84 |
}; |
85 | 85 |
|
86 | 86 |
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
87 |
-UINT32 inverse[256]={ |
|
87 |
+const UINT32 inverse[256]={ |
|
88 | 88 |
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
89 | 89 |
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, |
90 | 90 |
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, |
... | ... |
@@ -119,7 +119,7 @@ UINT32 inverse[256]={ |
119 | 119 |
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
120 | 120 |
}; |
121 | 121 |
|
122 |
-int pix_sum_c(UINT8 * pix, int line_size) |
|
122 |
+static int pix_sum_c(UINT8 * pix, int line_size) |
|
123 | 123 |
{ |
124 | 124 |
int s, i, j; |
125 | 125 |
|
... | ... |
@@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size) |
141 | 141 |
return s; |
142 | 142 |
} |
143 | 143 |
|
144 |
-int pix_norm1_c(UINT8 * pix, int line_size) |
|
144 |
+static int pix_norm1_c(UINT8 * pix, int line_size) |
|
145 | 145 |
{ |
146 | 146 |
int s, i, j; |
147 | 147 |
UINT32 *sq = squareTbl + 256; |
... | ... |
@@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size) |
165 | 165 |
} |
166 | 166 |
|
167 | 167 |
|
168 |
-void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
|
168 |
+static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
|
169 | 169 |
{ |
170 | 170 |
int i; |
171 | 171 |
|
... | ... |
@@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
184 | 184 |
} |
185 | 185 |
} |
186 | 186 |
|
187 |
-void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, |
|
188 |
- int stride){ |
|
187 |
+static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, |
|
188 |
+ const UINT8 *s2, int stride){ |
|
189 | 189 |
int i; |
190 | 190 |
|
191 | 191 |
/* read the pixels */ |
... | ... |
@@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, |
205 | 205 |
} |
206 | 206 |
|
207 | 207 |
|
208 |
-void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
|
209 |
- int line_size) |
|
208 |
+static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
|
209 |
+ int line_size) |
|
210 | 210 |
{ |
211 | 211 |
int i; |
212 | 212 |
UINT8 *cm = cropTbl + MAX_NEG_CROP; |
... | ... |
@@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
227 | 227 |
} |
228 | 228 |
} |
229 | 229 |
|
230 |
-void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
|
230 |
+static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
|
231 | 231 |
int line_size) |
232 | 232 |
{ |
233 | 233 |
int i; |
... | ... |
@@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_ , _ , op_avg) |
1353 | 1353 |
#undef op_put |
1354 | 1354 |
#undef op_put_no_rnd |
1355 | 1355 |
|
1356 |
-int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1356 |
+static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1357 | 1357 |
{ |
1358 | 1358 |
int s, i; |
1359 | 1359 |
|
... | ... |
@@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1381 | 1381 |
return s; |
1382 | 1382 |
} |
1383 | 1383 |
|
1384 |
-int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1384 |
+static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1385 | 1385 |
{ |
1386 | 1386 |
int s, i; |
1387 | 1387 |
|
... | ... |
@@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1409 | 1409 |
return s; |
1410 | 1410 |
} |
1411 | 1411 |
|
1412 |
-int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1412 |
+static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1413 | 1413 |
{ |
1414 | 1414 |
int s, i; |
1415 | 1415 |
UINT8 *pix3 = pix2 + line_size; |
... | ... |
@@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1439 | 1439 |
return s; |
1440 | 1440 |
} |
1441 | 1441 |
|
1442 |
-int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1442 |
+static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1443 | 1443 |
{ |
1444 | 1444 |
int s, i; |
1445 | 1445 |
UINT8 *pix3 = pix2 + line_size; |
... | ... |
@@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1469 | 1469 |
return s; |
1470 | 1470 |
} |
1471 | 1471 |
|
1472 |
-int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1472 |
+static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1473 | 1473 |
{ |
1474 | 1474 |
int s, i; |
1475 | 1475 |
|
... | ... |
@@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1489 | 1489 |
return s; |
1490 | 1490 |
} |
1491 | 1491 |
|
1492 |
-int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1492 |
+static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1493 | 1493 |
{ |
1494 | 1494 |
int s, i; |
1495 | 1495 |
|
... | ... |
@@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1509 | 1509 |
return s; |
1510 | 1510 |
} |
1511 | 1511 |
|
1512 |
-int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1512 |
+static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1513 | 1513 |
{ |
1514 | 1514 |
int s, i; |
1515 | 1515 |
UINT8 *pix3 = pix2 + line_size; |
... | ... |
@@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1531 | 1531 |
return s; |
1532 | 1532 |
} |
1533 | 1533 |
|
1534 |
-int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1534 |
+static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
|
1535 | 1535 |
{ |
1536 | 1536 |
int s, i; |
1537 | 1537 |
UINT8 *pix3 = pix2 + line_size; |
... | ... |
@@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, |
1574 | 1574 |
} |
1575 | 1575 |
} |
1576 | 1576 |
|
1577 |
-void clear_blocks_c(DCTELEM *blocks) |
|
1577 |
+static void clear_blocks_c(DCTELEM *blocks) |
|
1578 | 1578 |
{ |
1579 | 1579 |
memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1580 | 1580 |
} |
1581 | 1581 |
|
1582 |
-void dsputil_init(void) |
|
1582 |
+void dsputil_init(DSPContext* c, unsigned mask) |
|
1583 | 1583 |
{ |
1584 | 1584 |
int i; |
1585 | 1585 |
|
... | ... |
@@ -1593,42 +1593,82 @@ void dsputil_init(void) |
1593 | 1593 |
squareTbl[i] = (i - 256) * (i - 256); |
1594 | 1594 |
} |
1595 | 1595 |
|
1596 |
- get_pixels = get_pixels_c; |
|
1597 |
- diff_pixels = diff_pixels_c; |
|
1598 |
- put_pixels_clamped = put_pixels_clamped_c; |
|
1599 |
- add_pixels_clamped = add_pixels_clamped_c; |
|
1600 |
- ff_gmc1= gmc1_c; |
|
1601 |
- ff_gmc= gmc_c; |
|
1602 |
- clear_blocks= clear_blocks_c; |
|
1603 |
- pix_sum= pix_sum_c; |
|
1604 |
- pix_norm1= pix_norm1_c; |
|
1605 |
- |
|
1606 |
- pix_abs16x16 = pix_abs16x16_c; |
|
1607 |
- pix_abs16x16_x2 = pix_abs16x16_x2_c; |
|
1608 |
- pix_abs16x16_y2 = pix_abs16x16_y2_c; |
|
1609 |
- pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |
|
1610 |
- pix_abs8x8 = pix_abs8x8_c; |
|
1611 |
- pix_abs8x8_x2 = pix_abs8x8_x2_c; |
|
1612 |
- pix_abs8x8_y2 = pix_abs8x8_y2_c; |
|
1613 |
- pix_abs8x8_xy2 = pix_abs8x8_xy2_c; |
|
1596 |
+ c->get_pixels = get_pixels_c; |
|
1597 |
+ c->diff_pixels = diff_pixels_c; |
|
1598 |
+ c->put_pixels_clamped = put_pixels_clamped_c; |
|
1599 |
+ c->add_pixels_clamped = add_pixels_clamped_c; |
|
1600 |
+ c->gmc1 = gmc1_c; |
|
1601 |
+ c->gmc = gmc_c; |
|
1602 |
+ c->clear_blocks = clear_blocks_c; |
|
1603 |
+ c->pix_sum = pix_sum_c; |
|
1604 |
+ c->pix_norm1 = pix_norm1_c; |
|
1605 |
+ |
|
1606 |
+ c->pix_abs16x16 = pix_abs16x16_c; |
|
1607 |
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_c; |
|
1608 |
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_c; |
|
1609 |
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |
|
1610 |
+ c->pix_abs8x8 = pix_abs8x8_c; |
|
1611 |
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_c; |
|
1612 |
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_c; |
|
1613 |
+ c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; |
|
1614 |
+ |
|
1615 |
+ c->put_pixels_tab[0][0] = put_pixels16; |
|
1616 |
+ c->put_pixels_tab[0][1] = put_pixels16_x2; |
|
1617 |
+ c->put_pixels_tab[0][2] = put_pixels16_y2; |
|
1618 |
+ c->put_pixels_tab[0][3] = put_pixels16_xy2; |
|
1619 |
+ |
|
1620 |
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16; |
|
1621 |
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2; |
|
1622 |
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2; |
|
1623 |
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2; |
|
1624 |
+ |
|
1625 |
+ c->avg_pixels_tab[0][0] = avg_pixels16; |
|
1626 |
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2; |
|
1627 |
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2; |
|
1628 |
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2; |
|
1629 |
+ |
|
1630 |
+ c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16; |
|
1631 |
+ c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2; |
|
1632 |
+ c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2; |
|
1633 |
+ c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2; |
|
1634 |
+ |
|
1635 |
+ c->put_pixels_tab[1][0] = put_pixels8; |
|
1636 |
+ c->put_pixels_tab[1][1] = put_pixels8_x2; |
|
1637 |
+ c->put_pixels_tab[1][2] = put_pixels8_y2; |
|
1638 |
+ c->put_pixels_tab[1][3] = put_pixels8_xy2; |
|
1639 |
+ |
|
1640 |
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels8; |
|
1641 |
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2; |
|
1642 |
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2; |
|
1643 |
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2; |
|
1644 |
+ |
|
1645 |
+ c->avg_pixels_tab[1][0] = avg_pixels8; |
|
1646 |
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2; |
|
1647 |
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2; |
|
1648 |
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2; |
|
1649 |
+ |
|
1650 |
+ c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8; |
|
1651 |
+ c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2; |
|
1652 |
+ c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2; |
|
1653 |
+ c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2; |
|
1614 | 1654 |
|
1615 | 1655 |
#ifdef HAVE_MMX |
1616 |
- dsputil_init_mmx(); |
|
1656 |
+ dsputil_init_mmx(c, mask); |
|
1617 | 1657 |
#endif |
1618 | 1658 |
#ifdef ARCH_ARMV4L |
1619 |
- dsputil_init_armv4l(); |
|
1659 |
+ dsputil_init_armv4l(c, mask); |
|
1620 | 1660 |
#endif |
1621 | 1661 |
#ifdef HAVE_MLIB |
1622 |
- dsputil_init_mlib(); |
|
1662 |
+ dsputil_init_mlib(c, mask); |
|
1623 | 1663 |
#endif |
1624 | 1664 |
#ifdef ARCH_ALPHA |
1625 |
- dsputil_init_alpha(); |
|
1665 |
+ dsputil_init_alpha(c, mask); |
|
1626 | 1666 |
#endif |
1627 | 1667 |
#ifdef ARCH_POWERPC |
1628 |
- dsputil_init_ppc(); |
|
1668 |
+ dsputil_init_ppc(c, mask); |
|
1629 | 1669 |
#endif |
1630 | 1670 |
#ifdef HAVE_MMI |
1631 |
- dsputil_init_mmi(); |
|
1671 |
+ dsputil_init_mmi(c, mask); |
|
1632 | 1672 |
#endif |
1633 | 1673 |
|
1634 | 1674 |
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
... | ... |
@@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void) |
1639 | 1639 |
{ |
1640 | 1640 |
ff_bit_exact=1; |
1641 | 1641 |
#ifdef HAVE_MMX |
1642 |
- dsputil_set_bit_exact_mmx(); |
|
1642 |
+#warning FIXME - set_bit_exact |
|
1643 |
+// dsputil_set_bit_exact_mmx(); |
|
1643 | 1644 |
#endif |
1644 | 1645 |
} |
1645 | 1646 |
|
... | ... |
@@ -45,10 +45,9 @@ extern const UINT8 ff_zigzag_direct[64]; |
45 | 45 |
extern UINT32 squareTbl[512]; |
46 | 46 |
extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
47 | 47 |
|
48 |
-void dsputil_init(void); |
|
49 | 48 |
|
50 | 49 |
/* minimum alignment rules ;) |
51 |
-if u notice errors in the align stuff, need more alignment for some asm code for some cpu |
|
50 |
+if u notice errors in the align stuff, need more alignment for some asm code for some cpu |
|
52 | 51 |
or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... |
53 | 52 |
|
54 | 53 |
!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) |
... | ... |
@@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b |
57 | 57 |
!future video codecs might need functions with less strict alignment |
58 | 58 |
*/ |
59 | 59 |
|
60 |
-/* pixel ops : interface with DCT */ |
|
61 |
-extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); |
|
62 |
-extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); |
|
63 |
-extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); |
|
64 |
-extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); |
|
65 |
-extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
|
66 |
-extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, |
|
67 |
- int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
|
68 |
-extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
|
69 |
-extern int (*pix_sum)(UINT8 * pix, int line_size); |
|
70 |
-extern int (*pix_norm1)(UINT8 * pix, int line_size); |
|
71 |
- |
|
72 |
- |
|
73 |
- |
|
60 |
+/* |
|
74 | 61 |
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); |
75 | 62 |
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
76 | 63 |
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
77 | 64 |
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
78 | 65 |
void clear_blocks_c(DCTELEM *blocks); |
66 |
+*/ |
|
79 | 67 |
|
80 | 68 |
/* add and put pixel (decoding) */ |
81 | 69 |
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
82 | 70 |
typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); |
83 | 71 |
typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); |
84 | 72 |
|
85 |
-extern op_pixels_func put_pixels_tab[2][4]; |
|
86 |
-extern op_pixels_func avg_pixels_tab[2][4]; |
|
87 |
-extern op_pixels_func put_no_rnd_pixels_tab[2][4]; |
|
88 |
-extern op_pixels_func avg_no_rnd_pixels_tab[2][4]; |
|
89 |
-extern qpel_mc_func put_qpel_pixels_tab[2][16]; |
|
90 |
-extern qpel_mc_func avg_qpel_pixels_tab[2][16]; |
|
91 |
-extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
|
92 |
-extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; |
|
73 |
+ |
|
93 | 74 |
|
94 | 75 |
#define CALL_2X_PIXELS(a, b, n)\ |
95 | 76 |
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
... | ... |
@@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
100 | 100 |
/* motion estimation */ |
101 | 101 |
|
102 | 102 |
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); |
103 |
- |
|
104 |
-extern op_pixels_abs_func pix_abs16x16; |
|
105 |
-extern op_pixels_abs_func pix_abs16x16_x2; |
|
106 |
-extern op_pixels_abs_func pix_abs16x16_y2; |
|
107 |
-extern op_pixels_abs_func pix_abs16x16_xy2; |
|
108 |
-extern op_pixels_abs_func pix_abs8x8; |
|
109 |
-extern op_pixels_abs_func pix_abs8x8_x2; |
|
110 |
-extern op_pixels_abs_func pix_abs8x8_y2; |
|
111 |
-extern op_pixels_abs_func pix_abs8x8_xy2; |
|
112 |
- |
|
103 |
+/* |
|
113 | 104 |
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); |
114 | 105 |
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
115 | 106 |
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
116 | 107 |
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
108 |
+*/ |
|
109 |
+typedef struct DSPContext { |
|
110 |
+ /* pixel ops : interface with DCT */ |
|
111 |
+ void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); |
|
112 |
+ void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); |
|
113 |
+ void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); |
|
114 |
+ void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); |
|
115 |
+ void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
|
116 |
+ void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, |
|
117 |
+ int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
|
118 |
+ void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
|
119 |
+ int (*pix_sum)(UINT8 * pix, int line_size); |
|
120 |
+ int (*pix_norm1)(UINT8 * pix, int line_size); |
|
121 |
+ |
|
122 |
+ /* maybe create an array for 16/8 functions */ |
|
123 |
+ op_pixels_func put_pixels_tab[2][4]; |
|
124 |
+ op_pixels_func avg_pixels_tab[2][4]; |
|
125 |
+ op_pixels_func put_no_rnd_pixels_tab[2][4]; |
|
126 |
+ op_pixels_func avg_no_rnd_pixels_tab[2][4]; |
|
127 |
+ qpel_mc_func put_qpel_pixels_tab[2][16]; |
|
128 |
+ qpel_mc_func avg_qpel_pixels_tab[2][16]; |
|
129 |
+ qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
|
130 |
+ qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; |
|
131 |
+ |
|
132 |
+ op_pixels_abs_func pix_abs16x16; |
|
133 |
+ op_pixels_abs_func pix_abs16x16_x2; |
|
134 |
+ op_pixels_abs_func pix_abs16x16_y2; |
|
135 |
+ op_pixels_abs_func pix_abs16x16_xy2; |
|
136 |
+ op_pixels_abs_func pix_abs8x8; |
|
137 |
+ op_pixels_abs_func pix_abs8x8_x2; |
|
138 |
+ op_pixels_abs_func pix_abs8x8_y2; |
|
139 |
+ op_pixels_abs_func pix_abs8x8_xy2; |
|
140 |
+} DSPContext; |
|
141 |
+ |
|
142 |
+void dsputil_init(DSPContext* p, unsigned mask); |
|
117 | 143 |
|
118 | 144 |
/** |
119 | 145 |
* permute block according to permuatation. |
... | ... |
@@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
121 | 121 |
*/ |
122 | 122 |
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); |
123 | 123 |
|
124 |
+#define emms_c() |
|
125 |
+ |
|
124 | 126 |
#if defined(HAVE_MMX) |
125 | 127 |
|
128 |
+#undef emms_c() |
|
129 |
+ |
|
126 | 130 |
#define MM_MMX 0x0001 /* standard MMX */ |
127 | 131 |
#define MM_3DNOW 0x0004 /* AMD 3DNOW */ |
128 | 132 |
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ |
... | ... |
@@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, |
132 | 132 |
extern int mm_flags; |
133 | 133 |
|
134 | 134 |
int mm_support(void); |
135 |
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); |
|
136 |
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); |
|
135 | 137 |
|
136 | 138 |
static inline void emms(void) |
137 | 139 |
{ |
... | ... |
@@ -146,54 +158,44 @@ static inline void emms(void) |
146 | 146 |
|
147 | 147 |
#define __align8 __attribute__ ((aligned (8))) |
148 | 148 |
|
149 |
-void dsputil_init_mmx(void); |
|
150 |
-void dsputil_set_bit_exact_mmx(void); |
|
149 |
+void dsputil_init_mmx(DSPContext* c, unsigned mask); |
|
150 |
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask); |
|
151 | 151 |
|
152 | 152 |
#elif defined(ARCH_ARMV4L) |
153 | 153 |
|
154 |
-#define emms_c() |
|
155 |
- |
|
156 | 154 |
/* This is to use 4 bytes read to the IDCT pointers for some 'zero' |
157 | 155 |
line ptimizations */ |
158 | 156 |
#define __align8 __attribute__ ((aligned (4))) |
159 | 157 |
|
160 |
-void dsputil_init_armv4l(void); |
|
158 |
+void dsputil_init_armv4l(DSPContext* c, unsigned mask); |
|
161 | 159 |
|
162 | 160 |
#elif defined(HAVE_MLIB) |
163 |
- |
|
164 |
-#define emms_c() |
|
165 | 161 |
|
166 | 162 |
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ |
167 | 163 |
#define __align8 __attribute__ ((aligned (8))) |
168 | 164 |
|
169 |
-void dsputil_init_mlib(void); |
|
165 |
+void dsputil_init_mlib(DSPContext* c, unsigned mask); |
|
170 | 166 |
|
171 | 167 |
#elif defined(ARCH_ALPHA) |
172 | 168 |
|
173 |
-#define emms_c() |
|
174 | 169 |
#define __align8 __attribute__ ((aligned (8))) |
175 | 170 |
|
176 |
-void dsputil_init_alpha(void); |
|
171 |
+void dsputil_init_alpha(DSPContext* c, unsigned mask); |
|
177 | 172 |
|
178 | 173 |
#elif defined(ARCH_POWERPC) |
179 | 174 |
|
180 |
-#define emms_c() |
|
181 | 175 |
#define __align8 __attribute__ ((aligned (16))) |
182 | 176 |
|
183 |
-void dsputil_init_ppc(void); |
|
177 |
+void dsputil_init_ppc(DSPContext* c, unsigned mask); |
|
184 | 178 |
|
185 | 179 |
#elif defined(HAVE_MMI) |
186 | 180 |
|
187 |
-#define emms_c() |
|
188 |
- |
|
189 | 181 |
#define __align8 __attribute__ ((aligned (16))) |
190 | 182 |
|
191 |
-void dsputil_init_mmi(void); |
|
183 |
+void dsputil_init_mmi(DSPContext* c, unsigned mask); |
|
192 | 184 |
|
193 | 185 |
#else |
194 | 186 |
|
195 |
-#define emms_c() |
|
196 |
- |
|
197 | 187 |
#define __align8 |
198 | 188 |
|
199 | 189 |
#endif |
... | ... |
@@ -263,9 +265,9 @@ typedef struct MDCTContext { |
263 | 263 |
} MDCTContext; |
264 | 264 |
|
265 | 265 |
int ff_mdct_init(MDCTContext *s, int nbits, int inverse); |
266 |
-void ff_imdct_calc(MDCTContext *s, FFTSample *output, |
|
266 |
+void ff_imdct_calc(MDCTContext *s, FFTSample *output, |
|
267 | 267 |
const FFTSample *input, FFTSample *tmp); |
268 |
-void ff_mdct_calc(MDCTContext *s, FFTSample *out, |
|
268 |
+void ff_mdct_calc(MDCTContext *s, FFTSample *out, |
|
269 | 269 |
const FFTSample *input, FFTSample *tmp); |
270 | 270 |
void ff_mdct_end(MDCTContext *s); |
271 | 271 |
|
... | ... |
@@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){ |
331 | 331 |
s->mv_type = MV_TYPE_16X16; |
332 | 332 |
s->mb_skiped=0; |
333 | 333 |
|
334 |
- clear_blocks(s->block[0]); |
|
334 |
+ s->dsp.clear_blocks(s->block[0]); |
|
335 | 335 |
|
336 | 336 |
s->mb_x= mb_x; |
337 | 337 |
s->mb_y= mb_y; |
... | ... |
@@ -458,7 +458,7 @@ int score_sum=0; |
458 | 458 |
s->mv_type = MV_TYPE_16X16; |
459 | 459 |
s->mb_skiped=0; |
460 | 460 |
|
461 |
- clear_blocks(s->block[0]); |
|
461 |
+ s->dsp.clear_blocks(s->block[0]); |
|
462 | 462 |
|
463 | 463 |
s->mb_x= mb_x; |
464 | 464 |
s->mb_y= mb_y; |
... | ... |
@@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){ |
559 | 559 |
UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; |
560 | 560 |
UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize; |
561 | 561 |
|
562 |
- is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); |
|
563 |
- is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); |
|
562 |
+ is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); |
|
563 |
+ is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); |
|
564 | 564 |
}else{ |
565 | 565 |
if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[]) |
566 | 566 |
is_intra_likely++; |
... | ... |
@@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){ |
738 | 738 |
s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1]; |
739 | 739 |
} |
740 | 740 |
|
741 |
- clear_blocks(s->block[0]); |
|
741 |
+ s->dsp.clear_blocks(s->block[0]); |
|
742 | 742 |
|
743 | 743 |
s->mb_x= mb_x; |
744 | 744 |
s->mb_y= mb_y; |
... | ... |
@@ -778,8 +778,8 @@ void ff_error_resilience(MpegEncContext *s){ |
778 | 778 |
s->mv[1][0][0]= 0; |
779 | 779 |
s->mv[1][0][1]= 0; |
780 | 780 |
} |
781 |
- |
|
782 |
- clear_blocks(s->block[0]); |
|
781 |
+ |
|
782 |
+ s->dsp.clear_blocks(s->block[0]); |
|
783 | 783 |
s->mb_x= mb_x; |
784 | 784 |
s->mb_y= mb_y; |
785 | 785 |
MPV_decode_mb(s, s->block); |
... | ... |
@@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s, |
538 | 538 |
if(s->coded_order[i+1].pict_type!=B_TYPE) break; |
539 | 539 |
|
540 | 540 |
b_pic= s->coded_order[i+1].picture[0] + offset; |
541 |
- diff= pix_abs16x16(p_pic, b_pic, s->linesize); |
|
541 |
+ diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize); |
|
542 | 542 |
if(diff>s->qscale*70){ //FIXME check that 70 is optimal |
543 | 543 |
s->mb_skiped=0; |
544 | 544 |
break; |
... | ... |
@@ -22,7 +22,7 @@ |
22 | 22 |
#include "../dsputil.h" |
23 | 23 |
|
24 | 24 |
int mm_flags; /* multimedia extension flags */ |
25 |
- |
|
25 |
+/* FIXME use them in static form */ |
|
26 | 26 |
int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
27 | 27 |
int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 | 28 |
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
... | ... |
@@ -242,7 +242,7 @@ static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, in |
242 | 242 |
); |
243 | 243 |
} |
244 | 244 |
|
245 |
-static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
|
245 |
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
|
246 | 246 |
{ |
247 | 247 |
const DCTELEM *p; |
248 | 248 |
UINT8 *pix; |
... | ... |
@@ -297,7 +297,7 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line |
297 | 297 |
:"memory"); |
298 | 298 |
} |
299 | 299 |
|
300 |
-static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
|
300 |
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
|
301 | 301 |
{ |
302 | 302 |
const DCTELEM *p; |
303 | 303 |
UINT8 *pix; |
... | ... |
@@ -457,7 +457,7 @@ static int pix_sum16_mmx(UINT8 * pix, int line_size){ |
457 | 457 |
static void just_return() { return; } |
458 | 458 |
#endif |
459 | 459 |
|
460 |
-void dsputil_init_mmx(void) |
|
460 |
+void dsputil_init_mmx(DSPContext* c, unsigned mask) |
|
461 | 461 |
{ |
462 | 462 |
mm_flags = mm_support(); |
463 | 463 |
#if 0 |
... | ... |
@@ -476,112 +476,112 @@ void dsputil_init_mmx(void) |
476 | 476 |
#endif |
477 | 477 |
|
478 | 478 |
if (mm_flags & MM_MMX) { |
479 |
- get_pixels = get_pixels_mmx; |
|
480 |
- diff_pixels = diff_pixels_mmx; |
|
481 |
- put_pixels_clamped = put_pixels_clamped_mmx; |
|
482 |
- add_pixels_clamped = add_pixels_clamped_mmx; |
|
483 |
- clear_blocks= clear_blocks_mmx; |
|
484 |
- pix_sum= pix_sum16_mmx; |
|
485 |
- |
|
486 |
- pix_abs16x16 = pix_abs16x16_mmx; |
|
487 |
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
|
488 |
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
|
489 |
- pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
|
490 |
- pix_abs8x8 = pix_abs8x8_mmx; |
|
491 |
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
|
492 |
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
|
493 |
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; |
|
494 |
- |
|
495 |
- put_pixels_tab[0][0] = put_pixels16_mmx; |
|
496 |
- put_pixels_tab[0][1] = put_pixels16_x2_mmx; |
|
497 |
- put_pixels_tab[0][2] = put_pixels16_y2_mmx; |
|
498 |
- put_pixels_tab[0][3] = put_pixels16_xy2_mmx; |
|
499 |
- |
|
500 |
- put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; |
|
501 |
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
|
502 |
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
|
503 |
- put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; |
|
504 |
- |
|
505 |
- avg_pixels_tab[0][0] = avg_pixels16_mmx; |
|
506 |
- avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; |
|
507 |
- avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; |
|
508 |
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
|
509 |
- |
|
510 |
- avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; |
|
511 |
- avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; |
|
512 |
- avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; |
|
513 |
- avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; |
|
514 |
- |
|
515 |
- put_pixels_tab[1][0] = put_pixels8_mmx; |
|
516 |
- put_pixels_tab[1][1] = put_pixels8_x2_mmx; |
|
517 |
- put_pixels_tab[1][2] = put_pixels8_y2_mmx; |
|
518 |
- put_pixels_tab[1][3] = put_pixels8_xy2_mmx; |
|
519 |
- |
|
520 |
- put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; |
|
521 |
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
|
522 |
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
|
523 |
- put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; |
|
524 |
- |
|
525 |
- avg_pixels_tab[1][0] = avg_pixels8_mmx; |
|
526 |
- avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; |
|
527 |
- avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; |
|
528 |
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
|
529 |
- |
|
530 |
- avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; |
|
531 |
- avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
|
532 |
- avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
|
533 |
- avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
|
479 |
+ c->get_pixels = get_pixels_mmx; |
|
480 |
+ c->diff_pixels = diff_pixels_mmx; |
|
481 |
+ c->put_pixels_clamped = put_pixels_clamped_mmx; |
|
482 |
+ c->add_pixels_clamped = add_pixels_clamped_mmx; |
|
483 |
+ c->clear_blocks = clear_blocks_mmx; |
|
484 |
+ c->pix_sum = pix_sum16_mmx; |
|
485 |
+ |
|
486 |
+ c->pix_abs16x16 = pix_abs16x16_mmx; |
|
487 |
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
|
488 |
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
|
489 |
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
|
490 |
+ c->pix_abs8x8 = pix_abs8x8_mmx; |
|
491 |
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
|
492 |
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
|
493 |
+ c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; |
|
494 |
+ |
|
495 |
+ c->put_pixels_tab[0][0] = put_pixels16_mmx; |
|
496 |
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; |
|
497 |
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; |
|
498 |
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; |
|
499 |
+ |
|
500 |
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; |
|
501 |
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
|
502 |
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
|
503 |
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; |
|
504 |
+ |
|
505 |
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmx; |
|
506 |
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; |
|
507 |
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; |
|
508 |
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
|
509 |
+ |
|
510 |
+ c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; |
|
511 |
+ c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; |
|
512 |
+ c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; |
|
513 |
+ c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; |
|
514 |
+ |
|
515 |
+ c->put_pixels_tab[1][0] = put_pixels8_mmx; |
|
516 |
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; |
|
517 |
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; |
|
518 |
+ c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; |
|
519 |
+ |
|
520 |
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; |
|
521 |
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
|
522 |
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
|
523 |
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; |
|
524 |
+ |
|
525 |
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmx; |
|
526 |
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; |
|
527 |
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; |
|
528 |
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
|
529 |
+ |
|
530 |
+ c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; |
|
531 |
+ c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
|
532 |
+ c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
|
533 |
+ c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
|
534 | 534 |
|
535 | 535 |
if (mm_flags & MM_MMXEXT) { |
536 |
- pix_abs16x16 = pix_abs16x16_mmx2; |
|
537 |
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
|
538 |
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
|
539 |
- pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; |
|
540 |
- |
|
541 |
- pix_abs8x8 = pix_abs8x8_mmx2; |
|
542 |
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; |
|
543 |
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; |
|
544 |
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; |
|
545 |
- |
|
546 |
- put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
|
547 |
- put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
|
548 |
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
|
549 |
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
|
550 |
- |
|
551 |
- avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
|
552 |
- avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
|
553 |
- avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
|
554 |
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; |
|
555 |
- |
|
556 |
- put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
|
557 |
- put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
|
558 |
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
|
559 |
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; |
|
560 |
- |
|
561 |
- avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
|
562 |
- avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
|
563 |
- avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
|
564 |
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; |
|
536 |
+ c->pix_abs16x16 = pix_abs16x16_mmx2; |
|
537 |
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
|
538 |
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
|
539 |
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; |
|
540 |
+ |
|
541 |
+ c->pix_abs8x8 = pix_abs8x8_mmx2; |
|
542 |
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; |
|
543 |
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; |
|
544 |
+ c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; |
|
545 |
+ |
|
546 |
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
|
547 |
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
|
548 |
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
|
549 |
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
|
550 |
+ |
|
551 |
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
|
552 |
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
|
553 |
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
|
554 |
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; |
|
555 |
+ |
|
556 |
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
|
557 |
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
|
558 |
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
|
559 |
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; |
|
560 |
+ |
|
561 |
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
|
562 |
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
|
563 |
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
|
564 |
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; |
|
565 | 565 |
} else if (mm_flags & MM_3DNOW) { |
566 |
- put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
|
567 |
- put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
|
568 |
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; |
|
569 |
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; |
|
570 |
- |
|
571 |
- avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
|
572 |
- avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
|
573 |
- avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
|
574 |
- avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; |
|
575 |
- |
|
576 |
- put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
|
577 |
- put_pixels_tab[1][2] = put_pixels8_y2_3dnow; |
|
578 |
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; |
|
579 |
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; |
|
580 |
- |
|
581 |
- avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
|
582 |
- avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
|
583 |
- avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
|
584 |
- avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; |
|
566 |
+ c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
|
567 |
+ c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
|
568 |
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; |
|
569 |
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; |
|
570 |
+ |
|
571 |
+ c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
|
572 |
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
|
573 |
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
|
574 |
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; |
|
575 |
+ |
|
576 |
+ c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
|
577 |
+ c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; |
|
578 |
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; |
|
579 |
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; |
|
580 |
+ |
|
581 |
+ c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
|
582 |
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
|
583 |
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
|
584 |
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; |
|
585 | 585 |
} |
586 | 586 |
} |
587 | 587 |
|
... | ... |
@@ -624,25 +624,24 @@ void dsputil_init_mmx(void) |
624 | 624 |
/* remove any non bit exact operation (testing purpose). NOTE that |
625 | 625 |
this function should be kept as small as possible because it is |
626 | 626 |
always difficult to test automatically non bit exact cases. */ |
627 |
-void dsputil_set_bit_exact_mmx(void) |
|
627 |
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) |
|
628 | 628 |
{ |
629 | 629 |
if (mm_flags & MM_MMX) { |
630 |
- |
|
631 | 630 |
/* MMX2 & 3DNOW */ |
632 |
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
|
633 |
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
|
634 |
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
|
635 |
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
|
636 |
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
|
637 |
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
|
631 |
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
|
632 |
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
|
633 |
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
|
634 |
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
|
635 |
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
|
636 |
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
|
638 | 637 |
|
639 | 638 |
if (mm_flags & MM_MMXEXT) { |
640 |
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
|
641 |
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
|
642 |
- pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
|
643 |
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
|
644 |
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
|
645 |
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; |
|
639 |
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
|
640 |
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
|
641 |
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
|
642 |
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
|
643 |
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
|
644 |
+ c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; |
|
646 | 645 |
} |
647 | 646 |
} |
648 | 647 |
} |
... | ... |
@@ -88,8 +88,8 @@ static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size) |
88 | 88 |
return s; |
89 | 89 |
} |
90 | 90 |
|
91 |
-static void no_motion_search(MpegEncContext * s, |
|
92 |
- int *mx_ptr, int *my_ptr) |
|
91 |
+static inline void no_motion_search(MpegEncContext * s, |
|
92 |
+ int *mx_ptr, int *my_ptr) |
|
93 | 93 |
{ |
94 | 94 |
*mx_ptr = 16 * s->mb_x; |
95 | 95 |
*my_ptr = 16 * s->mb_y; |
... | ... |
@@ -123,7 +123,7 @@ static int full_motion_search(MpegEncContext * s, |
123 | 123 |
my = 0; |
124 | 124 |
for (y = y1; y <= y2; y++) { |
125 | 125 |
for (x = x1; x <= x2; x++) { |
126 |
- d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, |
|
126 |
+ d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, |
|
127 | 127 |
s->linesize); |
128 | 128 |
if (d < dmin || |
129 | 129 |
(d == dmin && |
... | ... |
@@ -188,7 +188,7 @@ static int log_motion_search(MpegEncContext * s, |
188 | 188 |
do { |
189 | 189 |
for (y = y1; y <= y2; y += range) { |
190 | 190 |
for (x = x1; x <= x2; x += range) { |
191 |
- d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
191 |
+ d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
192 | 192 |
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { |
193 | 193 |
dmin = d; |
194 | 194 |
mx = x; |
... | ... |
@@ -268,7 +268,7 @@ static int phods_motion_search(MpegEncContext * s, |
268 | 268 |
|
269 | 269 |
lastx = x; |
270 | 270 |
for (x = x1; x <= x2; x += range) { |
271 |
- d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
271 |
+ d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
272 | 272 |
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { |
273 | 273 |
dminx = d; |
274 | 274 |
mx = x; |
... | ... |
@@ -277,7 +277,7 @@ static int phods_motion_search(MpegEncContext * s, |
277 | 277 |
|
278 | 278 |
x = lastx; |
279 | 279 |
for (y = y1; y <= y2; y += range) { |
280 |
- d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
280 |
+ d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); |
|
281 | 281 |
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { |
282 | 282 |
dminy = d; |
283 | 283 |
my = y; |
... | ... |
@@ -324,7 +324,7 @@ static int phods_motion_search(MpegEncContext * s, |
324 | 324 |
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ |
325 | 325 |
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ |
326 | 326 |
if(map[index]!=key){\ |
327 |
- d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ |
|
327 |
+ d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ |
|
328 | 328 |
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ |
329 | 329 |
COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ |
330 | 330 |
map[index]= key;\ |
... | ... |
@@ -355,7 +355,7 @@ static int phods_motion_search(MpegEncContext * s, |
355 | 355 |
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ |
356 | 356 |
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ |
357 | 357 |
if(map[index]!=key){\ |
358 |
- d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ |
|
358 |
+ d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ |
|
359 | 359 |
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ |
360 | 360 |
COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ |
361 | 361 |
map[index]= key;\ |
... | ... |
@@ -590,7 +590,7 @@ static int epzs_motion_search(MpegEncContext * s, |
590 | 590 |
|
591 | 591 |
map_generation= update_map_generation(s); |
592 | 592 |
|
593 |
- dmin = pix_abs16x16(new_pic, old_pic, pic_stride); |
|
593 |
+ dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride); |
|
594 | 594 |
map[0]= map_generation; |
595 | 595 |
score_map[0]= dmin; |
596 | 596 |
|
... | ... |
@@ -644,11 +644,11 @@ static int epzs_motion_search(MpegEncContext * s, |
644 | 644 |
if(s->me_method==ME_EPZS) |
645 | 645 |
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, |
646 | 646 |
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, |
647 |
- shift, map, score_map, map_generation, pix_abs16x16); |
|
647 |
+ shift, map, score_map, map_generation, s->dsp.pix_abs16x16); |
|
648 | 648 |
else |
649 | 649 |
dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, |
650 | 650 |
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, |
651 |
- shift, map, score_map, map_generation, pix_abs16x16); |
|
651 |
+ shift, map, score_map, map_generation, s->dsp.pix_abs16x16); |
|
652 | 652 |
//check(best[0],best[1],0, b1) |
653 | 653 |
*mx_ptr= best[0]; |
654 | 654 |
*my_ptr= best[1]; |
... | ... |
@@ -683,7 +683,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block, |
683 | 683 |
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); |
684 | 684 |
/* first line */ |
685 | 685 |
if ((s->mb_y == 0 || s->first_slice_line) && block<2) { |
686 |
- CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
|
686 |
+ CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
|
687 | 687 |
CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) |
688 | 688 |
CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) |
689 | 689 |
}else{ |
... | ... |
@@ -705,11 +705,11 @@ static int epzs_motion_search4(MpegEncContext * s, int block, |
705 | 705 |
if(s->me_method==ME_EPZS) |
706 | 706 |
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, |
707 | 707 |
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, |
708 |
- shift, map, score_map, map_generation, pix_abs8x8); |
|
708 |
+ shift, map, score_map, map_generation, s->dsp.pix_abs8x8); |
|
709 | 709 |
else |
710 | 710 |
dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, |
711 | 711 |
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, |
712 |
- shift, map, score_map, map_generation, pix_abs8x8); |
|
712 |
+ shift, map, score_map, map_generation, s->dsp.pix_abs8x8); |
|
713 | 713 |
|
714 | 714 |
*mx_ptr= best[0]; |
715 | 715 |
*my_ptr= best[1]; |
... | ... |
@@ -1023,8 +1023,8 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in |
1023 | 1023 |
dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); |
1024 | 1024 |
|
1025 | 1025 |
dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, |
1026 |
- pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, |
|
1027 |
- pix_abs8x8_y2, pix_abs8x8_xy2, block); |
|
1026 |
+ pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2, |
|
1027 |
+ s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block); |
|
1028 | 1028 |
|
1029 | 1029 |
s->motion_val[ s->block_index[block] ][0]= mx4; |
1030 | 1030 |
s->motion_val[ s->block_index[block] ][1]= my4; |
... | ... |
@@ -1133,9 +1133,10 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, |
1133 | 1133 |
/* At this point (mx,my) are full-pell and the relative displacement */ |
1134 | 1134 |
ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); |
1135 | 1135 |
|
1136 |
- sum = pix_sum(pix, s->linesize); |
|
1136 |
+ sum = s->dsp.pix_sum(pix, s->linesize); |
|
1137 | 1137 |
|
1138 |
- varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; |
|
1138 |
+ varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; |
|
1139 |
+ // FIXME: MMX OPTIMIZE |
|
1139 | 1140 |
vard = (pix_norm(pix, ppix, s->linesize)+128)>>8; |
1140 | 1141 |
|
1141 | 1142 |
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); |
... | ... |
@@ -1161,13 +1162,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, |
1161 | 1161 |
if (varc*2 + 200 > vard){ |
1162 | 1162 |
mb_type|= MB_TYPE_INTER; |
1163 | 1163 |
if(s->me_method >= ME_EPZS) |
1164 |
- fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1165 |
- pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, |
|
1166 |
- pix_abs16x16_xy2, 0); |
|
1164 |
+ fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1165 |
+ pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, |
|
1166 |
+ s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); |
|
1167 | 1167 |
else |
1168 |
- halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1169 |
- pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, |
|
1170 |
- pix_abs16x16_xy2, 0); |
|
1168 |
+ halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1169 |
+ pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, |
|
1170 |
+ s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); |
|
1171 | 1171 |
}else{ |
1172 | 1172 |
mx <<=1; |
1173 | 1173 |
my <<=1; |
... | ... |
@@ -1186,13 +1187,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, |
1186 | 1186 |
mb_type|= MB_TYPE_INTER; |
1187 | 1187 |
if (s->me_method != ME_ZERO) { |
1188 | 1188 |
if(s->me_method >= ME_EPZS) |
1189 |
- dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1190 |
- pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, |
|
1191 |
- pix_abs16x16_xy2, 0); |
|
1189 |
+ dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1190 |
+ pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, |
|
1191 |
+ s->dsp.pix_abs16x16_xy2, 0); |
|
1192 | 1192 |
else |
1193 |
- dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1194 |
- pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, |
|
1195 |
- pix_abs16x16_xy2, 0); |
|
1193 |
+ dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1194 |
+ pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, |
|
1195 |
+ s->dsp.pix_abs16x16_xy2, 0); |
|
1196 | 1196 |
if((s->flags&CODEC_FLAG_4MV) |
1197 | 1197 |
&& !s->skip_me && varc>50 && vard>10){ |
1198 | 1198 |
int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); |
... | ... |
@@ -1303,9 +1304,9 @@ int ff_estimate_motion_b(MpegEncContext * s, |
1303 | 1303 |
break; |
1304 | 1304 |
} |
1305 | 1305 |
|
1306 |
- dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1307 |
- pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, |
|
1308 |
- pix_abs16x16_xy2, 0); |
|
1306 |
+ dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, |
|
1307 |
+ pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, |
|
1308 |
+ s->dsp.pix_abs16x16_xy2, 0); |
|
1309 | 1309 |
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); |
1310 | 1310 |
// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; |
1311 | 1311 |
mv_table[mot_xy][0]= mx; |
... | ... |
@@ -1343,8 +1344,8 @@ static inline int check_bidir_mv(MpegEncContext * s, |
1343 | 1343 |
dxy&= 1; |
1344 | 1344 |
|
1345 | 1345 |
ptr = s->last_picture[0] + (src_y * s->linesize) + src_x; |
1346 |
- put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1347 |
- |
|
1346 |
+ s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1347 |
+ |
|
1348 | 1348 |
fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale; |
1349 | 1349 |
|
1350 | 1350 |
dxy = ((motion_by & 1) << 1) | (motion_bx & 1); |
... | ... |
@@ -1356,11 +1357,11 @@ static inline int check_bidir_mv(MpegEncContext * s, |
1356 | 1356 |
src_y = clip(src_y, -16, s->height); |
1357 | 1357 |
if (src_y == s->height) |
1358 | 1358 |
dxy&= 1; |
1359 |
- |
|
1359 |
+ |
|
1360 | 1360 |
ptr = s->next_picture[0] + (src_y * s->linesize) + src_x; |
1361 |
- avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1362 |
- |
|
1363 |
- fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); |
|
1361 |
+ s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1362 |
+ |
|
1363 |
+ fbmin += s->dsp.pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); |
|
1364 | 1364 |
return fbmin; |
1365 | 1365 |
} |
1366 | 1366 |
|
... | ... |
@@ -1443,7 +1444,7 @@ static inline int direct_search(MpegEncContext * s, |
1443 | 1443 |
if (src_y == height) dxy &= ~2; |
1444 | 1444 |
|
1445 | 1445 |
ptr = s->last_picture[0] + (src_y * s->linesize) + src_x; |
1446 |
- put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1446 |
+ s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1447 | 1447 |
|
1448 | 1448 |
dxy = ((motion_by & 1) << 1) | (motion_bx & 1); |
1449 | 1449 |
src_x = (mb_x + bx) * 16 + (motion_bx >> 1); |
... | ... |
@@ -1453,7 +1454,7 @@ static inline int direct_search(MpegEncContext * s, |
1453 | 1453 |
src_y = clip(src_y, -16, height); |
1454 | 1454 |
if (src_y == height) dxy &= ~2; |
1455 | 1455 |
|
1456 |
- avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1456 |
+ s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); |
|
1457 | 1457 |
} |
1458 | 1458 |
} |
1459 | 1459 |
|
... | ... |
@@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, |
1623 | 1623 |
s->mb_incr= 1; |
1624 | 1624 |
|
1625 | 1625 |
for(;;) { |
1626 |
- clear_blocks(s->block[0]); |
|
1626 |
+ s->dsp.clear_blocks(s->block[0]); |
|
1627 | 1627 |
|
1628 | 1628 |
ret = mpeg_decode_mb(s, s->block); |
1629 | 1629 |
dprintf("ret=%d\n", ret); |
... | ... |
@@ -57,7 +57,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl |
57 | 57 |
/* for jpeg fast DCT */ |
58 | 58 |
#define CONST_BITS 14 |
59 | 59 |
|
60 |
-static const unsigned short aanscales[64] = { |
|
60 |
+static const uint16_t aanscales[64] = { |
|
61 | 61 |
/* precomputed values scaled up by 14 bits */ |
62 | 62 |
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, |
63 | 63 |
22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, |
... | ... |
@@ -70,7 +70,7 @@ static const unsigned short aanscales[64] = { |
70 | 70 |
}; |
71 | 71 |
|
72 | 72 |
/* Input permutation for the simple_idct_mmx */ |
73 |
-static const UINT8 simple_mmx_permutation[64]={ |
|
73 |
+static const uint8_t simple_mmx_permutation[64]={ |
|
74 | 74 |
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
75 | 75 |
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, |
76 | 76 |
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, |
... | ... |
@@ -81,7 +81,7 @@ static const UINT8 simple_mmx_permutation[64]={ |
81 | 81 |
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, |
82 | 82 |
}; |
83 | 83 |
|
84 |
-static UINT8 h263_chroma_roundtab[16] = { |
|
84 |
+static const uint8_t h263_chroma_roundtab[16] = { |
|
85 | 85 |
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, |
86 | 86 |
}; |
87 | 87 |
|
... | ... |
@@ -172,16 +172,19 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scanta |
172 | 172 |
} |
173 | 173 |
|
174 | 174 |
/* XXX: those functions should be suppressed ASAP when all IDCTs are |
175 |
- converted */ |
|
175 |
+ converted */ |
|
176 |
+// *FIXME* this is ugly hack using local static |
|
177 |
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
|
178 |
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
|
176 | 179 |
static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block) |
177 | 180 |
{ |
178 | 181 |
j_rev_dct (block); |
179 |
- put_pixels_clamped(block, dest, line_size); |
|
182 |
+ ff_put_pixels_clamped(block, dest, line_size); |
|
180 | 183 |
} |
181 | 184 |
static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block) |
182 | 185 |
{ |
183 | 186 |
j_rev_dct (block); |
184 |
- add_pixels_clamped(block, dest, line_size); |
|
187 |
+ ff_add_pixels_clamped(block, dest, line_size); |
|
185 | 188 |
} |
186 | 189 |
|
187 | 190 |
/* init common dct for both encoder and decoder */ |
... | ... |
@@ -189,6 +192,9 @@ int DCT_common_init(MpegEncContext *s) |
189 | 189 |
{ |
190 | 190 |
int i; |
191 | 191 |
|
192 |
+ ff_put_pixels_clamped = s->dsp.put_pixels_clamped; |
|
193 |
+ ff_add_pixels_clamped = s->dsp.add_pixels_clamped; |
|
194 |
+ |
|
192 | 195 |
s->dct_unquantize_h263 = dct_unquantize_h263_c; |
193 | 196 |
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; |
194 | 197 |
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c; |
... | ... |
@@ -268,29 +274,30 @@ int MPV_common_init(MpegEncContext *s) |
268 | 268 |
UINT8 *pict; |
269 | 269 |
int y_size, c_size, yc_size, i; |
270 | 270 |
|
271 |
+ dsputil_init(&s->dsp, s->avctx->dsp_mask); |
|
271 | 272 |
DCT_common_init(s); |
272 |
- |
|
273 |
+ |
|
273 | 274 |
s->flags= s->avctx->flags; |
274 | 275 |
|
275 | 276 |
s->mb_width = (s->width + 15) / 16; |
276 | 277 |
s->mb_height = (s->height + 15) / 16; |
277 |
- |
|
278 |
- y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); |
|
279 |
- c_size = (s->mb_width + 2) * (s->mb_height + 2); |
|
280 |
- yc_size = y_size + 2 * c_size; |
|
281 |
- |
|
278 |
+ |
|
282 | 279 |
/* set default edge pos, will be overriden in decode_header if needed */ |
283 | 280 |
s->h_edge_pos= s->mb_width*16; |
284 | 281 |
s->v_edge_pos= s->mb_height*16; |
285 |
- |
|
282 |
+ |
|
283 |
+ s->mb_num = s->mb_width * s->mb_height; |
|
284 |
+ |
|
285 |
+ y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); |
|
286 |
+ c_size = (s->mb_width + 2) * (s->mb_height + 2); |
|
287 |
+ yc_size = y_size + 2 * c_size; |
|
288 |
+ |
|
286 | 289 |
/* convert fourcc to upper case */ |
287 | 290 |
s->avctx->fourcc= toupper( s->avctx->fourcc &0xFF) |
288 | 291 |
+ (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 ) |
289 | 292 |
+ (toupper((s->avctx->fourcc>>16)&0xFF)<<16) |
290 | 293 |
+ (toupper((s->avctx->fourcc>>24)&0xFF)<<24); |
291 | 294 |
|
292 |
- s->mb_num = s->mb_width * s->mb_height; |
|
293 |
- |
|
294 | 295 |
if(!(s->flags&CODEC_FLAG_DR1)){ |
295 | 296 |
s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; |
296 | 297 |
s->uvlinesize = s->mb_width * 8 + EDGE_WIDTH; |
... | ... |
@@ -1133,17 +1140,17 @@ static inline void gmc1_motion(MpegEncContext *s, |
1133 | 1133 |
} |
1134 | 1134 |
|
1135 | 1135 |
if((motion_x|motion_y)&7){ |
1136 |
- ff_gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1137 |
- ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1136 |
+ s->dsp.gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1137 |
+ s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1138 | 1138 |
}else{ |
1139 | 1139 |
int dxy; |
1140 | 1140 |
|
1141 | 1141 |
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); |
1142 | 1142 |
if (s->no_rounding){ |
1143 |
- put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); |
|
1143 |
+ s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); |
|
1144 | 1144 |
}else{ |
1145 |
- put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); |
|
1146 |
- } |
|
1145 |
+ s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); |
|
1146 |
+ } |
|
1147 | 1147 |
} |
1148 | 1148 |
|
1149 | 1149 |
if(s->flags&CODEC_FLAG_GRAY) return; |
... | ... |
@@ -1167,14 +1174,14 @@ static inline void gmc1_motion(MpegEncContext *s, |
1167 | 1167 |
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); |
1168 | 1168 |
ptr= s->edge_emu_buffer; |
1169 | 1169 |
} |
1170 |
- ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1170 |
+ s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1171 | 1171 |
|
1172 | 1172 |
ptr = ref_picture[2] + offset; |
1173 | 1173 |
if(emu){ |
1174 | 1174 |
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); |
1175 | 1175 |
ptr= s->edge_emu_buffer; |
1176 | 1176 |
} |
1177 |
- ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1177 |
+ s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); |
|
1178 | 1178 |
|
1179 | 1179 |
return; |
1180 | 1180 |
} |
... | ... |
@@ -1199,14 +1206,14 @@ static inline void gmc_motion(MpegEncContext *s, |
1199 | 1199 |
ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16; |
1200 | 1200 |
oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16; |
1201 | 1201 |
|
1202 |
- ff_gmc(dest_y, ptr, linesize, 16, |
|
1202 |
+ s->dsp.gmc(dest_y, ptr, linesize, 16, |
|
1203 | 1203 |
ox, |
1204 | 1204 |
oy, |
1205 | 1205 |
s->sprite_delta[0][0], s->sprite_delta[0][1], |
1206 | 1206 |
s->sprite_delta[1][0], s->sprite_delta[1][1], |
1207 | 1207 |
a+1, (1<<(2*a+1)) - s->no_rounding, |
1208 | 1208 |
s->h_edge_pos, s->v_edge_pos); |
1209 |
- ff_gmc(dest_y+8, ptr, linesize, 16, |
|
1209 |
+ s->dsp.gmc(dest_y+8, ptr, linesize, 16, |
|
1210 | 1210 |
ox + s->sprite_delta[0][0]*8, |
1211 | 1211 |
oy + s->sprite_delta[1][0]*8, |
1212 | 1212 |
s->sprite_delta[0][0], s->sprite_delta[0][1], |
... | ... |
@@ -1224,7 +1231,7 @@ static inline void gmc_motion(MpegEncContext *s, |
1224 | 1224 |
oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8; |
1225 | 1225 |
|
1226 | 1226 |
ptr = ref_picture[1] + (src_offset>>1); |
1227 |
- ff_gmc(dest_cb, ptr, uvlinesize, 8, |
|
1227 |
+ s->dsp.gmc(dest_cb, ptr, uvlinesize, 8, |
|
1228 | 1228 |
ox, |
1229 | 1229 |
oy, |
1230 | 1230 |
s->sprite_delta[0][0], s->sprite_delta[0][1], |
... | ... |
@@ -1233,7 +1240,7 @@ static inline void gmc_motion(MpegEncContext *s, |
1233 | 1233 |
s->h_edge_pos>>1, s->v_edge_pos>>1); |
1234 | 1234 |
|
1235 | 1235 |
ptr = ref_picture[2] + (src_offset>>1); |
1236 |
- ff_gmc(dest_cr, ptr, uvlinesize, 8, |
|
1236 |
+ s->dsp.gmc(dest_cr, ptr, uvlinesize, 8, |
|
1237 | 1237 |
ox, |
1238 | 1238 |
oy, |
1239 | 1239 |
s->sprite_delta[0][0], s->sprite_delta[0][1], |
... | ... |
@@ -1248,7 +1255,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl |
1248 | 1248 |
int x, y; |
1249 | 1249 |
int start_y, start_x, end_y, end_x; |
1250 | 1250 |
UINT8 *buf= s->edge_emu_buffer; |
1251 |
- |
|
1251 |
+ |
|
1252 | 1252 |
if(src_y>= h){ |
1253 | 1253 |
src+= (h-1-src_y)*linesize; |
1254 | 1254 |
src_y=h-1; |
... | ... |
@@ -1860,17 +1867,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) |
1860 | 1860 |
/* decoding or more than one mb_type (MC was allready done otherwise) */ |
1861 | 1861 |
if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){ |
1862 | 1862 |
if ((!s->no_rounding) || s->pict_type==B_TYPE){ |
1863 |
- op_pix = put_pixels_tab; |
|
1864 |
- op_qpix= put_qpel_pixels_tab; |
|
1863 |
+ op_pix = s->dsp.put_pixels_tab; |
|
1864 |
+ op_qpix= s->dsp.put_qpel_pixels_tab; |
|
1865 | 1865 |
}else{ |
1866 |
- op_pix = put_no_rnd_pixels_tab; |
|
1867 |
- op_qpix= put_no_rnd_qpel_pixels_tab; |
|
1866 |
+ op_pix = s->dsp.put_no_rnd_pixels_tab; |
|
1867 |
+ op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; |
|
1868 | 1868 |
} |
1869 | 1869 |
|
1870 | 1870 |
if (s->mv_dir & MV_DIR_FORWARD) { |
1871 | 1871 |
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix); |
1872 |
- op_pix = avg_pixels_tab; |
|
1873 |
- op_qpix= avg_qpel_pixels_tab; |
|
1872 |
+ op_pix = s->dsp.avg_pixels_tab; |
|
1873 |
+ op_qpix= s->dsp.avg_qpel_pixels_tab; |
|
1874 | 1874 |
} |
1875 | 1875 |
if (s->mv_dir & MV_DIR_BACKWARD) { |
1876 | 1876 |
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix); |
... | ... |
@@ -2224,10 +2231,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) |
2224 | 2224 |
s->interlaced_dct=0; |
2225 | 2225 |
} |
2226 | 2226 |
|
2227 |
- get_pixels(s->block[0], ptr , wrap_y); |
|
2228 |
- get_pixels(s->block[1], ptr + 8, wrap_y); |
|
2229 |
- get_pixels(s->block[2], ptr + dct_offset , wrap_y); |
|
2230 |
- get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y); |
|
2227 |
+ s->dsp.get_pixels(s->block[0], ptr , wrap_y); |
|
2228 |
+ s->dsp.get_pixels(s->block[1], ptr + 8, wrap_y); |
|
2229 |
+ s->dsp.get_pixels(s->block[2], ptr + dct_offset , wrap_y); |
|
2230 |
+ s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y); |
|
2231 | 2231 |
|
2232 | 2232 |
if(s->flags&CODEC_FLAG_GRAY){ |
2233 | 2233 |
skip_dct[4]= 1; |
... | ... |
@@ -2239,14 +2246,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) |
2239 | 2239 |
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); |
2240 | 2240 |
ptr= s->edge_emu_buffer; |
2241 | 2241 |
} |
2242 |
- get_pixels(s->block[4], ptr, wrap_c); |
|
2242 |
+ s->dsp.get_pixels(s->block[4], ptr, wrap_c); |
|
2243 | 2243 |
|
2244 | 2244 |
ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; |
2245 | 2245 |
if(emu){ |
2246 | 2246 |
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); |
2247 | 2247 |
ptr= s->edge_emu_buffer; |
2248 | 2248 |
} |
2249 |
- get_pixels(s->block[5], ptr, wrap_c); |
|
2249 |
+ s->dsp.get_pixels(s->block[5], ptr, wrap_c); |
|
2250 | 2250 |
} |
2251 | 2251 |
}else{ |
2252 | 2252 |
op_pixels_func (*op_pix)[4]; |
... | ... |
@@ -2266,17 +2273,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) |
2266 | 2266 |
ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; |
2267 | 2267 |
|
2268 | 2268 |
if ((!s->no_rounding) || s->pict_type==B_TYPE){ |
2269 |
- op_pix = put_pixels_tab; |
|
2270 |
- op_qpix= put_qpel_pixels_tab; |
|
2269 |
+ op_pix = s->dsp.put_pixels_tab; |
|
2270 |
+ op_qpix= s->dsp.put_qpel_pixels_tab; |
|
2271 | 2271 |
}else{ |
2272 |
- op_pix = put_no_rnd_pixels_tab; |
|
2273 |
- op_qpix= put_no_rnd_qpel_pixels_tab; |
|
2272 |
+ op_pix = s->dsp.put_no_rnd_pixels_tab; |
|
2273 |
+ op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; |
|
2274 | 2274 |
} |
2275 | 2275 |
|
2276 | 2276 |
if (s->mv_dir & MV_DIR_FORWARD) { |
2277 | 2277 |
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix); |
2278 |
- op_pix = avg_pixels_tab; |
|
2279 |
- op_qpix= avg_qpel_pixels_tab; |
|
2278 |
+ op_pix = s->dsp.avg_pixels_tab; |
|
2279 |
+ op_qpix= s->dsp.avg_qpel_pixels_tab; |
|
2280 | 2280 |
} |
2281 | 2281 |
if (s->mv_dir & MV_DIR_BACKWARD) { |
2282 | 2282 |
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix); |
... | ... |
@@ -2305,10 +2312,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) |
2305 | 2305 |
s->interlaced_dct=0; |
2306 | 2306 |
} |
2307 | 2307 |
|
2308 |
- diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); |
|
2309 |
- diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); |
|
2310 |
- diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); |
|
2311 |
- diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); |
|
2308 |
+ s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); |
|
2309 |
+ s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); |
|
2310 |
+ s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); |
|
2311 |
+ s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); |
|
2312 | 2312 |
|
2313 | 2313 |
if(s->flags&CODEC_FLAG_GRAY){ |
2314 | 2314 |
skip_dct[4]= 1; |
... | ... |
@@ -2318,23 +2325,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) |
2318 | 2318 |
emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); |
2319 | 2319 |
ptr_cb= s->edge_emu_buffer; |
2320 | 2320 |
} |
2321 |
- diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); |
|
2321 |
+ s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); |
|
2322 | 2322 |
if(emu){ |
2323 | 2323 |
emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); |
2324 | 2324 |
ptr_cr= s->edge_emu_buffer; |
2325 | 2325 |
} |
2326 |
- diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); |
|
2326 |
+ s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); |
|
2327 | 2327 |
} |
2328 | 2328 |
|
2329 | 2329 |
/* pre quantization */ |
2330 | 2330 |
if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){ |
2331 | 2331 |
//FIXME optimize |
2332 |
- if(pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; |
|
2333 |
- if(pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; |
|
2334 |
- if(pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; |
|
2335 |
- if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; |
|
2336 |
- if(pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1; |
|
2337 |
- if(pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1; |
|
2332 |
+ if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; |
|
2333 |
+ if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; |
|
2334 |
+ if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; |
|
2335 |
+ if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; |
|
2336 |
+ if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1; |
|
2337 |
+ if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1; |
|
2338 | 2338 |
#if 0 |
2339 | 2339 |
{ |
2340 | 2340 |
static int stat[7]; |
... | ... |
@@ -2601,9 +2608,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) |
2601 | 2601 |
int yy = mb_y * 16; |
2602 | 2602 |
uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx; |
2603 | 2603 |
int varc; |
2604 |
- int sum = pix_sum(pix, s->linesize); |
|
2604 |
+ int sum = s->dsp.pix_sum(pix, s->linesize); |
|
2605 | 2605 |
|
2606 |
- varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; |
|
2606 |
+ varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; |
|
2607 | 2607 |
|
2608 | 2608 |
s->mb_var [s->mb_width * mb_y + mb_x] = varc; |
2609 | 2609 |
s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8; |
... | ... |
@@ -221,6 +221,7 @@ typedef struct MpegEncContext { |
221 | 221 |
int unrestricted_mv; |
222 | 222 |
int h263_long_vectors; /* use horrible h263v1 long vector mode */ |
223 | 223 |
|
224 |
+ DSPContext dsp; /* pointers for accelerated dsp fucntions */ |
|
224 | 225 |
int f_code; /* forward MV resolution */ |
225 | 226 |
int b_code; /* backward MV resolution for B Frames (mpeg4) */ |
226 | 227 |
INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */ |
... | ... |
@@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, |
447 | 447 |
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); |
448 | 448 |
#endif |
449 | 449 |
|
450 |
- clear_blocks(s->block[0]); |
|
450 |
+ s->dsp.clear_blocks(s->block[0]); |
|
451 | 451 |
s->mv_dir = MV_DIR_FORWARD; |
452 | 452 |
s->mv_type = MV_TYPE_16X16; |
453 | 453 |
if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) { |
... | ... |
@@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int |
804 | 804 |
} |
805 | 805 |
} |
806 | 806 |
|
807 |
-static int svq1_motion_inter_block (bit_buffer_t *bitbuf, |
|
807 |
+static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf, |
|
808 | 808 |
uint8_t *current, uint8_t *previous, int pitch, |
809 | 809 |
svq1_pmv_t *motion, int x, int y) { |
810 | 810 |
uint8_t *src; |
... | ... |
@@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf, |
839 | 839 |
src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch]; |
840 | 840 |
dst = current; |
841 | 841 |
|
842 |
- put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16); |
|
842 |
+ s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16); |
|
843 | 843 |
|
844 | 844 |
return 0; |
845 | 845 |
} |
846 | 846 |
|
847 |
-static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, |
|
847 |
+static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf, |
|
848 | 848 |
uint8_t *current, uint8_t *previous, int pitch, |
849 | 849 |
svq1_pmv_t *motion,int x, int y) { |
850 | 850 |
uint8_t *src; |
... | ... |
@@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, |
906 | 906 |
src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch]; |
907 | 907 |
dst = current; |
908 | 908 |
|
909 |
- put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8); |
|
909 |
+ s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8); |
|
910 | 910 |
|
911 | 911 |
/* select next block */ |
912 | 912 |
if (i & 1) { |
... | ... |
@@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, |
921 | 921 |
return 0; |
922 | 922 |
} |
923 | 923 |
|
924 |
-static int svq1_decode_delta_block (bit_buffer_t *bitbuf, |
|
924 |
+static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf, |
|
925 | 925 |
uint8_t *current, uint8_t *previous, int pitch, |
926 | 926 |
svq1_pmv_t *motion, int x, int y) { |
927 | 927 |
uint32_t bit_cache; |
... | ... |
@@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, |
951 | 951 |
break; |
952 | 952 |
|
953 | 953 |
case SVQ1_BLOCK_INTER: |
954 |
- result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y); |
|
954 |
+ result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y); |
|
955 | 955 |
|
956 | 956 |
if (result != 0) |
957 | 957 |
{ |
... | ... |
@@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, |
964 | 964 |
break; |
965 | 965 |
|
966 | 966 |
case SVQ1_BLOCK_INTER_4V: |
967 |
- result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y); |
|
967 |
+ result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y); |
|
968 | 968 |
|
969 | 969 |
if (result != 0) |
970 | 970 |
{ |
... | ... |
@@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx, |
1142 | 1142 |
|
1143 | 1143 |
for (y=0; y < height; y+=16) { |
1144 | 1144 |
for (x=0; x < width; x+=16) { |
1145 |
- result = svq1_decode_delta_block (&s->gb, ¤t[x], previous, |
|
1146 |
- linesize, pmv, x, y); |
|
1145 |
+ result = svq1_decode_delta_block (s, &s->gb, ¤t[x], previous, |
|
1146 |
+ linesize, pmv, x, y); |
|
1147 | 1147 |
if (result != 0) |
1148 | 1148 |
{ |
1149 | 1149 |
#ifdef DEBUG_SVQ1 |