Originally committed as revision 2204 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
Michael Niedermayer authored on 2001/10/15 12:01:08... | ... |
@@ -17,19 +17,22 @@ |
17 | 17 |
*/ |
18 | 18 |
|
19 | 19 |
/* |
20 |
- C MMX MMX2 3DNow* |
|
20 |
+ C MMX MMX2 3DNow |
|
21 | 21 |
isVertDC Ec Ec |
22 | 22 |
isVertMinMaxOk Ec Ec |
23 |
-doVertLowPass E e e* |
|
23 |
+doVertLowPass E e e |
|
24 | 24 |
doVertDefFilter Ec Ec Ec |
25 | 25 |
isHorizDC Ec Ec |
26 | 26 |
isHorizMinMaxOk a |
27 |
-doHorizLowPass E a a* |
|
27 |
+doHorizLowPass E a a |
|
28 | 28 |
doHorizDefFilter E ac ac |
29 | 29 |
deRing |
30 |
-Vertical RKAlgo1 E a a* |
|
31 |
-Vertical X1 a E E* |
|
32 |
-Horizontal X1 a E E* |
|
30 |
+Vertical RKAlgo1 E a a |
|
31 |
+Vertical X1 a E E |
|
32 |
+Horizontal X1 a E E |
|
33 |
+LinIpolDeinterlace a E E* |
|
34 |
+LinBlendDeinterlace a E E* |
|
35 |
+MedianDeinterlace a E |
|
33 | 36 |
|
34 | 37 |
|
35 | 38 |
* i dont have a 3dnow CPU -> its untested |
... | ... |
@@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once |
55 | 55 |
compare the quality & speed of all filters |
56 | 56 |
implement a few simple deinterlacing filters |
57 | 57 |
split this huge file |
58 |
+fix warnings (unused vars, ...) |
|
58 | 59 |
... |
59 | 60 |
|
60 | 61 |
Notes: |
... | ... |
@@ -63,6 +67,9 @@ Notes: |
63 | 63 |
|
64 | 64 |
/* |
65 | 65 |
Changelog: use the CVS log |
66 |
+rewrote the horizontal lowpass filter to fix a bug which caused a blocky look |
|
67 |
+added deinterlace filters (linear interpolate, linear blend, median) |
|
68 |
+minor cleanups (removed some outcommented stuff) |
|
66 | 69 |
0.1.3 |
67 | 70 |
bugfixes: last 3 lines not brightness/contrast corrected |
68 | 71 |
brightness statistics messed up with initial black pic |
... | ... |
@@ -194,13 +201,11 @@ static inline void prefetcht2(void *p) |
194 | 194 |
* Check if the middle 8x8 Block in the given 8x10 block is flat |
195 | 195 |
*/ |
196 | 196 |
static inline int isVertDC(uint8_t src[], int stride){ |
197 |
-// return true; |
|
198 | 197 |
int numEq= 0; |
199 | 198 |
int y; |
200 | 199 |
src+= stride; // src points to begin of the 8x8 Block |
201 | 200 |
#ifdef HAVE_MMX |
202 | 201 |
asm volatile( |
203 |
-// "int $3 \n\t" |
|
204 | 202 |
"pushl %1\n\t" |
205 | 203 |
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
206 | 204 |
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
... | ... |
@@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP |
1577 | 1577 |
} |
1578 | 1578 |
|
1579 | 1579 |
/** |
1580 |
- * Do a horizontal low pass filter on the 8x8 block |
|
1580 |
+ * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
|
1581 | 1581 |
* useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
1582 |
- * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) |
|
1582 |
+ * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
|
1583 | 1583 |
*/ |
1584 | 1584 |
static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1585 | 1585 |
{ |
... | ... |
@@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1635 | 1635 |
*/ |
1636 | 1636 |
// approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 |
1637 | 1637 |
/* |
1638 |
- 31 |
|
1639 |
- 121 |
|
1640 |
- 121 |
|
1641 |
- 121 |
|
1642 |
- 121 |
|
1643 |
- 121 |
|
1644 |
- 121 |
|
1645 |
- 13 |
|
1646 | 1638 |
Implemented Exact 7-Tap |
1647 | 1639 |
9421 A321 |
1648 | 1640 |
36421 64321 |
... | ... |
@@ -1654,6 +1651,7 @@ Implemented Exact 7-Tap |
1654 | 1654 |
1249 123A |
1655 | 1655 |
|
1656 | 1656 |
*/ |
1657 |
+ |
|
1657 | 1658 |
#ifdef HAVE_MMX2 |
1658 | 1659 |
#define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1659 | 1660 |
"movq %%mm0, %%mm1 \n\t"\ |
... | ... |
@@ -1680,12 +1678,12 @@ Implemented Exact 7-Tap |
1680 | 1680 |
#define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1681 | 1681 |
"movq %%mm0, %%mm1 \n\t"\ |
1682 | 1682 |
"movq %%mm0, %%mm2 \n\t"\ |
1683 |
- "movq %%mm0, %%mm3 \n\t"\ |
|
1684 |
- "movq %%mm0, %%mm4 \n\t"\ |
|
1683 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1684 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1685 | 1685 |
"psllq $8, %%mm1 \n\t"\ |
1686 | 1686 |
"psrlq $8, %%mm2 \n\t"\ |
1687 |
- "pand bm00000001, %%mm3 \n\t"\ |
|
1688 |
- "pand bm10000000, %%mm4 \n\t"\ |
|
1687 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1688 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1689 | 1689 |
"por %%mm3, %%mm1 \n\t"\ |
1690 | 1690 |
"por %%mm4, %%mm2 \n\t"\ |
1691 | 1691 |
PAVGB(%%mm2, %%mm1)\ |
... | ... |
@@ -1708,7 +1706,80 @@ Implemented Exact 7-Tap |
1708 | 1708 |
"movd %%mm0, 4(%0) \n\t" |
1709 | 1709 |
#endif |
1710 | 1710 |
|
1711 |
-#define HLP(i) HLP3(i) |
|
1711 |
+/* uses the 7-Tap Filter: 1112111 */ |
|
1712 |
+#define NEW_HLP(i)\ |
|
1713 |
+ "movq " #i "(%%eax), %%mm0 \n\t"\ |
|
1714 |
+ "movq %%mm0, %%mm1 \n\t"\ |
|
1715 |
+ "movq %%mm0, %%mm2 \n\t"\ |
|
1716 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1717 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1718 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1719 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1720 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1721 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1722 |
+ "por %%mm3, %%mm1 \n\t"\ |
|
1723 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1724 |
+ "movq %%mm1, %%mm5 \n\t"\ |
|
1725 |
+ PAVGB(%%mm2, %%mm1)\ |
|
1726 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1727 |
+ "psllq $8, %%mm5 \n\t"\ |
|
1728 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1729 |
+ "por %%mm3, %%mm5 \n\t"\ |
|
1730 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1731 |
+ "movq %%mm5, %%mm1 \n\t"\ |
|
1732 |
+ PAVGB(%%mm2, %%mm5)\ |
|
1733 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1734 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1735 |
+ "por %%mm3, %%mm1 \n\t"\ |
|
1736 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1737 |
+ PAVGB(%%mm2, %%mm1)\ |
|
1738 |
+ PAVGB(%%mm1, %%mm5)\ |
|
1739 |
+ PAVGB(%%mm5, %%mm0)\ |
|
1740 |
+ "movd %%mm0, (%0) \n\t"\ |
|
1741 |
+ "psrlq $32, %%mm0 \n\t"\ |
|
1742 |
+ "movd %%mm0, 4(%0) \n\t" |
|
1743 |
+ |
|
1744 |
+/* uses the 9-Tap Filter: 112242211 */ |
|
1745 |
+#define NEW_HLP2(i)\ |
|
1746 |
+ "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ |
|
1747 |
+ "movq %%mm0, %%mm1 \n\t" /*0001000*/\ |
|
1748 |
+ "movq %%mm0, %%mm2 \n\t" /*0001000*/\ |
|
1749 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1750 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1751 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1752 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1753 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1754 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1755 |
+ "por %%mm3, %%mm1 \n\t" /*0010000*/\ |
|
1756 |
+ "por %%mm4, %%mm2 \n\t" /*0000100*/\ |
|
1757 |
+ "movq %%mm1, %%mm5 \n\t" /*0010000*/\ |
|
1758 |
+ PAVGB(%%mm2, %%mm1) /*0010100*/\ |
|
1759 |
+ PAVGB(%%mm1, %%mm0) /*0012100*/\ |
|
1760 |
+ "psllq $8, %%mm5 \n\t"\ |
|
1761 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1762 |
+ "por %%mm3, %%mm5 \n\t" /*0100000*/\ |
|
1763 |
+ "por %%mm4, %%mm2 \n\t" /*0000010*/\ |
|
1764 |
+ "movq %%mm5, %%mm1 \n\t" /*0100000*/\ |
|
1765 |
+ PAVGB(%%mm2, %%mm5) /*0100010*/\ |
|
1766 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1767 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1768 |
+ "por %%mm3, %%mm1 \n\t" /*1000000*/\ |
|
1769 |
+ "por %%mm4, %%mm2 \n\t" /*0000001*/\ |
|
1770 |
+ "movq %%mm1, %%mm6 \n\t" /*1000000*/\ |
|
1771 |
+ PAVGB(%%mm2, %%mm1) /*1000001*/\ |
|
1772 |
+ "psllq $8, %%mm6 \n\t"\ |
|
1773 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1774 |
+ "por %%mm3, %%mm6 \n\t"/*100000000*/\ |
|
1775 |
+ "por %%mm4, %%mm2 \n\t"/*000000001*/\ |
|
1776 |
+ PAVGB(%%mm2, %%mm6) /*100000001*/\ |
|
1777 |
+ PAVGB(%%mm6, %%mm1) /*110000011*/\ |
|
1778 |
+ PAVGB(%%mm1, %%mm5) /*112000211*/\ |
|
1779 |
+ PAVGB(%%mm5, %%mm0) /*112242211*/\ |
|
1780 |
+ "movd %%mm0, (%0) \n\t"\ |
|
1781 |
+ "psrlq $32, %%mm0 \n\t"\ |
|
1782 |
+ "movd %%mm0, 4(%0) \n\t" |
|
1783 |
+ |
|
1784 |
+#define HLP(i) NEW_HLP(i) |
|
1712 | 1785 |
|
1713 | 1786 |
HLP(0) |
1714 | 1787 |
"addl %1, %0 \n\t" |
... | ... |
@@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2) |
1828 | 1828 |
#endif |
1829 | 1829 |
} |
1830 | 1830 |
|
1831 |
+/** |
|
1832 |
+ * Deinterlaces the given block |
|
1833 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
1834 |
+ */ |
|
1835 |
+static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) |
|
1836 |
+{ |
|
1837 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1838 |
+ asm volatile( |
|
1839 |
+ "leal (%0, %1), %%eax \n\t" |
|
1840 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1841 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1842 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1843 |
+ |
|
1844 |
+ "movq (%0), %%mm0 \n\t" |
|
1845 |
+ "movq (%%eax, %1), %%mm1 \n\t" |
|
1846 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1847 |
+ "movq %%mm0, (%%eax) \n\t" |
|
1848 |
+ "movq (%0, %1, 4), %%mm0 \n\t" |
|
1849 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1850 |
+ "movq %%mm1, (%%eax, %1, 2) \n\t" |
|
1851 |
+ "movq (%%ebx, %1), %%mm1 \n\t" |
|
1852 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1853 |
+ "movq %%mm0, (%%ebx) \n\t" |
|
1854 |
+ "movq (%0, %1, 8), %%mm0 \n\t" |
|
1855 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1856 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
1857 |
+ |
|
1858 |
+ : : "r" (src), "r" (stride) |
|
1859 |
+ : "%eax", "%ebx" |
|
1860 |
+ ); |
|
1861 |
+#else |
|
1862 |
+ int x; |
|
1863 |
+ for(x=0; x<8; x++) |
|
1864 |
+ { |
|
1865 |
+ src[stride] = (src[0] + src[stride*2])>>1; |
|
1866 |
+ src[stride*3] = (src[stride*2] + src[stride*4])>>1; |
|
1867 |
+ src[stride*5] = (src[stride*4] + src[stride*6])>>1; |
|
1868 |
+ src[stride*7] = (src[stride*6] + src[stride*8])>>1; |
|
1869 |
+ src++; |
|
1870 |
+ } |
|
1871 |
+#endif |
|
1872 |
+} |
|
1873 |
+ |
|
1874 |
+/** |
|
1875 |
+ * Deinterlaces the given block |
|
1876 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
1877 |
+ */ |
|
1878 |
+static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) |
|
1879 |
+{ |
|
1880 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1881 |
+ asm volatile( |
|
1882 |
+ "leal (%0, %1), %%eax \n\t" |
|
1883 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1884 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1885 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1886 |
+ |
|
1887 |
+ "movq (%0), %%mm0 \n\t" |
|
1888 |
+ "movq (%%eax, %1), %%mm1 \n\t" |
|
1889 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1890 |
+ "movq %%mm0, (%%eax) \n\t" |
|
1891 |
+ "movq (%0, %1, 4), %%mm0 \n\t" |
|
1892 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1893 |
+ "movq %%mm1, (%%eax, %1, 2) \n\t" |
|
1894 |
+ "movq (%%ebx, %1), %%mm1 \n\t" |
|
1895 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1896 |
+ "movq %%mm0, (%%ebx) \n\t" |
|
1897 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
1898 |
+ |
|
1899 |
+ |
|
1900 |
+ : : "r" (src), "r" (stride) |
|
1901 |
+ : "%eax", "%ebx" |
|
1902 |
+ ); |
|
1903 |
+#else |
|
1904 |
+ int x; |
|
1905 |
+ for(x=0; x<8; x++) |
|
1906 |
+ { |
|
1907 |
+ src[stride] = (src[0] + src[stride*2])>>1; |
|
1908 |
+ src[stride*3] = (src[stride*2] + src[stride*4])>>1; |
|
1909 |
+ src[stride*5] = (src[stride*4] + src[stride*6])>>1; |
|
1910 |
+ src[stride*7] = src[stride*6]; |
|
1911 |
+ src++; |
|
1912 |
+ } |
|
1913 |
+#endif |
|
1914 |
+} |
|
1915 |
+ |
|
1916 |
+/** |
|
1917 |
+ * Deinterlaces the given block |
|
1918 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
1919 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
1920 |
+ */ |
|
1921 |
+static inline void deInterlaceBlendLinear(uint8_t src[], int stride) |
|
1922 |
+{ |
|
1923 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1924 |
+ asm volatile( |
|
1925 |
+ "leal (%0, %1), %%eax \n\t" |
|
1926 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1927 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1928 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1929 |
+ |
|
1930 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
1931 |
+ "movq (%%eax, %1), %%mm1 \n\t" // L2 |
|
1932 |
+ PAVGB(%%mm1, %%mm0) // L0+L2 |
|
1933 |
+ "movq (%%eax), %%mm2 \n\t" // L1 |
|
1934 |
+ PAVGB(%%mm2, %%mm0) |
|
1935 |
+ "movq %%mm0, (%0) \n\t" |
|
1936 |
+ "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 |
|
1937 |
+ PAVGB(%%mm0, %%mm2) // L1+L3 |
|
1938 |
+ PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 |
|
1939 |
+ "movq %%mm2, (%%eax) \n\t" |
|
1940 |
+ "movq (%0, %1, 4), %%mm2 \n\t" // L4 |
|
1941 |
+ PAVGB(%%mm2, %%mm1) // L2+L4 |
|
1942 |
+ PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 |
|
1943 |
+ "movq %%mm1, (%%eax, %1) \n\t" |
|
1944 |
+ "movq (%%ebx), %%mm1 \n\t" // L5 |
|
1945 |
+ PAVGB(%%mm1, %%mm0) // L3+L5 |
|
1946 |
+ PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 |
|
1947 |
+ "movq %%mm0, (%%eax, %1, 2) \n\t" |
|
1948 |
+ "movq (%%ebx, %1), %%mm0 \n\t" // L6 |
|
1949 |
+ PAVGB(%%mm0, %%mm2) // L4+L6 |
|
1950 |
+ PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 |
|
1951 |
+ "movq %%mm2, (%0, %1, 4) \n\t" |
|
1952 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 |
|
1953 |
+ PAVGB(%%mm2, %%mm1) // L5+L7 |
|
1954 |
+ PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 |
|
1955 |
+ "movq %%mm1, (%%ebx) \n\t" |
|
1956 |
+ "movq (%0, %1, 8), %%mm1 \n\t" // L8 |
|
1957 |
+ PAVGB(%%mm1, %%mm0) // L6+L8 |
|
1958 |
+ PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 |
|
1959 |
+ "movq %%mm0, (%%ebx, %1) \n\t" |
|
1960 |
+ "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 |
|
1961 |
+ PAVGB(%%mm0, %%mm2) // L7+L9 |
|
1962 |
+ PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 |
|
1963 |
+ "movq %%mm2, (%%ebx, %1, 2) \n\t" |
|
1964 |
+ |
|
1965 |
+ |
|
1966 |
+ : : "r" (src), "r" (stride) |
|
1967 |
+ : "%eax", "%ebx" |
|
1968 |
+ ); |
|
1969 |
+#else |
|
1970 |
+ int x; |
|
1971 |
+ for(x=0; x<8; x++) |
|
1972 |
+ { |
|
1973 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
1974 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
1975 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
1976 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
1977 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
1978 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
1979 |
+ src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; |
|
1980 |
+ src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; |
|
1981 |
+ src++; |
|
1982 |
+ } |
|
1983 |
+#endif |
|
1984 |
+} |
|
1985 |
+ |
|
1986 |
+/** |
|
1987 |
+ * Deinterlaces the given block |
|
1988 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
1989 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
1990 |
+ */ |
|
1991 |
+static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) |
|
1992 |
+{ |
|
1993 |
+#if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) |
|
1994 |
+ asm volatile( |
|
1995 |
+ "leal (%0, %1), %%eax \n\t" |
|
1996 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1997 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1998 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1999 |
+ |
|
2000 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2001 |
+ "movq (%%eax, %1), %%mm1 \n\t" // L2 |
|
2002 |
+ PAVGB(%%mm1, %%mm0) // L0+L2 |
|
2003 |
+ "movq (%%eax), %%mm2 \n\t" // L1 |
|
2004 |
+ PAVGB(%%mm2, %%mm0) |
|
2005 |
+ "movq %%mm0, (%0) \n\t" |
|
2006 |
+ "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 |
|
2007 |
+ PAVGB(%%mm0, %%mm2) // L1+L3 |
|
2008 |
+ PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 |
|
2009 |
+ "movq %%mm2, (%%eax) \n\t" |
|
2010 |
+ "movq (%0, %1, 4), %%mm2 \n\t" // L4 |
|
2011 |
+ PAVGB(%%mm2, %%mm1) // L2+L4 |
|
2012 |
+ PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 |
|
2013 |
+ "movq %%mm1, (%%eax, %1) \n\t" |
|
2014 |
+ "movq (%%ebx), %%mm1 \n\t" // L5 |
|
2015 |
+ PAVGB(%%mm1, %%mm0) // L3+L5 |
|
2016 |
+ PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 |
|
2017 |
+ "movq %%mm0, (%%eax, %1, 2) \n\t" |
|
2018 |
+ "movq (%%ebx, %1), %%mm0 \n\t" // L6 |
|
2019 |
+ PAVGB(%%mm0, %%mm2) // L4+L6 |
|
2020 |
+ PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 |
|
2021 |
+ "movq %%mm2, (%0, %1, 4) \n\t" |
|
2022 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 |
|
2023 |
+ PAVGB(%%mm2, %%mm1) // L5+L7 |
|
2024 |
+ PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 |
|
2025 |
+ "movq %%mm1, (%%ebx) \n\t" |
|
2026 |
+ PAVGB(%%mm2, %%mm0) // L7 + L8 |
|
2027 |
+ "movq %%mm0, (%%ebx, %1) \n\t" |
|
2028 |
+ "movq %%mm0, (%%ebx, %1, 2) \n\t" |
|
2029 |
+ |
|
2030 |
+ : : "r" (src), "r" (stride) |
|
2031 |
+ : "%eax", "%ebx" |
|
2032 |
+ ); |
|
2033 |
+#else |
|
2034 |
+ int x; |
|
2035 |
+ for(x=0; x<8; x++) |
|
2036 |
+ { |
|
2037 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2038 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2039 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2040 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2041 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2042 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2043 |
+ src[stride*6] = (src[stride*6] + src[stride*7])>>1; |
|
2044 |
+ src[stride*7] = src[stride*6]; |
|
2045 |
+ src++; |
|
2046 |
+ } |
|
2047 |
+#endif |
|
2048 |
+} |
|
2049 |
+ |
|
2050 |
+/** |
|
2051 |
+ * Deinterlaces the given block |
|
2052 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
2053 |
+ */ |
|
2054 |
+static inline void deInterlaceMedian(uint8_t src[], int stride) |
|
2055 |
+{ |
|
2056 |
+#if defined (HAVE_MMX2) |
|
2057 |
+ asm volatile( |
|
2058 |
+ "leal (%0, %1), %%eax \n\t" |
|
2059 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
2060 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2061 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
2062 |
+ |
|
2063 |
+ "movq (%0), %%mm0 \n\t" // |
|
2064 |
+ "movq (%%eax, %1), %%mm2 \n\t" // |
|
2065 |
+ "movq (%%eax), %%mm1 \n\t" // |
|
2066 |
+ "movq %%mm0, %%mm3 \n\t" |
|
2067 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2068 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2069 |
+ "pmaxub %%mm2, %%mm1 \n\t" // |
|
2070 |
+ "pminub %%mm1, %%mm0 \n\t" |
|
2071 |
+ "movq %%mm0, (%%eax) \n\t" |
|
2072 |
+ |
|
2073 |
+ "movq (%0, %1, 4), %%mm0 \n\t" // |
|
2074 |
+ "movq (%%eax, %1, 2), %%mm1 \n\t" // |
|
2075 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2076 |
+ "pmaxub %%mm1, %%mm2 \n\t" // |
|
2077 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2078 |
+ "pmaxub %%mm0, %%mm1 \n\t" // |
|
2079 |
+ "pminub %%mm1, %%mm2 \n\t" |
|
2080 |
+ "movq %%mm2, (%%eax, %1, 2) \n\t" |
|
2081 |
+ |
|
2082 |
+ "movq (%%ebx), %%mm2 \n\t" // |
|
2083 |
+ "movq (%%ebx, %1), %%mm1 \n\t" // |
|
2084 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2085 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2086 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2087 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2088 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2089 |
+ "movq %%mm2, (%%ebx) \n\t" |
|
2090 |
+ |
|
2091 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // |
|
2092 |
+ "movq (%0, %1, 8), %%mm0 \n\t" // |
|
2093 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2094 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2095 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2096 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2097 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2098 |
+ "movq %%mm2, (%%ebx, %1, 2) \n\t" |
|
2099 |
+ |
|
2100 |
+ |
|
2101 |
+ : : "r" (src), "r" (stride) |
|
2102 |
+ : "%eax", "%ebx" |
|
2103 |
+ ); |
|
2104 |
+#else |
|
2105 |
+ //FIXME |
|
2106 |
+ int x; |
|
2107 |
+ for(x=0; x<8; x++) |
|
2108 |
+ { |
|
2109 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2110 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2111 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2112 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2113 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2114 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2115 |
+ src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; |
|
2116 |
+ src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; |
|
2117 |
+ src++; |
|
2118 |
+ } |
|
2119 |
+#endif |
|
2120 |
+} |
|
2121 |
+ |
|
2122 |
+/** |
|
2123 |
+ * Deinterlaces the given block |
|
2124 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
2125 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
2126 |
+ */ |
|
2127 |
+static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) |
|
2128 |
+{ |
|
2129 |
+#if defined (HAVE_MMX2) |
|
2130 |
+ asm volatile( |
|
2131 |
+ "leal (%0, %1), %%eax \n\t" |
|
2132 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
2133 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2134 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
2135 |
+ |
|
2136 |
+ "movq (%0), %%mm0 \n\t" // |
|
2137 |
+ "movq (%%eax, %1), %%mm2 \n\t" // |
|
2138 |
+ "movq (%%eax), %%mm1 \n\t" // |
|
2139 |
+ "movq %%mm0, %%mm3 \n\t" |
|
2140 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2141 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2142 |
+ "pmaxub %%mm2, %%mm1 \n\t" // |
|
2143 |
+ "pminub %%mm1, %%mm0 \n\t" |
|
2144 |
+ "movq %%mm0, (%%eax) \n\t" |
|
2145 |
+ |
|
2146 |
+ "movq (%0, %1, 4), %%mm0 \n\t" // |
|
2147 |
+ "movq (%%eax, %1, 2), %%mm1 \n\t" // |
|
2148 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2149 |
+ "pmaxub %%mm1, %%mm2 \n\t" // |
|
2150 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2151 |
+ "pmaxub %%mm0, %%mm1 \n\t" // |
|
2152 |
+ "pminub %%mm1, %%mm2 \n\t" |
|
2153 |
+ "movq %%mm2, (%%eax, %1, 2) \n\t" |
|
2154 |
+ |
|
2155 |
+ "movq (%%ebx), %%mm2 \n\t" // |
|
2156 |
+ "movq (%%ebx, %1), %%mm1 \n\t" // |
|
2157 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2158 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2159 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2160 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2161 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2162 |
+ "movq %%mm2, (%%ebx) \n\t" |
|
2163 |
+ |
|
2164 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
2165 |
+ |
|
2166 |
+ : : "r" (src), "r" (stride) |
|
2167 |
+ : "%eax", "%ebx" |
|
2168 |
+ ); |
|
2169 |
+#else |
|
2170 |
+ //FIXME |
|
2171 |
+ int x; |
|
2172 |
+ for(x=0; x<8; x++) |
|
2173 |
+ { |
|
2174 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2175 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2176 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2177 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2178 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2179 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2180 |
+ src[stride*6] = (src[stride*6] + src[stride*7])>>1; |
|
2181 |
+ src[stride*7] = src[stride*6]; |
|
2182 |
+ src++; |
|
2183 |
+ } |
|
2184 |
+#endif |
|
2185 |
+} |
|
2186 |
+ |
|
2187 |
+ |
|
1831 | 2188 |
#ifdef HAVE_ODIVX_POSTPROCESS |
1832 | 2189 |
#include "../opendivx/postprocess.h" |
1833 | 2190 |
int use_old_pp=0; |
... | ... |
@@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
1841 | 1841 |
* the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) |
1842 | 1842 |
* -63 is best quality -1 is worst |
1843 | 1843 |
*/ |
1844 |
-//extern "C"{ |
|
1845 | 1844 |
void postprocess(unsigned char * src[], int src_stride, |
1846 | 1845 |
unsigned char * dst[], int dst_stride, |
1847 | 1846 |
int horizontal_size, int vertical_size, |
... | ... |
@@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
2196 | 2196 |
blockCopy(vertBlock + dstStride*2, dstStride, |
2197 | 2197 |
vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); |
2198 | 2198 |
|
2199 |
+ if(mode & LINEAR_IPOL_DEINT_FILTER) |
|
2200 |
+ deInterlaceInterpolateLinear(dstBlock, dstStride); |
|
2201 |
+ else if(mode & LINEAR_BLEND_DEINT_FILTER) |
|
2202 |
+ deInterlaceBlendLinear(dstBlock, dstStride); |
|
2203 |
+ else if(mode & MEDIAN_DEINT_FILTER) |
|
2204 |
+ deInterlaceMedian(dstBlock, dstStride); |
|
2205 |
+/* else if(mode & CUBIC_IPOL_DEINT_FILTER) |
|
2206 |
+ deInterlaceInterpolateCubic(dstBlock, dstStride); |
|
2207 |
+ else if(mode & CUBIC_BLEND_DEINT_FILTER) |
|
2208 |
+ deInterlaceBlendCubic(dstBlock, dstStride); |
|
2209 |
+*/ |
|
2199 | 2210 |
|
2200 | 2211 |
#ifdef MORE_TIMEING |
2201 | 2212 |
T1= rdtsc(); |
... | ... |
@@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
2226 | 2226 |
#endif |
2227 | 2227 |
} |
2228 | 2228 |
else |
2229 |
+ { |
|
2229 | 2230 |
blockCopy(vertBlock + dstStride*1, dstStride, |
2230 | 2231 |
vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); |
2231 | 2232 |
|
2233 |
+ if(mode & LINEAR_IPOL_DEINT_FILTER) |
|
2234 |
+ deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); |
|
2235 |
+ else if(mode & LINEAR_BLEND_DEINT_FILTER) |
|
2236 |
+ deInterlaceBlendLinearLastRow(dstBlock, dstStride); |
|
2237 |
+ else if(mode & MEDIAN_DEINT_FILTER) |
|
2238 |
+ deInterlaceMedianLastRow(dstBlock, dstStride); |
|
2239 |
+/* else if(mode & CUBIC_IPOL_DEINT_FILTER) |
|
2240 |
+ deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); |
|
2241 |
+ else if(mode & CUBIC_BLEND_DEINT_FILTER) |
|
2242 |
+ deInterlaceBlendCubicLastRow(dstBlock, dstStride); |
|
2243 |
+*/ |
|
2244 |
+ } |
|
2232 | 2245 |
|
2233 | 2246 |
if(x - 8 >= 0 && x<width) |
2234 | 2247 |
{ |
... | ... |
@@ -22,6 +22,7 @@ |
22 | 22 |
|
23 | 23 |
#define BLOCK_SIZE 8 |
24 | 24 |
#define TEMP_STRIDE 8 |
25 |
+//#define NUM_BLOCKS_AT_ONCE 16 //not used yet |
|
25 | 26 |
|
26 | 27 |
#define V_DEBLOCK 0x01 |
27 | 28 |
#define H_DEBLOCK 0x02 |
... | ... |
@@ -32,20 +33,28 @@ |
32 | 32 |
#define LUM_H_DEBLOCK H_DEBLOCK // 2 |
33 | 33 |
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 |
34 | 34 |
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 |
35 |
-#define LUM_DERING DERING // 4 |
|
36 |
-#define CHROM_DERING (DERING<<4) // 64 |
|
35 |
+#define LUM_DERING DERING // 4 (not implemented yet) |
|
36 |
+#define CHROM_DERING (DERING<<4) // 64 (not implemented yet) |
|
37 | 37 |
#define LUM_LEVEL_FIX LEVEL_FIX // 8 |
38 |
-//not supported currently |
|
39 |
-#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 |
|
38 |
+#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) |
|
40 | 39 |
|
41 | 40 |
// Experimental vertical filters |
42 | 41 |
#define V_RK1_FILTER 0x0100 // 256 |
43 | 42 |
#define V_X1_FILTER 0x0200 // 512 |
44 | 43 |
|
45 | 44 |
// Experimental horizontal filters |
46 |
-#define H_RK1_FILTER 0x1000 // 4096 |
|
45 |
+#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet) |
|
47 | 46 |
#define H_X1_FILTER 0x2000 // 8192 |
48 | 47 |
|
48 |
+//Deinterlacing Filters |
|
49 |
+#define DEINTERLACE_FILTER_MASK 0xF0000 |
|
50 |
+#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 |
|
51 |
+#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 |
|
52 |
+#define CUBIC_BLEND_DEINT_FILTER 0x30000 // 196608 (not implemented yet) |
|
53 |
+#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 (not implemented yet) |
|
54 |
+#define MEDIAN_DEINT_FILTER 0x80000 // 524288 |
|
55 |
+ |
|
56 |
+ |
|
49 | 57 |
#define GET_PP_QUALITY_MAX 6 |
50 | 58 |
|
51 | 59 |
//#define TIMEING |
... | ... |
@@ -53,18 +62,6 @@ |
53 | 53 |
|
54 | 54 |
#define QP_STORE_T int |
55 | 55 |
|
56 |
-//#ifdef __cplusplus |
|
57 |
-//#include <inttypes.h> |
|
58 |
- |
|
59 |
-//void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
|
60 |
-// QP_STORE_T QPs[], int QPStride, int isColor, int mode); |
|
61 |
-//#endif |
|
62 |
- |
|
63 |
-//#ifdef __cplusplus |
|
64 |
-//extern "C" |
|
65 |
-//{ |
|
66 |
-//#endif |
|
67 |
- |
|
68 | 56 |
void postprocess(unsigned char * src[], int src_stride, |
69 | 57 |
unsigned char * dst[], int dst_stride, |
70 | 58 |
int horizontal_size, int vertical_size, |
... | ... |
@@ -72,8 +69,4 @@ void postprocess(unsigned char * src[], int src_stride, |
72 | 72 |
|
73 | 73 |
int getPpModeForQuality(int quality); |
74 | 74 |
|
75 |
-//#ifdef __cplusplus |
|
76 |
-//} |
|
77 |
-//#endif |
|
78 |
- |
|
79 | 75 |
#endif |
... | ... |
@@ -17,19 +17,22 @@ |
17 | 17 |
*/ |
18 | 18 |
|
19 | 19 |
/* |
20 |
- C MMX MMX2 3DNow* |
|
20 |
+ C MMX MMX2 3DNow |
|
21 | 21 |
isVertDC Ec Ec |
22 | 22 |
isVertMinMaxOk Ec Ec |
23 |
-doVertLowPass E e e* |
|
23 |
+doVertLowPass E e e |
|
24 | 24 |
doVertDefFilter Ec Ec Ec |
25 | 25 |
isHorizDC Ec Ec |
26 | 26 |
isHorizMinMaxOk a |
27 |
-doHorizLowPass E a a* |
|
27 |
+doHorizLowPass E a a |
|
28 | 28 |
doHorizDefFilter E ac ac |
29 | 29 |
deRing |
30 |
-Vertical RKAlgo1 E a a* |
|
31 |
-Vertical X1 a E E* |
|
32 |
-Horizontal X1 a E E* |
|
30 |
+Vertical RKAlgo1 E a a |
|
31 |
+Vertical X1 a E E |
|
32 |
+Horizontal X1 a E E |
|
33 |
+LinIpolDeinterlace a E E* |
|
34 |
+LinBlendDeinterlace a E E* |
|
35 |
+MedianDeinterlace a E |
|
33 | 36 |
|
34 | 37 |
|
35 | 38 |
* i dont have a 3dnow CPU -> its untested |
... | ... |
@@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once |
55 | 55 |
compare the quality & speed of all filters |
56 | 56 |
implement a few simple deinterlacing filters |
57 | 57 |
split this huge file |
58 |
+fix warnings (unused vars, ...) |
|
58 | 59 |
... |
59 | 60 |
|
60 | 61 |
Notes: |
... | ... |
@@ -63,6 +67,9 @@ Notes: |
63 | 63 |
|
64 | 64 |
/* |
65 | 65 |
Changelog: use the CVS log |
66 |
+rewrote the horizontal lowpass filter to fix a bug which caused a blocky look |
|
67 |
+added deinterlace filters (linear interpolate, linear blend, median) |
|
68 |
+minor cleanups (removed some outcommented stuff) |
|
66 | 69 |
0.1.3 |
67 | 70 |
bugfixes: last 3 lines not brightness/contrast corrected |
68 | 71 |
brightness statistics messed up with initial black pic |
... | ... |
@@ -194,13 +201,11 @@ static inline void prefetcht2(void *p) |
194 | 194 |
* Check if the middle 8x8 Block in the given 8x10 block is flat |
195 | 195 |
*/ |
196 | 196 |
static inline int isVertDC(uint8_t src[], int stride){ |
197 |
-// return true; |
|
198 | 197 |
int numEq= 0; |
199 | 198 |
int y; |
200 | 199 |
src+= stride; // src points to begin of the 8x8 Block |
201 | 200 |
#ifdef HAVE_MMX |
202 | 201 |
asm volatile( |
203 |
-// "int $3 \n\t" |
|
204 | 202 |
"pushl %1\n\t" |
205 | 203 |
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
206 | 204 |
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
... | ... |
@@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP |
1577 | 1577 |
} |
1578 | 1578 |
|
1579 | 1579 |
/** |
1580 |
- * Do a horizontal low pass filter on the 8x8 block |
|
1580 |
+ * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
|
1581 | 1581 |
* useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
1582 |
- * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) |
|
1582 |
+ * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
|
1583 | 1583 |
*/ |
1584 | 1584 |
static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1585 | 1585 |
{ |
... | ... |
@@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1635 | 1635 |
*/ |
1636 | 1636 |
// approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 |
1637 | 1637 |
/* |
1638 |
- 31 |
|
1639 |
- 121 |
|
1640 |
- 121 |
|
1641 |
- 121 |
|
1642 |
- 121 |
|
1643 |
- 121 |
|
1644 |
- 121 |
|
1645 |
- 13 |
|
1646 | 1638 |
Implemented Exact 7-Tap |
1647 | 1639 |
9421 A321 |
1648 | 1640 |
36421 64321 |
... | ... |
@@ -1654,6 +1651,7 @@ Implemented Exact 7-Tap |
1654 | 1654 |
1249 123A |
1655 | 1655 |
|
1656 | 1656 |
*/ |
1657 |
+ |
|
1657 | 1658 |
#ifdef HAVE_MMX2 |
1658 | 1659 |
#define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1659 | 1660 |
"movq %%mm0, %%mm1 \n\t"\ |
... | ... |
@@ -1680,12 +1678,12 @@ Implemented Exact 7-Tap |
1680 | 1680 |
#define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1681 | 1681 |
"movq %%mm0, %%mm1 \n\t"\ |
1682 | 1682 |
"movq %%mm0, %%mm2 \n\t"\ |
1683 |
- "movq %%mm0, %%mm3 \n\t"\ |
|
1684 |
- "movq %%mm0, %%mm4 \n\t"\ |
|
1683 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1684 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1685 | 1685 |
"psllq $8, %%mm1 \n\t"\ |
1686 | 1686 |
"psrlq $8, %%mm2 \n\t"\ |
1687 |
- "pand bm00000001, %%mm3 \n\t"\ |
|
1688 |
- "pand bm10000000, %%mm4 \n\t"\ |
|
1687 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1688 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1689 | 1689 |
"por %%mm3, %%mm1 \n\t"\ |
1690 | 1690 |
"por %%mm4, %%mm2 \n\t"\ |
1691 | 1691 |
PAVGB(%%mm2, %%mm1)\ |
... | ... |
@@ -1708,7 +1706,80 @@ Implemented Exact 7-Tap |
1708 | 1708 |
"movd %%mm0, 4(%0) \n\t" |
1709 | 1709 |
#endif |
1710 | 1710 |
|
1711 |
-#define HLP(i) HLP3(i) |
|
1711 |
+/* uses the 7-Tap Filter: 1112111 */ |
|
1712 |
+#define NEW_HLP(i)\ |
|
1713 |
+ "movq " #i "(%%eax), %%mm0 \n\t"\ |
|
1714 |
+ "movq %%mm0, %%mm1 \n\t"\ |
|
1715 |
+ "movq %%mm0, %%mm2 \n\t"\ |
|
1716 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1717 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1718 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1719 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1720 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1721 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1722 |
+ "por %%mm3, %%mm1 \n\t"\ |
|
1723 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1724 |
+ "movq %%mm1, %%mm5 \n\t"\ |
|
1725 |
+ PAVGB(%%mm2, %%mm1)\ |
|
1726 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1727 |
+ "psllq $8, %%mm5 \n\t"\ |
|
1728 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1729 |
+ "por %%mm3, %%mm5 \n\t"\ |
|
1730 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1731 |
+ "movq %%mm5, %%mm1 \n\t"\ |
|
1732 |
+ PAVGB(%%mm2, %%mm5)\ |
|
1733 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1734 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1735 |
+ "por %%mm3, %%mm1 \n\t"\ |
|
1736 |
+ "por %%mm4, %%mm2 \n\t"\ |
|
1737 |
+ PAVGB(%%mm2, %%mm1)\ |
|
1738 |
+ PAVGB(%%mm1, %%mm5)\ |
|
1739 |
+ PAVGB(%%mm5, %%mm0)\ |
|
1740 |
+ "movd %%mm0, (%0) \n\t"\ |
|
1741 |
+ "psrlq $32, %%mm0 \n\t"\ |
|
1742 |
+ "movd %%mm0, 4(%0) \n\t" |
|
1743 |
+ |
|
1744 |
+/* uses the 9-Tap Filter: 112242211 */ |
|
1745 |
+#define NEW_HLP2(i)\ |
|
1746 |
+ "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ |
|
1747 |
+ "movq %%mm0, %%mm1 \n\t" /*0001000*/\ |
|
1748 |
+ "movq %%mm0, %%mm2 \n\t" /*0001000*/\ |
|
1749 |
+ "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
|
1750 |
+ "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
|
1751 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1752 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1753 |
+ "psrlq $24, %%mm3 \n\t"\ |
|
1754 |
+ "psllq $56, %%mm4 \n\t"\ |
|
1755 |
+ "por %%mm3, %%mm1 \n\t" /*0010000*/\ |
|
1756 |
+ "por %%mm4, %%mm2 \n\t" /*0000100*/\ |
|
1757 |
+ "movq %%mm1, %%mm5 \n\t" /*0010000*/\ |
|
1758 |
+ PAVGB(%%mm2, %%mm1) /*0010100*/\ |
|
1759 |
+ PAVGB(%%mm1, %%mm0) /*0012100*/\ |
|
1760 |
+ "psllq $8, %%mm5 \n\t"\ |
|
1761 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1762 |
+ "por %%mm3, %%mm5 \n\t" /*0100000*/\ |
|
1763 |
+ "por %%mm4, %%mm2 \n\t" /*0000010*/\ |
|
1764 |
+ "movq %%mm5, %%mm1 \n\t" /*0100000*/\ |
|
1765 |
+ PAVGB(%%mm2, %%mm5) /*0100010*/\ |
|
1766 |
+ "psllq $8, %%mm1 \n\t"\ |
|
1767 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1768 |
+ "por %%mm3, %%mm1 \n\t" /*1000000*/\ |
|
1769 |
+ "por %%mm4, %%mm2 \n\t" /*0000001*/\ |
|
1770 |
+ "movq %%mm1, %%mm6 \n\t" /*1000000*/\ |
|
1771 |
+ PAVGB(%%mm2, %%mm1) /*1000001*/\ |
|
1772 |
+ "psllq $8, %%mm6 \n\t"\ |
|
1773 |
+ "psrlq $8, %%mm2 \n\t"\ |
|
1774 |
+ "por %%mm3, %%mm6 \n\t"/*100000000*/\ |
|
1775 |
+ "por %%mm4, %%mm2 \n\t"/*000000001*/\ |
|
1776 |
+ PAVGB(%%mm2, %%mm6) /*100000001*/\ |
|
1777 |
+ PAVGB(%%mm6, %%mm1) /*110000011*/\ |
|
1778 |
+ PAVGB(%%mm1, %%mm5) /*112000211*/\ |
|
1779 |
+ PAVGB(%%mm5, %%mm0) /*112242211*/\ |
|
1780 |
+ "movd %%mm0, (%0) \n\t"\ |
|
1781 |
+ "psrlq $32, %%mm0 \n\t"\ |
|
1782 |
+ "movd %%mm0, 4(%0) \n\t" |
|
1783 |
+ |
|
1784 |
+#define HLP(i) NEW_HLP(i) |
|
1712 | 1785 |
|
1713 | 1786 |
HLP(0) |
1714 | 1787 |
"addl %1, %0 \n\t" |
... | ... |
@@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2) |
1828 | 1828 |
#endif |
1829 | 1829 |
} |
1830 | 1830 |
|
1831 |
+/** |
|
1832 |
+ * Deinterlaces the given block |
|
1833 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
1834 |
+ */ |
|
1835 |
+static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) |
|
1836 |
+{ |
|
1837 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1838 |
+ asm volatile( |
|
1839 |
+ "leal (%0, %1), %%eax \n\t" |
|
1840 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1841 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1842 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1843 |
+ |
|
1844 |
+ "movq (%0), %%mm0 \n\t" |
|
1845 |
+ "movq (%%eax, %1), %%mm1 \n\t" |
|
1846 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1847 |
+ "movq %%mm0, (%%eax) \n\t" |
|
1848 |
+ "movq (%0, %1, 4), %%mm0 \n\t" |
|
1849 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1850 |
+ "movq %%mm1, (%%eax, %1, 2) \n\t" |
|
1851 |
+ "movq (%%ebx, %1), %%mm1 \n\t" |
|
1852 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1853 |
+ "movq %%mm0, (%%ebx) \n\t" |
|
1854 |
+ "movq (%0, %1, 8), %%mm0 \n\t" |
|
1855 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1856 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
1857 |
+ |
|
1858 |
+ : : "r" (src), "r" (stride) |
|
1859 |
+ : "%eax", "%ebx" |
|
1860 |
+ ); |
|
1861 |
+#else |
|
1862 |
+ int x; |
|
1863 |
+ for(x=0; x<8; x++) |
|
1864 |
+ { |
|
1865 |
+ src[stride] = (src[0] + src[stride*2])>>1; |
|
1866 |
+ src[stride*3] = (src[stride*2] + src[stride*4])>>1; |
|
1867 |
+ src[stride*5] = (src[stride*4] + src[stride*6])>>1; |
|
1868 |
+ src[stride*7] = (src[stride*6] + src[stride*8])>>1; |
|
1869 |
+ src++; |
|
1870 |
+ } |
|
1871 |
+#endif |
|
1872 |
+} |
|
1873 |
+ |
|
1874 |
+/** |
|
1875 |
+ * Deinterlaces the given block |
|
1876 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
1877 |
+ */ |
|
1878 |
+static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) |
|
1879 |
+{ |
|
1880 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1881 |
+ asm volatile( |
|
1882 |
+ "leal (%0, %1), %%eax \n\t" |
|
1883 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1884 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1885 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1886 |
+ |
|
1887 |
+ "movq (%0), %%mm0 \n\t" |
|
1888 |
+ "movq (%%eax, %1), %%mm1 \n\t" |
|
1889 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1890 |
+ "movq %%mm0, (%%eax) \n\t" |
|
1891 |
+ "movq (%0, %1, 4), %%mm0 \n\t" |
|
1892 |
+ PAVGB(%%mm0, %%mm1)\ |
|
1893 |
+ "movq %%mm1, (%%eax, %1, 2) \n\t" |
|
1894 |
+ "movq (%%ebx, %1), %%mm1 \n\t" |
|
1895 |
+ PAVGB(%%mm1, %%mm0)\ |
|
1896 |
+ "movq %%mm0, (%%ebx) \n\t" |
|
1897 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
1898 |
+ |
|
1899 |
+ |
|
1900 |
+ : : "r" (src), "r" (stride) |
|
1901 |
+ : "%eax", "%ebx" |
|
1902 |
+ ); |
|
1903 |
+#else |
|
1904 |
+ int x; |
|
1905 |
+ for(x=0; x<8; x++) |
|
1906 |
+ { |
|
1907 |
+ src[stride] = (src[0] + src[stride*2])>>1; |
|
1908 |
+ src[stride*3] = (src[stride*2] + src[stride*4])>>1; |
|
1909 |
+ src[stride*5] = (src[stride*4] + src[stride*6])>>1; |
|
1910 |
+ src[stride*7] = src[stride*6]; |
|
1911 |
+ src++; |
|
1912 |
+ } |
|
1913 |
+#endif |
|
1914 |
+} |
|
1915 |
+ |
|
1916 |
+/** |
|
1917 |
+ * Deinterlaces the given block |
|
1918 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
1919 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
1920 |
+ */ |
|
1921 |
+static inline void deInterlaceBlendLinear(uint8_t src[], int stride) |
|
1922 |
+{ |
|
1923 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
1924 |
+ asm volatile( |
|
1925 |
+ "leal (%0, %1), %%eax \n\t" |
|
1926 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1927 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1928 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1929 |
+ |
|
1930 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
1931 |
+ "movq (%%eax, %1), %%mm1 \n\t" // L2 |
|
1932 |
+ PAVGB(%%mm1, %%mm0) // L0+L2 |
|
1933 |
+ "movq (%%eax), %%mm2 \n\t" // L1 |
|
1934 |
+ PAVGB(%%mm2, %%mm0) |
|
1935 |
+ "movq %%mm0, (%0) \n\t" |
|
1936 |
+ "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 |
|
1937 |
+ PAVGB(%%mm0, %%mm2) // L1+L3 |
|
1938 |
+ PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 |
|
1939 |
+ "movq %%mm2, (%%eax) \n\t" |
|
1940 |
+ "movq (%0, %1, 4), %%mm2 \n\t" // L4 |
|
1941 |
+ PAVGB(%%mm2, %%mm1) // L2+L4 |
|
1942 |
+ PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 |
|
1943 |
+ "movq %%mm1, (%%eax, %1) \n\t" |
|
1944 |
+ "movq (%%ebx), %%mm1 \n\t" // L5 |
|
1945 |
+ PAVGB(%%mm1, %%mm0) // L3+L5 |
|
1946 |
+ PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 |
|
1947 |
+ "movq %%mm0, (%%eax, %1, 2) \n\t" |
|
1948 |
+ "movq (%%ebx, %1), %%mm0 \n\t" // L6 |
|
1949 |
+ PAVGB(%%mm0, %%mm2) // L4+L6 |
|
1950 |
+ PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 |
|
1951 |
+ "movq %%mm2, (%0, %1, 4) \n\t" |
|
1952 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 |
|
1953 |
+ PAVGB(%%mm2, %%mm1) // L5+L7 |
|
1954 |
+ PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 |
|
1955 |
+ "movq %%mm1, (%%ebx) \n\t" |
|
1956 |
+ "movq (%0, %1, 8), %%mm1 \n\t" // L8 |
|
1957 |
+ PAVGB(%%mm1, %%mm0) // L6+L8 |
|
1958 |
+ PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 |
|
1959 |
+ "movq %%mm0, (%%ebx, %1) \n\t" |
|
1960 |
+ "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 |
|
1961 |
+ PAVGB(%%mm0, %%mm2) // L7+L9 |
|
1962 |
+ PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 |
|
1963 |
+ "movq %%mm2, (%%ebx, %1, 2) \n\t" |
|
1964 |
+ |
|
1965 |
+ |
|
1966 |
+ : : "r" (src), "r" (stride) |
|
1967 |
+ : "%eax", "%ebx" |
|
1968 |
+ ); |
|
1969 |
+#else |
|
1970 |
+ int x; |
|
1971 |
+ for(x=0; x<8; x++) |
|
1972 |
+ { |
|
1973 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
1974 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
1975 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
1976 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
1977 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
1978 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
1979 |
+ src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; |
|
1980 |
+ src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; |
|
1981 |
+ src++; |
|
1982 |
+ } |
|
1983 |
+#endif |
|
1984 |
+} |
|
1985 |
+ |
|
1986 |
+/** |
|
1987 |
+ * Deinterlaces the given block |
|
1988 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
1989 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
1990 |
+ */ |
|
1991 |
+static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) |
|
1992 |
+{ |
|
1993 |
+#if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) |
|
1994 |
+ asm volatile( |
|
1995 |
+ "leal (%0, %1), %%eax \n\t" |
|
1996 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
1997 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
1998 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
1999 |
+ |
|
2000 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2001 |
+ "movq (%%eax, %1), %%mm1 \n\t" // L2 |
|
2002 |
+ PAVGB(%%mm1, %%mm0) // L0+L2 |
|
2003 |
+ "movq (%%eax), %%mm2 \n\t" // L1 |
|
2004 |
+ PAVGB(%%mm2, %%mm0) |
|
2005 |
+ "movq %%mm0, (%0) \n\t" |
|
2006 |
+ "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 |
|
2007 |
+ PAVGB(%%mm0, %%mm2) // L1+L3 |
|
2008 |
+ PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 |
|
2009 |
+ "movq %%mm2, (%%eax) \n\t" |
|
2010 |
+ "movq (%0, %1, 4), %%mm2 \n\t" // L4 |
|
2011 |
+ PAVGB(%%mm2, %%mm1) // L2+L4 |
|
2012 |
+ PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 |
|
2013 |
+ "movq %%mm1, (%%eax, %1) \n\t" |
|
2014 |
+ "movq (%%ebx), %%mm1 \n\t" // L5 |
|
2015 |
+ PAVGB(%%mm1, %%mm0) // L3+L5 |
|
2016 |
+ PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 |
|
2017 |
+ "movq %%mm0, (%%eax, %1, 2) \n\t" |
|
2018 |
+ "movq (%%ebx, %1), %%mm0 \n\t" // L6 |
|
2019 |
+ PAVGB(%%mm0, %%mm2) // L4+L6 |
|
2020 |
+ PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 |
|
2021 |
+ "movq %%mm2, (%0, %1, 4) \n\t" |
|
2022 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 |
|
2023 |
+ PAVGB(%%mm2, %%mm1) // L5+L7 |
|
2024 |
+ PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 |
|
2025 |
+ "movq %%mm1, (%%ebx) \n\t" |
|
2026 |
+ PAVGB(%%mm2, %%mm0) // L7 + L8 |
|
2027 |
+ "movq %%mm0, (%%ebx, %1) \n\t" |
|
2028 |
+ "movq %%mm0, (%%ebx, %1, 2) \n\t" |
|
2029 |
+ |
|
2030 |
+ : : "r" (src), "r" (stride) |
|
2031 |
+ : "%eax", "%ebx" |
|
2032 |
+ ); |
|
2033 |
+#else |
|
2034 |
+ int x; |
|
2035 |
+ for(x=0; x<8; x++) |
|
2036 |
+ { |
|
2037 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2038 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2039 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2040 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2041 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2042 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2043 |
+ src[stride*6] = (src[stride*6] + src[stride*7])>>1; |
|
2044 |
+ src[stride*7] = src[stride*6]; |
|
2045 |
+ src++; |
|
2046 |
+ } |
|
2047 |
+#endif |
|
2048 |
+} |
|
2049 |
+ |
|
2050 |
+/** |
|
2051 |
+ * Deinterlaces the given block |
|
2052 |
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
|
2053 |
+ */ |
|
2054 |
+static inline void deInterlaceMedian(uint8_t src[], int stride) |
|
2055 |
+{ |
|
2056 |
+#if defined (HAVE_MMX2) |
|
2057 |
+ asm volatile( |
|
2058 |
+ "leal (%0, %1), %%eax \n\t" |
|
2059 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
2060 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2061 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
2062 |
+ |
|
2063 |
+ "movq (%0), %%mm0 \n\t" // |
|
2064 |
+ "movq (%%eax, %1), %%mm2 \n\t" // |
|
2065 |
+ "movq (%%eax), %%mm1 \n\t" // |
|
2066 |
+ "movq %%mm0, %%mm3 \n\t" |
|
2067 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2068 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2069 |
+ "pmaxub %%mm2, %%mm1 \n\t" // |
|
2070 |
+ "pminub %%mm1, %%mm0 \n\t" |
|
2071 |
+ "movq %%mm0, (%%eax) \n\t" |
|
2072 |
+ |
|
2073 |
+ "movq (%0, %1, 4), %%mm0 \n\t" // |
|
2074 |
+ "movq (%%eax, %1, 2), %%mm1 \n\t" // |
|
2075 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2076 |
+ "pmaxub %%mm1, %%mm2 \n\t" // |
|
2077 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2078 |
+ "pmaxub %%mm0, %%mm1 \n\t" // |
|
2079 |
+ "pminub %%mm1, %%mm2 \n\t" |
|
2080 |
+ "movq %%mm2, (%%eax, %1, 2) \n\t" |
|
2081 |
+ |
|
2082 |
+ "movq (%%ebx), %%mm2 \n\t" // |
|
2083 |
+ "movq (%%ebx, %1), %%mm1 \n\t" // |
|
2084 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2085 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2086 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2087 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2088 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2089 |
+ "movq %%mm2, (%%ebx) \n\t" |
|
2090 |
+ |
|
2091 |
+ "movq (%%ebx, %1, 2), %%mm2 \n\t" // |
|
2092 |
+ "movq (%0, %1, 8), %%mm0 \n\t" // |
|
2093 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2094 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2095 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2096 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2097 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2098 |
+ "movq %%mm2, (%%ebx, %1, 2) \n\t" |
|
2099 |
+ |
|
2100 |
+ |
|
2101 |
+ : : "r" (src), "r" (stride) |
|
2102 |
+ : "%eax", "%ebx" |
|
2103 |
+ ); |
|
2104 |
+#else |
|
2105 |
+ //FIXME |
|
2106 |
+ int x; |
|
2107 |
+ for(x=0; x<8; x++) |
|
2108 |
+ { |
|
2109 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2110 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2111 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2112 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2113 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2114 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2115 |
+ src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; |
|
2116 |
+ src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; |
|
2117 |
+ src++; |
|
2118 |
+ } |
|
2119 |
+#endif |
|
2120 |
+} |
|
2121 |
+ |
|
2122 |
+/** |
|
2123 |
+ * Deinterlaces the given block |
|
2124 |
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
|
2125 |
+ * will shift the image up by 1 line (FIXME if this is a problem) |
|
2126 |
+ */ |
|
2127 |
+static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) |
|
2128 |
+{ |
|
2129 |
+#if defined (HAVE_MMX2) |
|
2130 |
+ asm volatile( |
|
2131 |
+ "leal (%0, %1), %%eax \n\t" |
|
2132 |
+ "leal (%%eax, %1, 4), %%ebx \n\t" |
|
2133 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2134 |
+// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
|
2135 |
+ |
|
2136 |
+ "movq (%0), %%mm0 \n\t" // |
|
2137 |
+ "movq (%%eax, %1), %%mm2 \n\t" // |
|
2138 |
+ "movq (%%eax), %%mm1 \n\t" // |
|
2139 |
+ "movq %%mm0, %%mm3 \n\t" |
|
2140 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2141 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2142 |
+ "pmaxub %%mm2, %%mm1 \n\t" // |
|
2143 |
+ "pminub %%mm1, %%mm0 \n\t" |
|
2144 |
+ "movq %%mm0, (%%eax) \n\t" |
|
2145 |
+ |
|
2146 |
+ "movq (%0, %1, 4), %%mm0 \n\t" // |
|
2147 |
+ "movq (%%eax, %1, 2), %%mm1 \n\t" // |
|
2148 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2149 |
+ "pmaxub %%mm1, %%mm2 \n\t" // |
|
2150 |
+ "pminub %%mm3, %%mm1 \n\t" // |
|
2151 |
+ "pmaxub %%mm0, %%mm1 \n\t" // |
|
2152 |
+ "pminub %%mm1, %%mm2 \n\t" |
|
2153 |
+ "movq %%mm2, (%%eax, %1, 2) \n\t" |
|
2154 |
+ |
|
2155 |
+ "movq (%%ebx), %%mm2 \n\t" // |
|
2156 |
+ "movq (%%ebx, %1), %%mm1 \n\t" // |
|
2157 |
+ "movq %%mm2, %%mm3 \n\t" |
|
2158 |
+ "pmaxub %%mm0, %%mm2 \n\t" // |
|
2159 |
+ "pminub %%mm3, %%mm0 \n\t" // |
|
2160 |
+ "pmaxub %%mm1, %%mm0 \n\t" // |
|
2161 |
+ "pminub %%mm0, %%mm2 \n\t" |
|
2162 |
+ "movq %%mm2, (%%ebx) \n\t" |
|
2163 |
+ |
|
2164 |
+ "movq %%mm1, (%%ebx, %1, 2) \n\t" |
|
2165 |
+ |
|
2166 |
+ : : "r" (src), "r" (stride) |
|
2167 |
+ : "%eax", "%ebx" |
|
2168 |
+ ); |
|
2169 |
+#else |
|
2170 |
+ //FIXME |
|
2171 |
+ int x; |
|
2172 |
+ for(x=0; x<8; x++) |
|
2173 |
+ { |
|
2174 |
+ src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; |
|
2175 |
+ src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; |
|
2176 |
+ src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; |
|
2177 |
+ src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; |
|
2178 |
+ src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; |
|
2179 |
+ src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; |
|
2180 |
+ src[stride*6] = (src[stride*6] + src[stride*7])>>1; |
|
2181 |
+ src[stride*7] = src[stride*6]; |
|
2182 |
+ src++; |
|
2183 |
+ } |
|
2184 |
+#endif |
|
2185 |
+} |
|
2186 |
+ |
|
2187 |
+ |
|
1831 | 2188 |
#ifdef HAVE_ODIVX_POSTPROCESS |
1832 | 2189 |
#include "../opendivx/postprocess.h" |
1833 | 2190 |
int use_old_pp=0; |
... | ... |
@@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
1841 | 1841 |
* the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) |
1842 | 1842 |
* -63 is best quality -1 is worst |
1843 | 1843 |
*/ |
1844 |
-//extern "C"{ |
|
1845 | 1844 |
void postprocess(unsigned char * src[], int src_stride, |
1846 | 1845 |
unsigned char * dst[], int dst_stride, |
1847 | 1846 |
int horizontal_size, int vertical_size, |
... | ... |
@@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
2196 | 2196 |
blockCopy(vertBlock + dstStride*2, dstStride, |
2197 | 2197 |
vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); |
2198 | 2198 |
|
2199 |
+ if(mode & LINEAR_IPOL_DEINT_FILTER) |
|
2200 |
+ deInterlaceInterpolateLinear(dstBlock, dstStride); |
|
2201 |
+ else if(mode & LINEAR_BLEND_DEINT_FILTER) |
|
2202 |
+ deInterlaceBlendLinear(dstBlock, dstStride); |
|
2203 |
+ else if(mode & MEDIAN_DEINT_FILTER) |
|
2204 |
+ deInterlaceMedian(dstBlock, dstStride); |
|
2205 |
+/* else if(mode & CUBIC_IPOL_DEINT_FILTER) |
|
2206 |
+ deInterlaceInterpolateCubic(dstBlock, dstStride); |
|
2207 |
+ else if(mode & CUBIC_BLEND_DEINT_FILTER) |
|
2208 |
+ deInterlaceBlendCubic(dstBlock, dstStride); |
|
2209 |
+*/ |
|
2199 | 2210 |
|
2200 | 2211 |
#ifdef MORE_TIMEING |
2201 | 2212 |
T1= rdtsc(); |
... | ... |
@@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri |
2226 | 2226 |
#endif |
2227 | 2227 |
} |
2228 | 2228 |
else |
2229 |
+ { |
|
2229 | 2230 |
blockCopy(vertBlock + dstStride*1, dstStride, |
2230 | 2231 |
vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); |
2231 | 2232 |
|
2233 |
+ if(mode & LINEAR_IPOL_DEINT_FILTER) |
|
2234 |
+ deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); |
|
2235 |
+ else if(mode & LINEAR_BLEND_DEINT_FILTER) |
|
2236 |
+ deInterlaceBlendLinearLastRow(dstBlock, dstStride); |
|
2237 |
+ else if(mode & MEDIAN_DEINT_FILTER) |
|
2238 |
+ deInterlaceMedianLastRow(dstBlock, dstStride); |
|
2239 |
+/* else if(mode & CUBIC_IPOL_DEINT_FILTER) |
|
2240 |
+ deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); |
|
2241 |
+ else if(mode & CUBIC_BLEND_DEINT_FILTER) |
|
2242 |
+ deInterlaceBlendCubicLastRow(dstBlock, dstStride); |
|
2243 |
+*/ |
|
2244 |
+ } |
|
2232 | 2245 |
|
2233 | 2246 |
if(x - 8 >= 0 && x<width) |
2234 | 2247 |
{ |