Browse code

rewrote the horizontal lowpass filter to fix a bug which caused a blocky look added deinterlace filters (linear interpolate, linear blend, median) minor cleanups (removed some outcommented stuff)

Originally committed as revision 2204 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

Michael Niedermayer authored on 2001/10/15 12:01:08
Showing 3 changed files
... ...
@@ -17,19 +17,22 @@
17 17
 */
18 18
 
19 19
 /*
20
-			C	MMX	MMX2	3DNow*
20
+			C	MMX	MMX2	3DNow
21 21
 isVertDC		Ec	Ec
22 22
 isVertMinMaxOk		Ec	Ec
23
-doVertLowPass		E		e	e*
23
+doVertLowPass		E		e	e
24 24
 doVertDefFilter		Ec	Ec	Ec
25 25
 isHorizDC		Ec	Ec
26 26
 isHorizMinMaxOk		a
27
-doHorizLowPass		E		a	a*
27
+doHorizLowPass		E		a	a
28 28
 doHorizDefFilter	E	ac	ac
29 29
 deRing
30
-Vertical RKAlgo1	E		a	a*
31
-Vertical X1		a		E	E*
32
-Horizontal X1		a		E	E*
30
+Vertical RKAlgo1	E		a	a
31
+Vertical X1		a		E	E
32
+Horizontal X1		a		E	E
33
+LinIpolDeinterlace	a		E	E*
34
+LinBlendDeinterlace	a		E	E*
35
+MedianDeinterlace	a		E
33 36
 
34 37
 
35 38
 * i dont have a 3dnow CPU -> its untested
... ...
@@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once
55 55
 compare the quality & speed of all filters
56 56
 implement a few simple deinterlacing filters
57 57
 split this huge file
58
+fix warnings (unused vars, ...)
58 59
 ...
59 60
 
60 61
 Notes:
... ...
@@ -63,6 +67,9 @@ Notes:
63 63
 
64 64
 /*
65 65
 Changelog: use the CVS log
66
+rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
67
+added deinterlace filters (linear interpolate, linear blend, median)
68
+minor cleanups (removed some outcommented stuff)
66 69
 0.1.3
67 70
 	bugfixes: last 3 lines not brightness/contrast corrected
68 71
 		brightness statistics messed up with initial black pic
... ...
@@ -194,13 +201,11 @@ static inline void prefetcht2(void *p)
194 194
  * Check if the middle 8x8 Block in the given 8x10 block is flat
195 195
  */
196 196
 static inline int isVertDC(uint8_t src[], int stride){
197
-//	return true;
198 197
 	int numEq= 0;
199 198
 	int y;
200 199
 	src+= stride; // src points to begin of the 8x8 Block
201 200
 #ifdef HAVE_MMX
202 201
 	asm volatile(
203
-//		"int $3 \n\t"
204 202
 		"pushl %1\n\t"
205 203
 		"movq b7E, %%mm7					\n\t" // mm7 = 0x7F
206 204
 		"movq b7C, %%mm6					\n\t" // mm6 = 0x7D
... ...
@@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
1577 1577
 }
1578 1578
 
1579 1579
 /**
1580
- * Do a horizontal low pass filter on the 8x8 block
1580
+ * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
1581 1581
  * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
1582
- * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version)
1582
+ * useing the 7-Tap Filter   (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version)
1583 1583
  */
1584 1584
 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1585 1585
 {
... ...
@@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1635 1635
 */
1636 1636
 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16
1637 1637
 /*
1638
- 31
1639
- 121
1640
-  121
1641
-   121
1642
-    121
1643
-     121
1644
-      121
1645
-       13
1646 1638
 Implemented	Exact 7-Tap
1647 1639
  9421		A321
1648 1640
  36421		64321
... ...
@@ -1654,6 +1651,7 @@ Implemented	Exact 7-Tap
1654 1654
      1249	   123A
1655 1655
 
1656 1656
 */
1657
+
1657 1658
 #ifdef HAVE_MMX2
1658 1659
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
1659 1660
 		"movq %%mm0, %%mm1					\n\t"\
... ...
@@ -1680,12 +1678,12 @@ Implemented	Exact 7-Tap
1680 1680
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
1681 1681
 		"movq %%mm0, %%mm1					\n\t"\
1682 1682
 		"movq %%mm0, %%mm2					\n\t"\
1683
-		"movq %%mm0, %%mm3					\n\t"\
1684
-		"movq %%mm0, %%mm4					\n\t"\
1683
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1684
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1685 1685
 		"psllq $8, %%mm1					\n\t"\
1686 1686
 		"psrlq $8, %%mm2					\n\t"\
1687
-		"pand bm00000001, %%mm3					\n\t"\
1688
-		"pand bm10000000, %%mm4					\n\t"\
1687
+		"psrlq $24, %%mm3					\n\t"\
1688
+		"psllq $56, %%mm4					\n\t"\
1689 1689
 		"por %%mm3, %%mm1					\n\t"\
1690 1690
 		"por %%mm4, %%mm2					\n\t"\
1691 1691
 		PAVGB(%%mm2, %%mm1)\
... ...
@@ -1708,7 +1706,80 @@ Implemented	Exact 7-Tap
1708 1708
 		"movd %%mm0, 4(%0)					\n\t"
1709 1709
 #endif
1710 1710
 
1711
-#define HLP(i) HLP3(i)
1711
+/* uses the 7-Tap Filter: 1112111 */
1712
+#define NEW_HLP(i)\
1713
+		"movq " #i "(%%eax), %%mm0				\n\t"\
1714
+		"movq %%mm0, %%mm1					\n\t"\
1715
+		"movq %%mm0, %%mm2					\n\t"\
1716
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1717
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1718
+		"psllq $8, %%mm1					\n\t"\
1719
+		"psrlq $8, %%mm2					\n\t"\
1720
+		"psrlq $24, %%mm3					\n\t"\
1721
+		"psllq $56, %%mm4					\n\t"\
1722
+		"por %%mm3, %%mm1					\n\t"\
1723
+		"por %%mm4, %%mm2					\n\t"\
1724
+		"movq %%mm1, %%mm5					\n\t"\
1725
+		PAVGB(%%mm2, %%mm1)\
1726
+		PAVGB(%%mm1, %%mm0)\
1727
+		"psllq $8, %%mm5					\n\t"\
1728
+		"psrlq $8, %%mm2					\n\t"\
1729
+		"por %%mm3, %%mm5					\n\t"\
1730
+		"por %%mm4, %%mm2					\n\t"\
1731
+		"movq %%mm5, %%mm1					\n\t"\
1732
+		PAVGB(%%mm2, %%mm5)\
1733
+		"psllq $8, %%mm1					\n\t"\
1734
+		"psrlq $8, %%mm2					\n\t"\
1735
+		"por %%mm3, %%mm1					\n\t"\
1736
+		"por %%mm4, %%mm2					\n\t"\
1737
+		PAVGB(%%mm2, %%mm1)\
1738
+		PAVGB(%%mm1, %%mm5)\
1739
+		PAVGB(%%mm5, %%mm0)\
1740
+		"movd %%mm0, (%0)					\n\t"\
1741
+		"psrlq $32, %%mm0					\n\t"\
1742
+		"movd %%mm0, 4(%0)					\n\t"
1743
+
1744
+/* uses the 9-Tap Filter: 112242211 */
1745
+#define NEW_HLP2(i)\
1746
+		"movq " #i "(%%eax), %%mm0				\n\t" /*0001000*/\
1747
+		"movq %%mm0, %%mm1					\n\t" /*0001000*/\
1748
+		"movq %%mm0, %%mm2					\n\t" /*0001000*/\
1749
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1750
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1751
+		"psllq $8, %%mm1					\n\t"\
1752
+		"psrlq $8, %%mm2					\n\t"\
1753
+		"psrlq $24, %%mm3					\n\t"\
1754
+		"psllq $56, %%mm4					\n\t"\
1755
+		"por %%mm3, %%mm1					\n\t" /*0010000*/\
1756
+		"por %%mm4, %%mm2					\n\t" /*0000100*/\
1757
+		"movq %%mm1, %%mm5					\n\t" /*0010000*/\
1758
+		PAVGB(%%mm2, %%mm1)					      /*0010100*/\
1759
+		PAVGB(%%mm1, %%mm0)					      /*0012100*/\
1760
+		"psllq $8, %%mm5					\n\t"\
1761
+		"psrlq $8, %%mm2					\n\t"\
1762
+		"por %%mm3, %%mm5					\n\t" /*0100000*/\
1763
+		"por %%mm4, %%mm2					\n\t" /*0000010*/\
1764
+		"movq %%mm5, %%mm1					\n\t" /*0100000*/\
1765
+		PAVGB(%%mm2, %%mm5)					      /*0100010*/\
1766
+		"psllq $8, %%mm1					\n\t"\
1767
+		"psrlq $8, %%mm2					\n\t"\
1768
+		"por %%mm3, %%mm1					\n\t" /*1000000*/\
1769
+		"por %%mm4, %%mm2					\n\t" /*0000001*/\
1770
+		"movq %%mm1, %%mm6					\n\t" /*1000000*/\
1771
+		PAVGB(%%mm2, %%mm1)					      /*1000001*/\
1772
+		"psllq $8, %%mm6					\n\t"\
1773
+		"psrlq $8, %%mm2					\n\t"\
1774
+		"por %%mm3, %%mm6					\n\t"/*100000000*/\
1775
+		"por %%mm4, %%mm2					\n\t"/*000000001*/\
1776
+		PAVGB(%%mm2, %%mm6)					     /*100000001*/\
1777
+		PAVGB(%%mm6, %%mm1)					     /*110000011*/\
1778
+		PAVGB(%%mm1, %%mm5)					     /*112000211*/\
1779
+		PAVGB(%%mm5, %%mm0)					     /*112242211*/\
1780
+		"movd %%mm0, (%0)					\n\t"\
1781
+		"psrlq $32, %%mm0					\n\t"\
1782
+		"movd %%mm0, 4(%0)					\n\t"
1783
+
1784
+#define HLP(i) NEW_HLP(i)
1712 1785
 
1713 1786
 		HLP(0)
1714 1787
 		"addl %1, %0						\n\t"
... ...
@@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2)
1828 1828
 #endif
1829 1829
 }
1830 1830
 
1831
+/**
1832
+ * Deinterlaces the given block
1833
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1834
+ */
1835
+static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride)
1836
+{
1837
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1838
+	asm volatile(
1839
+		"leal (%0, %1), %%eax				\n\t"
1840
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1841
+//	0	1	2	3	4	5	6	7	8	9
1842
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1843
+
1844
+		"movq (%0), %%mm0				\n\t"
1845
+		"movq (%%eax, %1), %%mm1			\n\t"
1846
+		PAVGB(%%mm1, %%mm0)\
1847
+		"movq %%mm0, (%%eax)				\n\t"
1848
+		"movq (%0, %1, 4), %%mm0			\n\t"
1849
+		PAVGB(%%mm0, %%mm1)\
1850
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
1851
+		"movq (%%ebx, %1), %%mm1			\n\t"
1852
+		PAVGB(%%mm1, %%mm0)\
1853
+		"movq %%mm0, (%%ebx)				\n\t"
1854
+		"movq (%0, %1, 8), %%mm0			\n\t"
1855
+		PAVGB(%%mm0, %%mm1)\
1856
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
1857
+
1858
+		: : "r" (src), "r" (stride)
1859
+		: "%eax", "%ebx"
1860
+	);
1861
+#else
1862
+	int x;
1863
+	for(x=0; x<8; x++)
1864
+	{
1865
+		src[stride]   = (src[0]        + src[stride*2])>>1;
1866
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1867
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1868
+		src[stride*7] = (src[stride*6] + src[stride*8])>>1;
1869
+		src++;
1870
+	}
1871
+#endif
1872
+}
1873
+
1874
+/**
1875
+ * Deinterlaces the given block
1876
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
1877
+ */
1878
+static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride)
1879
+{
1880
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1881
+	asm volatile(
1882
+		"leal (%0, %1), %%eax				\n\t"
1883
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1884
+//	0	1	2	3	4	5	6	7	8	9
1885
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1886
+
1887
+		"movq (%0), %%mm0				\n\t"
1888
+		"movq (%%eax, %1), %%mm1			\n\t"
1889
+		PAVGB(%%mm1, %%mm0)\
1890
+		"movq %%mm0, (%%eax)				\n\t"
1891
+		"movq (%0, %1, 4), %%mm0			\n\t"
1892
+		PAVGB(%%mm0, %%mm1)\
1893
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
1894
+		"movq (%%ebx, %1), %%mm1			\n\t"
1895
+		PAVGB(%%mm1, %%mm0)\
1896
+		"movq %%mm0, (%%ebx)				\n\t"
1897
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
1898
+
1899
+
1900
+		: : "r" (src), "r" (stride)
1901
+		: "%eax", "%ebx"
1902
+	);
1903
+#else
1904
+	int x;
1905
+	for(x=0; x<8; x++)
1906
+	{
1907
+		src[stride]   = (src[0]        + src[stride*2])>>1;
1908
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1909
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1910
+		src[stride*7] = src[stride*6];
1911
+		src++;
1912
+	}
1913
+#endif
1914
+}
1915
+
1916
+/**
1917
+ * Deinterlaces the given block
1918
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1919
+ * will shift the image up by 1 line (FIXME if this is a problem)
1920
+ */
1921
+static inline void deInterlaceBlendLinear(uint8_t src[], int stride)
1922
+{
1923
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1924
+	asm volatile(
1925
+		"leal (%0, %1), %%eax				\n\t"
1926
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1927
+//	0	1	2	3	4	5	6	7	8	9
1928
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1929
+
1930
+		"movq (%0), %%mm0				\n\t" // L0
1931
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
1932
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
1933
+		"movq (%%eax), %%mm2				\n\t" // L1
1934
+		PAVGB(%%mm2, %%mm0)
1935
+		"movq %%mm0, (%0)				\n\t"
1936
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
1937
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
1938
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
1939
+		"movq %%mm2, (%%eax)				\n\t"
1940
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
1941
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
1942
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
1943
+		"movq %%mm1, (%%eax, %1)			\n\t"
1944
+		"movq (%%ebx), %%mm1				\n\t" // L5
1945
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
1946
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
1947
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
1948
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
1949
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
1950
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
1951
+		"movq %%mm2, (%0, %1, 4)			\n\t"
1952
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
1953
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
1954
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
1955
+		"movq %%mm1, (%%ebx)				\n\t"
1956
+		"movq (%0, %1, 8), %%mm1			\n\t" // L8
1957
+		PAVGB(%%mm1, %%mm0)				      // L6+L8
1958
+		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
1959
+		"movq %%mm0, (%%ebx, %1)			\n\t"
1960
+		"movq (%%ebx, %1, 4), %%mm0			\n\t" // L9
1961
+		PAVGB(%%mm0, %%mm2)				      // L7+L9
1962
+		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
1963
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
1964
+
1965
+
1966
+		: : "r" (src), "r" (stride)
1967
+		: "%eax", "%ebx"
1968
+	);
1969
+#else
1970
+	int x;
1971
+	for(x=0; x<8; x++)
1972
+	{
1973
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
1974
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
1975
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
1976
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
1977
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
1978
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
1979
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
1980
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
1981
+		src++;
1982
+	}
1983
+#endif
1984
+}
1985
+
1986
+/**
1987
+ * Deinterlaces the given block
1988
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
1989
+ * will shift the image up by 1 line (FIXME if this is a problem)
1990
+ */
1991
+static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride)
1992
+{
1993
+#if defined (HAVE_MMSX2) || defined (HAVE_3DNOW)
1994
+	asm volatile(
1995
+		"leal (%0, %1), %%eax				\n\t"
1996
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1997
+//	0	1	2	3	4	5	6	7	8	9
1998
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1999
+
2000
+		"movq (%0), %%mm0				\n\t" // L0
2001
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
2002
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
2003
+		"movq (%%eax), %%mm2				\n\t" // L1
2004
+		PAVGB(%%mm2, %%mm0)
2005
+		"movq %%mm0, (%0)				\n\t"
2006
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
2007
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
2008
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
2009
+		"movq %%mm2, (%%eax)				\n\t"
2010
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
2011
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
2012
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
2013
+		"movq %%mm1, (%%eax, %1)			\n\t"
2014
+		"movq (%%ebx), %%mm1				\n\t" // L5
2015
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
2016
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
2017
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
2018
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
2019
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
2020
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
2021
+		"movq %%mm2, (%0, %1, 4)			\n\t"
2022
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
2023
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
2024
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
2025
+		"movq %%mm1, (%%ebx)				\n\t"
2026
+		PAVGB(%%mm2, %%mm0)				      // L7 + L8
2027
+		"movq %%mm0, (%%ebx, %1)			\n\t"
2028
+		"movq %%mm0, (%%ebx, %1, 2)			\n\t"
2029
+
2030
+		: : "r" (src), "r" (stride)
2031
+		: "%eax", "%ebx"
2032
+	);
2033
+#else
2034
+	int x;
2035
+	for(x=0; x<8; x++)
2036
+	{
2037
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2038
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2039
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2040
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2041
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2042
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2043
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
2044
+		src[stride*7] = src[stride*6];
2045
+		src++;
2046
+	}
2047
+#endif
2048
+}
2049
+
2050
+/**
2051
+ * Deinterlaces the given block
2052
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
2053
+ */
2054
+static inline void deInterlaceMedian(uint8_t src[], int stride)
2055
+{
2056
+#if defined (HAVE_MMX2)
2057
+	asm volatile(
2058
+		"leal (%0, %1), %%eax				\n\t"
2059
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
2060
+//	0	1	2	3	4	5	6	7	8	9
2061
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
2062
+
2063
+		"movq (%0), %%mm0				\n\t" //
2064
+		"movq (%%eax, %1), %%mm2			\n\t" //
2065
+		"movq (%%eax), %%mm1				\n\t" //
2066
+		"movq %%mm0, %%mm3				\n\t"
2067
+		"pmaxub %%mm1, %%mm0				\n\t" //
2068
+		"pminub %%mm3, %%mm1				\n\t" //
2069
+		"pmaxub %%mm2, %%mm1				\n\t" //
2070
+		"pminub %%mm1, %%mm0				\n\t"
2071
+		"movq %%mm0, (%%eax)				\n\t"
2072
+
2073
+		"movq (%0, %1, 4), %%mm0			\n\t" //
2074
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
2075
+		"movq %%mm2, %%mm3				\n\t"
2076
+		"pmaxub %%mm1, %%mm2				\n\t" //
2077
+		"pminub %%mm3, %%mm1				\n\t" //
2078
+		"pmaxub %%mm0, %%mm1				\n\t" //
2079
+		"pminub %%mm1, %%mm2				\n\t"
2080
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
2081
+
2082
+		"movq (%%ebx), %%mm2				\n\t" //
2083
+		"movq (%%ebx, %1), %%mm1			\n\t" //
2084
+		"movq %%mm2, %%mm3				\n\t"
2085
+		"pmaxub %%mm0, %%mm2				\n\t" //
2086
+		"pminub %%mm3, %%mm0				\n\t" //
2087
+		"pmaxub %%mm1, %%mm0				\n\t" //
2088
+		"pminub %%mm0, %%mm2				\n\t"
2089
+		"movq %%mm2, (%%ebx)				\n\t"
2090
+
2091
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" //
2092
+		"movq (%0, %1, 8), %%mm0			\n\t" //
2093
+		"movq %%mm2, %%mm3				\n\t"
2094
+		"pmaxub %%mm0, %%mm2				\n\t" //
2095
+		"pminub %%mm3, %%mm0				\n\t" //
2096
+		"pmaxub %%mm1, %%mm0				\n\t" //
2097
+		"pminub %%mm0, %%mm2				\n\t"
2098
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
2099
+
2100
+
2101
+		: : "r" (src), "r" (stride)
2102
+		: "%eax", "%ebx"
2103
+	);
2104
+#else
2105
+	//FIXME
2106
+	int x;
2107
+	for(x=0; x<8; x++)
2108
+	{
2109
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2110
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2111
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2112
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2113
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2114
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2115
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
2116
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
2117
+		src++;
2118
+	}
2119
+#endif
2120
+}
2121
+
2122
+/**
2123
+ * Deinterlaces the given block
2124
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
2125
+ * will shift the image up by 1 line (FIXME if this is a problem)
2126
+ */
2127
+static inline void deInterlaceMedianLastRow(uint8_t src[], int stride)
2128
+{
2129
+#if defined (HAVE_MMX2)
2130
+	asm volatile(
2131
+		"leal (%0, %1), %%eax				\n\t"
2132
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
2133
+//	0	1	2	3	4	5	6	7	8	9
2134
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
2135
+
2136
+		"movq (%0), %%mm0				\n\t" //
2137
+		"movq (%%eax, %1), %%mm2			\n\t" //
2138
+		"movq (%%eax), %%mm1				\n\t" //
2139
+		"movq %%mm0, %%mm3				\n\t"
2140
+		"pmaxub %%mm1, %%mm0				\n\t" //
2141
+		"pminub %%mm3, %%mm1				\n\t" //
2142
+		"pmaxub %%mm2, %%mm1				\n\t" //
2143
+		"pminub %%mm1, %%mm0				\n\t"
2144
+		"movq %%mm0, (%%eax)				\n\t"
2145
+
2146
+		"movq (%0, %1, 4), %%mm0			\n\t" //
2147
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
2148
+		"movq %%mm2, %%mm3				\n\t"
2149
+		"pmaxub %%mm1, %%mm2				\n\t" //
2150
+		"pminub %%mm3, %%mm1				\n\t" //
2151
+		"pmaxub %%mm0, %%mm1				\n\t" //
2152
+		"pminub %%mm1, %%mm2				\n\t"
2153
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
2154
+
2155
+		"movq (%%ebx), %%mm2				\n\t" //
2156
+		"movq (%%ebx, %1), %%mm1			\n\t" //
2157
+		"movq %%mm2, %%mm3				\n\t"
2158
+		"pmaxub %%mm0, %%mm2				\n\t" //
2159
+		"pminub %%mm3, %%mm0				\n\t" //
2160
+		"pmaxub %%mm1, %%mm0				\n\t" //
2161
+		"pminub %%mm0, %%mm2				\n\t"
2162
+		"movq %%mm2, (%%ebx)				\n\t"
2163
+
2164
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
2165
+
2166
+		: : "r" (src), "r" (stride)
2167
+		: "%eax", "%ebx"
2168
+	);
2169
+#else
2170
+	//FIXME
2171
+	int x;
2172
+	for(x=0; x<8; x++)
2173
+	{
2174
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2175
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2176
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2177
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2178
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2179
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2180
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
2181
+		src[stride*7] = src[stride*6];
2182
+		src++;
2183
+	}
2184
+#endif
2185
+}
2186
+
2187
+
1831 2188
 #ifdef HAVE_ODIVX_POSTPROCESS
1832 2189
 #include "../opendivx/postprocess.h"
1833 2190
 int use_old_pp=0;
... ...
@@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
1841 1841
  * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63)
1842 1842
  * -63 is best quality -1 is worst
1843 1843
  */
1844
-//extern "C"{
1845 1844
 void  postprocess(unsigned char * src[], int src_stride,
1846 1845
                  unsigned char * dst[], int dst_stride,
1847 1846
                  int horizontal_size,   int vertical_size,
... ...
@@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
2196 2196
 				blockCopy(vertBlock + dstStride*2, dstStride,
2197 2197
 					vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX);
2198 2198
 
2199
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
2200
+					deInterlaceInterpolateLinear(dstBlock, dstStride);
2201
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
2202
+					deInterlaceBlendLinear(dstBlock, dstStride);
2203
+				else if(mode & MEDIAN_DEINT_FILTER)
2204
+					deInterlaceMedian(dstBlock, dstStride);
2205
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
2206
+					deInterlaceInterpolateCubic(dstBlock, dstStride);
2207
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
2208
+					deInterlaceBlendCubic(dstBlock, dstStride);
2209
+*/
2199 2210
 
2200 2211
 #ifdef MORE_TIMEING
2201 2212
 				T1= rdtsc();
... ...
@@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
2226 2226
 #endif
2227 2227
 			}
2228 2228
 			else
2229
+			{
2229 2230
 				blockCopy(vertBlock + dstStride*1, dstStride,
2230 2231
 					vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX);
2231 2232
 
2233
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
2234
+					deInterlaceInterpolateLinearLastRow(dstBlock, dstStride);
2235
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
2236
+					deInterlaceBlendLinearLastRow(dstBlock, dstStride);
2237
+				else if(mode & MEDIAN_DEINT_FILTER)
2238
+					deInterlaceMedianLastRow(dstBlock, dstStride);
2239
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
2240
+					deInterlaceInterpolateCubicLastRow(dstBlock, dstStride);
2241
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
2242
+					deInterlaceBlendCubicLastRow(dstBlock, dstStride);
2243
+*/
2244
+			}
2232 2245
 
2233 2246
 			if(x - 8 >= 0 && x<width)
2234 2247
 			{
... ...
@@ -22,6 +22,7 @@
22 22
 
23 23
 #define BLOCK_SIZE 8
24 24
 #define TEMP_STRIDE 8
25
+//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
25 26
 
26 27
 #define V_DEBLOCK	0x01
27 28
 #define H_DEBLOCK	0x02
... ...
@@ -32,20 +33,28 @@
32 32
 #define LUM_H_DEBLOCK	H_DEBLOCK		//   2
33 33
 #define CHROM_V_DEBLOCK	(V_DEBLOCK<<4)		//  16
34 34
 #define CHROM_H_DEBLOCK	(H_DEBLOCK<<4)		//  32
35
-#define LUM_DERING	DERING			//   4
36
-#define CHROM_DERING	(DERING<<4)		//  64
35
+#define LUM_DERING	DERING			//   4 (not implemented yet)
36
+#define CHROM_DERING	(DERING<<4)		//  64 (not implemented yet)
37 37
 #define LUM_LEVEL_FIX	LEVEL_FIX		//   8
38
-//not supported currently
39
-#define CHROM_LEVEL_FIX	(LEVEL_FIX<<4)		// 128
38
+#define CHROM_LEVEL_FIX	(LEVEL_FIX<<4)		// 128 (not implemented yet)
40 39
 
41 40
 // Experimental vertical filters
42 41
 #define V_RK1_FILTER	0x0100			// 256
43 42
 #define V_X1_FILTER	0x0200			// 512
44 43
 
45 44
 // Experimental horizontal filters
46
-#define H_RK1_FILTER	0x1000			// 4096
45
+#define H_RK1_FILTER	0x1000			// 4096 (not implemented yet)
47 46
 #define H_X1_FILTER	0x2000			// 8192
48 47
 
48
+//Deinterlacing Filters
49
+#define DEINTERLACE_FILTER_MASK		0xF0000
50
+#define	LINEAR_IPOL_DEINT_FILTER	0x10000	// 65536
51
+#define	LINEAR_BLEND_DEINT_FILTER	0x20000	// 131072
52
+#define	CUBIC_BLEND_DEINT_FILTER	0x30000	// 196608 (not implemented yet)
53
+#define	CUBIC_IPOL_DEINT_FILTER		0x40000	// 262144 (not implemented yet)
54
+#define	MEDIAN_DEINT_FILTER		0x80000	// 524288 
55
+
56
+
49 57
 #define GET_PP_QUALITY_MAX 6
50 58
 
51 59
 //#define TIMEING
... ...
@@ -53,18 +62,6 @@
53 53
 
54 54
 #define QP_STORE_T int
55 55
 
56
-//#ifdef __cplusplus
57
-//#include <inttypes.h>
58
-
59
-//void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
60
-//	QP_STORE_T QPs[], int QPStride, int isColor, int mode);
61
-//#endif
62
-
63
-//#ifdef __cplusplus
64
-//extern "C"
65
-//{
66
-//#endif
67
-
68 56
 void postprocess(unsigned char * src[], int src_stride,
69 57
                  unsigned char * dst[], int dst_stride,
70 58
                  int horizontal_size,   int vertical_size,
... ...
@@ -72,8 +69,4 @@ void postprocess(unsigned char * src[], int src_stride,
72 72
 
73 73
 int getPpModeForQuality(int quality);
74 74
 
75
-//#ifdef __cplusplus
76
-//}
77
-//#endif
78
-
79 75
 #endif
... ...
@@ -17,19 +17,22 @@
17 17
 */
18 18
 
19 19
 /*
20
-			C	MMX	MMX2	3DNow*
20
+			C	MMX	MMX2	3DNow
21 21
 isVertDC		Ec	Ec
22 22
 isVertMinMaxOk		Ec	Ec
23
-doVertLowPass		E		e	e*
23
+doVertLowPass		E		e	e
24 24
 doVertDefFilter		Ec	Ec	Ec
25 25
 isHorizDC		Ec	Ec
26 26
 isHorizMinMaxOk		a
27
-doHorizLowPass		E		a	a*
27
+doHorizLowPass		E		a	a
28 28
 doHorizDefFilter	E	ac	ac
29 29
 deRing
30
-Vertical RKAlgo1	E		a	a*
31
-Vertical X1		a		E	E*
32
-Horizontal X1		a		E	E*
30
+Vertical RKAlgo1	E		a	a
31
+Vertical X1		a		E	E
32
+Horizontal X1		a		E	E
33
+LinIpolDeinterlace	a		E	E*
34
+LinBlendDeinterlace	a		E	E*
35
+MedianDeinterlace	a		E
33 36
 
34 37
 
35 38
 * i dont have a 3dnow CPU -> its untested
... ...
@@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once
55 55
 compare the quality & speed of all filters
56 56
 implement a few simple deinterlacing filters
57 57
 split this huge file
58
+fix warnings (unused vars, ...)
58 59
 ...
59 60
 
60 61
 Notes:
... ...
@@ -63,6 +67,9 @@ Notes:
63 63
 
64 64
 /*
65 65
 Changelog: use the CVS log
66
+rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
67
+added deinterlace filters (linear interpolate, linear blend, median)
68
+minor cleanups (removed some outcommented stuff)
66 69
 0.1.3
67 70
 	bugfixes: last 3 lines not brightness/contrast corrected
68 71
 		brightness statistics messed up with initial black pic
... ...
@@ -194,13 +201,11 @@ static inline void prefetcht2(void *p)
194 194
  * Check if the middle 8x8 Block in the given 8x10 block is flat
195 195
  */
196 196
 static inline int isVertDC(uint8_t src[], int stride){
197
-//	return true;
198 197
 	int numEq= 0;
199 198
 	int y;
200 199
 	src+= stride; // src points to begin of the 8x8 Block
201 200
 #ifdef HAVE_MMX
202 201
 	asm volatile(
203
-//		"int $3 \n\t"
204 202
 		"pushl %1\n\t"
205 203
 		"movq b7E, %%mm7					\n\t" // mm7 = 0x7F
206 204
 		"movq b7C, %%mm6					\n\t" // mm6 = 0x7D
... ...
@@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
1577 1577
 }
1578 1578
 
1579 1579
 /**
1580
- * Do a horizontal low pass filter on the 8x8 block
1580
+ * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
1581 1581
  * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
1582
- * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version)
1582
+ * useing the 7-Tap Filter   (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version)
1583 1583
  */
1584 1584
 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1585 1585
 {
... ...
@@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1635 1635
 */
1636 1636
 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16
1637 1637
 /*
1638
- 31
1639
- 121
1640
-  121
1641
-   121
1642
-    121
1643
-     121
1644
-      121
1645
-       13
1646 1638
 Implemented	Exact 7-Tap
1647 1639
  9421		A321
1648 1640
  36421		64321
... ...
@@ -1654,6 +1651,7 @@ Implemented	Exact 7-Tap
1654 1654
      1249	   123A
1655 1655
 
1656 1656
 */
1657
+
1657 1658
 #ifdef HAVE_MMX2
1658 1659
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
1659 1660
 		"movq %%mm0, %%mm1					\n\t"\
... ...
@@ -1680,12 +1678,12 @@ Implemented	Exact 7-Tap
1680 1680
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
1681 1681
 		"movq %%mm0, %%mm1					\n\t"\
1682 1682
 		"movq %%mm0, %%mm2					\n\t"\
1683
-		"movq %%mm0, %%mm3					\n\t"\
1684
-		"movq %%mm0, %%mm4					\n\t"\
1683
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1684
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1685 1685
 		"psllq $8, %%mm1					\n\t"\
1686 1686
 		"psrlq $8, %%mm2					\n\t"\
1687
-		"pand bm00000001, %%mm3					\n\t"\
1688
-		"pand bm10000000, %%mm4					\n\t"\
1687
+		"psrlq $24, %%mm3					\n\t"\
1688
+		"psllq $56, %%mm4					\n\t"\
1689 1689
 		"por %%mm3, %%mm1					\n\t"\
1690 1690
 		"por %%mm4, %%mm2					\n\t"\
1691 1691
 		PAVGB(%%mm2, %%mm1)\
... ...
@@ -1708,7 +1706,80 @@ Implemented	Exact 7-Tap
1708 1708
 		"movd %%mm0, 4(%0)					\n\t"
1709 1709
 #endif
1710 1710
 
1711
-#define HLP(i) HLP3(i)
1711
+/* uses the 7-Tap Filter: 1112111 */
1712
+#define NEW_HLP(i)\
1713
+		"movq " #i "(%%eax), %%mm0				\n\t"\
1714
+		"movq %%mm0, %%mm1					\n\t"\
1715
+		"movq %%mm0, %%mm2					\n\t"\
1716
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1717
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1718
+		"psllq $8, %%mm1					\n\t"\
1719
+		"psrlq $8, %%mm2					\n\t"\
1720
+		"psrlq $24, %%mm3					\n\t"\
1721
+		"psllq $56, %%mm4					\n\t"\
1722
+		"por %%mm3, %%mm1					\n\t"\
1723
+		"por %%mm4, %%mm2					\n\t"\
1724
+		"movq %%mm1, %%mm5					\n\t"\
1725
+		PAVGB(%%mm2, %%mm1)\
1726
+		PAVGB(%%mm1, %%mm0)\
1727
+		"psllq $8, %%mm5					\n\t"\
1728
+		"psrlq $8, %%mm2					\n\t"\
1729
+		"por %%mm3, %%mm5					\n\t"\
1730
+		"por %%mm4, %%mm2					\n\t"\
1731
+		"movq %%mm5, %%mm1					\n\t"\
1732
+		PAVGB(%%mm2, %%mm5)\
1733
+		"psllq $8, %%mm1					\n\t"\
1734
+		"psrlq $8, %%mm2					\n\t"\
1735
+		"por %%mm3, %%mm1					\n\t"\
1736
+		"por %%mm4, %%mm2					\n\t"\
1737
+		PAVGB(%%mm2, %%mm1)\
1738
+		PAVGB(%%mm1, %%mm5)\
1739
+		PAVGB(%%mm5, %%mm0)\
1740
+		"movd %%mm0, (%0)					\n\t"\
1741
+		"psrlq $32, %%mm0					\n\t"\
1742
+		"movd %%mm0, 4(%0)					\n\t"
1743
+
1744
+/* uses the 9-Tap Filter: 112242211 */
1745
+#define NEW_HLP2(i)\
1746
+		"movq " #i "(%%eax), %%mm0				\n\t" /*0001000*/\
1747
+		"movq %%mm0, %%mm1					\n\t" /*0001000*/\
1748
+		"movq %%mm0, %%mm2					\n\t" /*0001000*/\
1749
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
1750
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
1751
+		"psllq $8, %%mm1					\n\t"\
1752
+		"psrlq $8, %%mm2					\n\t"\
1753
+		"psrlq $24, %%mm3					\n\t"\
1754
+		"psllq $56, %%mm4					\n\t"\
1755
+		"por %%mm3, %%mm1					\n\t" /*0010000*/\
1756
+		"por %%mm4, %%mm2					\n\t" /*0000100*/\
1757
+		"movq %%mm1, %%mm5					\n\t" /*0010000*/\
1758
+		PAVGB(%%mm2, %%mm1)					      /*0010100*/\
1759
+		PAVGB(%%mm1, %%mm0)					      /*0012100*/\
1760
+		"psllq $8, %%mm5					\n\t"\
1761
+		"psrlq $8, %%mm2					\n\t"\
1762
+		"por %%mm3, %%mm5					\n\t" /*0100000*/\
1763
+		"por %%mm4, %%mm2					\n\t" /*0000010*/\
1764
+		"movq %%mm5, %%mm1					\n\t" /*0100000*/\
1765
+		PAVGB(%%mm2, %%mm5)					      /*0100010*/\
1766
+		"psllq $8, %%mm1					\n\t"\
1767
+		"psrlq $8, %%mm2					\n\t"\
1768
+		"por %%mm3, %%mm1					\n\t" /*1000000*/\
1769
+		"por %%mm4, %%mm2					\n\t" /*0000001*/\
1770
+		"movq %%mm1, %%mm6					\n\t" /*1000000*/\
1771
+		PAVGB(%%mm2, %%mm1)					      /*1000001*/\
1772
+		"psllq $8, %%mm6					\n\t"\
1773
+		"psrlq $8, %%mm2					\n\t"\
1774
+		"por %%mm3, %%mm6					\n\t"/*100000000*/\
1775
+		"por %%mm4, %%mm2					\n\t"/*000000001*/\
1776
+		PAVGB(%%mm2, %%mm6)					     /*100000001*/\
1777
+		PAVGB(%%mm6, %%mm1)					     /*110000011*/\
1778
+		PAVGB(%%mm1, %%mm5)					     /*112000211*/\
1779
+		PAVGB(%%mm5, %%mm0)					     /*112242211*/\
1780
+		"movd %%mm0, (%0)					\n\t"\
1781
+		"psrlq $32, %%mm0					\n\t"\
1782
+		"movd %%mm0, 4(%0)					\n\t"
1783
+
1784
+#define HLP(i) NEW_HLP(i)
1712 1785
 
1713 1786
 		HLP(0)
1714 1787
 		"addl %1, %0						\n\t"
... ...
@@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2)
1828 1828
 #endif
1829 1829
 }
1830 1830
 
1831
+/**
1832
+ * Deinterlaces the given block
1833
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1834
+ */
1835
+static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride)
1836
+{
1837
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1838
+	asm volatile(
1839
+		"leal (%0, %1), %%eax				\n\t"
1840
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1841
+//	0	1	2	3	4	5	6	7	8	9
1842
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1843
+
1844
+		"movq (%0), %%mm0				\n\t"
1845
+		"movq (%%eax, %1), %%mm1			\n\t"
1846
+		PAVGB(%%mm1, %%mm0)\
1847
+		"movq %%mm0, (%%eax)				\n\t"
1848
+		"movq (%0, %1, 4), %%mm0			\n\t"
1849
+		PAVGB(%%mm0, %%mm1)\
1850
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
1851
+		"movq (%%ebx, %1), %%mm1			\n\t"
1852
+		PAVGB(%%mm1, %%mm0)\
1853
+		"movq %%mm0, (%%ebx)				\n\t"
1854
+		"movq (%0, %1, 8), %%mm0			\n\t"
1855
+		PAVGB(%%mm0, %%mm1)\
1856
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
1857
+
1858
+		: : "r" (src), "r" (stride)
1859
+		: "%eax", "%ebx"
1860
+	);
1861
+#else
1862
+	int x;
1863
+	for(x=0; x<8; x++)
1864
+	{
1865
+		src[stride]   = (src[0]        + src[stride*2])>>1;
1866
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1867
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1868
+		src[stride*7] = (src[stride*6] + src[stride*8])>>1;
1869
+		src++;
1870
+	}
1871
+#endif
1872
+}
1873
+
1874
+/**
1875
+ * Deinterlaces the given block
1876
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
1877
+ */
1878
+static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride)
1879
+{
1880
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1881
+	asm volatile(
1882
+		"leal (%0, %1), %%eax				\n\t"
1883
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1884
+//	0	1	2	3	4	5	6	7	8	9
1885
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1886
+
1887
+		"movq (%0), %%mm0				\n\t"
1888
+		"movq (%%eax, %1), %%mm1			\n\t"
1889
+		PAVGB(%%mm1, %%mm0)\
1890
+		"movq %%mm0, (%%eax)				\n\t"
1891
+		"movq (%0, %1, 4), %%mm0			\n\t"
1892
+		PAVGB(%%mm0, %%mm1)\
1893
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
1894
+		"movq (%%ebx, %1), %%mm1			\n\t"
1895
+		PAVGB(%%mm1, %%mm0)\
1896
+		"movq %%mm0, (%%ebx)				\n\t"
1897
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
1898
+
1899
+
1900
+		: : "r" (src), "r" (stride)
1901
+		: "%eax", "%ebx"
1902
+	);
1903
+#else
1904
+	int x;
1905
+	for(x=0; x<8; x++)
1906
+	{
1907
+		src[stride]   = (src[0]        + src[stride*2])>>1;
1908
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1909
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1910
+		src[stride*7] = src[stride*6];
1911
+		src++;
1912
+	}
1913
+#endif
1914
+}
1915
+
1916
+/**
1917
+ * Deinterlaces the given block
1918
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1919
+ * will shift the image up by 1 line (FIXME if this is a problem)
1920
+ */
1921
+static inline void deInterlaceBlendLinear(uint8_t src[], int stride)
1922
+{
1923
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1924
+	asm volatile(
1925
+		"leal (%0, %1), %%eax				\n\t"
1926
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1927
+//	0	1	2	3	4	5	6	7	8	9
1928
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1929
+
1930
+		"movq (%0), %%mm0				\n\t" // L0
1931
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
1932
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
1933
+		"movq (%%eax), %%mm2				\n\t" // L1
1934
+		PAVGB(%%mm2, %%mm0)
1935
+		"movq %%mm0, (%0)				\n\t"
1936
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
1937
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
1938
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
1939
+		"movq %%mm2, (%%eax)				\n\t"
1940
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
1941
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
1942
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
1943
+		"movq %%mm1, (%%eax, %1)			\n\t"
1944
+		"movq (%%ebx), %%mm1				\n\t" // L5
1945
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
1946
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
1947
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
1948
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
1949
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
1950
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
1951
+		"movq %%mm2, (%0, %1, 4)			\n\t"
1952
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
1953
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
1954
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
1955
+		"movq %%mm1, (%%ebx)				\n\t"
1956
+		"movq (%0, %1, 8), %%mm1			\n\t" // L8
1957
+		PAVGB(%%mm1, %%mm0)				      // L6+L8
1958
+		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
1959
+		"movq %%mm0, (%%ebx, %1)			\n\t"
1960
+		"movq (%%ebx, %1, 4), %%mm0			\n\t" // L9
1961
+		PAVGB(%%mm0, %%mm2)				      // L7+L9
1962
+		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
1963
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
1964
+
1965
+
1966
+		: : "r" (src), "r" (stride)
1967
+		: "%eax", "%ebx"
1968
+	);
1969
+#else
1970
+	int x;
1971
+	for(x=0; x<8; x++)
1972
+	{
1973
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
1974
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
1975
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
1976
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
1977
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
1978
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
1979
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
1980
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
1981
+		src++;
1982
+	}
1983
+#endif
1984
+}
1985
+
1986
+/**
1987
+ * Deinterlaces the given block
1988
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
1989
+ * will shift the image up by 1 line (FIXME if this is a problem)
1990
+ */
1991
+static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride)
1992
+{
1993
+#if defined (HAVE_MMSX2) || defined (HAVE_3DNOW)
1994
+	asm volatile(
1995
+		"leal (%0, %1), %%eax				\n\t"
1996
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
1997
+//	0	1	2	3	4	5	6	7	8	9
1998
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
1999
+
2000
+		"movq (%0), %%mm0				\n\t" // L0
2001
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
2002
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
2003
+		"movq (%%eax), %%mm2				\n\t" // L1
2004
+		PAVGB(%%mm2, %%mm0)
2005
+		"movq %%mm0, (%0)				\n\t"
2006
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
2007
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
2008
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
2009
+		"movq %%mm2, (%%eax)				\n\t"
2010
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
2011
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
2012
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
2013
+		"movq %%mm1, (%%eax, %1)			\n\t"
2014
+		"movq (%%ebx), %%mm1				\n\t" // L5
2015
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
2016
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
2017
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
2018
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
2019
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
2020
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
2021
+		"movq %%mm2, (%0, %1, 4)			\n\t"
2022
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
2023
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
2024
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
2025
+		"movq %%mm1, (%%ebx)				\n\t"
2026
+		PAVGB(%%mm2, %%mm0)				      // L7 + L8
2027
+		"movq %%mm0, (%%ebx, %1)			\n\t"
2028
+		"movq %%mm0, (%%ebx, %1, 2)			\n\t"
2029
+
2030
+		: : "r" (src), "r" (stride)
2031
+		: "%eax", "%ebx"
2032
+	);
2033
+#else
2034
+	int x;
2035
+	for(x=0; x<8; x++)
2036
+	{
2037
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2038
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2039
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2040
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2041
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2042
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2043
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
2044
+		src[stride*7] = src[stride*6];
2045
+		src++;
2046
+	}
2047
+#endif
2048
+}
2049
+
2050
+/**
2051
+ * Deinterlaces the given block
2052
+ * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
2053
+ */
2054
+static inline void deInterlaceMedian(uint8_t src[], int stride)
2055
+{
2056
+#if defined (HAVE_MMX2)
2057
+	asm volatile(
2058
+		"leal (%0, %1), %%eax				\n\t"
2059
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
2060
+//	0	1	2	3	4	5	6	7	8	9
2061
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
2062
+
2063
+		"movq (%0), %%mm0				\n\t" //
2064
+		"movq (%%eax, %1), %%mm2			\n\t" //
2065
+		"movq (%%eax), %%mm1				\n\t" //
2066
+		"movq %%mm0, %%mm3				\n\t"
2067
+		"pmaxub %%mm1, %%mm0				\n\t" //
2068
+		"pminub %%mm3, %%mm1				\n\t" //
2069
+		"pmaxub %%mm2, %%mm1				\n\t" //
2070
+		"pminub %%mm1, %%mm0				\n\t"
2071
+		"movq %%mm0, (%%eax)				\n\t"
2072
+
2073
+		"movq (%0, %1, 4), %%mm0			\n\t" //
2074
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
2075
+		"movq %%mm2, %%mm3				\n\t"
2076
+		"pmaxub %%mm1, %%mm2				\n\t" //
2077
+		"pminub %%mm3, %%mm1				\n\t" //
2078
+		"pmaxub %%mm0, %%mm1				\n\t" //
2079
+		"pminub %%mm1, %%mm2				\n\t"
2080
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
2081
+
2082
+		"movq (%%ebx), %%mm2				\n\t" //
2083
+		"movq (%%ebx, %1), %%mm1			\n\t" //
2084
+		"movq %%mm2, %%mm3				\n\t"
2085
+		"pmaxub %%mm0, %%mm2				\n\t" //
2086
+		"pminub %%mm3, %%mm0				\n\t" //
2087
+		"pmaxub %%mm1, %%mm0				\n\t" //
2088
+		"pminub %%mm0, %%mm2				\n\t"
2089
+		"movq %%mm2, (%%ebx)				\n\t"
2090
+
2091
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" //
2092
+		"movq (%0, %1, 8), %%mm0			\n\t" //
2093
+		"movq %%mm2, %%mm3				\n\t"
2094
+		"pmaxub %%mm0, %%mm2				\n\t" //
2095
+		"pminub %%mm3, %%mm0				\n\t" //
2096
+		"pmaxub %%mm1, %%mm0				\n\t" //
2097
+		"pminub %%mm0, %%mm2				\n\t"
2098
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
2099
+
2100
+
2101
+		: : "r" (src), "r" (stride)
2102
+		: "%eax", "%ebx"
2103
+	);
2104
+#else
2105
+	//FIXME
2106
+	int x;
2107
+	for(x=0; x<8; x++)
2108
+	{
2109
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2110
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2111
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2112
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2113
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2114
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2115
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
2116
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
2117
+		src++;
2118
+	}
2119
+#endif
2120
+}
2121
+
2122
+/**
2123
+ * Deinterlaces the given block
2124
+ * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
2125
+ * will shift the image up by 1 line (FIXME if this is a problem)
2126
+ */
2127
+static inline void deInterlaceMedianLastRow(uint8_t src[], int stride)
2128
+{
2129
+#if defined (HAVE_MMX2)
2130
+	asm volatile(
2131
+		"leal (%0, %1), %%eax				\n\t"
2132
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
2133
+//	0	1	2	3	4	5	6	7	8	9
2134
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
2135
+
2136
+		"movq (%0), %%mm0				\n\t" //
2137
+		"movq (%%eax, %1), %%mm2			\n\t" //
2138
+		"movq (%%eax), %%mm1				\n\t" //
2139
+		"movq %%mm0, %%mm3				\n\t"
2140
+		"pmaxub %%mm1, %%mm0				\n\t" //
2141
+		"pminub %%mm3, %%mm1				\n\t" //
2142
+		"pmaxub %%mm2, %%mm1				\n\t" //
2143
+		"pminub %%mm1, %%mm0				\n\t"
2144
+		"movq %%mm0, (%%eax)				\n\t"
2145
+
2146
+		"movq (%0, %1, 4), %%mm0			\n\t" //
2147
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
2148
+		"movq %%mm2, %%mm3				\n\t"
2149
+		"pmaxub %%mm1, %%mm2				\n\t" //
2150
+		"pminub %%mm3, %%mm1				\n\t" //
2151
+		"pmaxub %%mm0, %%mm1				\n\t" //
2152
+		"pminub %%mm1, %%mm2				\n\t"
2153
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
2154
+
2155
+		"movq (%%ebx), %%mm2				\n\t" //
2156
+		"movq (%%ebx, %1), %%mm1			\n\t" //
2157
+		"movq %%mm2, %%mm3				\n\t"
2158
+		"pmaxub %%mm0, %%mm2				\n\t" //
2159
+		"pminub %%mm3, %%mm0				\n\t" //
2160
+		"pmaxub %%mm1, %%mm0				\n\t" //
2161
+		"pminub %%mm0, %%mm2				\n\t"
2162
+		"movq %%mm2, (%%ebx)				\n\t"
2163
+
2164
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
2165
+
2166
+		: : "r" (src), "r" (stride)
2167
+		: "%eax", "%ebx"
2168
+	);
2169
+#else
2170
+	//FIXME
2171
+	int x;
2172
+	for(x=0; x<8; x++)
2173
+	{
2174
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
2175
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
2176
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2177
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2178
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2179
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2180
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
2181
+		src[stride*7] = src[stride*6];
2182
+		src++;
2183
+	}
2184
+#endif
2185
+}
2186
+
2187
+
1831 2188
 #ifdef HAVE_ODIVX_POSTPROCESS
1832 2189
 #include "../opendivx/postprocess.h"
1833 2190
 int use_old_pp=0;
... ...
@@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
1841 1841
  * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63)
1842 1842
  * -63 is best quality -1 is worst
1843 1843
  */
1844
-//extern "C"{
1845 1844
 void  postprocess(unsigned char * src[], int src_stride,
1846 1845
                  unsigned char * dst[], int dst_stride,
1847 1846
                  int horizontal_size,   int vertical_size,
... ...
@@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
2196 2196
 				blockCopy(vertBlock + dstStride*2, dstStride,
2197 2197
 					vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX);
2198 2198
 
2199
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
2200
+					deInterlaceInterpolateLinear(dstBlock, dstStride);
2201
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
2202
+					deInterlaceBlendLinear(dstBlock, dstStride);
2203
+				else if(mode & MEDIAN_DEINT_FILTER)
2204
+					deInterlaceMedian(dstBlock, dstStride);
2205
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
2206
+					deInterlaceInterpolateCubic(dstBlock, dstStride);
2207
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
2208
+					deInterlaceBlendCubic(dstBlock, dstStride);
2209
+*/
2199 2210
 
2200 2211
 #ifdef MORE_TIMEING
2201 2212
 				T1= rdtsc();
... ...
@@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
2226 2226
 #endif
2227 2227
 			}
2228 2228
 			else
2229
+			{
2229 2230
 				blockCopy(vertBlock + dstStride*1, dstStride,
2230 2231
 					vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX);
2231 2232
 
2233
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
2234
+					deInterlaceInterpolateLinearLastRow(dstBlock, dstStride);
2235
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
2236
+					deInterlaceBlendLinearLastRow(dstBlock, dstStride);
2237
+				else if(mode & MEDIAN_DEINT_FILTER)
2238
+					deInterlaceMedianLastRow(dstBlock, dstStride);
2239
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
2240
+					deInterlaceInterpolateCubicLastRow(dstBlock, dstStride);
2241
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
2242
+					deInterlaceBlendCubicLastRow(dstBlock, dstStride);
2243
+*/
2244
+			}
2232 2245
 
2233 2246
 			if(x - 8 >= 0 && x<width)
2234 2247
 			{