Originally committed as revision 1988 to svn://svn.ffmpeg.org/ffmpeg/trunk
Michael Niedermayer authored on 2003/06/26 20:31:34... | ... |
@@ -243,9 +243,77 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
243 | 243 |
|
244 | 244 |
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) |
245 | 245 |
{ |
246 |
- if(abs(src[0] - src[7]) > 2*QP) return 0; |
|
246 |
+ int i; |
|
247 |
+#if 1 |
|
248 |
+ for(i=0; i<2; i++){ |
|
249 |
+ if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
|
250 |
+ src += stride; |
|
251 |
+ if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
|
252 |
+ src += stride; |
|
253 |
+ if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
|
254 |
+ src += stride; |
|
255 |
+ if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
|
256 |
+ src += stride; |
|
257 |
+ } |
|
258 |
+#else |
|
259 |
+ for(i=0; i<8; i++){ |
|
260 |
+ if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
|
261 |
+ src += stride; |
|
262 |
+ } |
|
263 |
+#endif |
|
264 |
+ return 1; |
|
265 |
+} |
|
247 | 266 |
|
267 |
+static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
|
268 |
+{ |
|
269 |
+#if 1 |
|
270 |
+#if 1 |
|
271 |
+ int x; |
|
272 |
+ src+= stride*4; |
|
273 |
+ for(x=0; x<BLOCK_SIZE; x+=4) |
|
274 |
+ { |
|
275 |
+ if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
|
276 |
+ if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
|
277 |
+ if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
|
278 |
+ if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
|
279 |
+ } |
|
280 |
+#else |
|
281 |
+ int x; |
|
282 |
+ src+= stride*3; |
|
283 |
+ for(x=0; x<BLOCK_SIZE; x++) |
|
284 |
+ { |
|
285 |
+ if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
|
286 |
+ } |
|
287 |
+#endif |
|
248 | 288 |
return 1; |
289 |
+#else |
|
290 |
+ int x; |
|
291 |
+ src+= stride*4; |
|
292 |
+ for(x=0; x<BLOCK_SIZE; x++) |
|
293 |
+ { |
|
294 |
+ int min=255; |
|
295 |
+ int max=0; |
|
296 |
+ int y; |
|
297 |
+ for(y=0; y<8; y++){ |
|
298 |
+ int v= src[x + y*stride]; |
|
299 |
+ if(v>max) max=v; |
|
300 |
+ if(v<min) min=v; |
|
301 |
+ } |
|
302 |
+ if(max-min > 2*QP) return 0; |
|
303 |
+ } |
|
304 |
+ return 1; |
|
305 |
+#endif |
|
306 |
+} |
|
307 |
+ |
|
308 |
+static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
|
309 |
+ if( isVertDC_C(src, stride, c) ){ |
|
310 |
+ if( isVertMinMaxOk_C(src, stride, c->QP) ) |
|
311 |
+ return 1; |
|
312 |
+ else |
|
313 |
+ return 0; |
|
314 |
+ }else{ |
|
315 |
+ return 2; |
|
316 |
+ } |
|
249 | 317 |
} |
250 | 318 |
|
251 | 319 |
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) |
... | ... |
@@ -49,65 +49,81 @@ |
49 | 49 |
"paddb " #a ", " #b " \n\t" |
50 | 50 |
#endif |
51 | 51 |
|
52 |
- |
|
53 | 52 |
//FIXME? |255-0| = 1 (shouldnt be a problem ...) |
54 | 53 |
#ifdef HAVE_MMX |
55 | 54 |
/** |
56 | 55 |
* Check if the middle 8x8 Block in the given 8x16 block is flat |
57 | 56 |
*/ |
58 |
-static inline int RENAME(isVertDC)(uint8_t src[], int stride, PPContext *c){ |
|
59 |
- int numEq= 0; |
|
57 |
+static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
58 |
+ int numEq= 0, dcOk; |
|
60 | 59 |
src+= stride*4; // src points to begin of the 8x8 Block |
61 | 60 |
asm volatile( |
62 |
- "leal (%1, %2), %%eax \n\t" |
|
61 |
+ "leal (%2, %3), %%eax \n\t" |
|
63 | 62 |
// 0 1 2 3 4 5 6 7 8 9 |
64 | 63 |
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 |
65 |
- "movq %3, %%mm7 \n\t" |
|
66 |
- "movq %4, %%mm6 \n\t" |
|
64 |
+ "movq %4, %%mm7 \n\t" |
|
65 |
+ "movq %5, %%mm6 \n\t" |
|
67 | 66 |
|
68 |
- "movq (%1), %%mm0 \n\t" |
|
67 |
+ "movq (%2), %%mm0 \n\t" |
|
69 | 68 |
"movq (%%eax), %%mm1 \n\t" |
69 |
+ "movq %%mm0, %%mm3 \n\t" |
|
70 |
+ "movq %%mm0, %%mm4 \n\t" |
|
71 |
+ PMAXUB(%%mm1, %%mm4) |
|
72 |
+ PMINUB(%%mm1, %%mm3, %%mm5) |
|
70 | 73 |
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece |
71 | 74 |
"paddb %%mm7, %%mm0 \n\t" |
72 | 75 |
"pcmpgtb %%mm6, %%mm0 \n\t" |
73 | 76 |
|
74 |
- "movq (%%eax,%2), %%mm2 \n\t" |
|
77 |
+ "movq (%%eax,%3), %%mm2 \n\t" |
|
78 |
+ PMAXUB(%%mm2, %%mm4) |
|
79 |
+ PMINUB(%%mm2, %%mm3, %%mm5) |
|
75 | 80 |
"psubb %%mm2, %%mm1 \n\t" |
76 | 81 |
"paddb %%mm7, %%mm1 \n\t" |
77 | 82 |
"pcmpgtb %%mm6, %%mm1 \n\t" |
78 | 83 |
"paddb %%mm1, %%mm0 \n\t" |
79 | 84 |
|
80 |
- "movq (%%eax, %2, 2), %%mm1 \n\t" |
|
85 |
+ "movq (%%eax, %3, 2), %%mm1 \n\t" |
|
86 |
+ PMAXUB(%%mm1, %%mm4) |
|
87 |
+ PMINUB(%%mm1, %%mm3, %%mm5) |
|
81 | 88 |
"psubb %%mm1, %%mm2 \n\t" |
82 | 89 |
"paddb %%mm7, %%mm2 \n\t" |
83 | 90 |
"pcmpgtb %%mm6, %%mm2 \n\t" |
84 | 91 |
"paddb %%mm2, %%mm0 \n\t" |
85 | 92 |
|
86 |
- "leal (%%eax, %2, 4), %%eax \n\t" |
|
93 |
+ "leal (%%eax, %3, 4), %%eax \n\t" |
|
87 | 94 |
|
88 |
- "movq (%1, %2, 4), %%mm2 \n\t" |
|
95 |
+ "movq (%2, %3, 4), %%mm2 \n\t" |
|
96 |
+ PMAXUB(%%mm2, %%mm4) |
|
97 |
+ PMINUB(%%mm2, %%mm3, %%mm5) |
|
89 | 98 |
"psubb %%mm2, %%mm1 \n\t" |
90 | 99 |
"paddb %%mm7, %%mm1 \n\t" |
91 | 100 |
"pcmpgtb %%mm6, %%mm1 \n\t" |
92 | 101 |
"paddb %%mm1, %%mm0 \n\t" |
93 | 102 |
|
94 | 103 |
"movq (%%eax), %%mm1 \n\t" |
104 |
+ PMAXUB(%%mm1, %%mm4) |
|
105 |
+ PMINUB(%%mm1, %%mm3, %%mm5) |
|
95 | 106 |
"psubb %%mm1, %%mm2 \n\t" |
96 | 107 |
"paddb %%mm7, %%mm2 \n\t" |
97 | 108 |
"pcmpgtb %%mm6, %%mm2 \n\t" |
98 | 109 |
"paddb %%mm2, %%mm0 \n\t" |
99 | 110 |
|
100 |
- "movq (%%eax, %2), %%mm2 \n\t" |
|
111 |
+ "movq (%%eax, %3), %%mm2 \n\t" |
|
112 |
+ PMAXUB(%%mm2, %%mm4) |
|
113 |
+ PMINUB(%%mm2, %%mm3, %%mm5) |
|
101 | 114 |
"psubb %%mm2, %%mm1 \n\t" |
102 | 115 |
"paddb %%mm7, %%mm1 \n\t" |
103 | 116 |
"pcmpgtb %%mm6, %%mm1 \n\t" |
104 | 117 |
"paddb %%mm1, %%mm0 \n\t" |
105 | 118 |
|
106 |
- "movq (%%eax, %2, 2), %%mm1 \n\t" |
|
119 |
+ "movq (%%eax, %3, 2), %%mm1 \n\t" |
|
120 |
+ PMAXUB(%%mm1, %%mm4) |
|
121 |
+ PMINUB(%%mm1, %%mm3, %%mm5) |
|
107 | 122 |
"psubb %%mm1, %%mm2 \n\t" |
108 | 123 |
"paddb %%mm7, %%mm2 \n\t" |
109 | 124 |
"pcmpgtb %%mm6, %%mm2 \n\t" |
110 | 125 |
"paddb %%mm2, %%mm0 \n\t" |
126 |
+ "psubusb %%mm3, %%mm4 \n\t" |
|
111 | 127 |
|
112 | 128 |
" \n\t" |
113 | 129 |
#ifdef HAVE_MMX2 |
... | ... |
@@ -124,70 +140,28 @@ asm volatile( |
124 | 124 |
"psrlq $32, %%mm0 \n\t" |
125 | 125 |
"paddb %%mm1, %%mm0 \n\t" |
126 | 126 |
#endif |
127 |
+ "movq %6, %%mm7 \n\t" // QP,..., QP |
|
128 |
+ "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP |
|
129 |
+ "psubusb %%mm7, %%mm4 \n\t" // Diff <= 2QP -> 0 |
|
130 |
+ "packssdw %%mm4, %%mm4 \n\t" |
|
127 | 131 |
"movd %%mm0, %0 \n\t" |
128 |
- : "=r" (numEq) |
|
129 |
- : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
|
132 |
+ "movd %%mm4, %1 \n\t" |
|
133 |
+ |
|
134 |
+ : "=r" (numEq), "=r" (dcOk) |
|
135 |
+ : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]), "m" (c->pQPb) |
|
130 | 136 |
: "%eax" |
131 | 137 |
); |
138 |
+ |
|
132 | 139 |
numEq= (-numEq) &0xFF; |
133 |
- return numEq > c->ppMode.flatnessThreshold; |
|
140 |
+ if(numEq > c->ppMode.flatnessThreshold){ |
|
141 |
+ if(dcOk) return 0; |
|
142 |
+ else return 1; |
|
143 |
+ }else{ |
|
144 |
+ return 2; |
|
145 |
+ } |
|
134 | 146 |
} |
135 | 147 |
#endif |
136 | 148 |
|
137 |
-static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c) |
|
138 |
-{ |
|
139 |
-#ifdef HAVE_MMX |
|
140 |
- int isOk; |
|
141 |
- src+= stride*3; |
|
142 |
- asm volatile( |
|
143 |
- "movq (%1, %2), %%mm0 \n\t" |
|
144 |
- "movq (%1, %2, 8), %%mm1 \n\t" |
|
145 |
- "movq %%mm0, %%mm2 \n\t" |
|
146 |
- "psubusb %%mm1, %%mm0 \n\t" |
|
147 |
- "psubusb %%mm2, %%mm1 \n\t" |
|
148 |
- "por %%mm1, %%mm0 \n\t" // ABS Diff |
|
149 |
- |
|
150 |
- "movq %3, %%mm7 \n\t" // QP,..., QP |
|
151 |
- "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP |
|
152 |
- "psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0 |
|
153 |
- "packssdw %%mm0, %%mm0 \n\t" |
|
154 |
- "movd %%mm0, %0 \n\t" |
|
155 |
- : "=r" (isOk) |
|
156 |
- : "r" (src), "r" (stride), "m" (c->pQPb) |
|
157 |
- ); |
|
158 |
- return isOk==0; |
|
159 |
-#else |
|
160 |
-#if 1 |
|
161 |
- int x; |
|
162 |
- const int QP= c->QP; |
|
163 |
- src+= stride*3; |
|
164 |
- for(x=0; x<BLOCK_SIZE; x++) |
|
165 |
- { |
|
166 |
- if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
|
167 |
- } |
|
168 |
- |
|
169 |
- return 1; |
|
170 |
-#else |
|
171 |
- int x; |
|
172 |
- const int QP= c->QP; |
|
173 |
- src+= stride*4; |
|
174 |
- for(x=0; x<BLOCK_SIZE; x++) |
|
175 |
- { |
|
176 |
- int min=255; |
|
177 |
- int max=0; |
|
178 |
- int y; |
|
179 |
- for(y=0; y<8; y++){ |
|
180 |
- int v= src[x + y*stride]; |
|
181 |
- if(v>max) max=v; |
|
182 |
- if(v<min) min=v; |
|
183 |
- } |
|
184 |
- if(max-min > 2*QP) return 0; |
|
185 |
- } |
|
186 |
- return 1; |
|
187 |
-#endif |
|
188 |
-#endif |
|
189 |
-} |
|
190 |
- |
|
191 | 149 |
/** |
192 | 150 |
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
193 | 151 |
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
... | ... |
@@ -3119,12 +3093,11 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int |
3119 | 3119 |
RENAME(vertX1Filter)(dstBlock, stride, &c); |
3120 | 3120 |
else if(mode & V_DEBLOCK) |
3121 | 3121 |
{ |
3122 |
- if( RENAME(isVertDC)(dstBlock, stride, &c)) |
|
3123 |
- { |
|
3124 |
- if(RENAME(isVertMinMaxOk)(dstBlock, stride, &c)) |
|
3125 |
- RENAME(doVertLowPass)(dstBlock, stride, &c); |
|
3126 |
- } |
|
3127 |
- else |
|
3122 |
+ const int t= RENAME(vertClassify)(dstBlock, stride, &c); |
|
3123 |
+ |
|
3124 |
+ if(t==1) |
|
3125 |
+ RENAME(doVertLowPass)(dstBlock, stride, &c); |
|
3126 |
+ else if(t==2) |
|
3128 | 3127 |
RENAME(doVertDefFilter)(dstBlock, stride, &c); |
3129 | 3128 |
} |
3130 | 3129 |
} |
... | ... |
@@ -3140,12 +3113,12 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int |
3140 | 3140 |
RENAME(vertX1Filter)(tempBlock1, 16, &c); |
3141 | 3141 |
else if(mode & H_DEBLOCK) |
3142 | 3142 |
{ |
3143 |
- if( RENAME(isVertDC)(tempBlock1, 16, &c)) |
|
3144 |
- { |
|
3145 |
- if(RENAME(isVertMinMaxOk)(tempBlock1, 16, &c)) |
|
3146 |
- RENAME(doVertLowPass)(tempBlock1, 16, &c); |
|
3147 |
- } |
|
3148 |
- else |
|
3143 |
+//START_TIMER |
|
3144 |
+ const int t= RENAME(vertClassify)(tempBlock1, 16, &c); |
|
3145 |
+//STOP_TIMER("dc & minmax") |
|
3146 |
+ if(t==1) |
|
3147 |
+ RENAME(doVertLowPass)(tempBlock1, 16, &c); |
|
3148 |
+ else if(t==2) |
|
3149 | 3149 |
RENAME(doVertDefFilter)(tempBlock1, 16, &c); |
3150 | 3150 |
} |
3151 | 3151 |
|