Originally committed as revision 12565 to svn://svn.ffmpeg.org/ffmpeg/trunk
Luca Barbato authored on 2008/03/24 00:51:02... | ... |
@@ -103,7 +103,6 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) |
103 | 103 |
|
104 | 104 |
src2 += stride * 4; |
105 | 105 |
|
106 |
- |
|
107 | 106 |
#define LOAD_LINE(i) \ |
108 | 107 |
{ \ |
109 | 108 |
vector unsigned char perm##i = vec_lvsl(j##i, src2); \ |
... | ... |
@@ -158,23 +157,23 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) |
158 | 158 |
v_dcThreshold); \ |
159 | 159 |
const vector signed short v_part##i = vec_and(mask, v_comp##i); |
160 | 160 |
|
161 |
-{ |
|
162 |
- ITER(0, 1) |
|
163 |
- ITER(1, 2) |
|
164 |
- ITER(2, 3) |
|
165 |
- ITER(3, 4) |
|
166 |
- ITER(4, 5) |
|
167 |
- ITER(5, 6) |
|
168 |
- ITER(6, 7) |
|
169 |
- |
|
170 |
- v_numEq = vec_sum4s(v_part0, v_numEq); |
|
171 |
- v_numEq = vec_sum4s(v_part1, v_numEq); |
|
172 |
- v_numEq = vec_sum4s(v_part2, v_numEq); |
|
173 |
- v_numEq = vec_sum4s(v_part3, v_numEq); |
|
174 |
- v_numEq = vec_sum4s(v_part4, v_numEq); |
|
175 |
- v_numEq = vec_sum4s(v_part5, v_numEq); |
|
176 |
- v_numEq = vec_sum4s(v_part6, v_numEq); |
|
177 |
-} |
|
161 |
+ { |
|
162 |
+ ITER(0, 1) |
|
163 |
+ ITER(1, 2) |
|
164 |
+ ITER(2, 3) |
|
165 |
+ ITER(3, 4) |
|
166 |
+ ITER(4, 5) |
|
167 |
+ ITER(5, 6) |
|
168 |
+ ITER(6, 7) |
|
169 |
+ |
|
170 |
+ v_numEq = vec_sum4s(v_part0, v_numEq); |
|
171 |
+ v_numEq = vec_sum4s(v_part1, v_numEq); |
|
172 |
+ v_numEq = vec_sum4s(v_part2, v_numEq); |
|
173 |
+ v_numEq = vec_sum4s(v_part3, v_numEq); |
|
174 |
+ v_numEq = vec_sum4s(v_part4, v_numEq); |
|
175 |
+ v_numEq = vec_sum4s(v_part5, v_numEq); |
|
176 |
+ v_numEq = vec_sum4s(v_part6, v_numEq); |
|
177 |
+ } |
|
178 | 178 |
|
179 | 179 |
#undef ITER |
180 | 180 |
|
... | ... |
@@ -286,73 +285,73 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
286 | 286 |
LOAD_LINE(7); |
287 | 287 |
LOAD_LINE(8); |
288 | 288 |
LOAD_LINE(9); |
289 |
- } |
|
289 |
+ } |
|
290 | 290 |
#undef LOAD_LINE |
291 | 291 |
#undef LOAD_LINE_ALIGNED |
292 |
-{ |
|
293 |
- const vector unsigned short v_2 = vec_splat_u16(2); |
|
294 |
- const vector unsigned short v_4 = vec_splat_u16(4); |
|
295 |
- |
|
296 |
- const vector signed short v_diff01 = vec_sub(vb0, vb1); |
|
297 |
- const vector unsigned short v_cmp01 = |
|
298 |
- (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); |
|
299 |
- const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); |
|
300 |
- const vector signed short v_diff89 = vec_sub(vb8, vb9); |
|
301 |
- const vector unsigned short v_cmp89 = |
|
302 |
- (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); |
|
303 |
- const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); |
|
304 |
- |
|
305 |
- const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); |
|
306 |
- const vector signed short temp02 = vec_add(vb2, vb3); |
|
307 |
- const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); |
|
308 |
- const vector signed short v_sumsB0 = vec_add(temp02, temp03); |
|
309 |
- |
|
310 |
- const vector signed short temp11 = vec_sub(v_sumsB0, v_first); |
|
311 |
- const vector signed short v_sumsB1 = vec_add(temp11, vb4); |
|
312 |
- |
|
313 |
- const vector signed short temp21 = vec_sub(v_sumsB1, v_first); |
|
314 |
- const vector signed short v_sumsB2 = vec_add(temp21, vb5); |
|
315 |
- |
|
316 |
- const vector signed short temp31 = vec_sub(v_sumsB2, v_first); |
|
317 |
- const vector signed short v_sumsB3 = vec_add(temp31, vb6); |
|
318 |
- |
|
319 |
- const vector signed short temp41 = vec_sub(v_sumsB3, v_first); |
|
320 |
- const vector signed short v_sumsB4 = vec_add(temp41, vb7); |
|
321 |
- |
|
322 |
- const vector signed short temp51 = vec_sub(v_sumsB4, vb1); |
|
323 |
- const vector signed short v_sumsB5 = vec_add(temp51, vb8); |
|
324 |
- |
|
325 |
- const vector signed short temp61 = vec_sub(v_sumsB5, vb2); |
|
326 |
- const vector signed short v_sumsB6 = vec_add(temp61, v_last); |
|
327 |
- |
|
328 |
- const vector signed short temp71 = vec_sub(v_sumsB6, vb3); |
|
329 |
- const vector signed short v_sumsB7 = vec_add(temp71, v_last); |
|
330 |
- |
|
331 |
- const vector signed short temp81 = vec_sub(v_sumsB7, vb4); |
|
332 |
- const vector signed short v_sumsB8 = vec_add(temp81, v_last); |
|
333 |
- |
|
334 |
- const vector signed short temp91 = vec_sub(v_sumsB8, vb5); |
|
335 |
- const vector signed short v_sumsB9 = vec_add(temp91, v_last); |
|
336 |
- |
|
337 |
-#define COMPUTE_VR(i, j, k) \ |
|
338 |
- const vector signed short temps1##i = \ |
|
339 |
- vec_add(v_sumsB##i, v_sumsB##k); \ |
|
340 |
- const vector signed short temps2##i = \ |
|
341 |
- vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ |
|
342 |
- const vector signed short vr##j = vec_sra(temps2##i, v_4) |
|
343 |
- |
|
344 |
- COMPUTE_VR(0, 1, 2); |
|
345 |
- COMPUTE_VR(1, 2, 3); |
|
346 |
- COMPUTE_VR(2, 3, 4); |
|
347 |
- COMPUTE_VR(3, 4, 5); |
|
348 |
- COMPUTE_VR(4, 5, 6); |
|
349 |
- COMPUTE_VR(5, 6, 7); |
|
350 |
- COMPUTE_VR(6, 7, 8); |
|
351 |
- COMPUTE_VR(7, 8, 9); |
|
352 |
- |
|
353 |
- const vector signed char neg1 = vec_splat_s8(-1); |
|
354 |
- const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
|
355 |
- 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
|
292 |
+ { |
|
293 |
+ const vector unsigned short v_2 = vec_splat_u16(2); |
|
294 |
+ const vector unsigned short v_4 = vec_splat_u16(4); |
|
295 |
+ |
|
296 |
+ const vector signed short v_diff01 = vec_sub(vb0, vb1); |
|
297 |
+ const vector unsigned short v_cmp01 = |
|
298 |
+ (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); |
|
299 |
+ const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); |
|
300 |
+ const vector signed short v_diff89 = vec_sub(vb8, vb9); |
|
301 |
+ const vector unsigned short v_cmp89 = |
|
302 |
+ (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); |
|
303 |
+ const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); |
|
304 |
+ |
|
305 |
+ const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); |
|
306 |
+ const vector signed short temp02 = vec_add(vb2, vb3); |
|
307 |
+ const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); |
|
308 |
+ const vector signed short v_sumsB0 = vec_add(temp02, temp03); |
|
309 |
+ |
|
310 |
+ const vector signed short temp11 = vec_sub(v_sumsB0, v_first); |
|
311 |
+ const vector signed short v_sumsB1 = vec_add(temp11, vb4); |
|
312 |
+ |
|
313 |
+ const vector signed short temp21 = vec_sub(v_sumsB1, v_first); |
|
314 |
+ const vector signed short v_sumsB2 = vec_add(temp21, vb5); |
|
315 |
+ |
|
316 |
+ const vector signed short temp31 = vec_sub(v_sumsB2, v_first); |
|
317 |
+ const vector signed short v_sumsB3 = vec_add(temp31, vb6); |
|
318 |
+ |
|
319 |
+ const vector signed short temp41 = vec_sub(v_sumsB3, v_first); |
|
320 |
+ const vector signed short v_sumsB4 = vec_add(temp41, vb7); |
|
321 |
+ |
|
322 |
+ const vector signed short temp51 = vec_sub(v_sumsB4, vb1); |
|
323 |
+ const vector signed short v_sumsB5 = vec_add(temp51, vb8); |
|
324 |
+ |
|
325 |
+ const vector signed short temp61 = vec_sub(v_sumsB5, vb2); |
|
326 |
+ const vector signed short v_sumsB6 = vec_add(temp61, v_last); |
|
327 |
+ |
|
328 |
+ const vector signed short temp71 = vec_sub(v_sumsB6, vb3); |
|
329 |
+ const vector signed short v_sumsB7 = vec_add(temp71, v_last); |
|
330 |
+ |
|
331 |
+ const vector signed short temp81 = vec_sub(v_sumsB7, vb4); |
|
332 |
+ const vector signed short v_sumsB8 = vec_add(temp81, v_last); |
|
333 |
+ |
|
334 |
+ const vector signed short temp91 = vec_sub(v_sumsB8, vb5); |
|
335 |
+ const vector signed short v_sumsB9 = vec_add(temp91, v_last); |
|
336 |
+ |
|
337 |
+ #define COMPUTE_VR(i, j, k) \ |
|
338 |
+ const vector signed short temps1##i = \ |
|
339 |
+ vec_add(v_sumsB##i, v_sumsB##k); \ |
|
340 |
+ const vector signed short temps2##i = \ |
|
341 |
+ vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ |
|
342 |
+ const vector signed short vr##j = vec_sra(temps2##i, v_4) |
|
343 |
+ |
|
344 |
+ COMPUTE_VR(0, 1, 2); |
|
345 |
+ COMPUTE_VR(1, 2, 3); |
|
346 |
+ COMPUTE_VR(2, 3, 4); |
|
347 |
+ COMPUTE_VR(3, 4, 5); |
|
348 |
+ COMPUTE_VR(4, 5, 6); |
|
349 |
+ COMPUTE_VR(5, 6, 7); |
|
350 |
+ COMPUTE_VR(6, 7, 8); |
|
351 |
+ COMPUTE_VR(7, 8, 9); |
|
352 |
+ |
|
353 |
+ const vector signed char neg1 = vec_splat_s8(-1); |
|
354 |
+ const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
|
355 |
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
|
356 | 356 |
|
357 | 357 |
#define PACK_AND_STORE(i) \ |
358 | 358 |
{ const vector unsigned char perms##i = \ |
... | ... |
@@ -379,31 +378,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
379 | 379 |
vec_perm(vf##i, vbT##i, permHH); \ |
380 | 380 |
vec_st(vg##i, i * stride, src2);} |
381 | 381 |
|
382 |
- /* Special-casing the aligned case is worthwhile, as all calls from |
|
383 |
- * the (transposed) horizontable deblocks will be aligned, in addition |
|
384 |
- * to the naturally aligned vertical deblocks. */ |
|
385 |
- if (properStride && srcAlign) { |
|
386 |
- PACK_AND_STORE_ALIGNED(1) |
|
387 |
- PACK_AND_STORE_ALIGNED(2) |
|
388 |
- PACK_AND_STORE_ALIGNED(3) |
|
389 |
- PACK_AND_STORE_ALIGNED(4) |
|
390 |
- PACK_AND_STORE_ALIGNED(5) |
|
391 |
- PACK_AND_STORE_ALIGNED(6) |
|
392 |
- PACK_AND_STORE_ALIGNED(7) |
|
393 |
- PACK_AND_STORE_ALIGNED(8) |
|
394 |
- } else { |
|
395 |
- PACK_AND_STORE(1) |
|
396 |
- PACK_AND_STORE(2) |
|
397 |
- PACK_AND_STORE(3) |
|
398 |
- PACK_AND_STORE(4) |
|
399 |
- PACK_AND_STORE(5) |
|
400 |
- PACK_AND_STORE(6) |
|
401 |
- PACK_AND_STORE(7) |
|
402 |
- PACK_AND_STORE(8) |
|
382 |
+ /* Special-casing the aligned case is worthwhile, as all calls from |
|
383 |
+ * the (transposed) horizontable deblocks will be aligned, in addition |
|
384 |
+ * to the naturally aligned vertical deblocks. */ |
|
385 |
+ if (properStride && srcAlign) { |
|
386 |
+ PACK_AND_STORE_ALIGNED(1) |
|
387 |
+ PACK_AND_STORE_ALIGNED(2) |
|
388 |
+ PACK_AND_STORE_ALIGNED(3) |
|
389 |
+ PACK_AND_STORE_ALIGNED(4) |
|
390 |
+ PACK_AND_STORE_ALIGNED(5) |
|
391 |
+ PACK_AND_STORE_ALIGNED(6) |
|
392 |
+ PACK_AND_STORE_ALIGNED(7) |
|
393 |
+ PACK_AND_STORE_ALIGNED(8) |
|
394 |
+ } else { |
|
395 |
+ PACK_AND_STORE(1) |
|
396 |
+ PACK_AND_STORE(2) |
|
397 |
+ PACK_AND_STORE(3) |
|
398 |
+ PACK_AND_STORE(4) |
|
399 |
+ PACK_AND_STORE(5) |
|
400 |
+ PACK_AND_STORE(6) |
|
401 |
+ PACK_AND_STORE(7) |
|
402 |
+ PACK_AND_STORE(8) |
|
403 |
+ } |
|
404 |
+ #undef PACK_AND_STORE |
|
405 |
+ #undef PACK_AND_STORE_ALIGNED |
|
403 | 406 |
} |
404 |
-#undef PACK_AND_STORE |
|
405 |
-#undef PACK_AND_STORE_ALIGNED |
|
406 |
-} |
|
407 | 407 |
} |
408 | 408 |
|
409 | 409 |
|