Originally committed as revision 12562 to svn://svn.ffmpeg.org/ffmpeg/trunk
Luca Barbato authored on 2008/03/24 00:35:19... | ... |
@@ -223,21 +223,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
223 | 223 |
const vector signed int zero = vec_splat_s32(0); |
224 | 224 |
const int properStride = (stride % 16); |
225 | 225 |
const int srcAlign = ((unsigned long)src2 % 16); |
226 |
- DECLARE_ALIGNED(16, short, qp[8]); |
|
227 |
- qp[0] = c->QP; |
|
226 |
+ DECLARE_ALIGNED(16, short, qp[8]) = {c->QP}; |
|
228 | 227 |
vector signed short vqp = vec_ld(0, qp); |
229 |
- vqp = vec_splat(vqp, 0); |
|
230 |
- |
|
231 |
- src2 += stride*3; |
|
232 |
- |
|
233 | 228 |
vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; |
234 | 229 |
vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9; |
235 | 230 |
vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9; |
236 | 231 |
vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; |
232 |
+ vector unsigned char perml0, perml1, perml2, perml3, perml4, |
|
233 |
+ perml5, perml6, perml7, perml8, perml9; |
|
234 |
+ register int j0 = 0, |
|
235 |
+ j1 = stride, |
|
236 |
+ j2 = 2 * stride, |
|
237 |
+ j3 = 3 * stride, |
|
238 |
+ j4 = 4 * stride, |
|
239 |
+ j5 = 5 * stride, |
|
240 |
+ j6 = 6 * stride, |
|
241 |
+ j7 = 7 * stride, |
|
242 |
+ j8 = 8 * stride, |
|
243 |
+ j9 = 9 * stride; |
|
244 |
+ |
|
245 |
+ vqp = vec_splat(vqp, 0); |
|
246 |
+ |
|
247 |
+ src2 += stride*3; |
|
237 | 248 |
|
238 | 249 |
#define LOAD_LINE(i) \ |
239 |
- const vector unsigned char perml##i = \ |
|
240 |
- vec_lvsl(i * stride, src2); \ |
|
250 |
+ perml##i = vec_lvsl(i * stride, src2); \ |
|
241 | 251 |
vbA##i = vec_ld(i * stride, src2); \ |
242 | 252 |
vbB##i = vec_ld(i * stride + 16, src2); \ |
243 | 253 |
vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ |
... | ... |
@@ -246,7 +256,6 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
246 | 246 |
(vector unsigned char)vbT##i) |
247 | 247 |
|
248 | 248 |
#define LOAD_LINE_ALIGNED(i) \ |
249 |
- register int j##i = i * stride; \ |
|
250 | 249 |
vbT##i = vec_ld(j##i, src2); \ |
251 | 250 |
vb##i = \ |
252 | 251 |
(vector signed short)vec_mergeh((vector signed char)zero, \ |
... | ... |
@@ -255,7 +264,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
255 | 255 |
/* Special-casing the aligned case is worthwhile, as all calls from |
256 | 256 |
* the (transposed) horizontable deblocks will be aligned, in addition |
257 | 257 |
* to the naturally aligned vertical deblocks. */ |
258 |
- if (properStride && srcAlign) { |
|
258 |
+ if (properStride && srcAlign) { |
|
259 | 259 |
LOAD_LINE_ALIGNED(0); |
260 | 260 |
LOAD_LINE_ALIGNED(1); |
261 | 261 |
LOAD_LINE_ALIGNED(2); |
... | ... |
@@ -266,7 +275,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
266 | 266 |
LOAD_LINE_ALIGNED(7); |
267 | 267 |
LOAD_LINE_ALIGNED(8); |
268 | 268 |
LOAD_LINE_ALIGNED(9); |
269 |
- } else { |
|
269 |
+ } else { |
|
270 | 270 |
LOAD_LINE(0); |
271 | 271 |
LOAD_LINE(1); |
272 | 272 |
LOAD_LINE(2); |
... | ... |
@@ -280,7 +289,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
280 | 280 |
} |
281 | 281 |
#undef LOAD_LINE |
282 | 282 |
#undef LOAD_LINE_ALIGNED |
283 |
- |
|
283 |
+{ |
|
284 | 284 |
const vector unsigned short v_2 = vec_splat_u16(2); |
285 | 285 |
const vector unsigned short v_4 = vec_splat_u16(4); |
286 | 286 |
|
... | ... |
@@ -346,7 +355,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
346 | 346 |
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
347 | 347 |
|
348 | 348 |
#define PACK_AND_STORE(i) \ |
349 |
- const vector unsigned char perms##i = \ |
|
349 |
+{ const vector unsigned char perms##i = \ |
|
350 | 350 |
vec_lvsr(i * stride, src2); \ |
351 | 351 |
const vector unsigned char vf##i = \ |
352 | 352 |
vec_packsu(vr##i, (vector signed short)zero); \ |
... | ... |
@@ -361,40 +370,41 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) |
361 | 361 |
const vector unsigned char svB##i = \ |
362 | 362 |
vec_sel(vg2##i, vbB##i, mask##i); \ |
363 | 363 |
vec_st(svA##i, i * stride, src2); \ |
364 |
- vec_st(svB##i, i * stride + 16, src2) |
|
364 |
+ vec_st(svB##i, i * stride + 16, src2);} |
|
365 | 365 |
|
366 | 366 |
#define PACK_AND_STORE_ALIGNED(i) \ |
367 |
- const vector unsigned char vf##i = \ |
|
367 |
+{ const vector unsigned char vf##i = \ |
|
368 | 368 |
vec_packsu(vr##i, (vector signed short)zero); \ |
369 | 369 |
const vector unsigned char vg##i = \ |
370 | 370 |
vec_perm(vf##i, vbT##i, permHH); \ |
371 |
- vec_st(vg##i, i * stride, src2) |
|
371 |
+ vec_st(vg##i, i * stride, src2);} |
|
372 | 372 |
|
373 | 373 |
/* Special-casing the aligned case is worthwhile, as all calls from |
374 | 374 |
* the (transposed) horizontable deblocks will be aligned, in addition |
375 | 375 |
* to the naturally aligned vertical deblocks. */ |
376 | 376 |
if (properStride && srcAlign) { |
377 |
- PACK_AND_STORE_ALIGNED(1); |
|
378 |
- PACK_AND_STORE_ALIGNED(2); |
|
379 |
- PACK_AND_STORE_ALIGNED(3); |
|
380 |
- PACK_AND_STORE_ALIGNED(4); |
|
381 |
- PACK_AND_STORE_ALIGNED(5); |
|
382 |
- PACK_AND_STORE_ALIGNED(6); |
|
383 |
- PACK_AND_STORE_ALIGNED(7); |
|
384 |
- PACK_AND_STORE_ALIGNED(8); |
|
377 |
+ PACK_AND_STORE_ALIGNED(1) |
|
378 |
+ PACK_AND_STORE_ALIGNED(2) |
|
379 |
+ PACK_AND_STORE_ALIGNED(3) |
|
380 |
+ PACK_AND_STORE_ALIGNED(4) |
|
381 |
+ PACK_AND_STORE_ALIGNED(5) |
|
382 |
+ PACK_AND_STORE_ALIGNED(6) |
|
383 |
+ PACK_AND_STORE_ALIGNED(7) |
|
384 |
+ PACK_AND_STORE_ALIGNED(8) |
|
385 | 385 |
} else { |
386 |
- PACK_AND_STORE(1); |
|
387 |
- PACK_AND_STORE(2); |
|
388 |
- PACK_AND_STORE(3); |
|
389 |
- PACK_AND_STORE(4); |
|
390 |
- PACK_AND_STORE(5); |
|
391 |
- PACK_AND_STORE(6); |
|
392 |
- PACK_AND_STORE(7); |
|
393 |
- PACK_AND_STORE(8); |
|
386 |
+ PACK_AND_STORE(1) |
|
387 |
+ PACK_AND_STORE(2) |
|
388 |
+ PACK_AND_STORE(3) |
|
389 |
+ PACK_AND_STORE(4) |
|
390 |
+ PACK_AND_STORE(5) |
|
391 |
+ PACK_AND_STORE(6) |
|
392 |
+ PACK_AND_STORE(7) |
|
393 |
+ PACK_AND_STORE(8) |
|
394 | 394 |
} |
395 | 395 |
#undef PACK_AND_STORE |
396 | 396 |
#undef PACK_AND_STORE_ALIGNED |
397 | 397 |
} |
398 |
+} |
|
398 | 399 |
|
399 | 400 |
|
400 | 401 |
|