GitList

libpostproc/postprocess_altivec_template.c

@@ -223,21 +223,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
                          const vector signed int zero = vec_splat_s32(0);
                          const int properStride = (stride % 16);
                          const int srcAlign = ((unsigned long)src2 % 16);
                     -    DECLARE_ALIGNED(16, short, qp[8]);
                     -    qp[0] = c->QP;
                     +    DECLARE_ALIGNED(16, short, qp[8]) = {c->QP};
                          vector signed short vqp = vec_ld(0, qp);
                     -    vqp = vec_splat(vqp, 0);
+                    -
                     -    src2 += stride*3;
+                    -
                          vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;
                          vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9;
                          vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9;
                          vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
                     +    vector unsigned char perml0, perml1, perml2, perml3, perml4,
                     +                         perml5, perml6, perml7, perml8, perml9;
                     +    register int j0 = 0,
                     +                 j1 = stride,
                     +                 j2 = 2 * stride,
                     +                 j3 = 3 * stride,
                     +                 j4 = 4 * stride,
                     +                 j5 = 5 * stride,
                     +                 j6 = 6 * stride,
                     +                 j7 = 7 * stride,
                     +                 j8 = 8 * stride,
                     +                 j9 = 9 * stride;
+                    +
                     +    vqp = vec_splat(vqp, 0);
+                    +
                     +    src2 += stride*3;
                      #define LOAD_LINE(i)                                                    \
                     -    const vector unsigned char perml##i =                               \
                     -        vec_lvsl(i * stride, src2);                                     \
                     +    perml##i = vec_lvsl(i * stride, src2);                              \
                          vbA##i = vec_ld(i * stride, src2);                                  \
                          vbB##i = vec_ld(i * stride + 16, src2);                             \
                          vbT##i = vec_perm(vbA##i, vbB##i, perml##i);                        \
@@ -246,7 +256,6 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
                                                              (vector unsigned char)vbT##i)
                      #define LOAD_LINE_ALIGNED(i)                                            \
                     -    register int j##i = i * stride;                                     \
                          vbT##i = vec_ld(j##i, src2);                                        \
                          vb##i =                                                             \
                              (vector signed short)vec_mergeh((vector signed char)zero,       \
@@ -255,7 +264,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
                            /* Special-casing the aligned case is worthwhile, as all calls from
                             * the (transposed) horizontable deblocks will be aligned, in addition
                             * to the naturally aligned vertical deblocks. */
                     -      if (properStride && srcAlign) {
                     +    if (properStride && srcAlign) {
                                LOAD_LINE_ALIGNED(0);
                                LOAD_LINE_ALIGNED(1);
                                LOAD_LINE_ALIGNED(2);
@@ -266,7 +275,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
                                LOAD_LINE_ALIGNED(7);
                                LOAD_LINE_ALIGNED(8);
                                LOAD_LINE_ALIGNED(9);
                     -      } else {
                     +    } else {
                                LOAD_LINE(0);
                                LOAD_LINE(1);
                                LOAD_LINE(2);
@@ -280,7 +289,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
+                           }
                      #undef LOAD_LINE
                      #undef LOAD_LINE_ALIGNED
+                    -
                     +{
                          const vector unsigned short v_2 = vec_splat_u16(2);
                          const vector unsigned short v_4 = vec_splat_u16(4);
@@ -346,7 +355,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
 x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
                      #define PACK_AND_STORE(i)                                       \
                     -    const vector unsigned char perms##i =                       \
                     +{    const vector unsigned char perms##i =                       \
                              vec_lvsr(i * stride, src2);                             \
                          const vector unsigned char vf##i =                          \
                              vec_packsu(vr##i, (vector signed short)zero);           \
@@ -361,40 +370,41 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
                          const vector unsigned char svB##i =                         \
                              vec_sel(vg2##i, vbB##i, mask##i);                       \
                          vec_st(svA##i, i * stride, src2);                           \
                     -    vec_st(svB##i, i * stride + 16, src2)
                     +    vec_st(svB##i, i * stride + 16, src2);}
                      #define PACK_AND_STORE_ALIGNED(i)                               \
                     -    const vector unsigned char vf##i =                          \
                     +{    const vector unsigned char vf##i =                          \
                              vec_packsu(vr##i, (vector signed short)zero);           \
                          const vector unsigned char vg##i =                          \
                              vec_perm(vf##i, vbT##i, permHH);                        \
                     -    vec_st(vg##i, i * stride, src2)
                     +    vec_st(vg##i, i * stride, src2);}
                          /* Special-casing the aligned case is worthwhile, as all calls from
                           * the (transposed) horizontable deblocks will be aligned, in addition
                           * to the naturally aligned vertical deblocks. */
                          if (properStride && srcAlign) {
                     -        PACK_AND_STORE_ALIGNED(1);
                     -        PACK_AND_STORE_ALIGNED(2);
                     -        PACK_AND_STORE_ALIGNED(3);
                     -        PACK_AND_STORE_ALIGNED(4);
                     -        PACK_AND_STORE_ALIGNED(5);
                     -        PACK_AND_STORE_ALIGNED(6);
                     -        PACK_AND_STORE_ALIGNED(7);
                     -        PACK_AND_STORE_ALIGNED(8);
                     +        PACK_AND_STORE_ALIGNED(1)
                     +        PACK_AND_STORE_ALIGNED(2)
                     +        PACK_AND_STORE_ALIGNED(3)
                     +        PACK_AND_STORE_ALIGNED(4)
                     +        PACK_AND_STORE_ALIGNED(5)
                     +        PACK_AND_STORE_ALIGNED(6)
                     +        PACK_AND_STORE_ALIGNED(7)
                     +        PACK_AND_STORE_ALIGNED(8)
                          } else {
                     -        PACK_AND_STORE(1);
                     -        PACK_AND_STORE(2);
                     -        PACK_AND_STORE(3);
                     -        PACK_AND_STORE(4);
                     -        PACK_AND_STORE(5);
                     -        PACK_AND_STORE(6);
                     -        PACK_AND_STORE(7);
                     -        PACK_AND_STORE(8);
                     +        PACK_AND_STORE(1)
                     +        PACK_AND_STORE(2)
                     +        PACK_AND_STORE(3)
                     +        PACK_AND_STORE(4)
                     +        PACK_AND_STORE(5)
                     +        PACK_AND_STORE(6)
                     +        PACK_AND_STORE(7)
                     +        PACK_AND_STORE(8)
+                         }
                      #undef PACK_AND_STORE
                      #undef PACK_AND_STORE_ALIGNED
+                     }
                     +}

Remove declarations after statements from doVertLowPass_altivec