Browse code

Communicate proper aliasing to gcc (needed for 4.1).

Originally committed as revision 4384 to svn://svn.ffmpeg.org/ffmpeg/trunk

Falk Hüffner authored on 2005/06/20 06:46:14
Showing 2 changed files
... ...
@@ -56,13 +56,33 @@ static inline uint64_t WORD_VEC(uint64_t x)
56 56
     return x;
57 57
 }
58 58
 
59
-#define ldq(p) (*(const uint64_t *) (p))
60
-#define ldl(p) (*(const int32_t *) (p))
61
-#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
62
-#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
63 59
 #define sextw(x) ((int16_t) (x))
64 60
 
65 61
 #ifdef __GNUC__
62
+#define ldq(p)                                                  \
63
+    (((union {                                                  \
64
+        uint64_t __l;                                           \
65
+        __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];  \
66
+    } *) (p))->__l)
67
+#define ldl(p)                                                  \
68
+    (((union {                                                  \
69
+        int32_t __l;                                            \
70
+        __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];   \
71
+    } *) (p))->__l)
72
+#define stq(l, p)                                                       \
73
+    do {                                                                \
74
+        (((union {                                                      \
75
+            uint64_t __l;                                               \
76
+            __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];      \
77
+        } *) (p))->__l) = l;                                            \
78
+    } while (0)
79
+#define stl(l, p)                                                       \
80
+    do {                                                                \
81
+        (((union {                                                      \
82
+            int32_t __l;                                                \
83
+            __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];       \
84
+        } *) (p))->__l) = l;                                            \
85
+    } while (0)
66 86
 struct unaligned_long { uint64_t l; } __attribute__((packed));
67 87
 #define ldq_u(p)     (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
68 88
 #define uldq(a)	     (((const struct unaligned_long *) (a))->l)
... ...
@@ -132,6 +152,10 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
132 132
 #elif defined(__DECC)		/* Digital/Compaq/hp "ccc" compiler */
133 133
 
134 134
 #include <c_asm.h>
135
+#define ldq(p) (*(const uint64_t *) (p))
136
+#define ldl(p) (*(const int32_t *)  (p))
137
+#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
138
+#define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
135 139
 #define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
136 140
 #define uldq(a)	     (*(const __unaligned uint64_t *) (a))
137 141
 #define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
... ...
@@ -235,25 +235,22 @@ static inline void idct_col2(DCTELEM *col)
235 235
 {
236 236
     int i;
237 237
     uint64_t l, r;
238
-    uint64_t *lcol = (uint64_t *) col;
239 238
 
240 239
     for (i = 0; i < 8; ++i) {
241
-        int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
240
+        int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
242 241
 
243 242
         a0 *= W4;
244
-        col[0] = a0 >> COL_SHIFT;
245
-        ++col;
243
+        col[i] = a0 >> COL_SHIFT;
246 244
     }
247 245
 
248
-    l = lcol[0];
249
-    r = lcol[1];
250
-    lcol[ 2] = l; lcol[ 3] = r;
251
-    lcol[ 4] = l; lcol[ 5] = r;
252
-    lcol[ 6] = l; lcol[ 7] = r;
253
-    lcol[ 8] = l; lcol[ 9] = r;
254
-    lcol[10] = l; lcol[11] = r;
255
-    lcol[12] = l; lcol[13] = r;
256
-    lcol[14] = l; lcol[15] = r;
246
+    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
247
+    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
248
+    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
249
+    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
250
+    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
251
+    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
252
+    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
253
+    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
257 254
 }
258 255
 
259 256
 void simple_idct_axp(DCTELEM *block)
... ...
@@ -275,22 +272,20 @@ void simple_idct_axp(DCTELEM *block)
275 275
     if (rowsZero) {
276 276
         idct_col2(block);
277 277
     } else if (rowsConstant) {
278
-        uint64_t *lblock = (uint64_t *) block;
279
-
280 278
         idct_col(block);
281 279
         for (i = 0; i < 8; i += 2) {
282
-            uint64_t v = (uint16_t) block[i * 8];
283
-            uint64_t w = (uint16_t) block[i * 8 + 8];
280
+            uint64_t v = (uint16_t) block[0];
281
+            uint64_t w = (uint16_t) block[8];
284 282
 
285 283
             v |= v << 16;
286 284
             w |= w << 16;
287 285
             v |= v << 32;
288 286
             w |= w << 32;
289
-            lblock[0] = v;
290
-            lblock[1] = v;
291
-            lblock[2] = w;
292
-            lblock[3] = w;
293
-            lblock += 4;
287
+            stq(v, block + 0 * 4);
288
+            stq(v, block + 1 * 4);
289
+            stq(w, block + 2 * 4);
290
+            stq(w, block + 3 * 4);
291
+	    block += 4 * 4;
294 292
         }
295 293
     } else {
296 294
         for (i = 0; i < 8; i++)