Browse code

Support for MacIntel, last part: balign directives Determines whether .align's arg is power-of-two or not, then defines ASMALIGN appropriately in config.h. Changes all .baligns to ASMALIGNs. Patch by John Dalgliesh % johnd AH defyne P org % Original thread: Date: Aug 11, 2006 8:00 AM Subject: Re: [Ffmpeg-devel] Mac OS X Intel last part: balign directives

Originally committed as revision 5990 to svn://svn.ffmpeg.org/ffmpeg/trunk

John Dalgliesh authored on 2006/08/13 01:37:31
Showing 8 changed files
... ...
@@ -468,6 +468,7 @@ pthreads="no"
468 468
 swscaler="no"
469 469
 gpl="no"
470 470
 memalignhack="no"
471
+asmalign_pot="unknown"
471 472
 
472 473
 # OS specific
473 474
 targetos=`uname -s`
... ...
@@ -1469,6 +1470,12 @@ if test "$gprof" = "yes" ; then
1469 1469
     LDFLAGS="$LDFLAGS -p"
1470 1470
 fi
1471 1471
 
1472
+# find if .align arg is power-of-two or not
1473
+if test $asmalign_pot = "unknown"; then
1474
+    asmalign_pot="no"
1475
+    echo 'asm (".align 3");' | check_cc && asmalign_pot="yes"
1476
+fi
1477
+
1472 1478
 echo "install prefix   $PREFIX"
1473 1479
 echo "source path      $source_path"
1474 1480
 echo "C compiler       $cc"
... ...
@@ -1535,6 +1542,7 @@ echo "network support      $network"
1535 1535
 if test "$network" = "yes" ; then
1536 1536
     echo "IPv6 support         $ipv6"
1537 1537
 fi
1538
+echo ".align is power-of-two" $asmalign_pot
1538 1539
 if test "$gpl" = "no" ; then
1539 1540
     echo "License: LGPL"
1540 1541
 else
... ...
@@ -2096,6 +2104,12 @@ if test "$amr_if2" = "yes" ; then
2096 2096
   echo "AMR_CFLAGS=-DIF2=1" >> config.mak
2097 2097
 fi
2098 2098
 
2099
+if test "$asmalign_pot" = "yes" ; then
2100
+  echo '#define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"' >> $TMPH
2101
+else
2102
+  echo '#define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"' >> $TMPH
2103
+fi
2104
+
2099 2105
 
2100 2106
 for codec in $DECODER_LIST $ENCODER_LIST $PARSER_LIST $DEMUXER_LIST $MUXER_LIST; do
2101 2107
     echo "#define CONFIG_`echo $codec | tr a-z A-Z` 1" >> $TMPH
... ...
@@ -56,7 +56,7 @@ static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x0
56 56
 static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL;
57 57
 static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
58 58
 
59
-#define JUMPALIGN() __asm __volatile (".balign 8"::)
59
+#define JUMPALIGN() __asm __volatile (ASMALIGN(3)::)
60 60
 #define MOVQ_ZERO(regd)  __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
61 61
 
62 62
 #define MOVQ_WONE(regd) \
... ...
@@ -204,7 +204,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
204 204
     asm volatile(
205 205
         "mov $-128, %%"REG_a"           \n\t"
206 206
         "pxor %%mm7, %%mm7              \n\t"
207
-        ".balign 16                     \n\t"
207
+        ASMALIGN(4)
208 208
         "1:                             \n\t"
209 209
         "movq (%0), %%mm0               \n\t"
210 210
         "movq (%0, %2), %%mm2           \n\t"
... ...
@@ -232,7 +232,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint
232 232
     asm volatile(
233 233
         "pxor %%mm7, %%mm7              \n\t"
234 234
         "mov $-128, %%"REG_a"           \n\t"
235
-        ".balign 16                     \n\t"
235
+        ASMALIGN(4)
236 236
         "1:                             \n\t"
237 237
         "movq (%0), %%mm0               \n\t"
238 238
         "movq (%1), %%mm2               \n\t"
... ...
@@ -375,7 +375,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
375 375
 {
376 376
     __asm __volatile(
377 377
          "lea (%3, %3), %%"REG_a"       \n\t"
378
-         ".balign 8                     \n\t"
378
+         ASMALIGN(3)
379 379
          "1:                            \n\t"
380 380
          "movd (%1), %%mm0              \n\t"
381 381
          "movd (%1, %3), %%mm1          \n\t"
... ...
@@ -401,7 +401,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
401 401
 {
402 402
     __asm __volatile(
403 403
          "lea (%3, %3), %%"REG_a"       \n\t"
404
-         ".balign 8                     \n\t"
404
+         ASMALIGN(3)
405 405
          "1:                            \n\t"
406 406
          "movq (%1), %%mm0              \n\t"
407 407
          "movq (%1, %3), %%mm1          \n\t"
... ...
@@ -427,7 +427,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
427 427
 {
428 428
     __asm __volatile(
429 429
          "lea (%3, %3), %%"REG_a"       \n\t"
430
-         ".balign 8                     \n\t"
430
+         ASMALIGN(3)
431 431
          "1:                            \n\t"
432 432
          "movq (%1), %%mm0              \n\t"
433 433
          "movq 8(%1), %%mm4             \n\t"
... ...
@@ -754,7 +754,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
754 754
         "lea (%3, %3), %%"REG_a"        \n\t"
755 755
         "movq (%1), %%mm0               \n\t"
756 756
         PAVGB" 1(%1), %%mm0             \n\t"
757
-        ".balign 8                      \n\t"
757
+         ASMALIGN(3)
758 758
         "1:                             \n\t"
759 759
         "movq (%1, %%"REG_a"), %%mm2    \n\t"
760 760
         "movq (%1, %3), %%mm1           \n\t"
... ...
@@ -28,7 +28,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
28 28
     MOVQ_BFE(mm6);
29 29
     __asm __volatile(
30 30
         "lea    (%3, %3), %%"REG_a"     \n\t"
31
-        ".balign 8                      \n\t"
31
+        ASMALIGN(3)
32 32
         "1:                             \n\t"
33 33
         "movq   (%1), %%mm0             \n\t"
34 34
         "movq   1(%1), %%mm1            \n\t"
... ...
@@ -69,7 +69,7 @@ static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, u
69 69
         "movq   %%mm4, (%3)             \n\t"
70 70
         "add    %5, %3                  \n\t"
71 71
         "decl   %0                      \n\t"
72
-        ".balign 8                      \n\t"
72
+        ASMALIGN(3)
73 73
         "1:                             \n\t"
74 74
         "movq   (%1), %%mm0             \n\t"
75 75
         "movq   (%2), %%mm1             \n\t"
... ...
@@ -110,7 +110,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
110 110
     MOVQ_BFE(mm6);
111 111
     __asm __volatile(
112 112
         "lea        (%3, %3), %%"REG_a" \n\t"
113
-        ".balign 8                      \n\t"
113
+        ASMALIGN(3)
114 114
         "1:                             \n\t"
115 115
         "movq   (%1), %%mm0             \n\t"
116 116
         "movq   1(%1), %%mm1            \n\t"
... ...
@@ -168,7 +168,7 @@ static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1,
168 168
         "movq   %%mm5, 8(%3)            \n\t"
169 169
         "add    %5, %3                  \n\t"
170 170
         "decl   %0                      \n\t"
171
-        ".balign 8                      \n\t"
171
+        ASMALIGN(3)
172 172
         "1:                             \n\t"
173 173
         "movq   (%1), %%mm0             \n\t"
174 174
         "movq   (%2), %%mm1             \n\t"
... ...
@@ -206,7 +206,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
206 206
     __asm __volatile(
207 207
         "lea (%3, %3), %%"REG_a"        \n\t"
208 208
         "movq (%1), %%mm0               \n\t"
209
-        ".balign 8                      \n\t"
209
+        ASMALIGN(3)
210 210
         "1:                             \n\t"
211 211
         "movq   (%1, %3), %%mm1         \n\t"
212 212
         "movq   (%1, %%"REG_a"),%%mm2   \n\t"
... ...
@@ -246,7 +246,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
246 246
         "paddusw %%mm1, %%mm5           \n\t"
247 247
         "xor    %%"REG_a", %%"REG_a"    \n\t"
248 248
         "add    %3, %1                  \n\t"
249
-        ".balign 8                      \n\t"
249
+        ASMALIGN(3)
250 250
         "1:                             \n\t"
251 251
         "movq   (%1, %%"REG_a"), %%mm0  \n\t"
252 252
         "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
... ...
@@ -458,7 +458,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
458 458
     __asm __volatile(
459 459
         "lea    (%3, %3), %%"REG_a"     \n\t"
460 460
         "movq   (%1), %%mm0             \n\t"
461
-        ".balign 8                      \n\t"
461
+        ASMALIGN(3)
462 462
         "1:                             \n\t"
463 463
         "movq   (%1, %3), %%mm1         \n\t"
464 464
         "movq   (%1, %%"REG_a"), %%mm2  \n\t"
... ...
@@ -509,7 +509,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
509 509
         "paddusw %%mm1, %%mm5           \n\t"
510 510
         "xor    %%"REG_a", %%"REG_a"    \n\t"
511 511
         "add    %3, %1                  \n\t"
512
-        ".balign 8                      \n\t"
512
+        ASMALIGN(3)
513 513
         "1:                             \n\t"
514 514
         "movq   (%1, %%"REG_a"), %%mm0  \n\t"
515 515
         "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
... ...
@@ -34,7 +34,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
34 34
 {
35 35
     long len= -(stride*h);
36 36
     asm volatile(
37
-        ".balign 16                     \n\t"
37
+        ASMALIGN(4)
38 38
         "1:                             \n\t"
39 39
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
40 40
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
... ...
@@ -70,7 +70,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
70 70
 {
71 71
     long len= -(stride*h);
72 72
     asm volatile(
73
-        ".balign 16                     \n\t"
73
+        ASMALIGN(4)
74 74
         "1:                             \n\t"
75 75
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
76 76
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
... ...
@@ -92,7 +92,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
92 92
 {
93 93
     long len= -(stride*h);
94 94
     asm volatile(
95
-        ".balign 16                     \n\t"
95
+        ASMALIGN(4)
96 96
         "1:                             \n\t"
97 97
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
98 98
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
... ...
@@ -118,7 +118,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
118 118
 { //FIXME reuse src
119 119
     long len= -(stride*h);
120 120
     asm volatile(
121
-        ".balign 16                     \n\t"
121
+        ASMALIGN(4)
122 122
         "movq "MANGLE(bone)", %%mm5     \n\t"
123 123
         "1:                             \n\t"
124 124
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
... ...
@@ -155,7 +155,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
155 155
 {
156 156
     long len= -(stride*h);
157 157
     asm volatile(
158
-        ".balign 16                     \n\t"
158
+        ASMALIGN(4)
159 159
         "1:                             \n\t"
160 160
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
161 161
         "movq (%2, %%"REG_a"), %%mm1    \n\t"
... ...
@@ -193,7 +193,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
193 193
 {
194 194
     long len= -(stride*h);
195 195
     asm volatile(
196
-        ".balign 16                     \n\t"
196
+        ASMALIGN(4)
197 197
         "1:                             \n\t"
198 198
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
199 199
         "movq (%2, %%"REG_a"), %%mm1    \n\t"
... ...
@@ -66,7 +66,7 @@ asm volatile(
66 66
                 "packssdw %%mm5, %%mm5          \n\t"
67 67
                 "psubw %%mm5, %%mm7             \n\t"
68 68
                 "pxor %%mm4, %%mm4              \n\t"
69
-                ".balign 16                     \n\t"
69
+                ASMALIGN(4)
70 70
                 "1:                             \n\t"
71 71
                 "movq (%0, %3), %%mm0           \n\t"
72 72
                 "movq 8(%0, %3), %%mm1          \n\t"
... ...
@@ -129,7 +129,7 @@ asm volatile(
129 129
                 "packssdw %%mm5, %%mm5          \n\t"
130 130
                 "psubw %%mm5, %%mm7             \n\t"
131 131
                 "pxor %%mm4, %%mm4              \n\t"
132
-                ".balign 16                     \n\t"
132
+                ASMALIGN(4)
133 133
                 "1:                             \n\t"
134 134
                 "movq (%0, %3), %%mm0           \n\t"
135 135
                 "movq 8(%0, %3), %%mm1          \n\t"
... ...
@@ -222,7 +222,7 @@ asm volatile(
222 222
                 "packssdw %%mm6, %%mm6          \n\t"
223 223
                 "packssdw %%mm6, %%mm6          \n\t"
224 224
                 "mov %3, %%"REG_a"              \n\t"
225
-                ".balign 16                     \n\t"
225
+                ASMALIGN(4)
226 226
                 "1:                             \n\t"
227 227
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
228 228
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
... ...
@@ -285,7 +285,7 @@ asm volatile(
285 285
                 "packssdw %%mm6, %%mm6          \n\t"
286 286
                 "packssdw %%mm6, %%mm6          \n\t"
287 287
                 "mov %3, %%"REG_a"              \n\t"
288
-                ".balign 16                     \n\t"
288
+                ASMALIGN(4)
289 289
                 "1:                             \n\t"
290 290
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
291 291
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
... ...
@@ -357,7 +357,7 @@ asm volatile(
357 357
                 "packssdw %%mm6, %%mm6          \n\t"
358 358
                 "packssdw %%mm6, %%mm6          \n\t"
359 359
                 "mov %3, %%"REG_a"              \n\t"
360
-                ".balign 16                     \n\t"
360
+                ASMALIGN(4)
361 361
                 "1:                             \n\t"
362 362
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
363 363
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
... ...
@@ -418,7 +418,7 @@ asm volatile(
418 418
                 "packssdw %%mm6, %%mm6          \n\t"
419 419
                 "packssdw %%mm6, %%mm6          \n\t"
420 420
                 "mov %3, %%"REG_a"              \n\t"
421
-                ".balign 16                     \n\t"
421
+                ASMALIGN(4)
422 422
                 "1:                             \n\t"
423 423
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
424 424
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
... ...
@@ -112,7 +112,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
112 112
             "pxor %%mm6, %%mm6                  \n\t"
113 113
             "psubw (%3), %%mm6                  \n\t" // -bias[0]
114 114
             "mov $-128, %%"REG_a"               \n\t"
115
-            ".balign 16                         \n\t"
115
+            ASMALIGN(4)
116 116
             "1:                                 \n\t"
117 117
             "pxor %%mm1, %%mm1                  \n\t" // 0
118 118
             "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
... ...
@@ -156,7 +156,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
156 156
             "pxor %%mm7, %%mm7                  \n\t" // 0
157 157
             "pxor %%mm4, %%mm4                  \n\t" // 0
158 158
             "mov $-128, %%"REG_a"               \n\t"
159
-            ".balign 16                         \n\t"
159
+            ASMALIGN(4)
160 160
             "1:                                 \n\t"
161 161
             "pxor %%mm1, %%mm1                  \n\t" // 0
162 162
             "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
... ...
@@ -785,7 +785,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
785 785
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
786 786
         "jmp 9f                         \n\t"
787 787
 
788
-        "#.balign 16                    \n\t"\
788
+        "#" ASMALIGN(4)                      \
789 789
         "4:                             \n\t"
790 790
 Z_COND_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
791 791
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
... ...
@@ -860,7 +860,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
860 860
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
861 861
         "jmp 9f                         \n\t"
862 862
 
863
-        "#.balign 16                    \n\t"\
863
+        "#" ASMALIGN(4)                      \
864 864
         "6:                             \n\t"
865 865
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
866 866
 
... ...
@@ -926,7 +926,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
926 926
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
927 927
         "jmp 9f                         \n\t"
928 928
 
929
-        "#.balign 16                    \n\t"\
929
+        "#" ASMALIGN(4)                      \
930 930
         "2:                             \n\t"
931 931
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
932 932
 
... ...
@@ -1003,7 +1003,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
1003 1003
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
1004 1004
         "jmp 9f                         \n\t"
1005 1005
 
1006
-        "#.balign 16                    \n\t"\
1006
+        "#" ASMALIGN(4)                      \
1007 1007
         "3:                             \n\t"
1008 1008
 #undef IDCT
1009 1009
 #define IDCT(src0, src4, src1, src5, dst, shift) \
... ...
@@ -1067,7 +1067,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
1067 1067
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
1068 1068
         "jmp 9f                         \n\t"
1069 1069
 
1070
-        "#.balign 16                    \n\t"\
1070
+        "#" ASMALIGN(4)                      \
1071 1071
         "5:                             \n\t"
1072 1072
 #undef IDCT
1073 1073
 #define IDCT(src0, src4, src1, src5, dst, shift) \
... ...
@@ -1132,7 +1132,7 @@ IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
1132 1132
         "jmp 9f                         \n\t"
1133 1133
 
1134 1134
 
1135
-        "#.balign 16                    \n\t"\
1135
+        "#" ASMALIGN(4)                      \
1136 1136
         "1:                             \n\t"
1137 1137
 #undef IDCT
1138 1138
 #define IDCT(src0, src4, src1, src5, dst, shift) \
... ...
@@ -1206,7 +1206,7 @@ IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
1206 1206
         "jmp 9f                         \n\t"
1207 1207
 
1208 1208
 
1209
-        "#.balign 16                    \n\t"
1209
+        "#" ASMALIGN(4)
1210 1210
         "7:                             \n\t"
1211 1211
 #undef IDCT
1212 1212
 #define IDCT(src0, src4, src1, src5, dst, shift) \