Browse code

x86: check for AVX support

This adds configure and runtime checks for AVX support on x86 CPUs.

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 87f1355f9b4fc11414d0e6a91404203c2745f89f)

Mans Rullgard authored on 2011/02/16 11:39:42
Showing 4 changed files
... ...
@@ -222,6 +222,7 @@ Advanced options (experts only):
222 222
   --disable-mmx2           disable MMX2 optimizations
223 223
   --disable-sse            disable SSE optimizations
224 224
   --disable-ssse3          disable SSSE3 optimizations
225
+  --disable-avx            disable AVX optimizations
225 226
   --disable-armv5te        disable armv5te optimizations
226 227
   --disable-armv6          disable armv6 optimizations
227 228
   --disable-armv6t2        disable armv6t2 optimizations
... ...
@@ -975,6 +976,7 @@ ARCH_EXT_LIST='
975 975
     armv6
976 976
     armv6t2
977 977
     armvfp
978
+    avx
978 979
     iwmmxt
979 980
     mmi
980 981
     mmx
... ...
@@ -1183,6 +1185,7 @@ mmx_deps="x86"
1183 1183
 mmx2_deps="mmx"
1184 1184
 sse_deps="mmx"
1185 1185
 ssse3_deps="sse"
1186
+avx_deps="ssse3"
1186 1187
 
1187 1188
 aligned_stack_if_any="ppc x86"
1188 1189
 fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
... ...
@@ -2677,6 +2680,7 @@ EOF
2677 2677
 
2678 2678
         check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
2679 2679
             die "yasm not found, use --disable-yasm for a crippled build"
2680
+        check_yasm "vpaddw xmm0, xmm0, xmm0" || disable avx
2680 2681
     fi
2681 2682
 
2682 2683
     case "$cpu" in
... ...
@@ -44,7 +44,7 @@ int main(void)
44 44
     int cpu_flags = av_get_cpu_flags();
45 45
 
46 46
     printf("cpu_flags = 0x%08X\n", cpu_flags);
47
-    printf("cpu_flags = %s%s%s%s%s%s%s%s%s%s%s%s\n",
47
+    printf("cpu_flags = %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
48 48
 #if   ARCH_ARM
49 49
            cpu_flags & AV_CPU_FLAG_IWMMXT   ? "IWMMXT "     : "",
50 50
 #elif ARCH_PPC
... ...
@@ -60,6 +60,7 @@ int main(void)
60 60
            cpu_flags & AV_CPU_FLAG_SSSE3    ? "SSSE3 "      : "",
61 61
            cpu_flags & AV_CPU_FLAG_SSE4     ? "SSE4.1 "     : "",
62 62
            cpu_flags & AV_CPU_FLAG_SSE42    ? "SSE4.2 "     : "",
63
+           cpu_flags & AV_CPU_FLAG_AVX      ? "AVX "        : "",
63 64
            cpu_flags & AV_CPU_FLAG_3DNOW    ? "3DNow "      : "",
64 65
            cpu_flags & AV_CPU_FLAG_3DNOWEXT ? "3DNowExt "   : "");
65 66
 #endif
... ...
@@ -36,6 +36,7 @@
36 36
 #define AV_CPU_FLAG_SSSE3        0x0080 ///< Conroe SSSE3 functions
37 37
 #define AV_CPU_FLAG_SSE4         0x0100 ///< Penryn SSE4.1 functions
38 38
 #define AV_CPU_FLAG_SSE42        0x0200 ///< Nehalem SSE4.2 functions
39
+#define AV_CPU_FLAG_AVX          0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
39 40
 #define AV_CPU_FLAG_IWMMXT       0x0100 ///< XScale IWMMXT
40 41
 #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
41 42
 
... ...
@@ -35,6 +35,9 @@
35 35
            "=c" (ecx), "=d" (edx)\
36 36
          : "0" (index));
37 37
 
38
+#define xgetbv(index,eax,edx)                                   \
39
+    __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
40
+
38 41
 /* Function to test if multimedia instructions are supported...  */
39 42
 int ff_get_cpu_flags_x86(void)
40 43
 {
... ...
@@ -93,6 +96,15 @@ int ff_get_cpu_flags_x86(void)
93 93
             rval |= AV_CPU_FLAG_SSE4;
94 94
         if (ecx & 0x00100000 )
95 95
             rval |= AV_CPU_FLAG_SSE42;
96
+#if HAVE_AVX
97
+        /* Check OXSAVE and AVX bits */
98
+        if ((ecx & 0x18000000) == 0x18000000) {
99
+            /* Check for OS support */
100
+            xgetbv(0, eax, edx);
101
+            if ((eax & 0x6) == 0x6)
102
+                rval |= AV_CPU_FLAG_AVX;
103
+        }
104
+#endif
96 105
 #endif
97 106
                   ;
98 107
     }