This adds configure and runtime checks for AVX support on x86 CPUs.
Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 87f1355f9b4fc11414d0e6a91404203c2745f89f)
... | ... |
@@ -222,6 +222,7 @@ Advanced options (experts only): |
222 | 222 |
--disable-mmx2 disable MMX2 optimizations |
223 | 223 |
--disable-sse disable SSE optimizations |
224 | 224 |
--disable-ssse3 disable SSSE3 optimizations |
225 |
+ --disable-avx disable AVX optimizations |
|
225 | 226 |
--disable-armv5te disable armv5te optimizations |
226 | 227 |
--disable-armv6 disable armv6 optimizations |
227 | 228 |
--disable-armv6t2 disable armv6t2 optimizations |
... | ... |
@@ -975,6 +976,7 @@ ARCH_EXT_LIST=' |
975 | 975 |
armv6 |
976 | 976 |
armv6t2 |
977 | 977 |
armvfp |
978 |
+ avx |
|
978 | 979 |
iwmmxt |
979 | 980 |
mmi |
980 | 981 |
mmx |
... | ... |
@@ -1183,6 +1185,7 @@ mmx_deps="x86" |
1183 | 1183 |
mmx2_deps="mmx" |
1184 | 1184 |
sse_deps="mmx" |
1185 | 1185 |
ssse3_deps="sse" |
1186 |
+avx_deps="ssse3" |
|
1186 | 1187 |
|
1187 | 1188 |
aligned_stack_if_any="ppc x86" |
1188 | 1189 |
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" |
... | ... |
@@ -2677,6 +2680,7 @@ EOF |
2677 | 2677 |
|
2678 | 2678 |
check_yasm "pextrd [eax], xmm0, 1" && enable yasm || |
2679 | 2679 |
die "yasm not found, use --disable-yasm for a crippled build" |
2680 |
+ check_yasm "vpaddw xmm0, xmm0, xmm0" || disable avx |
|
2680 | 2681 |
fi |
2681 | 2682 |
|
2682 | 2683 |
case "$cpu" in |
... | ... |
@@ -44,7 +44,7 @@ int main(void) |
44 | 44 |
int cpu_flags = av_get_cpu_flags(); |
45 | 45 |
|
46 | 46 |
printf("cpu_flags = 0x%08X\n", cpu_flags); |
47 |
- printf("cpu_flags = %s%s%s%s%s%s%s%s%s%s%s%s\n", |
|
47 |
+ printf("cpu_flags = %s%s%s%s%s%s%s%s%s%s%s%s%s\n", |
|
48 | 48 |
#if ARCH_ARM |
49 | 49 |
cpu_flags & AV_CPU_FLAG_IWMMXT ? "IWMMXT " : "", |
50 | 50 |
#elif ARCH_PPC |
... | ... |
@@ -60,6 +60,7 @@ int main(void) |
60 | 60 |
cpu_flags & AV_CPU_FLAG_SSSE3 ? "SSSE3 " : "", |
61 | 61 |
cpu_flags & AV_CPU_FLAG_SSE4 ? "SSE4.1 " : "", |
62 | 62 |
cpu_flags & AV_CPU_FLAG_SSE42 ? "SSE4.2 " : "", |
63 |
+ cpu_flags & AV_CPU_FLAG_AVX ? "AVX " : "", |
|
63 | 64 |
cpu_flags & AV_CPU_FLAG_3DNOW ? "3DNow " : "", |
64 | 65 |
cpu_flags & AV_CPU_FLAG_3DNOWEXT ? "3DNowExt " : ""); |
65 | 66 |
#endif |
... | ... |
@@ -36,6 +36,7 @@ |
36 | 36 |
#define AV_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions |
37 | 37 |
#define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions |
38 | 38 |
#define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions |
39 |
+#define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used |
|
39 | 40 |
#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT |
40 | 41 |
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard |
41 | 42 |
|
... | ... |
@@ -35,6 +35,9 @@ |
35 | 35 |
"=c" (ecx), "=d" (edx)\ |
36 | 36 |
: "0" (index)); |
37 | 37 |
|
38 |
+#define xgetbv(index,eax,edx) \ |
|
39 |
+ __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index)) |
|
40 |
+ |
|
38 | 41 |
/* Function to test if multimedia instructions are supported... */ |
39 | 42 |
int ff_get_cpu_flags_x86(void) |
40 | 43 |
{ |
... | ... |
@@ -93,6 +96,15 @@ int ff_get_cpu_flags_x86(void) |
93 | 93 |
rval |= AV_CPU_FLAG_SSE4; |
94 | 94 |
if (ecx & 0x00100000 ) |
95 | 95 |
rval |= AV_CPU_FLAG_SSE42; |
96 |
+#if HAVE_AVX |
|
97 |
+ /* Check OXSAVE and AVX bits */ |
|
98 |
+ if ((ecx & 0x18000000) == 0x18000000) { |
|
99 |
+ /* Check for OS support */ |
|
100 |
+ xgetbv(0, eax, edx); |
|
101 |
+ if ((eax & 0x6) == 0x6) |
|
102 |
+ rval |= AV_CPU_FLAG_AVX; |
|
103 |
+ } |
|
104 |
+#endif |
|
96 | 105 |
#endif |
97 | 106 |
; |
98 | 107 |
} |