Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
... | ... |
@@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s) |
86 | 86 |
#define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) |
87 | 87 |
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) |
88 | 88 |
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) |
89 |
+#define CPUFLAG_AVXSLOW (AV_CPU_FLAG_AVXSLOW | CPUFLAG_AVX) |
|
89 | 90 |
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) |
90 | 91 |
#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) |
91 | 92 |
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) |
... | ... |
@@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s) |
108 | 108 |
{ "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 }, .unit = "flags" }, |
109 | 109 |
{ "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 }, .unit = "flags" }, |
110 | 110 |
{ "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" }, |
111 |
+ { "avxslow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW }, .unit = "flags" }, |
|
111 | 112 |
{ "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" }, |
112 | 113 |
{ "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" }, |
113 | 114 |
{ "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, |
... | ... |
@@ -219,6 +221,7 @@ static const struct { |
219 | 219 |
{ AV_CPU_FLAG_SSE4, "sse4.1" }, |
220 | 220 |
{ AV_CPU_FLAG_SSE42, "sse4.2" }, |
221 | 221 |
{ AV_CPU_FLAG_AVX, "avx" }, |
222 |
+ { AV_CPU_FLAG_AVXSLOW, "avxslow" }, |
|
222 | 223 |
{ AV_CPU_FLAG_XOP, "xop" }, |
223 | 224 |
{ AV_CPU_FLAG_FMA3, "fma3" }, |
224 | 225 |
{ AV_CPU_FLAG_FMA4, "fma4" }, |
... | ... |
@@ -45,6 +45,7 @@ |
45 | 45 |
#define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions |
46 | 46 |
#define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions |
47 | 47 |
#define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used |
48 |
+#define AV_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer) |
|
48 | 49 |
#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions |
49 | 50 |
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions |
50 | 51 |
#define AV_CPU_FLAG_CMOV 0x1000 ///< i686 cmov |
... | ... |
@@ -54,8 +54,8 @@ |
54 | 54 |
*/ |
55 | 55 |
|
56 | 56 |
#define LIBAVUTIL_VERSION_MAJOR 54 |
57 |
-#define LIBAVUTIL_VERSION_MINOR 13 |
|
58 |
-#define LIBAVUTIL_VERSION_MICRO 1 |
|
57 |
+#define LIBAVUTIL_VERSION_MINOR 14 |
|
58 |
+#define LIBAVUTIL_VERSION_MICRO 0 |
|
59 | 59 |
|
60 | 60 |
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ |
61 | 61 |
LIBAVUTIL_VERSION_MINOR, \ |
... | ... |
@@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void) |
167 | 167 |
if (ext_caps & (1 << 22)) |
168 | 168 |
rval |= AV_CPU_FLAG_MMXEXT; |
169 | 169 |
|
170 |
+ if (!strncmp(vendor.c, "AuthenticAMD", 12)) { |
|
170 | 171 |
/* Allow for selectively disabling SSE2 functions on AMD processors |
171 | 172 |
with SSE2 support but not SSE4a. This includes Athlon64, some |
172 | 173 |
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster |
... | ... |
@@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void) |
174 | 174 |
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case |
175 | 175 |
so that SSE2 is used unless explicitly disabled by checking |
176 | 176 |
AV_CPU_FLAG_SSE2SLOW. */ |
177 |
- if (!strncmp(vendor.c, "AuthenticAMD", 12) && |
|
178 |
- rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) { |
|
179 |
- rval |= AV_CPU_FLAG_SSE2SLOW; |
|
177 |
+ if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) |
|
178 |
+ rval |= AV_CPU_FLAG_SSE2SLOW; |
|
179 |
+ |
|
180 |
+ /* Similar to the above but for AVX functions on AMD processors. |
|
181 |
+ This is necessary only for functions using YMM registers on Bulldozer |
|
182 |
+ based CPUs as they lack 256-bits execution units. SSE/AVX functions |
|
183 |
+ using XMM registers are always faster on them. |
|
184 |
+ AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is |
|
185 |
+ used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. |
|
186 |
+ TODO: Confirm if Excavator is affected or not by this once it's |
|
187 |
+ released, and update the check if necessary. Same for btver2. */ |
|
188 |
+ if (family == 0x15 && (rval & AV_CPU_FLAG_AVX)) |
|
189 |
+ rval |= AV_CPU_FLAG_AVXSLOW; |
|
180 | 190 |
} |
181 | 191 |
|
182 | 192 |
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be |