diff options
author | L. E. Segovia <amy@centricular.com> | 2024-05-18 18:24:33 +0000 |
---|---|---|
committer | L. E. Segovia <amy@centricular.com> | 2024-05-18 18:28:06 +0000 |
commit | d200b75cb126935a179e62858d0b5e4e5b78cbc3 (patch) | |
tree | 9770f8075bfc7e7e93b75ed9042bf01964d8349d | |
parent | 163f9a1e9706bf4dc9bd04f4878fc3fef40efcec (diff) |
x86: fix AVX detection by implementing the check recommended by Intel
See https://gitlab.freedesktop.org/gstreamer/orc/-/issues/65#note_2407239
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/184>
-rw-r--r-- | orc/orccpu-x86.c | 33 |
1 files changed, 26 insertions, 7 deletions
diff --git a/orc/orccpu-x86.c b/orc/orccpu-x86.c index c7be3c1..8ba3620 100644 --- a/orc/orccpu-x86.c +++ b/orc/orccpu-x86.c @@ -30,6 +30,7 @@ #endif #include <fcntl.h> +#include <immintrin.h> #ifdef _MSC_VER # include <intrin.h> @@ -300,6 +301,19 @@ orc_x86_cpuid_get_branding_string (void) _orc_cpu_name = orc_x86_processor_string; } +// Checks if XMM and YMM state are enabled in XCR0. +// See 14.3 DETECTION OF INTEL® AVX INSTRUCTIONS on the +// Intel® 64 and IA-32 Architectures Software Developer’s Manual +#if !defined(_MSC_VER) || defined(__clang__) +#define ORC_TARGET_XSAVE __attribute__((target("xsave"))) +#else +#define ORC_TARGET_XSAVE +#endif +static orc_bool ORC_TARGET_XSAVE check_xcr0_ymm() +{ + return (_xgetbv(0) & 6U) != 0U; +} + static void orc_x86_cpuid_handle_standard_flags (void) { @@ -329,19 +343,24 @@ orc_x86_cpuid_handle_standard_flags (void) orc_x86_sse_flags |= ORC_TARGET_SSE_SSE4_2; } - // Linux mitigation for GDS requires checking for XSAVE too. // https://bugzilla.mozilla.org/show_bug.cgi?id=1854795 // https://gitlab.freedesktop.org/gstreamer/orc/-/issues/65 - const int xsave_enabled = ecx & (1 << 26); + const int osxsave_enabled = ecx & (1 << 27); + const int avx_instructions_supported = ecx & (1 << 28); - if (ecx & (1 << 28) && xsave_enabled) { - orc_x86_sse_flags |= ORC_TARGET_AVX_AVX; - } get_cpuid (0x00000007, &eax, &ebx, &ecx, &edx); - if (ebx & (1 << 5) && xsave_enabled) { - orc_x86_sse_flags |= ORC_TARGET_AVX_AVX2; + const int avx2_instructions_supported = ebx & (1 << 5); + + if (check_xcr0_ymm() && osxsave_enabled) { + if (avx_instructions_supported) { + orc_x86_sse_flags |= ORC_TARGET_AVX_AVX; + } + + if (avx2_instructions_supported) { + orc_x86_sse_flags |= ORC_TARGET_AVX_AVX2; + } } } |