Closed spectecjr closed 1 year ago
Hey :)
Happy to ifdef out these attributes for MSVC but replicating the same behaviour as Clang seems non-trivial: https://stackoverflow.com/a/2390626
Let us know if you have any ideas otherwise (I don't know much about MSVC) but for now I'll just do that.
I think an alternative could be using a globally initialized variable, something like:
bool g_whatever = [] {
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
...
...
return true;
}();
might be worth a try.
The other non-controversial fix is to disable the run-time check for x64 via defined(__GNUC__) || defined(__clang__)
I have a possible fix that enables the same behavior and keeps the intent, and also uses intrinsics instead of inline asm (which isn't easily supported on x64/MSVC without dropping to MASM).
namespace {
#if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
// If we are building with -msse4 check that the CPU actually supports it.
// This file must be kept in sync with gn/standalone/BUILD.gn.
#ifdef _MSC_VER
#include <intrin.h>
union cpuid_registers {
int result[4];
struct {
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
} r;
};
void PERFETTO_EXPORT_COMPONENT
CheckCpuOptimizations() {
cpuid_registers cpuid;
__cpuid(cpuid.result, 1);
static constexpr uint64_t xcr0_xmm_mask = 0x2u;
static constexpr uint64_t xcr0_ymm_mask = 0x4u;
static constexpr uint64_t xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
const bool have_popcnt = cpuid.r.ecx & 0x800000000u; // Bit 23
const bool have_sse4_2 = cpuid.r.ecx & 0x100000000u; // Bit 20
const bool have_avx =
// Does the OS save/restore XMM and YMM state?
(cpuid.r.ecx & 0x08000000000u) && // OS support XGETBV - bit 27
(cpuid.r.ecx & 0x10000000000u) && // AVX extensions - bit 28
((__readcr0() & xcr0_avx_mask) == xcr0_avx_mask);
// Get level 7 features (eax = 7 and ecx= 0), to check for AVX2 support.
// (See Intel 64 and IA-32 Architectures Software Developer's Manual
// Volume 2A: Instruction Set Reference, A-M CPUID).
__cpuid(cpuid.result, 7);
const bool have_avx2 = have_avx && (cpuid.r.ebx & 0x20u); // Bit 5
const bool have_bmi = (cpuid.r.ebx & 0x8u); // Bit 3
const bool have_bmi2 = (cpuid.r.ebx & 0x100); // Bit 8
if (!have_sse4_2 || !have_popcnt || !have_avx2 || !have_bmi || !have_bmi2) {
fprintf(
stderr,
"This executable requires a x86_64 cpu that supports SSE4.2, BMI2 and "
"AVX2.\n"
#if PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
"On MacOS, this might be caused by running x86_64 binaries on arm64.\n"
"See https://github.com/google/perfetto/issues/294 for more.\n"
#endif
"Rebuild with enable_perfetto_x64_cpu_opt=false.\n");
_exit(126);
}
}
struct static_init_cpu_check {
static_init_cpu_check() {
CheckCpuOptimizations();
}
};
static static_init_cpu_check static_cpu_check();
#else // !_MSC_VER
// Preserve the %rbx register via %rdi to work around a clang bug
// https://bugs.llvm.org/show_bug.cgi?id=17907 (%rbx in an output constraint
// is not considered a clobbered register).
#define PERFETTO_GETCPUID(a, b, c, d, a_inp, c_inp) \
asm("mov %%rbx, %%rdi\n" \
"cpuid\n" \
"xchg %%rdi, %%rbx\n" \
: "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
: "a"(a_inp), "2"(c_inp))
uint32_t GetXCR0EAX() {
uint32_t eax = 0, edx = 0;
asm("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
return eax;
}
// If we are building with -msse4 check that the CPU actually supports it.
// This file must be kept in sync with gn/standalone/BUILD.gn.
void PERFETTO_EXPORT_COMPONENT __attribute__((constructor))
CheckCpuOptimizations() {
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
PERFETTO_GETCPUID(eax, ebx, ecx, edx, 1, 0);
static constexpr uint64_t xcr0_xmm_mask = 0x2;
static constexpr uint64_t xcr0_ymm_mask = 0x4;
static constexpr uint64_t xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
const bool have_popcnt = ecx & (1u << 23);
const bool have_sse4_2 = ecx & (1u << 20);
const bool have_avx =
// Does the OS save/restore XMM and YMM state?
(ecx & (1u << 27)) && // OS support XGETBV.
(ecx & (1u << 28)) && // AVX supported in hardware
((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask);
// Get level 7 features (eax = 7 and ecx= 0), to check for AVX2 support.
// (See Intel 64 and IA-32 Architectures Software Developer's Manual
// Volume 2A: Instruction Set Reference, A-M CPUID).
PERFETTO_GETCPUID(eax, ebx, ecx, edx, 7, 0);
const bool have_avx2 = have_avx && ((ebx >> 5) & 0x1);
const bool have_bmi = (ebx >> 3) & 0x1;
const bool have_bmi2 = (ebx >> 8) & 0x1;
if (!have_sse4_2 || !have_popcnt || !have_avx2 || !have_bmi || !have_bmi2) {
fprintf(
stderr,
"This executable requires a x86_64 cpu that supports SSE4.2, BMI2 and "
"AVX2.\n"
#if PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
"On MacOS, this might be caused by running x86_64 binaries on arm64.\n"
"See https://github.com/google/perfetto/issues/294 for more.\n"
#endif
"Rebuild with enable_perfetto_x64_cpu_opt=false.\n");
_exit(126);
}
}
#endif
#endif
} // namespace
If we go down that route I'd ask we factor out the divergent code that uses intrinsics and keep the common code together. I don't want to have to maintain 2 near copies of the same logic. Feel free to drop a patch for it (see https://perfetto.dev/docs/contributing/getting-started).
I mean that would be great but we have no power to convince the MSVC team :)
https://github.com/google/perfetto/blob/372573604a3cebf15dd79edcdf27e0a5e1e91fdf/src/base/utils.cc#L86
When you compile Perfetto with MSVC (a supported scenario), the build fails on this line because _attribute((constructor)) is a GCC/Clang-ism, and not supported on MSVC.
Compiling with MSVC, with args.gn set to:
... gives the following error on Windows:
A workaround would be appreciated.