ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
32.54k stars 2.37k forks source link

0.13 regression: simdutf targeting x86-64 no longer builds #20414

Open mitchellh opened 3 weeks ago

mitchellh commented 3 weeks ago

Zig Version

0.13.0

Steps to Reproduce and Observed Behavior

With zig 0.13 and the latest simdutf (5.7.8) at the time of writing amalgamation of simdutf8, targeting x86_64 (macOS or Linux works).

If it makes a difference, I am cross compiling from an aarch64 machine, but I don't believe that makes a difference since another tester I'm pretty sure reproduced this while building from an x86_64 machine.

zig clang simdutf.cpp --no-default-config -nostdinc++ -fno-caret-diagnostics -isystem /nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/libcxx/include -isystem /nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/libcxxabi/include -D_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS -D_LIBCXXABI_DISABLE_VISIBILITY_ANNOTATIONS -D_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -D_LIBCPP_PSTL_CPU_BACKEND_SERIAL -D_LIBCPP_ABI_VERSION=1 -D_LIBCPP_ABI_NAMESPACE=__1 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG -target x86_64-unknown-macosx11.7.1-unknown -nostdinc -fno-spell-checking -F /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.5.sdk/System/Library/Frameworks -isystem /nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/include -isystem /nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/libc/include/any-macos-any -Xclang -target-cpu -Xclang x86-64 -Xclang -target-feature -Xclang -16bit-mode -Xclang -target-feature -Xclang -32bit-mode -Xclang -target-feature -Xclang -3dnow -Xclang -target-feature -Xclang -3dnowa -Xclang -target-feature -Xclang +64bit -Xclang -target-feature -Xclang -adx -Xclang -target-feature -Xclang -aes -Xclang -target-feature -Xclang -allow-light-256-bit -Xclang -target-feature -Xclang -amx-bf16 -Xclang -target-feature -Xclang -amx-complex -Xclang -target-feature -Xclang -amx-fp16 -Xclang -target-feature -Xclang -amx-int8 -Xclang -target-feature -Xclang -amx-tile -Xclang -target-feature -Xclang -avx -Xclang -target-feature -Xclang -avx10.1-256 -Xclang -target-feature -Xclang -avx10.1-512 -Xclang -target-feature -Xclang -avx2 -Xclang -target-feature -Xclang -avx512bf16 -Xclang -target-feature -Xclang -avx512bitalg -Xclang -target-feature -Xclang -avx512bw -Xclang -target-feature -Xclang -avx512cd -Xclang -target-feature -Xclang -avx512dq -Xclang -target-feature -Xclang -avx512er -Xclang -target-feature -Xclang -avx512f -Xclang -target-feature -Xclang -avx512fp16 -Xclang -target-feature -Xclang -avx512ifma -Xclang -target-feature -Xclang -avx512pf -Xclang -target-feature -Xclang -avx512vbmi -Xclang -target-feature -Xclang -avx512vbmi2 -Xclang -target-feature -Xclang -avx512vl -Xclang -target-feature -Xclang -avx512vnni -Xclang -target-feature -Xclang -avx512vp2intersect -Xclang -target-feature -Xclang -avx512vpopcntdq -Xclang -target-feature -Xclang -avxifma -Xclang -target-feature -Xclang -avxneconvert -Xclang -target-feature -Xclang -avxvnni -Xclang -target-feature -Xclang -avxvnniint16 -Xclang -target-feature -Xclang -avxvnniint8 -Xclang -target-feature -Xclang -bmi -Xclang -target-feature -Xclang -bmi2 -Xclang -target-feature -Xclang -branchfusion -Xclang -target-feature -Xclang -ccmp -Xclang -target-feature -Xclang -cf -Xclang -target-feature -Xclang -cldemote -Xclang -target-feature -Xclang -clflushopt -Xclang -target-feature -Xclang -clwb -Xclang -target-feature -Xclang -clzero -Xclang -target-feature -Xclang +cmov -Xclang -target-feature -Xclang -cmpccxadd -Xclang -target-feature -Xclang -crc32 -Xclang -target-feature -Xclang -cx16 -Xclang -target-feature -Xclang +cx8 -Xclang -target-feature -Xclang -egpr -Xclang -target-feature -Xclang -enqcmd -Xclang -target-feature -Xclang -ermsb -Xclang -target-feature -Xclang -evex512 -Xclang -target-feature -Xclang -f16c -Xclang -target-feature -Xclang -false-deps-getmant -Xclang -target-feature -Xclang -false-deps-lzcnt-tzcnt -Xclang -target-feature -Xclang -false-deps-mulc -Xclang -target-feature -Xclang -false-deps-mullq -Xclang -target-feature -Xclang -false-deps-perm -Xclang -target-feature -Xclang -false-deps-popcnt -Xclang -target-feature -Xclang -false-deps-range -Xclang -target-feature -Xclang -fast-11bytenop -Xclang -target-feature -Xclang -fast-15bytenop -Xclang -target-feature -Xclang -fast-7bytenop -Xclang -target-feature -Xclang -fast-bextr -Xclang -target-feature -Xclang -fast-gather -Xclang -target-feature -Xclang -fast-hops -Xclang -target-feature -Xclang -fast-lzcnt -Xclang -target-feature -Xclang -fast-movbe -Xclang -target-feature -Xclang -fast-scalar-fsqrt -Xclang -target-feature -Xclang -fast-scalar-shift-masks -Xclang -target-feature -Xclang -fast-shld-rotate -Xclang -target-feature -Xclang -fast-variable-crosslane-shuffle -Xclang -target-feature -Xclang -fast-variable-perlane-shuffle -Xclang -target-feature -Xclang -fast-vector-fsqrt -Xclang -target-feature -Xclang -fast-vector-shift-masks -Xclang -target-feature -Xclang -faster-shift-than-shuffle -Xclang -target-feature -Xclang -fma -Xclang -target-feature -Xclang -fma4 -Xclang -target-feature -Xclang -fsgsbase -Xclang -target-feature -Xclang -fsrm -Xclang -target-feature -Xclang +fxsr -Xclang -target-feature -Xclang -gfni -Xclang -target-feature -Xclang -harden-sls-ijmp -Xclang -target-feature -Xclang -harden-sls-ret -Xclang -target-feature -Xclang -hreset -Xclang -target-feature -Xclang -idivl-to-divb -Xclang -target-feature -Xclang +idivq-to-divl -Xclang -target-feature -Xclang -invpcid -Xclang -target-feature -Xclang -kl -Xclang -target-feature -Xclang -lea-sp -Xclang -target-feature -Xclang -lea-uses-ag -Xclang -target-feature -Xclang -lvi-cfi -Xclang -target-feature -Xclang -lvi-load-hardening -Xclang -target-feature -Xclang -lwp -Xclang -target-feature -Xclang -lzcnt -Xclang -target-feature -Xclang +macrofusion -Xclang -target-feature -Xclang +mmx -Xclang -target-feature -Xclang -movbe -Xclang -target-feature -Xclang -movdir64b -Xclang -target-feature -Xclang -movdiri -Xclang -target-feature -Xclang -mwaitx -Xclang -target-feature -Xclang -ndd -Xclang -target-feature -Xclang -no-bypass-delay -Xclang -target-feature -Xclang -no-bypass-delay-blend -Xclang -target-feature -Xclang -no-bypass-delay-mov -Xclang -target-feature -Xclang -no-bypass-delay-shuffle -Xclang -target-feature -Xclang +nopl -Xclang -target-feature -Xclang -pad-short-functions -Xclang -target-feature -Xclang -pclmul -Xclang -target-feature -Xclang -pconfig -Xclang -target-feature -Xclang -pku -Xclang -target-feature -Xclang -popcnt -Xclang -target-feature -Xclang -ppx -Xclang -target-feature -Xclang -prefer-128-bit -Xclang -target-feature -Xclang -prefer-256-bit -Xclang -target-feature -Xclang -prefer-mask-registers -Xclang -target-feature -Xclang -prefer-movmsk-over-vtest -Xclang -target-feature -Xclang -prefer-no-gather -Xclang -target-feature -Xclang -prefer-no-scatter -Xclang -target-feature -Xclang -prefetchi -Xclang -target-feature -Xclang -prefetchwt1 -Xclang -target-feature -Xclang -prfchw -Xclang -target-feature -Xclang -ptwrite -Xclang -target-feature -Xclang -push2pop2 -Xclang -target-feature -Xclang -raoint -Xclang -target-feature -Xclang -rdpid -Xclang -target-feature -Xclang -rdpru -Xclang -target-feature -Xclang -rdrnd -Xclang -target-feature -Xclang -rdseed -Xclang -target-feature -Xclang -retpoline -Xclang -target-feature -Xclang -retpoline-external-thunk -Xclang -target-feature -Xclang -retpoline-indirect-branches -Xclang -target-feature -Xclang -retpoline-indirect-calls -Xclang -target-feature -Xclang -rtm -Xclang -target-feature -Xclang -sahf -Xclang -target-feature -Xclang -sbb-dep-breaking -Xclang -target-feature -Xclang -serialize -Xclang -target-feature -Xclang -seses -Xclang -target-feature -Xclang -sgx -Xclang -target-feature -Xclang -sha -Xclang -target-feature -Xclang -sha512 -Xclang -target-feature -Xclang -shstk -Xclang -target-feature -Xclang +slow-3ops-lea -Xclang -target-feature -Xclang +slow-incdec -Xclang -target-feature -Xclang -slow-lea -Xclang -target-feature -Xclang -slow-pmaddwd -Xclang -target-feature -Xclang -slow-pmulld -Xclang -target-feature -Xclang -slow-shld -Xclang -target-feature -Xclang -slow-two-mem-ops -Xclang -target-feature -Xclang -slow-unaligned-mem-16 -Xclang -target-feature -Xclang -slow-unaligned-mem-32 -Xclang -target-feature -Xclang -sm3 -Xclang -target-feature -Xclang -sm4 -Xclang -target-feature -Xclang -soft-float -Xclang -target-feature -Xclang +sse -Xclang -target-feature -Xclang +sse2 -Xclang -target-feature -Xclang -sse3 -Xclang -target-feature -Xclang -sse4.1 -Xclang -target-feature -Xclang -sse4.2 -Xclang -target-feature -Xclang -sse4a -Xclang -target-feature -Xclang -sse-unaligned-mem -Xclang -target-feature -Xclang -ssse3 -Xclang -target-feature -Xclang -tagged-globals -Xclang -target-feature -Xclang -tbm -Xclang -target-feature -Xclang -tsxldtrk -Xclang -target-feature -Xclang -tuning-fast-imm-vector-shift -Xclang -target-feature -Xclang -uintr -Xclang -target-feature -Xclang -use-glm-div-sqrt-costs -Xclang -target-feature -Xclang -use-slm-arith-costs -Xclang -target-feature -Xclang -usermsr -Xclang -target-feature -Xclang -vaes -Xclang -target-feature -Xclang -vpclmulqdq -Xclang -target-feature -Xclang +vzeroupper -Xclang -target-feature -Xclang -waitpkg -Xclang -target-feature -Xclang -wbnoinvd -Xclang -target-feature -Xclang -widekl -Xclang -target-feature -Xclang +x87 -Xclang -target-feature -Xclang -xop -Xclang -target-feature -Xclang -xsave -Xclang -target-feature -Xclang -xsavec -Xclang -target-feature -Xclang -xsaveopt -Xclang -target-feature -Xclang -xsaves -mmacos-version-min=11.7.1 -Wno-overriding-option -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize=function -mred-zone -fno-omit-frame-pointer -fstack-protector-strong --param ssp-buffer-size=4 -D_DEBUG -O0 -fPIC -funwind-tables -gdwarf-4 -gdwarf32 -MD -MV -MF simdutf.o.d -c -o simdutf.o --serialize-diagnostics simdutf.o.diag

Result:

/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23209:26: error: always_inline function '_mm512_setzero_si512' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23209:26: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23211:20: error: always_inline function '_mm512_loadu_si512' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23211:20: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23212:43: error: always_inline function '_mm512_set1_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23212:43: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23212:22: error:
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23212:22: error: AVX vector argument of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23214:11: error: '__builtin_ia32_ucmpw512_mask' needs target feature avx512bw,evex512
/nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/include/avx512bwintrin.h:357:5: note: expanded from macro '_mm512_cmplt_epu16_mask'
/nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/include/avx512bwintrin.h:256:15: note: expanded from macro '_mm512_cmp_epu16_mask'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23214:41: error: always_inline function '_mm512_set1_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23214:41: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23228:38: error: '__builtin_ia32_ucmpw512_mask' needs target feature avx512bw,evex512
/nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/include/avx512bwintrin.h:357:5: note: expanded from macro '_mm512_cmplt_epu16_mask'
/nix/store/9axcmc0w5kgpxbf3jvm8psar07b1nnzg-zig-0.13.0/lib/include/avx512bwintrin.h:256:15: note: expanded from macro '_mm512_cmp_epu16_mask'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23229:21: error:
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23229:21: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23271:20: error: always_inline function '_mm512_max_epu32' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23271:20: error: AVX vector argument of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23274:15: error: AVX vector argument of type 'const __m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23284:30: error: always_inline function '_mm512_maskz_loadu_epi8' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
/Users/mitchellh/Downloads/singleheader/simdutf.cpp:23284:30: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
fatal error: too many errors emitted, stopping now [-ferror-limit=]

Expected Behavior

Builds.

Additional info: https://github.com/llvm/llvm-project/issues/71325

mitchellh commented 3 weeks ago

I realized my repro has some Nix paths hardcoded. Here is a build.zig that also reproduces it, just put simdutf.cpp inside the vendor folder.

Command: zig build -Dtarget=x86_64-linux

const std = @import("std");

pub fn build(b: *std.Build) !void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const lib = b.addStaticLibrary(.{
        .name = "simdutf",
        .target = target,
        .optimize = optimize,
    });
    lib.linkLibCpp();
    lib.addIncludePath(.{ .path = "vendor" });

    var flags = std.ArrayList([]const u8).init(b.allocator);
    defer flags.deinit();
    try flags.appendSlice(&.{});

    lib.addCSourceFiles(.{
        .flags = flags.items,
        .files = &.{
            "vendor/simdutf.cpp",
        },
    });
    lib.installHeadersDirectory(
        .{ .path = "vendor" },
        "",
        .{ .include_extensions = &.{".h"} },
    );

    b.installArtifact(lib);
}
jcollie commented 3 weeks ago

Testing on a couple of X86 systems:

Xeon W-2133 (has AVX512) zig build had no errors but zig build -Dtarget=x86_64-linux had same errors. AMD Ryzen 7 5800X (no AVX512) had errors with both commands.

mitchellh commented 3 weeks ago

This also affects Highway builds: https://github.com/google/highway which do similar conditional target compilation logic, but getting a clean repro of that for you is much more difficult so I think the simdutf approach is better. I'm just noting this since its not specific to one project.

der-teufel-programming commented 3 weeks ago

Some digging around and experimenting with the -target-feature that Zig passes to zig clang and those passed by clang to itself it looks like passing -Xclang -target-feature -Xclang -evex512 triggers the issue, since passing it to clang makes it also fail to compile

Rexicon226 commented 3 weeks ago

The issue is that Zig is including the avx512vbmi2intrin.h file even when the target cpu doesn't support avx512. simdutf does feature detect by detecting if that file is included, and if so, it tries to use avx512 intrinsics.