Closed IceTDrinker closed 9 months ago
Reduces to something like this:
define void @test(ptr %p1, ptr %p2, ptr %p3, ptr %p4) #1 {
%v1 = load <8 x double>, ptr %p1
%v2 = load <8 x double>, ptr %p2
%v3 = load <8 x double>, ptr %p3
%res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %v1, <8 x double> %v2, <8 x double> %v3, i32 4)
store <8 x double> %res, ptr %p4
ret void
}
declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32 immarg)
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="skylake" "target-features"="+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,-avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,-avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,-clwb,+mmx,+sse2,+rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,+fxsr,-avx512dq,-sse4a,+sse,+sse2,+fxsr,+sse3,+ssse3,+sse4.1,+sse4.2,+crc32,+popcnt,+avx,+avx2,+bmi,+bmi2,+fma,+lzcnt,+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl" }
Just wanted to take a moment and thank you warmly @nikic it's a pleasure to interact and work with you on those issues (even if it's only as a bug reporter 🙂 )
Cheers
This fails:
attributes #1 = { "target-cpu"="skylake" "target-features"="+avx512f" }
While this works:
attributes #1 = { "target-features"="+avx512f" }
This also works:
attributes #1 = { "target-cpu"="skylake" "target-features"="+evex512,+avx512f" }
I think it's related to this change: https://github.com/llvm/llvm-project/pull/65920 It will automatically add +evex512
only if generic target-cpu is used.
I can't say that I understand the logic behind that change, but I guess this means we have to explicitly add +evex512
in rustc when avx512 features are used.
Code
Compiling
https://github.com/zama-ai/concrete-fft/tree/am/bug/llvm-error
With nightly (2024-02-13) fails with
nightly 2024-02-08 (with the stdsimd removal and stdarch addition) compiles fine
Looks like the update to LLVM 18 could be the culprit
Meta
rustc --version --verbose
:Error output
Here is the .ll file coming from the emit llvm ir (as recommended in another issue of the sort)
concrete_fft-a046b09e3ae302d9.concrete_fft.5a520b89991ad9c4-cgu.0.rcgu.ll.txt
changed extension so that github is not annoying about it (what a way to validate data blobs!)
Backtrace
``` RUST_BACKTRACE=1 RUSTFLAGS="-C target-cpu=native" cargo +nightly rustc --release --features=nightly -- --emit=llvm-ir Compiling concrete-fft v0.3.0 (/home/redacted/Documents/zama/code/concrete-fft) LLVM ERROR: Do not know how to split the result of this operator! error: could not compile `concrete-fft` (lib) ```