Closed michaelortmann closed 8 months ago
This is interesting, and I think it's a LLVM backend bug? Can't say for sure.
Zig version: 0.12.0-dev.3142+9d500bda2
This is zig build-obj -OReleaseFast --verbose-llvm-ir
into opt -O3 -S ir.ll > optimized.ll
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local double @foo_strict(double %0) local_unnamed_addr #0 !dbg !137 {
Entry:
call void @llvm.dbg.value(metadata double %0, metadata !140, metadata !DIExpression()), !dbg !141
%1 = fadd double %0, 0x4270000000000000, !dbg !142
%2 = fadd double %1, 0xC270000000000000, !dbg !143
ret double %2, !dbg !144
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local double @foo_optimized(double returned %0) local_unnamed_addr #0 !dbg !145 {
Entry:
call void @llvm.dbg.value(metadata double %0, metadata !146, metadata !DIExpression()), !dbg !147
ret double %0, !dbg !148
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="none" "target-cpu"="znver3" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,+allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,+bmi,+bmi2,+branchfusion,-cldemote,+clflushopt,+clwb,+clzero,+cmov,-cmpccxadd,+crc32,+cx16,+cx8,-enqcmd,-ermsb,+f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,+fast-bextr,-fast-gather,-fast-hops,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,-fast-shld-rotate,-fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,+fma,-fma4,+fsgsbase,+fsrm,+fxsr,-gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,-mwaitx,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefetchi,-prefetchwt1,+prfchw,-ptwrite,-raoint,+rdpid,+rdpru,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,+sbb-dep-breaking,-serialize,-seses,-sgx,+sha,-sha512,+shstk,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,+slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,+vaes,+vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
And here is zig build-obj test.zig -femit-llvm-ir=optimized.ll -OReleaseFast
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local double @foo_optimized(double %0) local_unnamed_addr #1 !dbg !137 {
Entry:
call void @llvm.dbg.value(metadata double %0, metadata !140, metadata !DIExpression()), !dbg !141
%1 = fadd double %0, 0x4270000000000000, !dbg !142
%2 = fadd double %1, 0xC270000000000000, !dbg !143
ret double %2, !dbg !144
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local double @foo_strict(double %0) local_unnamed_addr #1 {
%2 = tail call double @foo_optimized(double %0) #1
ret double %2
}
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="none" "target-cpu"="znver3" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,+allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,+bmi,+bmi2,+branchfusion,-cldemote,+clflushopt,+clwb,+clzero,+cmov,-cmpccxadd,+crc32,+cx16,+cx8,-enqcmd,-ermsb,+f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,+fast-bextr,-fast-gather,-fast-hops,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,-fast-shld-rotate,-fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,+fma,-fma4,+fsgsbase,+fsrm,+fxsr,-gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,-mwaitx,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefetchi,-prefetchwt1,+prfchw,-ptwrite,-raoint,+rdpid,+rdpru,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,+sbb-dep-breaking,-serialize,-seses,-sgx,+sha,-sha512,+shstk,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,+slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,+vaes,+vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }
I'm not familiar enough with the LLVM backend to say for sure, but might be a helpful datapoint :)
Zig Version
0.11.0
Steps to Reproduce and Observed Behavior
Follow the example here: https://ziglang.org/documentation/master/#Floating-Point-Operations
Expected Behavior
zig 0.10.1 produces:
But zig 0.11.0 produces:
Looking at generated code i see that zig 0.11.0 produces the same code for
foo_strict()
andfoo_optimized()
. In case of-O ReleaseFast
one function will even be simplejmp
to the other one.Its not clear to me, if this is a bug in zig or a bug in doc.
What i expect is a difference in result and generation of floating point math depending on
@setFloatMode(.Optimized);
vs..Strict
and an example showing that difference.