High-performance automatic differentiation of LLVM and MLIR.
[Bithack] Handle or of i32 with 32 #797

ZuseZ4 commented 2 years ago
cannot handle unknown binary operator:   %367 = or i32 %366, 32


wsmoses commented 2 years ago

Can you attach a full compiler explorer example? This feels more likely to actually be an integer rather than a weird bithack of a float/double...

ZuseZ4 commented 2 years ago


attributes #0 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cascadelake" "target-features"="+64bit,+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+avx512vnni,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-amx-bf16,-amx-int8,-amx-tile,-avx512bf16,-avx512bitalg,-avx512er,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-cldemote,-clzero,-enqcmd,-fma4,-gfni,-hreset,-kl,-lwp,-movdir64b,-movdiri,-mwaitx,-pconfig,-prefetchwt1,-ptwrite,-rdpid,-rtm,-serialize,-sgx,-sha,-shstk,-sse4a,-tbm,-tsxldtrk,-uintr,-vaes,-vpclmulqdq,-waitpkg,-wbnoinvd,-widekl,-xop" "tune-cpu"="cascadelake" }
attributes #1 = { noreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cascadelake" "target-features"="+64bit,+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+avx512vnni,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-amx-bf16,-amx-int8,-amx-tile,-avx512bf16,-avx512bitalg,-avx512er,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-cldemote,-clzero,-enqcmd,-fma4,-gfni,-hreset,-kl,-lwp,-movdir64b,-movdiri,-mwaitx,-pconfig,-prefetchwt1,-ptwrite,-rdpid,-rtm,-serialize,-sgx,-sha,-shstk,-sse4a,-tbm,-tsxldtrk,-uintr,-vaes,-vpclmulqdq,-waitpkg,-wbnoinvd,-widekl,-xop" "tune-cpu"="cascadelake" }
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #3 = { mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cascadelake" "target-features"="+64bit,+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+avx512vnni,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-amx-bf16,-amx-int8,-amx-tile,-avx512bf16,-avx512bitalg,-avx512er,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-cldemote,-clzero,-enqcmd,-fma4,-gfni,-hreset,-kl,-lwp,-movdir64b,-movdiri,-mwaitx,-pconfig,-prefetchwt1,-ptwrite,-rdpid,-rtm,-serialize,-sgx,-sha,-shstk,-sse4a,-tbm,-tsxldtrk,-uintr,-vaes,-vpclmulqdq,-waitpkg,-wbnoinvd,-widekl,-xop" "tune-cpu"="cascadelake" }
attributes #4 = { nounwind }
attributes #5 = { noreturn }

!llvm.ident = !{!0, !0, !0, !0}
!llvm.module.flags = !{!1, !2, !3, !4, !5, !6}

!0 = !{!"clang version 14.0.4 (git@github.com:llvm/llvm-project.git 29f1039a7285a5c3a9c353d054140bf2556d4c4d)"}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"uwtable", i32 1}
!3 = !{i32 1, !"ThinLTO", i32 0}
!4 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
!5 = !{i32 7, !"frame-pointer", i32 2}
!6 = !{i32 1, !"LTOPostLink", i32 1}
!7 = !{!8, !19, i64 544}
!8 = !{!"_ZTSN3olb4util5TimerIdEE", !9, i64 0, !17, i64 416, !17, i64 424, !17, i64 432, !16, i64 440, !16, i64 448, !16, i64 456, !11, i64 464, !18, i64 472, !18, i64 488, !18, i64 504, !18, i64 520, !17, i64 536, !19, i64 544, !19, i64 548, !16, i64 552, !19, i64 560, !19, i64 564, !19, i64 568, !17, i64 576, !17, i64 584, !17, i64 592, !17, i64 600, !17, i64 608, !17, i64 616, !17, i64 624, !17, i64 632, !17, i64 640}
!9 = !{!"_ZTSN3olb14OstreamManagerE", !10, i64 8}
!10 = !{!"_ZTSN3olb5OMBufE", !11, i64 104, !14, i64 112}
!11 = !{!"any pointer", !12, i64 0}
!12 = !{!"omnipotent char", !13, i64 0}
!13 = !{!"Simple C++ TBAA"}
!14 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE", !15, i64 0, !16, i64 8, !12, i64 16}
!15 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderE", !11, i64 0}
!16 = !{!"long", !12, i64 0}
!17 = !{!"double", !12, i64 0}
!18 = !{!"_ZTS7timeval", !16, i64 0, !16, i64 8}
!19 = !{!"int", !12, i64 0}
!20 = !{!8, !19, i64 548}
!21 = !{!8, !17, i64 624}
!22 = !{!8, !17, i64 632}
!23 = !{!8, !16, i64 552}
!24 = !{!8, !17, i64 536}
!25 = !{i64 0, i64 8, !26, i64 8, i64 8, !26}
!26 = !{!16, !16, i64 0}
!27 = !{i64 0, i64 8, !26}
!28 = !{!29, !29, i64 0}
!29 = !{!"vtable pointer", !13, i64 0}
!30 = !{!31, !11, i64 240}
!31 = !{!"_ZTSSt9basic_iosIcSt11char_traitsIcEE", !11, i64 216, !12, i64 224, !32, i64 225, !11, i64 232, !11, i64 240, !11, i64 248, !11, i64 256}
!32 = !{!"bool", !12, i64 0}
!33 = !{!34, !12, i64 56}
!34 = !{!"_ZTSSt5ctypeIcE", !11, i64 16, !32, i64 24, !11, i64 32, !11, i64 40, !11, i64 48, !12, i64 56, !12, i64 57, !12, i64 313, !12, i64 569}
!35 = !{!12, !12, i64 0}
!36 = !{!37, !16, i64 8}
!37 = !{!"_ZTSSt8ios_base", !16, i64 8, !16, i64 16, !38, i64 24, !39, i64 28, !39, i64 32, !11, i64 40, !40, i64 48, !12, i64 64, !19, i64 192, !11, i64 200, !41, i64 208}
!38 = !{!"_ZTSSt13_Ios_Fmtflags", !12, i64 0}
!39 = !{!"_ZTSSt12_Ios_Iostate", !12, i64 0}
!40 = !{!"_ZTSNSt8ios_base6_WordsE", !11, i64 0, !16, i64 8}
!41 = !{!"_ZTSSt6locale", !11, i64 0}
!42 = !{!37, !38, i64 24}
!43 = !{!38, !38, i64 0}
!44 = !{!8, !17, i64 640}
!45 = !{!37, !16, i64 16}
!46 = !{!31, !32, i64 225}
!47 = !{i8 0, i8 2}
!48 = !{!31, !12, i64 224}
!49 = !{!8, !17, i64 576}
!50 = !{!8, !17, i64 592}
ZuseZ4 commented 2 years ago

Explorer: https://fwd.gymni.ch/kBC8Ut https://fwd.gymni.ch/gSWcwM https://fwd.gymni.ch/YCJsqv

ZuseZ4 commented 2 years ago

Minimized: https://fwd.gymni.ch/CiQ2l0

ZuseZ4 commented 2 years ago

https://fwd.gymni.ch/MWgfXW https://fwd.gymni.ch/CnTz25

ZuseZ4 commented 2 years ago

@wsmoses trying to get rid of the openLB types from this example here: https://fwd.gymni.ch/nmzV0u by directly passing i8 instead of %"class.olb::util::Timer" and bitcasting it to i8 and i8. Getting an illegal updateAnalysis issue.

Either way, smaller: https://fwd.gymni.ch/OVBQrT

Either way, smaller: https://fwd.gymni.ch/OVBQrT

ZuseZ4 commented 2 years ago


ZuseZ4 commented 2 years ago


wsmoses commented 2 years ago
  %365 = load i32, i32* %364, align 8, !tbaa !42
  %366 = and i32 %365, -177
  %367 = or i32 %366, 32
  store i32 %367, i32* %364, align 8, !tbaa !43

!12 = !{!"omnipotent char", !13, i64 0}
!13 = !{!"Simple C++ TBAA"}

!16 = !{!"long", !12, i64 0}

!37 = !{!"_ZTSSt8ios_base", !16, i64 8, !16, i64 16, !38, i64 24, !39, i64 28, !39, i64 32, !11, i64 40, !40, i64 48, !12, i64 64, !19, i64 192, !11, i64 200, !41, i64 208}
!38 = !{!"_ZTSSt13_Ios_Fmtflags", !12, i64 0}

!42 = !{!37, !38, i64 24}
!43 = !{!38, !38, i64 0}

Relevant section above. The minimal cases have truely no information for which Enzyme can deduce that is an integer (which it appears to be), without TBAA or other info. The tbaa from the earlier case of relevance is posted above. It would be worth checking type analysis here to see what of the types exist and can be deduced as integral.

wsmoses commented 2 years ago

Minimized, but with the above info: https://fwd.gymni.ch/fZtSHY

ZuseZ4 commented 2 years ago

closed by #801