[X86] combinePMULDQ - add op back to worklist if SimplifyDemandedBits succeeds on either operand
Prevents missing other simplifications that may occur deep in the operand chain where CommitTargetLoweringOpt won't add the PMULDQ back to the worklist itself
The patch itself seems relevant to the failed logic and should be the root cause.
Extended Description
The following test fails when llc runs on it:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
define void @pluto(i8 %arg, float %arg1) #0 { bb: %tmp = getelementptr inbounds i8, i8 %arg, i64 72 %tmp2 = bitcast i8 %tmp to float %tmp3 = load float, float %tmp2, align 4 %tmp4 = load float, float* %arg1 %tmp5 = fadd float %tmp4, 0.000000e+00 %tmp6 = fcmp ogt float %tmp5, 0.000000e+00 %tmp7 = select i1 %tmp6, i64 9223372036854775807, i64 -9223372036854775808 %tmp8 = insertelement <4 x i64> undef, i64 %tmp7, i32 0 %tmp9 = shufflevector <4 x i64> %tmp8, <4 x i64> undef, <4 x i32> zeroinitializer br label %bb10
bb10: ; preds = %bb10, %bb %tmp11 = phi i32 [ 0, %bb ], [ %tmp30, %bb10 ] %tmp12 = phi <4 x i32> [ <i32 9, i32 8, i32 7, i32 6>, %bb ], [ %tmp31, %bb10 ] %tmp13 = phi <4 x i64> [ zeroinitializer, %bb ], [ %tmp28, %bb10 ] %tmp14 = phi <4 x i64> [ zeroinitializer, %bb ], [ %tmp29, %bb10 ] %tmp15 = phi <4 x i32> [ <i32 3, i32 4, i32 5, i32 6>, %bb ], [ %tmp32, %bb10 ] %tmp16 = add <4 x i32> %tmp12, <i32 -4, i32 -4, i32 -4, i32 -4> %tmp17 = add <4 x i32> %tmp15, <i32 4, i32 4, i32 4, i32 4> %tmp18 = zext <4 x i32> %tmp12 to <4 x i64> %tmp19 = zext <4 x i32> %tmp16 to <4 x i64> %tmp20 = xor <4 x i64> %tmp18, <i64 -1, i64 -1, i64 -1, i64 -1> %tmp21 = xor <4 x i64> %tmp19, <i64 -1, i64 -1, i64 -1, i64 -1> %tmp22 = icmp ugt <4 x i32> %tmp15, <i32 12, i32 12, i32 12, i32 12> %tmp23 = icmp ugt <4 x i32> %tmp17, <i32 12, i32 12, i32 12, i32 12> %tmp24 = mul <4 x i64> %tmp9, %tmp20 %tmp25 = mul <4 x i64> %tmp9, %tmp21 %tmp26 = select <4 x i1> %tmp22, <4 x i64> zeroinitializer, <4 x i64> %tmp24 %tmp27 = select <4 x i1> %tmp23, <4 x i64> zeroinitializer, <4 x i64> %tmp25 %tmp28 = add <4 x i64> %tmp13, %tmp26 %tmp29 = add <4 x i64> %tmp14, %tmp27 %tmp30 = add i32 %tmp11, 8 %tmp31 = add <4 x i32> %tmp12, <i32 -8, i32 -8, i32 -8, i32 -8> %tmp32 = add <4 x i32> %tmp15, <i32 8, i32 8, i32 8, i32 8> %tmp33 = icmp eq i32 %tmp30, 232 br i1 %tmp33, label %bb34, label %bb10
bb34: ; preds = %bb10 %tmp35 = add <4 x i64> %tmp29, %tmp28 %tmp36 = shufflevector <4 x i64> %tmp35, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> %tmp37 = add <4 x i64> %tmp35, %tmp36 %tmp38 = shufflevector <4 x i64> %tmp37, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %tmp39 = add <4 x i64> %tmp37, %tmp38 %tmp40 = extractelement <4 x i64> %tmp39, i32 0 %tmp41 = trunc i64 %tmp40 to i32 %tmp42 = sitofp i32 %tmp41 to float %tmp43 = fsub float %tmp3, %tmp42 store float %tmp43, float* %arg1, align 4 ret void }
The failure looks like this:
llc: /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:178: void {anonymous}::DAGCombiner::AddToWorklist(llvm::SDNode*): Assertion `N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"' failed. Stack dump:
0 0x00007f560b939d2a llvm::sys::PrintStackTrace(llvm::raw_ostream&) /home/mkazantsev/work/llvm/lib/Support/Unix/Signals.inc:499:0
1 0x00007f560b9382ba llvm::sys::RunSignalHandlers() /home/mkazantsev/work/llvm/lib/Support/Signals.cpp:67:0
2 0x00007f560b9383f5 SignalHandler(int) /home/mkazantsev/work/llvm/lib/Support/Unix/Signals.inc:358:0
3 0x00007f560ac11390 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x11390)
4 0x00007f5609fca428 gsignal /build/glibc-Cl5G7W/glibc-2.23/signal/../sysdeps/unix/sysv/linux/raise.c:54:0
5 0x00007f5609fcc02a abort /build/glibc-Cl5G7W/glibc-2.23/stdlib/abort.c:91:0
6 0x00007f5609fc2bd7 __assert_fail_base /build/glibc-Cl5G7W/glibc-2.23/assert/assert.c:92:0
7 0x00007f5609fc2c82 (/lib/x86_64-linux-gnu/libc.so.6+0x2dc82)
8 0x00007f560b6ff43c llvm::SmallVectorTemplateBase<(anonymous namespace)::DAGCombiner::MemOpLink, false>::grow(unsigned long) (.constprop.774) /home/mkazantsev/work/llvm/include/llvm/ADT/SmallVector.h:240:0
9 0x00007f560be46b31 LookupBucketFor<llvm::SDNode*> /home/mkazantsev/work/llvm/include/llvm/ADT/DenseMap.h:592:0
10 0x00007f560be46b31 LookupBucketFor<llvm::SDNode*> /home/mkazantsev/work/llvm/include/llvm/ADT/DenseMap.h:632:0
11 0x00007f560be46b31 try_emplace /home/mkazantsev/work/llvm/include/llvm/ADT/DenseMap.h:213:0
12 0x00007f560be46b31 llvm::DenseMapBase<llvm::DenseMap<llvm::SDNode, unsigned int, llvm::DenseMapInfo<llvm::SDNode>, llvm::detail::DenseMapPair<llvm::SDNode, unsigned int> >, llvm::SDNode, unsigned int, llvm::DenseMapInfo<llvm::SDNode>, llvm::detail::DenseMapPair<llvm::SDNode, unsigned int> >::insert(std::pair<llvm::SDNode*, unsigned int>&&) /home/mkazantsev/work/llvm/include/llvm/ADT/DenseMap.h:204:0
13 0x00007f560be46b31 AddToWorklist /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:185:0
14 0x00007f560be46b31 llvm::TargetLowering::DAGCombinerInfo::AddToWorklist(llvm::SDNode*) /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:639:0
15 0x00007f560d80a9ea combinePMULDQ(llvm::SDNode*, llvm::SelectionDAG&, llvm::TargetLowering::DAGCombinerInfo&) /home/mkazantsev/work/llvm/lib/Target/X86/X86ISelLowering.cpp:40388:0
16 0x00007f560d8cdd38 llvm::X86TargetLowering::PerformDAGCombine(llvm::SDNode*, llvm::TargetLowering::DAGCombinerInfo&) const /home/mkazantsev/work/llvm/lib/Target/X86/X86ISelLowering.cpp:40522:0
17 0x00007f560be7c330 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1624:0
18 0x00007f560be7da29 Run /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1460:0
19 0x00007f560be7da29 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::AAResults*, llvm::CodeGenOpt::Level) /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:19061:0
20 0x00007f560bfe2cdd ~TimeRegion /home/mkazantsev/work/llvm/include/llvm/Support/Timer.h:153:0
21 0x00007f560bfe2cdd ~NamedRegionTimer /home/mkazantsev/work/llvm/include/llvm/Support/Timer.h:161:0
22 0x00007f560bfe2cdd llvm::SelectionDAGISel::CodeGenAndEmitDAG() /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:842:0
23 0x00007f560bfe6ddc llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1752:0
24 0x00007f560bfe8937 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (.part.1046) /home/mkazantsev/work/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:470:0
25 0x00007f560d801b30 (anonymous namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) /home/mkazantsev/work/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:187:0
26 0x00007f560bc223b2 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.88) /home/mkazantsev/work/llvm/lib/CodeGen/MachineFunctionPass.cpp:74:0
27 0x00007f560ba4930f llvm::FPPassManager::runOnFunction(llvm::Function&) /home/mkazantsev/work/llvm/lib/IR/LegacyPassManager.cpp:1645:0
28 0x00007f560ba4953c llvm::FPPassManager::runOnModule(llvm::Module&) /home/mkazantsev/work/llvm/include/llvm/ADT/ilist_node.h:67:0
29 0x00007f560ba49c6b runOnModule /home/mkazantsev/work/llvm/lib/IR/LegacyPassManager.cpp:1745:0
30 0x00007f560ba49c6b llvm::legacy::PassManagerImpl::run(llvm::Module&) /home/mkazantsev/work/llvm/lib/IR/LegacyPassManager.cpp:1857:0
31 0x000000000040fef6 compileModule /home/mkazantsev/work/llvm/tools/llc/llc.cpp:600:0
32 0x000000000040fef6 main /home/mkazantsev/work/llvm/tools/llc/llc.cpp:351:0
33 0x00007f5609fb5830 __libc_start_main /build/glibc-Cl5G7W/glibc-2.23/csu/../csu/libc-start.c:325:0
34 0x0000000000410368 _start (/home/mkazantsev/work/llvm/build/buildRA/bin/llc+0x410368)
Aborted (core dumped)
Revert of the following patch makes the failure gone: https://reviews.llvm.org/rL343922
[X86] combinePMULDQ - add op back to worklist if SimplifyDemandedBits succeeds on either operand
Prevents missing other simplifications that may occur deep in the operand chain where CommitTargetLoweringOpt won't add the PMULDQ back to the worklist itself
The patch itself seems relevant to the failed logic and should be the root cause.