llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.65k stars 11.84k forks source link

AMDGPUPromoteAlloca triggered assert when attempting to cast the alloca user from <8 x i8> to ptr #80366

Closed chichunchen closed 8 months ago

chichunchen commented 8 months ago

Backtrace:

0.      Program arguments: /cray/css/users/cchen/llvm-pe31/bin/clang-19 -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm-bc -emit-llvm-uselists -disable-free -clear-ast-before-backend -main-file-name test.cpp
 -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global
-visibility-to-externs -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/ro
cm-6.0.0/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0
/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_isa_version_908.bc -mlink-builtin-bitcode /opt/r
ocm-6.0.0/amdgcn/bitcode/oclc_abi_version_400.bc -target-cpu gfx908 -debugger-tuning=gdb -fdebug-compilation-dir=/cray/css/users/cchen/PE-52194 -nostdinc++ -nobuiltininc -resource-dir /cray/css/users/cchen/llvm-pe31/lib/clang/19 -internal
-isystem /usr/local/include -internal-isystem /usr/lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /usr/local/include -intern
al-isystem /usr/lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-invalid-constexpr -Wno-ambiguous-ellipsis -Wno-inline-namespace-reopen
ed-noninline -Wno-deprecated-builtins -Wno-unused-value -Wno-c++20-extensions -std=c++17 -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgpu-rdc -fgnuc-version=4.2.1 -fcxx-exceptions -fexceptions -fcolor-diagnosti
cs -vectorize-loops -vectorize-slp -cuid=8831a35a02a84a35 -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o test-hip-amdgcn-amd-amdhsa-gfx908.bc -x hip test.cpp
1.      <eof> parser at end of file
2.      Optimizer
 #0 0x00000000030a1f18 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:22
 #1 0x00000000030a234f PrintStackTraceSignalHandler(void*) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x000000000309fc87 llvm::sys::RunSignalHandlers() /ptmp2/cchen/llvm-project/llvm/lib/Support/Signals.cpp:105:20
 #3 0x00000000030a190e SignalHandler(int) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x00007f83223e9910 __restore_rt (/lib64/libpthread.so.0+0x16910)
 #5 0x00007f8321c19d2b raise (/lib64/libc.so.6+0x4ad2b)
 #6 0x00007f8321c1b3e5 abort (/lib64/libc.so.6+0x4c3e5)
 #7 0x00007f8321c11c6a __assert_fail_base (/lib64/libc.so.6+0x42c6a)
 #8 0x00007f8321c11cf2 (/lib64/libc.so.6+0x42cf2)
 #9 0x0000000001b78140 (anonymous namespace)::FoldBitCast(llvm::Constant*, llvm::Type*, llvm::DataLayout const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp:105:3
#10 0x0000000001b7d9f5 llvm::ConstantFoldCastOperand(unsigned int, llvm::Constant*, llvm::Type*, llvm::DataLayout const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp:1462:37
#11 0x0000000001c0d3c4 simplifyCastInst(unsigned int, llvm::Value*, llvm::Type*, llvm::SimplifyQuery const&, unsigned int) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp:5287:56
#12 0x0000000001c0d57e llvm::simplifyCastInst(unsigned int, llvm::Value*, llvm::Type*, llvm::SimplifyQuery const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp:5321:1
#13 0x0000000001c18301 llvm::InstSimplifyFolder::FoldCast(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*) const /ptmp2/cchen/llvm-project/llvm/include/llvm/Analysis/InstSimplifyFolder.h:118:3
#14 0x0000000000eb0788 llvm::IRBuilderBase::CreateCast(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/IRBuilder.h:2148:40
#15 0x0000000000eb0720 llvm::IRBuilderBase::CreateBitCast(llvm::Value*, llvm::Type*, llvm::Twine const&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/IRBuilder.h:2113:3
#16 0x000000000189417a promoteAllocaUserToVector(llvm::Instruction*, llvm::DataLayout const&, llvm::FixedVectorType*, unsigned int, unsigned int, llvm::DenseMap<llvm::MemTransferInst*, MemTransferInfo, llvm::DenseMapInfo<llvm::MemTransfer
Inst*, void>, llvm::detail::DenseMapPair<llvm::MemTransferInst*, MemTransferInfo>>&, std::map<llvm::GetElementPtrInst*, llvm::Value*, std::less<llvm::GetElementPtrInst*>, std::allocator<std::pair<llvm::GetElementPtrInst* const, llvm::Valu
e*>>>&, llvm::Value*, llvm::SmallVectorImpl<llvm::LoadInst*>&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:530:0
#17 0x0000000001894572 (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)::operator()(llvm::Instruction*) const /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGP
UPromoteAlloca.cpp:840:0
#18 0x00000000018998a0 void std::__invoke_impl<void, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>(std::__invoke_other, (anonymous namespac
e)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/invoke.h:60:67
#19 0x000000000189956b std::enable_if<is_invocable_r_v<void, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>, void>::type std::__invoke_r<voi
d, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>((anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&
)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/invoke.h:116:5
#20 0x00000000018992aa std::_Function_handler<void (llvm::Instruction*), (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)>::_M_invoke(std::_Any_data const&, llvm::I
nstruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/std_function.h:292:44
#21 0x000000000189aedb std::function<void (llvm::Instruction*)>::operator()(llvm::Instruction*) const /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/std_function.h:622:71
#22 0x0000000001898e3c void forEachWorkListItem<llvm::SmallVector<llvm::Instruction*, 6u>>(llvm::SmallVector<llvm::Instruction*, 6u> const&, std::function<void (llvm::Instruction*)>) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU
PromoteAlloca.cpp:601:0
#23 0x0000000001895ba5 (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:836:0
#24 0x0000000001892a65 (anonymous namespace)::AMDGPUPromoteAllocaImpl::run(llvm::Function&, bool) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:253:0
#25 0x0000000001892755 llvm::AMDGPUPromoteAllocaToVectorPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:211:0
#26 0x000000000144c0e0 llvm::detail::PassModel<llvm::Function, llvm::AMDGPUPromoteAllocaToVectorPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cch
en/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:0
#27 0x00000000028a82f3 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20
#28 0x0000000000f0098e llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3
#29 0x0000000001b37fa8 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:540:23
#30 0x000000000144b910 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:0
#31 0x0000000001b3680e llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:90:12
#32 0x0000000004ba4360 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3
#33 0x0000000001b37845 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:407:17
#34 0x0000000004b9997c llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3
#35 0x0000000001b370e7 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:278:18
#36 0x0000000004ba42ae llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3
#37 0x00000000028a7fd5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20
#38 0x0000000004d5057d llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp:631:10
#39 0x0000000004ba26a6 llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3
#40 0x00000000028a7fd5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20
#41 0x00000000033ff179 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1101:12
#42 0x00000000033ff6fc (anonymous namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1167:21
#43 0x00000000034006db clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1328:25
#44 0x0000000003d0b6ed clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:379:20
#45 0x0000000006bae3e3 clang::ParseAST(clang::Sema&, bool, bool) /ptmp2/cchen/llvm-project/clang/lib/Parse/ParseAST.cpp:183:14
#46 0x0000000003fd5cfa clang::ASTFrontendAction::ExecuteAction() /ptmp2/cchen/llvm-project/clang/lib/Frontend/FrontendAction.cpp:1183:11
#47 0x0000000003d0f405 clang::CodeGenAction::ExecuteAction() /ptmp2/cchen/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:1154:5
#48 0x0000000003fd5625 clang::FrontendAction::Execute() /ptmp2/cchen/llvm-project/clang/lib/Frontend/FrontendAction.cpp:1073:38
#49 0x0000000003efb7da clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) /ptmp2/cchen/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:1057:42
#50 0x0000000004169a15 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) /ptmp2/cchen/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:272:38
#51 0x0000000000dff8c3 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) /ptmp2/cchen/llvm-project/clang/tools/driver/cc1_main.cpp:294:40
#52 0x0000000000df0c2f ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) /ptmp2/cchen/llvm-project/clang/tools/driver/driver.cpp:365:20
#53 0x0000000000df1105 clang_main(int, char**, llvm::ToolContext const&) /ptmp2/cchen/llvm-project/clang/tools/driver/driver.cpp:405:26
#54 0x0000000000e26b31 main /ptmp2/cchen/llvm-project/build/tools/clang/tools/driver/clang-driver.cpp:17:20
#55 0x00007f8321c0424d __libc_start_main (/lib64/libc.so.6+0x3524d)
#56 0x0000000000def2ba _start /home/abuild/rpmbuild/BUILD/glibc-2.31/csu/../sysdeps/x86_64/start.S:122:0
clang++: error: unable to execute command: Aborted (core dumped)
clang++: error: clang frontend command failed due to signal (use -v to see invocation)

Command for compiling the program: clang++ -std=c++17 -fgpu-rdc -xhip --rocm-path=/opt/rocm-6.0.0 --offload-arch=gfx908 -O3 -nohipwrapperinc -nostdinc++ -nobuiltininc -nogpuinc -Wno-invalid-constexpr --offload-device-only -c test.cpp -Wno-ambiguous-ellipsis -Wno-inline-namespace-reopened-noninline -Wno-deprecated-builtins -Wno-unused-value -Wno-c++20-extensions -c test.cpp

The issue can be avoided if adding -mllvm -amdgpu-promote-alloca-to-vector-limit=128. (avoid the call to promoteAllocaUserToVector)

Reproducer:

template <int a> struct b { static constexpr int c = a; };
using d = b<true>;
using e = b<false>;
template <typename f> struct j { typedef f g; };
template <bool, typename f = void> using aa = j<f>::g;
template <class, class> class ab;
template <int = 0, int = 0> struct ac;
template <class...> class l {
public:
  l(int, int, int);
};
template <class> struct ad;
template <class af> struct ag {
  using ah = ad<af>;
  using ai = ah::ai;
};
template <int> class aj;
struct x {
  using ai = aj<4>;
};
template <class> struct ad {
  using h = x;
  using ai = h::ai;
};
struct y {
  using i = ag<int>::ai;
};
class z {
public:
  using ak = y::i;
};
class al {
  using k = z;
  using ak = k::ak;

public:
  __attribute__((device)) aa<d ::c, ak> operator()();
};
template <class m, class n> void am(m ao, n ap) {
  ab<n, m> aq(ap, ao);
  aq.ar();
}
__attribute__((constant)) long as;
template <typename at> __attribute__((global)) void au() {
  at av = *reinterpret_cast<at *>(as);
  av();
}
enum ax { ay, az };
struct ba {
  static constexpr ax bb = sizeof(int) ? ay : az;
  static constexpr ax bc = bb;
};
template <typename at> struct bd {
  static auto be() { au<at>; }
};
template <typename, typename, ax> struct bf;
template <typename at, typename ae> struct bf<at, ae, ay> {
  using bg = bd<at>;
  static void bh() { bg::be; }
};
template <typename, typename, ax = ba::bc> struct o;
template <typename at, int bi, int bj, ax bk> struct o<at, ac<bi, bj>, bk> {
  using bg = bf<at, ac<>, bk>;
  o(at, int, int, int, int *, bool) { bg::bh; }
};
int t, bl, bm;
template <typename at, typename ae = ac<>> void bo(at ao, bool ap) {
  o<at, ae>(ao, bl, bm, 0, &t, ap);
}
class C {
public:
  __attribute__((device)) C(void *, int, int, void *, int, int, int);
};
template <typename n, typename... aw> class ab<n, l<aw...>> {
public:
  using bp = l<>;
  using bq = int;
  using br = C;
  using an = bp;
  n bs;
  bq bt;
  int bu;
  int bv;
  void *bw;
  template <typename> __attribute__((device)) aa<e ::c> bx(br ao) {
    bs(int(), ao);
  }
  __attribute__((device)) void operator()() {
    for (int by;;)
      bx<an>(br(0, bu, bv, bw, 1, by, bt));
  }
  void ar() {
    using p = ab;
    bo<p>;
  }
  ab(n, bp);
};
template <typename> class bz { void ca(); };
struct cb {
  enum { c = 6 };
};
template <> class aj<4> {
public:
  using ai = double;
  __attribute__((device)) ai &operator[](int);
};
struct cc {
  template <typename, typename> using cd = cb;
  static constexpr int ce = cd<double, int>::c;
};
template <typename cf> struct cg {
  using u = cc;
  using q = double;
  using r = double;
  using ci = al;
  using br = C;
  ci s;
  int w;
  cg(cf, int, int, r);
  __attribute__((device)) void operator()(int, br) {
    q *block[u::ce]{};
    auto v = s();
    for (int cj;; ++cj)
        v[cj] = block[cj][0];
  }
  void ck() {
    int cl;
    l bn(0, cl, w);
    am(bn, *this);
  }
};
template <typename> void cm() {
  double cn;
  int btdm, co, dn;
  cg dm(btdm, co, dn, cn);
  dm.ck();
}
template <typename cf> void bz<cf>::ca() { cm<cf>; }
template class bz<int>;
llvmbot commented 8 months ago

@llvm/issue-subscribers-backend-amdgpu

Author: Chi-Chun, Chen (chichunchen)

Backtrace: ``` 0. Program arguments: /cray/css/users/cchen/llvm-pe31/bin/clang-19 -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm-bc -emit-llvm-uselists -disable-free -clear-ast-before-backend -main-file-name test.cpp -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global -visibility-to-externs -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/ro cm-6.0.0/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-6.0.0 /amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-6.0.0/amdgcn/bitcode/oclc_isa_version_908.bc -mlink-builtin-bitcode /opt/r ocm-6.0.0/amdgcn/bitcode/oclc_abi_version_400.bc -target-cpu gfx908 -debugger-tuning=gdb -fdebug-compilation-dir=/cray/css/users/cchen/PE-52194 -nostdinc++ -nobuiltininc -resource-dir /cray/css/users/cchen/llvm-pe31/lib/clang/19 -internal -isystem /usr/local/include -internal-isystem /usr/lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /usr/local/include -intern al-isystem /usr/lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-invalid-constexpr -Wno-ambiguous-ellipsis -Wno-inline-namespace-reopen ed-noninline -Wno-deprecated-builtins -Wno-unused-value -Wno-c++20-extensions -std=c++17 -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgpu-rdc -fgnuc-version=4.2.1 -fcxx-exceptions -fexceptions -fcolor-diagnosti cs -vectorize-loops -vectorize-slp -cuid=8831a35a02a84a35 -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o test-hip-amdgcn-amd-amdhsa-gfx908.bc -x hip test.cpp 1. <eof> parser at end of file 2. Optimizer #0 0x00000000030a1f18 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:22 #1 0x00000000030a234f PrintStackTraceSignalHandler(void*) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1 #2 0x000000000309fc87 llvm::sys::RunSignalHandlers() /ptmp2/cchen/llvm-project/llvm/lib/Support/Signals.cpp:105:20 #3 0x00000000030a190e SignalHandler(int) /ptmp2/cchen/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1 #4 0x00007f83223e9910 __restore_rt (/lib64/libpthread.so.0+0x16910) #5 0x00007f8321c19d2b raise (/lib64/libc.so.6+0x4ad2b) #6 0x00007f8321c1b3e5 abort (/lib64/libc.so.6+0x4c3e5) #7 0x00007f8321c11c6a __assert_fail_base (/lib64/libc.so.6+0x42c6a) #8 0x00007f8321c11cf2 (/lib64/libc.so.6+0x42cf2) #9 0x0000000001b78140 (anonymous namespace)::FoldBitCast(llvm::Constant*, llvm::Type*, llvm::DataLayout const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp:105:3 #10 0x0000000001b7d9f5 llvm::ConstantFoldCastOperand(unsigned int, llvm::Constant*, llvm::Type*, llvm::DataLayout const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp:1462:37 #11 0x0000000001c0d3c4 simplifyCastInst(unsigned int, llvm::Value*, llvm::Type*, llvm::SimplifyQuery const&, unsigned int) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp:5287:56 #12 0x0000000001c0d57e llvm::simplifyCastInst(unsigned int, llvm::Value*, llvm::Type*, llvm::SimplifyQuery const&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp:5321:1 #13 0x0000000001c18301 llvm::InstSimplifyFolder::FoldCast(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*) const /ptmp2/cchen/llvm-project/llvm/include/llvm/Analysis/InstSimplifyFolder.h:118:3 #14 0x0000000000eb0788 llvm::IRBuilderBase::CreateCast(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/IRBuilder.h:2148:40 #15 0x0000000000eb0720 llvm::IRBuilderBase::CreateBitCast(llvm::Value*, llvm::Type*, llvm::Twine const&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/IRBuilder.h:2113:3 #16 0x000000000189417a promoteAllocaUserToVector(llvm::Instruction*, llvm::DataLayout const&, llvm::FixedVectorType*, unsigned int, unsigned int, llvm::DenseMap<llvm::MemTransferInst*, MemTransferInfo, llvm::DenseMapInfo<llvm::MemTransfer Inst*, void>, llvm::detail::DenseMapPair<llvm::MemTransferInst*, MemTransferInfo>>&, std::map<llvm::GetElementPtrInst*, llvm::Value*, std::less<llvm::GetElementPtrInst*>, std::allocator<std::pair<llvm::GetElementPtrInst* const, llvm::Valu e*>>>&, llvm::Value*, llvm::SmallVectorImpl<llvm::LoadInst*>&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:530:0 #17 0x0000000001894572 (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)::operator()(llvm::Instruction*) const /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGP UPromoteAlloca.cpp:840:0 #18 0x00000000018998a0 void std::__invoke_impl<void, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>(std::__invoke_other, (anonymous namespac e)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/invoke.h:60:67 #19 0x000000000189956b std::enable_if<is_invocable_r_v<void, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>, void>::type std::__invoke_r<voi d, (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)&, llvm::Instruction*>((anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst& )::'lambda2'(llvm::Instruction*)&, llvm::Instruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/invoke.h:116:5 #20 0x00000000018992aa std::_Function_handler<void (llvm::Instruction*), (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&)::'lambda2'(llvm::Instruction*)>::_M_invoke(std::_Any_data const&, llvm::I nstruction*&&) /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/std_function.h:292:44 #21 0x000000000189aedb std::function<void (llvm::Instruction*)>::operator()(llvm::Instruction*) const /opt/cray/pe/gcc/10.3.0/snos/include/g++/bits/std_function.h:622:71 #22 0x0000000001898e3c void forEachWorkListItem<llvm::SmallVector<llvm::Instruction*, 6u>>(llvm::SmallVector<llvm::Instruction*, 6u> const&, std::function<void (llvm::Instruction*)>) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU PromoteAlloca.cpp:601:0 #23 0x0000000001895ba5 (anonymous namespace)::AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(llvm::AllocaInst&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:836:0 #24 0x0000000001892a65 (anonymous namespace)::AMDGPUPromoteAllocaImpl::run(llvm::Function&, bool) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:253:0 #25 0x0000000001892755 llvm::AMDGPUPromoteAllocaToVectorPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp:211:0 #26 0x000000000144c0e0 llvm::detail::PassModel<llvm::Function, llvm::AMDGPUPromoteAllocaToVectorPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cch en/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:0 #27 0x00000000028a82f3 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20 #28 0x0000000000f0098e llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3 #29 0x0000000001b37fa8 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:540:23 #30 0x000000000144b910 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:0 #31 0x0000000001b3680e llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:90:12 #32 0x0000000004ba4360 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3 #33 0x0000000001b37845 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:407:17 #34 0x0000000004b9997c llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3 #35 0x0000000001b370e7 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:278:18 #36 0x0000000004ba42ae llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3 #37 0x00000000028a7fd5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20 #38 0x0000000004d5057d llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp:631:10 #39 0x0000000004ba26a6 llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:3 #40 0x00000000028a7fd5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /ptmp2/cchen/llvm-project/llvm/include/llvm/IR/PassManager.h:547:20 #41 0x00000000033ff179 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1101:12 #42 0x00000000033ff6fc (anonymous namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1167:21 #43 0x00000000034006db clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1328:25 #44 0x0000000003d0b6ed clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) /ptmp2/cchen/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:379:20 #45 0x0000000006bae3e3 clang::ParseAST(clang::Sema&, bool, bool) /ptmp2/cchen/llvm-project/clang/lib/Parse/ParseAST.cpp:183:14 #46 0x0000000003fd5cfa clang::ASTFrontendAction::ExecuteAction() /ptmp2/cchen/llvm-project/clang/lib/Frontend/FrontendAction.cpp:1183:11 #47 0x0000000003d0f405 clang::CodeGenAction::ExecuteAction() /ptmp2/cchen/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:1154:5 #48 0x0000000003fd5625 clang::FrontendAction::Execute() /ptmp2/cchen/llvm-project/clang/lib/Frontend/FrontendAction.cpp:1073:38 #49 0x0000000003efb7da clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) /ptmp2/cchen/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:1057:42 #50 0x0000000004169a15 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) /ptmp2/cchen/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:272:38 #51 0x0000000000dff8c3 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) /ptmp2/cchen/llvm-project/clang/tools/driver/cc1_main.cpp:294:40 #52 0x0000000000df0c2f ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) /ptmp2/cchen/llvm-project/clang/tools/driver/driver.cpp:365:20 #53 0x0000000000df1105 clang_main(int, char**, llvm::ToolContext const&) /ptmp2/cchen/llvm-project/clang/tools/driver/driver.cpp:405:26 #54 0x0000000000e26b31 main /ptmp2/cchen/llvm-project/build/tools/clang/tools/driver/clang-driver.cpp:17:20 #55 0x00007f8321c0424d __libc_start_main (/lib64/libc.so.6+0x3524d) #56 0x0000000000def2ba _start /home/abuild/rpmbuild/BUILD/glibc-2.31/csu/../sysdeps/x86_64/start.S:122:0 clang++: error: unable to execute command: Aborted (core dumped) clang++: error: clang frontend command failed due to signal (use -v to see invocation) ``` Command for compiling the program: `clang++ -std=c++17 -fgpu-rdc -xhip --rocm-path=/opt/rocm-6.0.0 --offload-arch=gfx908 -O3 -nohipwrapperinc -nostdinc++ -nobuiltininc -nogpuinc -Wno-invalid-constexpr --offload-device-only -c test.cpp -Wno-ambiguous-ellipsis -Wno-inline-namespace-reopened-noninline -Wno-deprecated-builtins -Wno-unused-value -Wno-c++20-extensions -c test.cpp` The issue can be avoided if adding `-mllvm -amdgpu-promote-alloca-to-vector-limit=128`. (avoid the call to `promoteAllocaUserToVector`) Reproducer: ``` template <int a> struct b { static constexpr int c = a; }; using d = b<true>; using e = b<false>; template <typename f> struct j { typedef f g; }; template <bool, typename f = void> using aa = j<f>::g; template <class, class> class ab; template <int = 0, int = 0> struct ac; template <class...> class l { public: l(int, int, int); }; template <class> struct ad; template <class af> struct ag { using ah = ad<af>; using ai = ah::ai; }; template <int> class aj; struct x { using ai = aj<4>; }; template <class> struct ad { using h = x; using ai = h::ai; }; struct y { using i = ag<int>::ai; }; class z { public: using ak = y::i; }; class al { using k = z; using ak = k::ak; public: __attribute__((device)) aa<d ::c, ak> operator()(); }; template <class m, class n> void am(m ao, n ap) { ab<n, m> aq(ap, ao); aq.ar(); } __attribute__((constant)) long as; template <typename at> __attribute__((global)) void au() { at av = *reinterpret_cast<at *>(as); av(); } enum ax { ay, az }; struct ba { static constexpr ax bb = sizeof(int) ? ay : az; static constexpr ax bc = bb; }; template <typename at> struct bd { static auto be() { au<at>; } }; template <typename, typename, ax> struct bf; template <typename at, typename ae> struct bf<at, ae, ay> { using bg = bd<at>; static void bh() { bg::be; } }; template <typename, typename, ax = ba::bc> struct o; template <typename at, int bi, int bj, ax bk> struct o<at, ac<bi, bj>, bk> { using bg = bf<at, ac<>, bk>; o(at, int, int, int, int *, bool) { bg::bh; } }; int t, bl, bm; template <typename at, typename ae = ac<>> void bo(at ao, bool ap) { o<at, ae>(ao, bl, bm, 0, &t, ap); } class C { public: __attribute__((device)) C(void *, int, int, void *, int, int, int); }; template <typename n, typename... aw> class ab<n, l<aw...>> { public: using bp = l<>; using bq = int; using br = C; using an = bp; n bs; bq bt; int bu; int bv; void *bw; template <typename> __attribute__((device)) aa<e ::c> bx(br ao) { bs(int(), ao); } __attribute__((device)) void operator()() { for (int by;;) bx<an>(br(0, bu, bv, bw, 1, by, bt)); } void ar() { using p = ab; bo<p>; } ab(n, bp); }; template <typename> class bz { void ca(); }; struct cb { enum { c = 6 }; }; template <> class aj<4> { public: using ai = double; __attribute__((device)) ai &operator[](int); }; struct cc { template <typename, typename> using cd = cb; static constexpr int ce = cd<double, int>::c; }; template <typename cf> struct cg { using u = cc; using q = double; using r = double; using ci = al; using br = C; ci s; int w; cg(cf, int, int, r); __attribute__((device)) void operator()(int, br) { q *block[u::ce]{}; auto v = s(); for (int cj;; ++cj) v[cj] = block[cj][0]; } void ck() { int cl; l bn(0, cl, w); am(bn, *this); } }; template <typename> void cm() { double cn; int btdm, co, dn; cg dm(btdm, co, dn, cn); dm.ck(); } template <typename cf> void bz<cf>::ca() { cm<cf>; } template class bz<int>; ```