llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.31k stars 12.11k forks source link

AMDGPU global-isel unsupported G_PHI s1 #114817

Open tpopp opened 3 weeks ago

tpopp commented 3 weeks ago

It's unclear to me at this time if instruction selection is actually the problem or the previous steps are miscompiling.

Command: llc -global-isel reduced.ll

Input:

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

define amdgpu_kernel void @_dynamic_unpack_simple_dispatch_0_unpack_i32(i64 %0, i1 %min.iters.check) {
.preheader:
  br i1 %min.iters.check, label %vector.ph, label %Flow97

vector.ph:                                        ; preds = %.preheader
  %cmp.n = icmp sgt i64 %0, 0
  br label %Flow97

Flow97:                                           ; preds = %vector.ph, %.preheader
  %1 = phi i1 [ %cmp.n, %vector.ph ], [ false, %.preheader ]
  br i1 %1, label %scalar.ph.preheader, label %Flow98

scalar.ph.preheader:                              ; preds = %Flow97
  ret void

Flow98:                                           ; preds = %Flow97
  ret void
}
# *** IR Dump Before InstructionSelect (instruction-select) ***:                                                                                                                                                        # Machine code for function _dynamic_unpack_simple_dispatch_0_unpack_i32: IsSSA, TracksLiveness, Legalized, RegBankSelected                                                                                                      
Function Live Ins: $sgpr0_sgpr1_sgpr2_sgpr3 in %2, $sgpr4_sgpr5 in %3, $sgpr6_sgpr7 in %4, $sgpr8_sgpr9 in %5, $vgpr0 in %6, $vgpr1 in %7, $vgpr2 in %8, $sgpr10 in %9, $sgpr11 in %10, $sgpr12 in %11, $sgpr13 in %12           

bb.1..preheader:                                                                                                                                                                                                                 
  successors: %bb.2(0x40000000), %bb.3(0x40000000); %bb.2(50.00%), %bb.3(50.00%)                                                                                                                                                 
  liveins: $sgpr6_sgpr7                                                                                                                                                                                                          
  %4:sgpr(p4) = COPY $sgpr6_sgpr7                                                                                                                                                                                                
  %14:sgpr(s64) = G_CONSTANT i64 8                                                                                                                                                                                               
  %15:sgpr(p4) = nuw G_PTR_ADD %4:sgpr, %14:sgpr(s64)                                                                                                                                                                            
  %16:sgpr(s32) = G_LOAD %15:sgpr(p4) :: (dereferenceable invariant load (s32) from %ir.min.iters.check.kernarg.offset.align.down, align 8, addrspace 4)                                                                         
  %17:sgpr(s1) = G_TRUNC %16:sgpr(s32)                                                                                                                                                                                           
  %28:sgpr(s32) = G_CONSTANT i32 1                                                                                                                                                                                               
  %23:sgpr(s1) = G_TRUNC %28:sgpr(s32)                                                                                                                                                                                           
  %29:sgpr(s32) = G_ANYEXT %17:sgpr(s1)                                                                                                                                                                                          
  %30:sgpr(s32) = G_ANYEXT %23:sgpr(s1)                                                                                                                                                                                          
  %31:sgpr(s32) = G_XOR %29:sgpr, %30:sgpr                                                                                                                                                                                       
  %24:sgpr(s1) = G_TRUNC %31:sgpr(s32)                                                                                                                                                                                           
  %32:sgpr(s32) = G_CONSTANT i32 0                                                                                                                                                                                               
  %22:sgpr(s1) = G_TRUNC %32:sgpr(s32)                                                                                                                                                                                           
  %33:sgpr(s32) = G_ZEXT %24:sgpr(s1)                                                                                                                                                                                            
  %36:vcc(s1) = COPY %22:sgpr(s1)                                                                                                                                                                                                
  G_BRCOND %33:sgpr(s32), %bb.3                                                                                                                                                                                                  
  G_BR %bb.2                                                                                                                                                                                                                     

bb.2.vector.ph:                                                                                                                                                                                                                  
; predecessors: %bb.1
  successors: %bb.3(0x80000000); %bb.3(100.00%)

  %18:sgpr(s64) = G_LOAD %4:sgpr(p4) :: (dereferenceable invariant load (s64) from %ir..kernarg.offset1, align 16, addrspace 4)
  %27:sgpr(s64) = G_CONSTANT i64 0
  %34:vgpr(s64) = COPY %18:sgpr(s64)
  %35:vgpr(s64) = COPY %27:sgpr(s64)
  %20:vcc(s1) = G_ICMP intpred(sgt), %34:vgpr(s64), %35:vgpr

bb.3.Flow97:
; predecessors: %bb.1, %bb.2
  successors: %bb.4(0x40000000), %bb.5(0x40000000); %bb.4(50.00%), %bb.5(50.00%)

  %21:vcc(s1) = G_PHI %36:vcc(s1), %bb.1, %20:vcc(s1), %bb.2
  %37:sgpr(s32) = G_CONSTANT i32 1
  %25:sgpr(s1) = G_TRUNC %37:sgpr(s32)
  %38:vcc(s1) = COPY %25:sgpr(s1)
  %26:vcc(s1) = G_XOR %21:vcc, %38:vcc
  G_BRCOND %26:vcc(s1), %bb.5
  G_BR %bb.4

bb.4.scalar.ph.preheader:
; predecessors: %bb.3

  S_ENDPGM 0

bb.5.Flow98:
; predecessors: %bb.3

  S_ENDPGM 0
# End machine code for function _dynamic_unpack_simple_dispatch_0_unpack_i32.

LLVM ERROR: cannot select: %21:sreg_64_xexec(s1) = G_PHI %36:vcc(s1), %bb.1, %20:vcc(s1), %bb.2 (in function: _dynamic_unpack_simple_dispatch_0_unpack_i32)
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: /home/tpopp/gisel/iree-build/llvm-project/bin/llc -global-isel reduced.ll
1.      Running pass 'CallGraph Pass Manager' on module 'reduced.ll'.
2.      Running pass 'InstructionSelect' on function '@_dynamic_unpack_simple_dispatch_0_unpack_i32'
 #0 0x000065189d510d87 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13
 #1 0x000065189d50efc0 llvm::sys::RunSignalHandlers() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:106:18
 #2 0x000065189d51144a SignalHandler(int) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x0000722d07842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x0000722d078969fc __pthread_kill_implementation ./nptl/./nptl/pthread_kill.c:44:76
 #5 0x0000722d078969fc __pthread_kill_internal ./nptl/./nptl/pthread_kill.c:78:10
 #6 0x0000722d078969fc pthread_kill ./nptl/./nptl/pthread_kill.c:89:10
 #7 0x0000722d07842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x0000722d078287f3 abort ./stdlib/./stdlib/abort.c:81:7
 #9 0x000065189d4857b3 llvm::report_fatal_error(llvm::Twine const&, bool) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/ErrorHandling.cpp:126:5
#10 0x000065189d9bb6e4 (/home/tpopp/gisel/iree-build/llvm-project/bin/llc+0x604b6e4)
#11 0x000065189d9bb9c8 llvm::DiagnosticInfoOptimizationBase::~DiagnosticInfoOptimizationBase() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h:414:7
#12 0x000065189d9bb9c8 llvm::reportGISelFailure(llvm::MachineFunction&, llvm::TargetPassConfig const&, llvm::MachineOptimizationRemarkEmitter&, char const*, llvm::StringRef, llvm::MachineInstr const&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:277:1
#13 0x000065189d94e16b llvm::InstructionSelect::selectMachineFunction(llvm::MachineFunction&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:0:11
#14 0x000065189d94d99b llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&)::$_0::operator()() const /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:146:59
#15 0x000065189d94d99b llvm::detail::scope_exit<llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&)::$_0>::~scope_exit() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/include/llvm/ADT/ScopeExit.h:46:7
#16 0x000065189d94d99b llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:158:1
#17 0x000065189c7464c4 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp:0:13
#18 0x000065189cbc3875 llvm::FPPassManager::runOnFunction(llvm::Function&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:0:27
#19 0x000065189c2ba43c (anonymous namespace)::CGPassManager::RunPassOnSCC(llvm::Pass*, llvm::CallGraphSCC&, llvm::CallGraph&, bool&, bool&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:180:25
#20 0x000065189c2ba43c (anonymous namespace)::CGPassManager::RunAllPassesOnSCC(llvm::CallGraphSCC&, llvm::CallGraph&, bool&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:469:9
#21 0x000065189c2ba43c (anonymous namespace)::CGPassManager::runOnModule(llvm::Module&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:534:18
#22 0x000065189cbc4002 llvm::legacy::PassManagerImpl::run(llvm::Module&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:0:27
#23 0x000065189b2ab5e3 compileModule(char**, llvm::LLVMContext&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/tools/llc/llc.cpp:755:17
#24 0x000065189b2ab5e3 main /home/tpopp/gisel/iree/third_party/llvm-project/llvm/tools/llc/llc.cpp:412:22
#25 0x0000722d07829d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#26 0x0000722d07829e40 call_init ./csu/../csu/libc-start.c:128:20
#27 0x0000722d07829e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#28 0x000065189b2a8d25 _start (/home/tpopp/gisel/iree-build/llvm-project/bin/llc+0x3938d25)
llvmbot commented 3 weeks ago

@llvm/issue-subscribers-backend-amdgpu

Author: Tres (tpopp)

It's unclear to me at this time if instruction selection is actually the problem or the previous steps are miscompiling. Command: `llc -global-isel reduced.ll` Input: ``` target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define amdgpu_kernel void @_dynamic_unpack_simple_dispatch_0_unpack_i32(i64 %0, i1 %min.iters.check) { .preheader: br i1 %min.iters.check, label %vector.ph, label %Flow97 vector.ph: ; preds = %.preheader %cmp.n = icmp sgt i64 %0, 0 br label %Flow97 Flow97: ; preds = %vector.ph, %.preheader %1 = phi i1 [ %cmp.n, %vector.ph ], [ false, %.preheader ] br i1 %1, label %scalar.ph.preheader, label %Flow98 scalar.ph.preheader: ; preds = %Flow97 ret void Flow98: ; preds = %Flow97 ret void } ``` ``` # *** IR Dump Before InstructionSelect (instruction-select) ***: # Machine code for function _dynamic_unpack_simple_dispatch_0_unpack_i32: IsSSA, TracksLiveness, Legalized, RegBankSelected Function Live Ins: $sgpr0_sgpr1_sgpr2_sgpr3 in %2, $sgpr4_sgpr5 in %3, $sgpr6_sgpr7 in %4, $sgpr8_sgpr9 in %5, $vgpr0 in %6, $vgpr1 in %7, $vgpr2 in %8, $sgpr10 in %9, $sgpr11 in %10, $sgpr12 in %11, $sgpr13 in %12 bb.1..preheader: successors: %bb.2(0x40000000), %bb.3(0x40000000); %bb.2(50.00%), %bb.3(50.00%) liveins: $sgpr6_sgpr7 %4:sgpr(p4) = COPY $sgpr6_sgpr7 %14:sgpr(s64) = G_CONSTANT i64 8 %15:sgpr(p4) = nuw G_PTR_ADD %4:sgpr, %14:sgpr(s64) %16:sgpr(s32) = G_LOAD %15:sgpr(p4) :: (dereferenceable invariant load (s32) from %ir.min.iters.check.kernarg.offset.align.down, align 8, addrspace 4) %17:sgpr(s1) = G_TRUNC %16:sgpr(s32) %28:sgpr(s32) = G_CONSTANT i32 1 %23:sgpr(s1) = G_TRUNC %28:sgpr(s32) %29:sgpr(s32) = G_ANYEXT %17:sgpr(s1) %30:sgpr(s32) = G_ANYEXT %23:sgpr(s1) %31:sgpr(s32) = G_XOR %29:sgpr, %30:sgpr %24:sgpr(s1) = G_TRUNC %31:sgpr(s32) %32:sgpr(s32) = G_CONSTANT i32 0 %22:sgpr(s1) = G_TRUNC %32:sgpr(s32) %33:sgpr(s32) = G_ZEXT %24:sgpr(s1) %36:vcc(s1) = COPY %22:sgpr(s1) G_BRCOND %33:sgpr(s32), %bb.3 G_BR %bb.2 bb.2.vector.ph: ; predecessors: %bb.1 successors: %bb.3(0x80000000); %bb.3(100.00%) %18:sgpr(s64) = G_LOAD %4:sgpr(p4) :: (dereferenceable invariant load (s64) from %ir..kernarg.offset1, align 16, addrspace 4) %27:sgpr(s64) = G_CONSTANT i64 0 %34:vgpr(s64) = COPY %18:sgpr(s64) %35:vgpr(s64) = COPY %27:sgpr(s64) %20:vcc(s1) = G_ICMP intpred(sgt), %34:vgpr(s64), %35:vgpr bb.3.Flow97: ; predecessors: %bb.1, %bb.2 successors: %bb.4(0x40000000), %bb.5(0x40000000); %bb.4(50.00%), %bb.5(50.00%) %21:vcc(s1) = G_PHI %36:vcc(s1), %bb.1, %20:vcc(s1), %bb.2 %37:sgpr(s32) = G_CONSTANT i32 1 %25:sgpr(s1) = G_TRUNC %37:sgpr(s32) %38:vcc(s1) = COPY %25:sgpr(s1) %26:vcc(s1) = G_XOR %21:vcc, %38:vcc G_BRCOND %26:vcc(s1), %bb.5 G_BR %bb.4 bb.4.scalar.ph.preheader: ; predecessors: %bb.3 S_ENDPGM 0 bb.5.Flow98: ; predecessors: %bb.3 S_ENDPGM 0 # End machine code for function _dynamic_unpack_simple_dispatch_0_unpack_i32. LLVM ERROR: cannot select: %21:sreg_64_xexec(s1) = G_PHI %36:vcc(s1), %bb.1, %20:vcc(s1), %bb.2 (in function: _dynamic_unpack_simple_dispatch_0_unpack_i32) PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. Stack dump: 0. Program arguments: /home/tpopp/gisel/iree-build/llvm-project/bin/llc -global-isel reduced.ll 1. Running pass 'CallGraph Pass Manager' on module 'reduced.ll'. 2. Running pass 'InstructionSelect' on function '@_dynamic_unpack_simple_dispatch_0_unpack_i32' #0 0x000065189d510d87 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13 #1 0x000065189d50efc0 llvm::sys::RunSignalHandlers() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:106:18 #2 0x000065189d51144a SignalHandler(int) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1 #3 0x0000722d07842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520) #4 0x0000722d078969fc __pthread_kill_implementation ./nptl/./nptl/pthread_kill.c:44:76 #5 0x0000722d078969fc __pthread_kill_internal ./nptl/./nptl/pthread_kill.c:78:10 #6 0x0000722d078969fc pthread_kill ./nptl/./nptl/pthread_kill.c:89:10 #7 0x0000722d07842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6 #8 0x0000722d078287f3 abort ./stdlib/./stdlib/abort.c:81:7 #9 0x000065189d4857b3 llvm::report_fatal_error(llvm::Twine const&, bool) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Support/ErrorHandling.cpp:126:5 #10 0x000065189d9bb6e4 (/home/tpopp/gisel/iree-build/llvm-project/bin/llc+0x604b6e4) #11 0x000065189d9bb9c8 llvm::DiagnosticInfoOptimizationBase::~DiagnosticInfoOptimizationBase() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h:414:7 #12 0x000065189d9bb9c8 llvm::reportGISelFailure(llvm::MachineFunction&, llvm::TargetPassConfig const&, llvm::MachineOptimizationRemarkEmitter&, char const*, llvm::StringRef, llvm::MachineInstr const&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:277:1 #13 0x000065189d94e16b llvm::InstructionSelect::selectMachineFunction(llvm::MachineFunction&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:0:11 #14 0x000065189d94d99b llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&)::$_0::operator()() const /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:146:59 #15 0x000065189d94d99b llvm::detail::scope_exit<llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&)::$_0>::~scope_exit() /home/tpopp/gisel/iree/third_party/llvm-project/llvm/include/llvm/ADT/ScopeExit.h:46:7 #16 0x000065189d94d99b llvm::InstructionSelect::runOnMachineFunction(llvm::MachineFunction&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp:158:1 #17 0x000065189c7464c4 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp:0:13 #18 0x000065189cbc3875 llvm::FPPassManager::runOnFunction(llvm::Function&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:0:27 #19 0x000065189c2ba43c (anonymous namespace)::CGPassManager::RunPassOnSCC(llvm::Pass*, llvm::CallGraphSCC&, llvm::CallGraph&, bool&, bool&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:180:25 #20 0x000065189c2ba43c (anonymous namespace)::CGPassManager::RunAllPassesOnSCC(llvm::CallGraphSCC&, llvm::CallGraph&, bool&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:469:9 #21 0x000065189c2ba43c (anonymous namespace)::CGPassManager::runOnModule(llvm::Module&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp:534:18 #22 0x000065189cbc4002 llvm::legacy::PassManagerImpl::run(llvm::Module&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:0:27 #23 0x000065189b2ab5e3 compileModule(char**, llvm::LLVMContext&) /home/tpopp/gisel/iree/third_party/llvm-project/llvm/tools/llc/llc.cpp:755:17 #24 0x000065189b2ab5e3 main /home/tpopp/gisel/iree/third_party/llvm-project/llvm/tools/llc/llc.cpp:412:22 #25 0x0000722d07829d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16 #26 0x0000722d07829e40 call_init ./csu/../csu/libc-start.c:128:20 #27 0x0000722d07829e40 __libc_start_main ./csu/../csu/libc-start.c:379:5 #28 0x000065189b2a8d25 _start (/home/tpopp/gisel/iree-build/llvm-project/bin/llc+0x3938d25) ```
jayfoad commented 3 weeks ago

@petar-avramovic has been working on this.

arsenm commented 3 weeks ago

It's more intentionally failing since it's known wrong at this point

tpopp commented 3 weeks ago

G_PHI in general is known wrong, or just with s1 values? And is this the lowering or the representation that has a problem?

petar-avramovic commented 3 weeks ago

The error here is in regbankselect that is not assigning correct reg banks according to machine uniformity analysis. This will be fixed soon with new reg bank select passes. The G_PHI in question is uniform and should be assigned to sgpr reg bank and lowered to S32 Bank should be assigned like this:

body:             |
  bb.1..preheader:
    successors: %bb.2(0x40000000), %bb.3(0x40000000)
    liveins: $sgpr6_sgpr7

    %4:sgpr(p4) = COPY $sgpr6_sgpr7
    %14:sgpr(s64) = G_CONSTANT i64 8
    %15:sgpr(p4) = nuw nusw G_PTR_ADD %4, %14(s64)
    %16:sgpr(s32) = G_LOAD %15(p4) :: (dereferenceable invariant load (s32) from %ir.min.iters.check.kernarg.offset.align.down, align 8, addrspace 4)
    %28:sgpr(s32) = G_CONSTANT i32 1
    %29:sgpr(s32) = G_XOR %16, %28
    %32:sgpr(s32) = G_CONSTANT i32 0
    %34:sgpr(s32) = G_AND %29, %28
    G_BRCOND %34(s32), %bb.3
    G_BR %bb.2

  bb.2.vector.ph:
    successors: %bb.3(0x80000000)

    %18:sgpr(s64) = G_LOAD %4(p4) :: (dereferenceable invariant load (s64) from %ir..kernarg.offset1, align 16, addrspace 4)
    %27:sgpr(s64) = G_CONSTANT i64 0
    %37:vgpr(s64) = COPY %18(s64)
    %38:vgpr(s64) = COPY %27(s64)
    %35:vcc(s1) = G_ICMP intpred(sgt), %37(s64), %38
    %36:sgpr(s32) = G_COPY_SCC_VCC %35(s1)

  bb.3.Flow97:
    successors: %bb.4(0x40000000), %bb.5(0x40000000)

    %39:sgpr(s32) = G_PHI %32(s32), %bb.1, %36(s32), %bb.2
    %42:sgpr(s32) = G_CONSTANT i32 1
    %43:sgpr(s32) = G_XOR %39, %42
    %47:sgpr(s32) = G_AND %43, %42
    G_BRCOND %47(s32), %bb.5
    G_BR %bb.4

  bb.4.scalar.ph.preheader:
    S_ENDPGM 0

  bb.5.Flow98:
    S_ENDPGM 0

G_PHI is known wrong. S1 G_PHIs could be even considered correct. Divergent G_PHIs are lowered/selected to PHI in AMDGPUGlobalISelDivergenceLowering, so S1 G_PHI are known uniform and should be extended to S32 in RegBankSelect. Instruction-select failure is correct, despite the fact it was introduced before the AMDGPUGlobalISelDivergenceLowering. S1 G_PHIs are too complicated to be selected in instruction-select.