llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
26.79k stars 10.98k forks source link

[RISCV] Call frame size on entry does not match assertion with `-verify-machineinstrs` #97304

Open lukel97 opened 5 days ago

lukel97 commented 5 days ago

Compiling the below with llc -verify-machineinstrs fails during the machine verifier:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define i32 @_ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE(i1 %cmp436) {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.cond, %entry
  %conv = select i1 %cmp436, i32 32, i32 1
  %call479 = call i32 (ptr, ...) null(ptr null, i32 %conv, i32 0, i32 0, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
  br label %for.cond
}
# After Instruction Selection
# Machine code for function _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE: IsSSA, TracksLiveness
Function Live Ins: $x10 in %0

bb.0.entry:
  successors: %bb.1(0x80000000); %bb.1(100.00%)
  liveins: $x10
  %0:gpr = COPY $x10
  %1:gpr = COPY %0:gpr

bb.1.for.cond:
; predecessors: %bb.0, %bb.3
  successors: %bb.2(0x40000000), %bb.3(0x40000000); %bb.2(50.00%), %bb.3(50.00%)

  %2:gpr = ANDI %1:gpr, 1
  ADJCALLSTACKDOWN 8, 0, implicit-def dead $x2, implicit $x2
  %3:gpr = COPY $x2
  %4:gprjalr = COPY $x0
  SD %4:gprjalr, %3:gpr, 0 :: (store (s64))
  %5:gpr = ADDI $x0, 1
  %6:gpr = ADDI $x0, 32
  BNE %2:gpr, $x0, %bb.3

bb.2.for.cond:
; predecessors: %bb.1
  successors: %bb.3(0x80000000); %bb.3(100.00%)

bb.3.for.cond:
; predecessors: %bb.1, %bb.2
  successors: %bb.1(0x80000000); %bb.1(100.00%)

  %7:gpr = PHI %6:gpr, %bb.1, %5:gpr, %bb.2
  $x10 = COPY %4:gprjalr
  $x11 = COPY %7:gpr
  $x12 = COPY %4:gprjalr
  $x13 = COPY %4:gprjalr
  $x14 = COPY %4:gprjalr
  $x15 = COPY %4:gprjalr
  $x16 = COPY %4:gprjalr
  $x17 = COPY %4:gprjalr
  PseudoCALLIndirect %4:gprjalr, <regmask $vlenb $x0 $x1 $x8 $x9 $x18 $x19 $x20 $x21 $x22 $x23 $x24 $x25 $x26 $x27 $x8_x9 $x18_x19 $x20_x21 $x22_x23 $x24_x25 $x26_x27>, implicit-def dead $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x2, implicit-def $x10
  ADJCALLSTACKUP 8, 0, implicit-def dead $x2, implicit $x2
  %8:gpr = COPY $x10
  PseudoBR %bb.1

# End machine code for function _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE.

*** Bad machine code: Call frame size on entry does not match value computed from predecessor ***
- function:    _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE
- basic block: %bb.2 for.cond (0x144084970)
Call frame size on entry 0 does not match value computed from predecessor 8

*** Bad machine code: Call frame size on entry does not match value computed from predecessor ***
- function:    _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE
- basic block: %bb.3 for.cond (0x144084870)
Call frame size on entry 0 does not match value computed from predecessor 8
LLVM ERROR: Found 2 machine code errors.

The test case looks strange, but it was reduced from a crash when compiling OpenCV with -rva22u64v -mllvm -verify-machineinstrs .

llvmbot commented 5 days ago

@llvm/issue-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Compiling the below with `llc -verify-machineinstrs` fails during the machine verifier: ```llvm target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux-gnu" define i32 @_ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE(i1 %cmp436) { entry: br label %for.cond for.cond: ; preds = %for.cond, %entry %conv = select i1 %cmp436, i32 32, i32 1 %call479 = call i32 (ptr, ...) null(ptr null, i32 %conv, i32 0, i32 0, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) br label %for.cond } ``` ``` # After Instruction Selection # Machine code for function _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE: IsSSA, TracksLiveness Function Live Ins: $x10 in %0 bb.0.entry: successors: %bb.1(0x80000000); %bb.1(100.00%) liveins: $x10 %0:gpr = COPY $x10 %1:gpr = COPY %0:gpr bb.1.for.cond: ; predecessors: %bb.0, %bb.3 successors: %bb.2(0x40000000), %bb.3(0x40000000); %bb.2(50.00%), %bb.3(50.00%) %2:gpr = ANDI %1:gpr, 1 ADJCALLSTACKDOWN 8, 0, implicit-def dead $x2, implicit $x2 %3:gpr = COPY $x2 %4:gprjalr = COPY $x0 SD %4:gprjalr, %3:gpr, 0 :: (store (s64)) %5:gpr = ADDI $x0, 1 %6:gpr = ADDI $x0, 32 BNE %2:gpr, $x0, %bb.3 bb.2.for.cond: ; predecessors: %bb.1 successors: %bb.3(0x80000000); %bb.3(100.00%) bb.3.for.cond: ; predecessors: %bb.1, %bb.2 successors: %bb.1(0x80000000); %bb.1(100.00%) %7:gpr = PHI %6:gpr, %bb.1, %5:gpr, %bb.2 $x10 = COPY %4:gprjalr $x11 = COPY %7:gpr $x12 = COPY %4:gprjalr $x13 = COPY %4:gprjalr $x14 = COPY %4:gprjalr $x15 = COPY %4:gprjalr $x16 = COPY %4:gprjalr $x17 = COPY %4:gprjalr PseudoCALLIndirect %4:gprjalr, <regmask $vlenb $x0 $x1 $x8 $x9 $x18 $x19 $x20 $x21 $x22 $x23 $x24 $x25 $x26 $x27 $x8_x9 $x18_x19 $x20_x21 $x22_x23 $x24_x25 $x26_x27>, implicit-def dead $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x2, implicit-def $x10 ADJCALLSTACKUP 8, 0, implicit-def dead $x2, implicit $x2 %8:gpr = COPY $x10 PseudoBR %bb.1 # End machine code for function _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE. *** Bad machine code: Call frame size on entry does not match value computed from predecessor *** - function: _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE - basic block: %bb.2 for.cond (0x144084970) Call frame size on entry 0 does not match value computed from predecessor 8 *** Bad machine code: Call frame size on entry does not match value computed from predecessor *** - function: _ZNK2cv12LMSolverImpl3runERKNS_17_InputOutputArrayE - basic block: %bb.3 for.cond (0x144084870) Call frame size on entry 0 does not match value computed from predecessor 8 LLVM ERROR: Found 2 machine code errors. ``` The test case looks strange, but it was reduced from a crash when compiling OpenCV with `-rva22u64v -mllvm -verify-machineinstrs` .
topperc commented 5 days ago

Please fix the title

topperc commented 5 days ago

The fundamental issue here seems to be that we scheduled a select that needs expansion between ADJCALLSTACKDOWN and ADJCALLSTACKUP

topperc commented 5 days ago

At one point we had (any_extend (select X, C1, C2)) which was replaced by (select X, aext(C1), aext(C2)). The any_extend was part of the call frame. This new select inherited the DebugLoc and IROrder from the any_extend.

The SelectionDAG scheduler uses the IROrder to prioritize nodes. Because the new select has the IROrder from the any_extend, this caused the select to get scheduled as part of the call lowering.