llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.09k stars 12k forks source link

Dead Code Elimination Regression at -O3 (trunk vs. 13.0.0) #53316

Open Quarub opened 2 years ago

Quarub commented 2 years ago

cat case.c #5901 case.txt

short b;
char c, d;
static int e = 1;
void foo(void);
char(a)(char f, char g) { return f + g; }
int main() {
  for (; c;)
    e++;
  if (e)
    a(--d, ++c);
  else {
    // Smallest possible number
    unsigned h = 2147483651;
    for (;;)
      if (h < 1)
        e = b;
      else {
        h = 0;
        for (; h <= 1; h = a(h, 1))
          foo();
      }
  }
}

clang-f29256a64ac11cf59cea878c8d1ba9537db4f523 (trunk) -O3 can not eliminate foo but clang-llvmorg-13.0.0 -O3 can.

Target: x86_64-unknown-linux-gnu


clang-f29256a64ac11cf59cea878c8d1ba9537db4f523 (trunk) -O3 [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @c = dso_local local_unnamed_addr global i8 0, align 1 @e = internal unnamed_addr global i32 1, align 4 @d = dso_local local_unnamed_addr global i8 0, align 1 @b = dso_local local_unnamed_addr global i16 0, align 2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn define dso_local signext i8 @a(i8 noundef signext %0, i8 noundef signext %1) local_unnamed_addr #0 { %3 = add i8 %1, %0 ret i8 %3 } ; Function Attrs: nounwind uwtable define dso_local i32 @main() local_unnamed_addr #1 { %1 = load i8, i8* @c, align 1, !tbaa !3 %2 = icmp eq i8 %1, 0 br i1 %2, label %4, label %3 3: ; preds = %0, %3 br label %3 4: ; preds = %0 %5 = load i32, i32* @e, align 4, !tbaa !6 %6 = icmp eq i32 %5, 0 br i1 %6, label %10, label %7 7: ; preds = %4 %8 = load i8, i8* @d, align 1, !tbaa !3 %9 = add i8 %8, -1 store i8 %9, i8* @d, align 1, !tbaa !3 store i8 1, i8* @c, align 1, !tbaa !3 ret i32 0 10: ; preds = %4, %10 tail call void @foo() #3 tail call void @foo() #3 br label %10 } declare dso_local void @foo() local_unnamed_addr #2 attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #3 = { nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git f29256a64ac11cf59cea878c8d1ba9537db4f523)"} !3 = !{!4, !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} !6 = !{!7, !7, i64 0} !7 = !{!"int", !4, i64 0} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: pushq %rax .cfi_def_cfa_offset 16 cmpb $0, c(%rip) je .LBB1_2 .p2align 4, 0x90 .LBB1_1: # =>This Inner Loop Header: Depth=1 jmp .LBB1_1 .LBB1_2: cmpl $0, e(%rip) je .LBB1_4 # %bb.3: addb $-1, d(%rip) movb $1, c(%rip) xorl %eax, %eax popq %rcx .cfi_def_cfa_offset 8 retq .p2align 4, 0x90 .LBB1_4: # =>This Inner Loop Header: Depth=1 .cfi_def_cfa_offset 16 callq foo callq foo jmp .LBB1_4 .Lfunc_end1: .size main, .Lfunc_end1-main ```


clang-llvmorg-13.0.0 -O3 [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @c = dso_local local_unnamed_addr global i8 0, align 1 @d = dso_local local_unnamed_addr global i8 0, align 1 @b = dso_local local_unnamed_addr global i16 0, align 2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn define dso_local signext i8 @a(i8 signext %0, i8 signext %1) local_unnamed_addr #0 { %3 = add i8 %1, %0 ret i8 %3 } ; Function Attrs: nounwind uwtable define dso_local i32 @main() local_unnamed_addr #1 { %1 = load i8, i8* @c, align 1, !tbaa !3 %2 = icmp eq i8 %1, 0 br i1 %2, label %4, label %3 3: ; preds = %0, %3 br label %3 4: ; preds = %0 %5 = load i8, i8* @d, align 1, !tbaa !3 %6 = add i8 %5, -1 store i8 %6, i8* @d, align 1, !tbaa !3 store i8 1, i8* @c, align 1, !tbaa !3 ret i32 0 } attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git d7b669b3a30345cfcdb2fde2af6f48aa4b94845d)"} !3 = !{!4, !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: cmpb $0, c(%rip) je .LBB1_2 .p2align 4, 0x90 .LBB1_1: # =>This Inner Loop Header: Depth=1 jmp .LBB1_1 .LBB1_2: addb $-1, d(%rip) movb $1, c(%rip) xorl %eax, %eax retq .Lfunc_end1: .size main, .Lfunc_end1-main ```


Bisection

Bisected to: ce4fa93db8b2511c389f27fba3e72b5b70c1b7db Committed by: @anton-afanasyev


clang-ce4fa93db8b2511c389f27fba3e72b5b70c1b7db -O3 [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @c = dso_local local_unnamed_addr global i8 0, align 1 @e = internal unnamed_addr global i32 1, align 4 @d = dso_local local_unnamed_addr global i8 0, align 1 @b = dso_local local_unnamed_addr global i16 0, align 2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn define dso_local signext i8 @a(i8 noundef signext %0, i8 noundef signext %1) local_unnamed_addr #0 { %3 = add i8 %1, %0 ret i8 %3 } ; Function Attrs: nounwind uwtable define dso_local i32 @main() local_unnamed_addr #1 { %1 = load i8, i8* @c, align 1, !tbaa !3 %2 = icmp eq i8 %1, 0 br i1 %2, label %4, label %3 3: ; preds = %0, %3 br label %3 4: ; preds = %0 %5 = load i32, i32* @e, align 4, !tbaa !6 %6 = icmp eq i32 %5, 0 br i1 %6, label %10, label %7 7: ; preds = %4 %8 = load i8, i8* @d, align 1, !tbaa !3 %9 = add i8 %8, -1 store i8 %9, i8* @d, align 1, !tbaa !3 store i8 1, i8* @c, align 1, !tbaa !3 ret i32 0 10: ; preds = %4, %10 tail call void @foo() #3 tail call void @foo() #3 br label %10 } declare dso_local void @foo() local_unnamed_addr #2 attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #3 = { nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git ce4fa93db8b2511c389f27fba3e72b5b70c1b7db)"} !3 = !{!4, !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} !6 = !{!7, !7, i64 0} !7 = !{!"int", !4, i64 0} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: pushq %rax .cfi_def_cfa_offset 16 cmpb $0, c(%rip) je .LBB1_2 .p2align 4, 0x90 .LBB1_1: # =>This Inner Loop Header: Depth=1 jmp .LBB1_1 .LBB1_2: cmpl $0, e(%rip) je .LBB1_4 # %bb.3: addb $-1, d(%rip) movb $1, c(%rip) xorl %eax, %eax popq %rcx .cfi_def_cfa_offset 8 retq .p2align 4, 0x90 .LBB1_4: # =>This Inner Loop Header: Depth=1 .cfi_def_cfa_offset 16 callq foo callq foo jmp .LBB1_4 .Lfunc_end1: .size main, .Lfunc_end1-main ```


Previous commit: 28b3cac7cf403bfc824299087a7420783c2d9311

clang-28b3cac7cf403bfc824299087a7420783c2d9311 -O3 [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @c = dso_local local_unnamed_addr global i8 0, align 1 @d = dso_local local_unnamed_addr global i8 0, align 1 @b = dso_local local_unnamed_addr global i16 0, align 2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn define dso_local signext i8 @a(i8 noundef signext %0, i8 noundef signext %1) local_unnamed_addr #0 { %3 = add i8 %1, %0 ret i8 %3 } ; Function Attrs: nounwind uwtable define dso_local i32 @main() local_unnamed_addr #1 { %1 = load i8, i8* @c, align 1, !tbaa !3 %2 = icmp eq i8 %1, 0 br i1 %2, label %4, label %3 3: ; preds = %0, %3 br label %3 4: ; preds = %0 %5 = load i8, i8* @d, align 1, !tbaa !3 %6 = add i8 %5, -1 store i8 %6, i8* @d, align 1, !tbaa !3 store i8 1, i8* @c, align 1, !tbaa !3 ret i32 0 } attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 28b3cac7cf403bfc824299087a7420783c2d9311)"} !3 = !{!4, !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: cmpb $0, c(%rip) je .LBB1_2 .p2align 4, 0x90 .LBB1_1: # =>This Inner Loop Header: Depth=1 jmp .LBB1_1 .LBB1_2: addb $-1, d(%rip) movb $1, c(%rip) xorl %eax, %eax retq .Lfunc_end1: .size main, .Lfunc_end1-main ```

Edit: Added IR

anton-afanasyev commented 2 years ago

Thanks, assigned to myself.

fhahn commented 2 years ago

cc @fhahn

fhahn commented 2 years ago

Still reproduces on current main: https://clang.godbolt.org/z/93d8xYzd9

fhahn commented 2 years ago

@anton-afanasyev any progress on the issue?