llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.09k stars 12k forks source link

Dead Code Elimination Regression at -Oz (trunk vs. 10.0.1) #53318

Open Quarub opened 2 years ago

Quarub commented 2 years ago

cat case.c #6398 case.txt

int a;
void foo();
int main() {
  char b = 0;
  for (; b <= 0; b = 1)
    for (; a >= 0; a--)
      if (b)
        foo();
}

clang-f29256a64ac11cf59cea878c8d1ba9537db4f523 (trunk) -Oz can not eliminate foo but clang-llvmorg-10.0.1 -Oz can.

Target: x86_64-unknown-linux-gnu


clang-f29256a64ac11cf59cea878c8d1ba9537db4f523 (trunk) -Oz [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @a = dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: minsize nounwind optsize uwtable define dso_local i32 @main() local_unnamed_addr #0 { br label %1 1: ; preds = %6, %0 %2 = phi i1 [ false, %0 ], [ true, %6 ] %3 = phi i1 [ true, %0 ], [ false, %6 ] br i1 %2, label %15, label %4 4: ; preds = %1 %5 = load i32, i32* @a, align 4, !tbaa !3 br label %6 6: ; preds = %4, %12 %7 = phi i32 [ %5, %4 ], [ %14, %12 ] %8 = icmp sgt i32 %7, -1 br i1 %8, label %9, label %1, !llvm.loop !7 9: ; preds = %6 br i1 %3, label %12, label %10 10: ; preds = %9 tail call void (...) @foo() #2 %11 = load i32, i32* @a, align 4, !tbaa !3 br label %12 12: ; preds = %9, %10 %13 = phi i32 [ %7, %9 ], [ %11, %10 ] %14 = add nsw i32 %13, -1 store i32 %14, i32* @a, align 4, !tbaa !3 br label %6, !llvm.loop !9 15: ; preds = %1 ret i32 0 } ; Function Attrs: minsize optsize declare dso_local void @foo(...) local_unnamed_addr #1 attributes #0 = { minsize nounwind optsize uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { minsize optsize "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { minsize nounwind optsize } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git f29256a64ac11cf59cea878c8d1ba9537db4f523)"} !3 = !{!4, !4, i64 0} !4 = !{!"int", !5, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} !7 = distinct !{!7, !8} !8 = !{!"llvm.loop.mustprogress"} !9 = distinct !{!9, !8} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 movb $1, %bl xorl %eax, %eax .LBB0_1: # =>This Loop Header: Depth=1 # Child Loop BB0_3 Depth 2 testb $1, %al jne .LBB0_8 # %bb.2: # in Loop: Header=BB0_1 Depth=1 movl a(%rip), %eax .LBB0_3: # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 testl %eax, %eax js .LBB0_4 # %bb.5: # in Loop: Header=BB0_3 Depth=2 testb $1, %bl jne .LBB0_7 # %bb.6: # in Loop: Header=BB0_3 Depth=2 xorl %eax, %eax callq foo movl a(%rip), %eax .LBB0_7: # in Loop: Header=BB0_3 Depth=2 decl %eax movl %eax, a(%rip) jmp .LBB0_3 .LBB0_4: # in Loop: Header=BB0_1 Depth=1 movb $1, %al xorl %ebx, %ebx jmp .LBB0_1 .LBB0_8: xorl %eax, %eax popq %rbx .cfi_def_cfa_offset 8 retq .Lfunc_end0: .size main, .Lfunc_end0-main ```


clang-llvmorg-10.0.1 -Oz [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @a = common dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: minsize nounwind optsize uwtable define dso_local i32 @main() local_unnamed_addr #0 { %1 = load i32, i32* @a, align 4, !tbaa !2 br label %2 2: ; preds = %0, %5 %3 = phi i32 [ %1, %0 ], [ %6, %5 ] %4 = icmp sgt i32 %3, -1 br i1 %4, label %5, label %7 5: ; preds = %2 %6 = add nsw i32 %3, -1 store i32 %6, i32* @a, align 4, !tbaa !2 br label %2 7: ; preds = %2 ret i32 0 } attributes #0 = { minsize nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} !2 = !{!3, !3, i64 0} !3 = !{!"int", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: movl a(%rip), %eax .LBB0_1: # =>This Inner Loop Header: Depth=1 testl %eax, %eax js .LBB0_3 # %bb.2: # in Loop: Header=BB0_1 Depth=1 decl %eax movl %eax, a(%rip) jmp .LBB0_1 .LBB0_3: xorl %eax, %eax retq .Lfunc_end0: .size main, .Lfunc_end0-main ```


Bisection

Bisected to: 23291b9863c8af7ad348c4a7d85d8d784df88eb1 Committed by: @max-quazan


clang-23291b9863c8af7ad348c4a7d85d8d784df88eb1 -Oz [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @a = dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: minsize nounwind optsize uwtable define dso_local i32 @main() local_unnamed_addr #0 { br label %1 1: ; preds = %6, %0 %2 = phi i1 [ true, %0 ], [ false, %6 ] %3 = phi i1 [ false, %0 ], [ true, %6 ] br i1 %2, label %4, label %15 4: ; preds = %1 %5 = load i32, i32* @a, align 4, !tbaa !2 br label %6 6: ; preds = %4, %12 %7 = phi i32 [ %5, %4 ], [ %14, %12 ] %8 = icmp sgt i32 %7, -1 br i1 %8, label %9, label %1 9: ; preds = %6 br i1 %3, label %10, label %12 10: ; preds = %9 tail call void (...) @foo() #2 %11 = load i32, i32* @a, align 4, !tbaa !2 br label %12 12: ; preds = %9, %10 %13 = phi i32 [ %7, %9 ], [ %11, %10 ] %14 = add nsw i32 %13, -1 store i32 %14, i32* @a, align 4, !tbaa !2 br label %6 15: ; preds = %1 ret i32 0 } ; Function Attrs: minsize optsize declare dso_local void @foo(...) local_unnamed_addr #1 attributes #0 = { minsize nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { minsize optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { minsize nounwind optsize } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 23291b9863c8af7ad348c4a7d85d8d784df88eb1)"} !2 = !{!3, !3, i64 0} !3 = !{!"int", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 movb $1, %al xorl %ebx, %ebx .LBB0_1: # =>This Loop Header: Depth=1 # Child Loop BB0_3 Depth 2 testb $1, %al je .LBB0_8 # %bb.2: # in Loop: Header=BB0_1 Depth=1 movl a(%rip), %eax .LBB0_3: # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 testl %eax, %eax js .LBB0_4 # %bb.5: # in Loop: Header=BB0_3 Depth=2 testb $1, %bl je .LBB0_7 # %bb.6: # in Loop: Header=BB0_3 Depth=2 xorl %eax, %eax callq foo movl a(%rip), %eax .LBB0_7: # in Loop: Header=BB0_3 Depth=2 decl %eax movl %eax, a(%rip) jmp .LBB0_3 .LBB0_4: # in Loop: Header=BB0_1 Depth=1 movb $1, %bl xorl %eax, %eax jmp .LBB0_1 .LBB0_8: xorl %eax, %eax popq %rbx .cfi_def_cfa_offset 8 retq .Lfunc_end0: .size main, .Lfunc_end0-main ```


Previous commit: 2e5940cf291517b3f7b31441f13812afa02a7f9b

clang-2e5940cf291517b3f7b31441f13812afa02a7f9b -Oz [-emit-llvm] -S -o /dev/stdout case.c

Emitted IR

```ll ; ModuleID = 'case.c' source_filename = "case.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @a = dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: minsize nounwind optsize uwtable define dso_local i32 @main() local_unnamed_addr #0 { %1 = load i32, i32* @a, align 4, !tbaa !2 br label %2 2: ; preds = %0, %5 %3 = phi i32 [ %1, %0 ], [ %6, %5 ] %4 = icmp sgt i32 %3, -1 br i1 %4, label %5, label %7 5: ; preds = %2 %6 = add nsw i32 %3, -1 store i32 %6, i32* @a, align 4, !tbaa !2 br label %2 7: ; preds = %2 ret i32 0 } attributes #0 = { minsize nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 2e5940cf291517b3f7b31441f13812afa02a7f9b)"} !2 = !{!3, !3, i64 0} !3 = !{!"int", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} ```

Reduced assembly

```asm main: # @main .cfi_startproc # %bb.0: movl a(%rip), %eax .LBB0_1: # =>This Inner Loop Header: Depth=1 testl %eax, %eax js .LBB0_3 # %bb.2: # in Loop: Header=BB0_1 Depth=1 decl %eax movl %eax, a(%rip) jmp .LBB0_1 .LBB0_3: xorl %eax, %eax retq .Lfunc_end0: .size main, .Lfunc_end0-main ```

Edit: Added IR

fhahn commented 2 years ago

cc @max-quazan looks like this may be related to one of your changes