llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.51k stars 11.3k forks source link

missed optimization: simple for loop #39112

Open andrewrk opened 5 years ago

andrewrk commented 5 years ago
Bugzilla Link 39765
Version trunk
OS All
CC @dwblaikie,@efriedma-quic,@froydnj

Extended Description

===================================== void entry(void) { for (unsigned i = 0; i != 10; i += 1) { if (i == 1) break; } }

expected output:

@​entry with body deleted

actual output:

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"

define dso_local void @​entry() local_unnamed_addr #​0 !dbg !​7 { call void @​llvm.dbg.value(metadata i32 0, metadata !​11, metadata !DIExpression()), !dbg !​14 br label %1, !dbg !​15

%2 = phi i32 [ 0, %0 ], [ %4, %3 ], !dbg !​16 call void @​llvm.dbg.value(metadata i32 %2, metadata !​11, metadata !DIExpression()), !dbg !​14 switch i32 %2, label %3 [ i32 10, label %5 i32 1, label %5 ], !dbg !​17

%4 = add i32 %2, 1, !dbg !​18 call void @​llvm.dbg.value(metadata i32 %4, metadata !​11, metadata !DIExpression()), !dbg !​14 br label %1, !dbg !​20, !llvm.loop !​21

ret void, !dbg !​23 }

declare void @​llvm.dbg.value(metadata, metadata, metadata) #​1

attributes #​0 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #​1 = { nounwind readnone speculatable }

!llvm.dbg.cu = !{#0} !llvm.module.flags = !{#3, !​4, !​5} !llvm.ident = !{#6}

!​0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !​1, producer: "clang version 8.0.0 (trunk 346600)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !​2, nameTableKind: None) !​1 = !DIFile(filename: "/tmp/compiler-explorer-compiler1181023-55-yllx70.sg18/example.c", directory: "/tmp/compiler-explorer-compiler1181023-55-yllx70.sg18") !​2 = !{} !​3 = !{i32 2, !"Dwarf Version", i32 4} !​4 = !{i32 2, !"Debug Info Version", i32 3} !​5 = !{i32 1, !"wchar_size", i32 4} !​6 = !{!"clang version 8.0.0 (trunk 346600)"} !​7 = distinct !DISubprogram(name: "entry", scope: !​1, file: !​1, line: 1, type: !​8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !​0, retainedNodes: !​10) !​8 = !DISubroutineType(types: !​9) !​9 = !{null} !​10 = !{#11} !​11 = !DILocalVariable(name: "i", scope: !​12, file: !​1, line: 2, type: !​13) !​12 = distinct !DILexicalBlock(scope: !​7, file: !​1, line: 2, column: 5) !​13 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) !​14 = !DILocation(line: 2, column: 19, scope: !​12) !​15 = !DILocation(line: 2, column: 10, scope: !​12) !​16 = !DILocation(line: 0, scope: !​12) !​17 = !DILocation(line: 2, column: 5, scope: !​12) !​18 = !DILocation(line: 2, column: 37, scope: !​19) !​19 = distinct !DILexicalBlock(scope: !​12, file: !​1, line: 2, column: 5) !​20 = !DILocation(line: 2, column: 5, scope: !​19) !​21 = distinct !{#21, !​17, !​22} !​22 = !DILocation(line: 4, column: 5, scope: !​12) !​23 = !DILocation(line: 5, column: 1, scope: !​7)

hint: if you change it to i < 10 then the optimizer generates the desired output.

efriedma-quic commented 5 years ago

This is a limitation of ScalarEvolution, specifically ScalarEvolution::computeExitLimit. SimplifyCFG turns the comparisons into a switch, and computeExitLimit doesn't handle a switch with multiple values that exit the loop. Because ScalarEvolution can't compute the trip count, LoopDeletion is conservative and refuses to delete the loop. Probably easy to improve.