dart-lang / sdk

The Dart SDK, including the VM, JS and Wasm compilers, analysis, core libraries, and more.
https://dart.dev
BSD 3-Clause "New" or "Revised" License
10.23k stars 1.57k forks source link

VM produces worse code for iterating `Iterable<String>` vs `List<String>` #56840

Closed jensjoha closed 1 week ago

jensjoha commented 1 month ago

At least as of c7f99144977aa10767eed8564808d402ce4540d1 the parser has the following code:

bool isOneOfOrEof(Token token, Iterable<String> values) {
  for (String tokenValue in values) {
    if (optional(tokenValue, token)) {
      return true;
    }
  }
  return token.isEof;
}

which - at least when compiling pkg/front_end/tool/_fasta/compile.dart - produces the following code:

68387   PrologueOffset = 0
   0x0000000000098fa8 <+0>:     push   %rbp
   0x0000000000098fa9 <+1>:     mov    %rsp,%rbp
   0x0000000000098fac <+4>:     sub    $0x38,%rsp

68388   ParallelMove rax <- C
   0x0000000000098fb0 <+8>:     mov    $0xff,%eax

68389   ParallelMove fp[-6] <- rdi, fp[-7] <- rsi
   0x0000000000098fb5 <+13>:    mov    %rdi,-0x30(%rbp)
   0x0000000000098fb9 <+17>:    mov    %rsi,-0x38(%rbp)

68390   CheckStackOverflow:8(stack=0, loop=0)
   0x0000000000098fbd <+21>:    cmp    0x38(%r14),%rsp
   0x0000000000098fc1 <+25>:    jbe    0x990b1 <isOneOfOrEof+265>

68391   v57 <- LoadField(v3 T{_ImmutableList} . :type_arguments {final}) T{TypeArguments}
   0x0000000000098fc7 <+31>:    mov    0x7(%rsi),%rbx

68392   ParallelMove fp[-5] <- rbx
   0x0000000000098fcb <+35>:    mov    %rbx,-0x28(%rbp)

68393   v111 <- LoadField(v3 T{_ImmutableList} . Array.length {final}) [0, 576460752303423487] T{_Smi}
   0x0000000000098fcf <+39>:    mov    0xf(%rsi),%rcx

68394   ParallelMove r8 <- rcx
   0x0000000000098fd3 <+43>:    mov    %rcx,%r8

68395   v119 <- UnboxInt64([non-speculative], v111 T{_Smi}) [v111, v111] int64
   0x0000000000098fd6 <+46>:    sar    %r8

68396   ParallelMove fp[-4] <- r8
   0x0000000000098fd9 <+49>:    mov    %r8,-0x20(%rbp)

68397   v96 <- LoadField:38(v2 T{SimpleToken} . _typeAndOffset@17236933) [-9223372036854775808, 9223372036854775807] int64
   0x0000000000098fdd <+53>:    mov    0x1f(%rdi),%rcx

68398   ParallelMove rcx <- rcx
68399   v161 <- IntConverter(int64->uint32[tr], v96) uint32
   0x0000000000098fe1 <+57>:    mov    %ecx,%ecx

68400   ParallelMove r9 <- rcx
   0x0000000000098fe3 <+59>:    mov    %rcx,%r9

68401   v98 <- BinaryUint32Op:38(& [tr], v161 T{int}, v163 T{_Smi}) [0, 255] uint32
   0x0000000000098fe6 <+62>:    and    %eax,%r9d

68402   ParallelMove fp[-3] <- r9
   0x0000000000098fe9 <+65>:    mov    %r9,-0x18(%rbp)

68403   ParallelMove rax <- C goto:38 B7
   0x0000000000098fed <+69>:    xor    %eax,%eax

68404   B7
68405     Loop 0
68406     Loop Header
68407   CheckStackOverflow:42(stack=0, loop=1)
   0x0000000000098fef <+71>:    cmp    0x38(%r14),%rsp
   0x0000000000098ff3 <+75>:    jbe    0x990bd <isOneOfOrEof+277>

68408   Branch if RelationalOp(>=, v130 T{int}, v119 T{_Smi}) T{bool} goto (4, 3)
   0x0000000000098ff9 <+81>:    cmp    %r8,%rax
   0x0000000000098ffc <+84>:    jge    0x990a3 <isOneOfOrEof+251>

68409   B3
68410     Loop 0
68411   v132 <- LoadIndexed:32([_List] v3, v130 T{int}) T{*?}
   0x0000000000099002 <+90>:    mov    0x17(%rsi,%rax,8),%r10

68412   ParallelMove fp[-2] <- r10
   0x0000000000099007 <+95>:    mov    %r10,-0x10(%rbp)

68413   ParallelMove r12 <- rax
   0x000000000009900b <+99>:    mov    %rax,%r12

68414   v45 <- BinaryInt64Op(+ [tr], v130 T{int}, v133 T{_Smi}) [1, v119] int64
   0x000000000009900e <+102>:   add    $0x1,%r12

68415   ParallelMove fp[-1] <- r12
   0x0000000000099012 <+106>:   mov    %r12,-0x8(%rbp)

68416   Branch if StrictCompare:12(===, v132 T{X0??}, v0 T{Null?}) goto (10, 11)
   0x0000000000099016 <+110>:   cmp    0x70(%r14),%r10
   0x000000000009901a <+114>:   jne    0x99049 <isOneOfOrEof+161>

68417   B10
68418     Loop 0
68419   ParallelMove rax <- r10, rdx <- rbx, rcx <- C
   0x0000000000099020 <+120>:   mov    %r10,%rax
   0x0000000000099023 <+123>:   mov    %rbx,%rdx
   0x0000000000099026 <+126>:   mov    0x70(%r14),%rcx

68420   t0 <- AssertAssignable:18(v132 T{X0??}, v23 T{_TypeParameter}, ' in type cast', instantiator_type_args(v57 T{TypeArguments}), function_type_args(v0 T{Null?})) T{X0?}
68421   AssertAssignable for compile-time type
   0x000000000009902a <+130>:   cmp    0x70(%r14),%rdx
   0x000000000009902e <+134>:   je     0x99049 <isOneOfOrEof+161>
   0x0000000000099034 <+140>:   mov    0x27(%rdx),%rsi
   0x0000000000099038 <+144>:   mov    0x1ff(%r15),%rbx

68422   TTSCall
   0x000000000009903f <+151>:   mov    0x2617(%r15),%r9
   0x0000000000099046 <+158>:   call   *0x7(%rsi)

68423   ParallelMove  goto:24 B12
68424   B11
68425   B12
68426     Loop 0
   0x0000000000099049 <+161>:   mov    -0x10(%rbp),%rdx
   0x000000000009904d <+165>:   mov    0x2607(%r15),%rcx

68427   ParallelMove rsi <- fp[-3]
   0x0000000000099054 <+172>:   mov    -0x18(%rbp),%rsi

68428   v162 <- IntConverter(uint32->int64, v98) int64
   0x0000000000099058 <+176>:   mov    %esi,%esi

68429   ParallelMove rax <- C, rbx <- rsi
   0x000000000009905a <+178>:   mov    %rsi,%rbx
   0x000000000009905d <+181>:   mov    $0x9b,%eax

68430   GenericCheckBound:14(v135 T{_Smi}, v162 T{_Smi}) [0, 155] int64
   0x0000000000099062 <+186>:   cmp    %rax,%rbx
   0x0000000000099065 <+189>:   jae    0x990c9 <isOneOfOrEof+289>

68431   v136 <- LoadIndexed:14([_List] v104, v162 T{_Smi}) T{TokenType}
   0x000000000009906b <+195>:   mov    0x17(%rcx,%rsi,8),%rax

68432   v64 <- LoadField(v136 T{TokenType} . stringValue {final}) T{_OneByteString?}
   0x0000000000099070 <+200>:   mov    0x5f(%rax),%rbx

68433   Branch if StrictCompare:12(===, v132 T{X0}, v64 T{String??}) T{bool} goto (5, 6)
   0x0000000000099074 <+204>:   cmp    %rbx,%rdx
   0x0000000000099077 <+207>:   je     0x9909a <isOneOfOrEof+242>

68434   B6
68435     Loop 0
68436   ParallelMove rax <- fp[-1], rdi <- fp[-6], rsi <- fp[-7], rbx <- fp[-5], r9 <- fp[-3], r8 <- fp[-4] goto:40 B7
   0x000000000009907d <+213>:   mov    -0x8(%rbp),%rax
   0x0000000000099081 <+217>:   mov    -0x30(%rbp),%rdi
   0x0000000000099085 <+221>:   mov    -0x38(%rbp),%rsi
   0x0000000000099089 <+225>:   mov    -0x28(%rbp),%rbx
   0x000000000009908d <+229>:   mov    -0x18(%rbp),%r9
   0x0000000000099091 <+233>:   mov    -0x20(%rbp),%r8
   0x0000000000099095 <+237>:   jmp    0x98fef <isOneOfOrEof+71>

68437   B5
68438   ParallelMove rax <- C
   0x000000000009909a <+242>:   mov    0x78(%r14),%rax

68439   DartReturn:32(v6)
   0x000000000009909e <+246>:   mov    %rbp,%rsp
   0x00000000000990a1 <+249>:   pop    %rbp
   0x00000000000990a2 <+250>:   ret

68440   B4
68441   ParallelMove rdi <- fp[-6]
   0x00000000000990a3 <+251>:   mov    -0x30(%rbp),%rdi

68442   v7 <- StaticCall:44( get:isEof<0> v2, result_type = T{bool}) T{bool}
   0x00000000000990a7 <+255>:   call   0x990d0 <SimpleToken.isEof>

68443   ParallelMove rax <- rax
68444   DartReturn:46(v7)
   0x00000000000990ac <+260>:   mov    %rbp,%rsp
   0x00000000000990af <+263>:   pop    %rbp
   0x00000000000990b0 <+264>:   ret

68445   CheckStackOverflowSlowPath
   0x00000000000990b1 <+265>:   call   *0x230(%r14)
   0x00000000000990b8 <+272>:   jmp    0x98fc7 <isOneOfOrEof+31>

68446   CheckStackOverflowSlowPath
   0x00000000000990bd <+277>:   call   *0x230(%r14)
   0x00000000000990c4 <+284>:   jmp    0x98ff9 <isOneOfOrEof+81>

68447   slow path check bound operation
   0x00000000000990c9 <+289>:   call   0xa52f8 <stub _iso_stub_RangeErrorSharedWithoutFPURegsStub>

which - on top of in general have a lot of moves I that can't figure out where comes from - contains what seems to be null-checks (StrictCompare:12(===, v132 T{X0??}, v0 T{Null?}), AssertAssignable:18(v132 T{X0??}, v23 T{_TypeParameter}, ' in type cast', instantiator_type_args(v57 T{TypeArguments}), function_type_args(v0 T{Null?})) T{X0?}) despite it being non-nullable code.

Changing the type from Iterable to List remedies the problem:

68387   PrologueOffset = 0
   0x0000000000098fa8 <+0>:     push   %rbp
   0x0000000000098fa9 <+1>:     mov    %rsp,%rbp

68388   ParallelMove rax <- C
   0x0000000000098fac <+4>:     mov    $0xff,%eax

68389   CheckStackOverflow:8(stack=0, loop=0)
   0x0000000000098fb1 <+9>:     cmp    0x38(%r14),%rsp
   0x0000000000098fb5 <+13>:    jbe    0x99033 <isOneOfOrEof+139>

68390   v111 <- LoadField(v3 T{_ImmutableList} . Array.length {final}) [0, 576460752303423487] T{_Smi}
   0x0000000000098fbb <+19>:    mov    0xf(%rsi),%rcx

68391   ParallelMove rcx <- rcx
68392   v119 <- UnboxInt64([non-speculative], v111 T{_Smi}) [v111, v111] int64
   0x0000000000098fbf <+23>:    sar    %rcx

68393   v96 <- LoadField:38(v2 T{SimpleToken} . _typeAndOffset@17236933) [-9223372036854775808, 9223372036854775807] int64
   0x0000000000098fc2 <+26>:    mov    0x1f(%rdi),%rdx

68394   ParallelMove rdx <- rdx
68395   v158 <- IntConverter(int64->uint32[tr], v96) uint32
   0x0000000000098fc6 <+30>:    mov    %edx,%edx

68396   ParallelMove rdx <- rdx
68397   v98 <- BinaryUint32Op:38(& [tr], v158 T{int}, v160 T{_Smi}) [0, 255] uint32
   0x0000000000098fc8 <+32>:    and    %eax,%edx

68398   ParallelMove rax <- C, r8 <- C goto:38 B7
   0x0000000000098fca <+34>:    xor    %eax,%eax
   0x0000000000098fcc <+36>:    mov    0x2607(%r15),%r8

68399   B7
68400     Loop 0
68401     Loop Header
68402   CheckStackOverflow:42(stack=0, loop=1)
   0x0000000000098fd3 <+43>:    cmp    0x38(%r14),%rsp
   0x0000000000098fd7 <+47>:    jbe    0x9903f <isOneOfOrEof+151>

68403   Branch if RelationalOp(>=, v130 T{int}, v119 T{_Smi}) T{bool} goto (4, 12)
   0x0000000000098fdd <+53>:    cmp    %rcx,%rax
   0x0000000000098fe0 <+56>:    jge    0x99029 <isOneOfOrEof+129>

68404   B12
68405     Loop 0
68406   v132 <- LoadIndexed:32([_List] v3, v130 T{int}) T{String}
   0x0000000000098fe6 <+62>:    mov    0x17(%rsi,%rax,8),%r9

68407   ParallelMove r10 <- rax
   0x0000000000098feb <+67>:    mov    %rax,%r10

68408   v45 <- BinaryInt64Op(+ [tr], v130 T{int}, v133 T{_Smi}) [1, v119] int64
   0x0000000000098fee <+70>:    add    $0x1,%r10

68409   ParallelMove r12 <- rdx
   0x0000000000098ff2 <+74>:    mov    %rdx,%r12

68410   v159 <- IntConverter(uint32->int64, v98) int64
   0x0000000000098ff5 <+77>:    mov    %r12d,%r12d

68411   ParallelMove rax <- C, rbx <- r12
   0x0000000000098ff8 <+80>:    mov    %r12,%rbx
   0x0000000000098ffb <+83>:    mov    $0x9b,%eax

68412   GenericCheckBound:14(v135 T{_Smi}, v159 T{_Smi}) [0, 155] int64
   0x0000000000099000 <+88>:    cmp    %rax,%rbx
   0x0000000000099003 <+91>:    jae    0x99048 <isOneOfOrEof+160>

68413   v136 <- LoadIndexed:14([_List] v104, v159 T{_Smi}) T{TokenType}
   0x0000000000099009 <+97>:    mov    0x17(%r8,%r12,8),%rax

68414   v64 <- LoadField(v136 T{TokenType} . stringValue {final}) T{_OneByteString?}
   0x000000000009900e <+102>:   mov    0x5f(%rax),%rbx

68415   Branch if StrictCompare:12(===, v132, v64 T{String??}) T{bool} goto (5, 6)
   0x0000000000099012 <+106>:   cmp    %rbx,%r9
   0x0000000000099015 <+109>:   je     0x99020 <isOneOfOrEof+120>

68416   B6
68417     Loop 0
68418   ParallelMove rax <- r10 goto:40 B7
   0x000000000009901b <+115>:   mov    %r10,%rax
   0x000000000009901e <+118>:   jmp    0x98fd3 <isOneOfOrEof+43>

68419   B5
68420   ParallelMove rax <- C
   0x0000000000099020 <+120>:   mov    0x78(%r14),%rax

68421   DartReturn:32(v6)
   0x0000000000099024 <+124>:   mov    %rbp,%rsp
   0x0000000000099027 <+127>:   pop    %rbp
   0x0000000000099028 <+128>:   ret

68422   B4
68423   ParallelMove rdi <- rdi
68424   v7 <- StaticCall:44( get:isEof<0> v2, result_type = T{bool}) T{bool}
   0x0000000000099029 <+129>:   call   0x99050 <SimpleToken.isEof>

68425   ParallelMove rax <- rax
68426   DartReturn:46(v7)
   0x000000000009902e <+134>:   mov    %rbp,%rsp
   0x0000000000099031 <+137>:   pop    %rbp
   0x0000000000099032 <+138>:   ret

68427   CheckStackOverflowSlowPath
   0x0000000000099033 <+139>:   call   *0x230(%r14)
   0x000000000009903a <+146>:   jmp    0x98fbb <isOneOfOrEof+19>

68428   CheckStackOverflowSlowPath
   0x000000000009903f <+151>:   call   *0x230(%r14)
   0x0000000000099046 <+158>:   jmp    0x98fdd <isOneOfOrEof+53>

68429   slow path check bound operation
   0x0000000000099048 <+160>:   call   0xa5278 <stub _iso_stub_RangeErrorSharedWithoutFPURegsStub>

and indeed doing a small benchmark:

diff --git a/pkg/_fe_analyzer_shared/lib/src/scanner/token.dart b/pkg/_fe_analyzer_shared/lib/src/scanner/token.dart
index 54628b0e66e..27046b4347a 100644
--- a/pkg/_fe_analyzer_shared/lib/src/scanner/token.dart
+++ b/pkg/_fe_analyzer_shared/lib/src/scanner/token.dart
@@ -2143,6 +2143,8 @@ const List<TokenType> _tokenTypesByIndex = [
   TokenType.UNUSED,
 ];

+List<TokenType> get tokenTypesByIndex => _tokenTypesByIndex;
+
 extension TokenIsAExtension on Token {
   /// Returns true if this has the token type [value].
   @pragma("vm:prefer-inline")
diff --git a/pkg/front_end/tool/_fasta/entry_points.dart b/pkg/front_end/tool/_fasta/entry_points.dart
index dedea557951..75fc18d0ca8 100644
--- a/pkg/front_end/tool/_fasta/entry_points.dart
+++ b/pkg/front_end/tool/_fasta/entry_points.dart
@@ -4,6 +4,9 @@

 library fasta.tool.entry_points;

+import 'package:_fe_analyzer_shared/src/scanner/token.dart' show Token, TokenType, tokenTypesByIndex;
+import 'package:_fe_analyzer_shared/src/parser/identifier_context.dart';
+
 import 'dart:convert' show JsonEncoder, LineSplitter, jsonDecode, utf8;
 import 'dart:io'
     show File, Platform, ProcessSignal, exit, exitCode, stderr, stdin, stdout;
@@ -50,6 +53,27 @@ const bool summary =
 const int iterations = const int.fromEnvironment("iterations", defaultValue: 1);

 Future<void> compileEntryPoint(List<String> arguments) async {
+  if (1+1==2) {
+  List<Token> tokens = [];
+  for(TokenType tokenType in tokenTypesByIndex) {
+    Token t = new Token(tokenType, 42);
+    tokens.add(t);
+  }
+
+  int yes = 0;
+  int no = 0;
+  for(int i = 0; i < 100000; i++) {
+    for(Token token in tokens) {
+    if (looksLikeStatementStart(token)) {
+      yes++;
+    } else {
+      no++;
+    }
+    }
+  }
+  print("$yes/$no");
+  return;
+  }
   if (Platform.environment["dart_cfe_intel_pt"] == "true") {
     print("Notice: Locating perf for profiling using intel_pt events.");
     linuxAndIntelSpecificPerf(onlyInitialize: true);

says it's quite a bit faster having it typed as a list:

msec task-clock:u: -35.5214% +/- 0.5360% (-247.26 +/- 3.73)
cycles:u: -35.6472% +/- 0.3781% (-1082648536.40 +/- 11483470.50)
instructions:u: -33.0222% +/- 0.0000% (-2572500473.10 +/- 442.66)
branch-misses:u: -93.1588% +/- 0.1126% (-12553791.40 +/- 15168.32)
seconds time elapsed: -35.4946% +/- 0.5343% (-0.25 +/- 0.00)
seconds user: -35.6740% +/- 0.6150% (-0.25 +/- 0.00)

What's going on?

dart-github-bot commented 1 month ago

Summary: The Dart VM produces inefficient code when iterating over an Iterable<String> compared to a List<String>. This results in unnecessary null checks and moves, even though the code is non-nullable. Switching to List<String> resolves the issue.

lrhn commented 1 month ago

The null check and assert looks like an inlined Iterator.moveNext and Iterator.current, where something being null it's a sign the iterator has reached its end, and current is get current => _current as T;