dart-lang / sdk

The Dart SDK, including the VM, JS and Wasm compilers, analysis, core libraries, and more.
https://dart.dev
BSD 3-Clause "New" or "Revised" License
10.09k stars 1.56k forks source link

Investigate lock contention in worst case scenarios when using isolate groups #46252

Open mkustermann opened 3 years ago

mkustermann commented 3 years ago

Example procedure to investigate:

Step 1) Create app into pkg/compiler/lib/src/d2js.dart

import 'dart:async';
import 'dart:io';
import 'dart:isolate';

import 'package:path/path.dart' as path;
import 'package:compiler/src/dart2js.dart' as dart2js;

doit(_) => dart2js.main([
      '-o',
      '/tmp/hello.dart.js',
      '--packages=.dart_tool/package_config.json',
      'package:compiler/src/dart2js.dart',
    ]);

main() async {
  File('/tmp/hello.dart').writeAsStringSync('main() => print("hello world");');

  final ports = <ReceivePort>[];
  for (int i = 0; i < 8; ++i) {
    final port = ReceivePort();
    await Isolate.spawn(doit, [], onExit: port.sendPort);
    ports.add(port);
  }

  for (int i = 0; i < 8; ++i) {
    final si = StreamIterator(ports[i]);
    await si.moveNext();
    si.cancel();
  }
}

Step 2) Patch in simple lock contention measuring logic:

diff --git a/runtime/vm/isolate.cc b/runtime/vm/isolate.cc
index bb3fea04135..cde22ef13e5 100644
--- a/runtime/vm/isolate.cc
+++ b/runtime/vm/isolate.cc
@@ -493,6 +493,8 @@ void IsolateGroup::CreateHeap(bool is_vm_isolate,
 }

 void IsolateGroup::Shutdown() {
+  program_lock()->Dump();
+
   // Ensure to join all threads before waiting for pending GC tasks (the thread
   // pool can trigger idle notification, which can start new GC tasks).
   //
diff --git a/runtime/vm/lockers.cc b/runtime/vm/lockers.cc
index 6cee868fdcd..44e96d18400 100644
--- a/runtime/vm/lockers.cc
+++ b/runtime/vm/lockers.cc
@@ -105,6 +105,8 @@ bool SafepointRwLock::IsCurrentThreadReader() {
 #endif  // defined(DEBUG)

 bool SafepointRwLock::EnterRead() {
+  const uword ns_before = OS::GetCurrentMonotonicTicks();
+
   // No need to safepoint if the current thread is not attached.
   auto thread = Thread::Current();
   // Attempt to acquire a lock while owning a safepoint could lead to a deadlock
@@ -123,6 +125,8 @@ bool SafepointRwLock::EnterRead() {
     RELEASE_ASSERT(ok);
     RELEASE_ASSERT(acquired_read_lock);
   }
+  const uword ns_after = OS::GetCurrentMonotonicTicks();
+  read_sum_ += (ns_after - ns_before);
   return acquired_read_lock;
 }

@@ -169,6 +173,7 @@ void SafepointRwLock::LeaveRead() {
 }

 void SafepointRwLock::EnterWrite() {
+  const uword ns_before = OS::GetCurrentMonotonicTicks();
   // No need to safepoint if the current thread is not attached.
   auto thread = Thread::Current();
   const bool can_block_without_safepoint = thread == nullptr;
@@ -179,6 +184,8 @@ void SafepointRwLock::EnterWrite() {
     const bool ok = TryEnterWrite(/*can_block=*/true);
     RELEASE_ASSERT(ok);
   }
+  const uword ns_after = OS::GetCurrentMonotonicTicks();
+  write_sum_ += (ns_after - ns_before);

 }

 bool SafepointRwLock::TryEnterWrite(bool can_block) {
@@ -210,4 +217,10 @@ void SafepointRwLock::LeaveWrite() {
   ml.NotifyAll();
 }

+void SafepointRwLock::Dump() {
+  OS::PrintErr("RWlock read: %" Pd " ms, write: %" Pd "\n",
+      read_sum_ / 1000 / 1000,
+      write_sum_ / 1000 / 1000);
+}
+
 }  // namespace dart
diff --git a/runtime/vm/lockers.h b/runtime/vm/lockers.h
index b4557ae7c63..2b502bb632f 100644
--- a/runtime/vm/lockers.h
+++ b/runtime/vm/lockers.h
@@ -346,6 +346,7 @@ class SafepointRwLock {
   bool IsCurrentThreadWriter() {
     return writer_id_ == OSThread::GetCurrentThreadId();
   }
+  void Dump();

  private:
   friend class SafepointReadRwLocker;
@@ -377,6 +378,9 @@ class SafepointRwLock {
   // [state_] < 0  : The lock is held by a single writer (possibly nested).
   intptr_t state_ = 0;

+  intptr_t read_sum_ = 0;
+  intptr_t write_sum_ = 0;
+
   DEBUG_ONLY(MallocGrowableArray<ThreadId> readers_ids_);
   ThreadId writer_id_ = OSThread::kInvalidThreadId;
 };

Step 3) Build SDK & app to kernel

% tools/build.py -mrelease -ax64 runtime_kernel dart2js_bot
% out/ReleaseX64/dart --snapshot-kind=kernel --snapshot=pkg/compiler/lib/src/d2js.dart.dill pkg/compiler/lib/src/d2js.dart

Step 4) Run with/without isolate groups:

% out/ReleaseX64/dart --no-enable-isolate-groups --experimental-enable-isolate-groups-jit pkg/compiler/lib/src/d2js.dart.dill       
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 67.9 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 438 ms, write: 183
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 68.5 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 413 ms, write: 176
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 68.7 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 446 ms, write: 185
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 69.0 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 449 ms, write: 182
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 69.1 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 418 ms, write: 183
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 70.8 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 421 ms, write: 159
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 71.3 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 462 ms, write: 181
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 73.1 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 421 ms, write: 183
RWlock read: 0 ms, write: 0
RWlock read: 0 ms, write: 0
RWlock read: 0 ms, write: 0

As can be seen without isolate groups it spends around 0.4 seconds per isolate trying to acquire read lock and 0.2 seconds per isolate trying to acquire write lock - making a total of around 5.6 seconds.

out/ReleaseX64/dart --enable-isolate-groups --experimental-enable-isolate-groups-jit pkg/compiler/lib/src/d2js.dart.dill  
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 124 seconds
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 124 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 124 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 125 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 125 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 125 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 125 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
Hint: When run on the command-line, the compiled output might require a preamble file located in:
  <sdk>/lib/_internal/js_runtime/lib/preambles.
Compiled 21,070,570 characters Dart to 12,576,995 characters JavaScript in 126 seconds
Dart file package:compiler/src/dart2js.dart compiled to JavaScript: /tmp/hello.dart.js
RWlock read: 12418 ms, write: 18455
RWlock read: 0 ms, write: 0
RWlock read: 0 ms, write: 0

As can be seen the second one spends 12.4 seconds waiting to acquire read lock and 18.4 seconds to acquire write lock - making it a total of 30.8 seconds.

Attaching with gdb to the second one and breaking in random places, suggest we spend a lot of time acquiring the lock for symbol lookup.

=> Would be nice to have an optimistic lookup in symbol table without holding even the read lock - since often the symbol will be there already and only acquire locks if that fails.

/cc @aam

aam commented 3 years ago

I think in your snippet above you meant symbols_lock, not program_lock when you dump stats ?

Removing read lock symbols_lock seems to increase contention around much more expensive write lock and running with stopped mutators in the example where many compilers start and run through the same compilation process together.