mratsim / weave

A state-of-the-art multithreading runtime: message-passing based, fast, scalable, ultra-low overhead
Other
537 stars 22 forks source link

UX: auto-transform captured seq to ptr UncheckedArray behind the scenes #179

Open mratsim opened 2 years ago

mratsim commented 2 years ago

having to cast[ptr UncheckedArray[T]] any captured seq[T] before use is a lot of boilerplate and quite annoying.

Example from Discord:

import std/[
  random,
  strutils,
  sequtils,
  sugar]
import chroma
import weave
import std/[times, monotimes]

randomize()
proc createColor() : string = 
  rand(0xFFFFFF).toHex(6)

proc main_serial(height, width: int) =
  var distance = newSeq[float64](height * width)

  # We create a power of 2 dataset so that indexing it is cheap and we measure processing speed.
  const pow2size = 8 # 2⁸ = 256
  let dataset = collect(for color in 0 ..< (1 shl pow2size): createColor())

  let start = getMonoTime()
  for upper in 0 ..< height:
    for lower in 0 ..< width:
      if upper == lower: 
        continue

      let colorUpper = parseHex(dataset[upper and (pow2size-1)]) # datasetAddr[upper mod pow2size]
      let colorLower = parseHex(dataset[lower and (pow2size-1)]) # datasetAddr[lower mod pow2size]
      distance[upper*width + lower] = distance(colorUpper, colorLower)
  let stop = getMonoTime()

  # echo distance.foldl(a+b)/distance.len.float
  echo "Time spent serial (ms): ", inMilliseconds(stop-start)

proc main_parallel(height, width: int) =
  var distance = newSeq[float64](height * width)

  # We create a power of 2 dataset so that indexing it is cheap and we measure processing speed.
  const pow2size = 8 # 2⁸ = 256
  let dataset = collect(for color in 0 ..< (1 shl pow2size): createColor())

  init(Weave)

  # Annoying ... to access seqs without causing GC issues
  let distanceAddr = cast[ptr UncheckedArray[float64]](distance[0].addr)
  let datasetAddr = cast[ptr UncheckedArray[string]](dataset[0].unsafeaddr)

  let start = getMonoTime()
  parallelFor upper in 0 ..< height:
    captures: {width, distanceAddr, datasetAddr}
    parallelFor lower in 0 ..< width:
      captures: {upper, width, distanceAddr, datasetAddr}
      if upper != lower: 
        let colorUpper = parseHex(datasetAddr[upper and (pow2size-1)]) # datasetAddr[upper mod pow2size]
        let colorLower = parseHex(datasetAddr[lower and (pow2size-1)]) # datasetAddr[lower mod pow2size]
        distanceAddr[upper*width + lower] = distance(colorUpper, colorLower)
  syncRoot(Weave)
  let stop = getMonoTime()

  # echo distance.foldl(a+b)/distance.len.float
  echo "Time spent parallel (ms): ", inMilliseconds(stop-start)

let height = 9600
let width = 1200

main_serial(height, width)
main_parallel(height, width)
dumblob commented 2 years ago

To me this doesn't seem as boilerplate at all (actually making it explicit is definitely a good thing!).

It seems to be rather a performance & safety problem (GC).

Vindaar commented 2 years ago

To me this doesn't seem as boilerplate at all (actually making it explicit is definitely a good thing!).

It seems to be rather a performance & safety problem (GC).

I don't think this is really an issue. It's essentially a "bug" that one cannot just pass a regular seq (I put that into quotation marks, as I don't fully understand where the problems come from). Hiding the required transformation from the user seems nice. Especially, because it is a fully mechanical transformation.

However, there is a practical issue implementing this as far as I can tell: parallelFor and friends currently are untyped macros. That means we have no way to tell, whether something actually is a seq. So:

mratsim commented 2 years ago

either one turns them into typed macros, but I'm rather certain we don't want to go down that path

We don't want to go that path but we can always have the untyped macro quote do the typed macro like in Arraymancer https://github.com/mratsim/Arraymancer/blob/20e0dc3/src/arraymancer/tensor/accessors_macros_read.nim#L56

The @ would be nice also to indicate transformation of a var T into ptr T as well though.