Open dsemi opened 5 months ago
If anyone is having trouble reproducing and would like extra debugging information, let me know what I can do to supplement.
I'd like to blame it on sharing a ref
across threads given the following works on my end.
import md5
import std/[atomics, cpuinfo, monotimes, strformat, strutils, sugar, terminal, times]
const batchSize = 8000
type
SharedObj = object
prefix: string
done: Atomic[bool]
counter: Atomic[uint32]
sol: Atomic[uint32]
Shared = ref SharedObj
proc worker(arg: (ptr SharedObj, int)) {.thread.} =
let (s, v) = arg
while not s.done.load(moRelaxed):
let offset = s.counter.fetchAdd(batchSize, moRelaxed)
for n in 0'u32 ..< batchSize:
let i = offset + n
let h = (s.prefix & $i).toMD5
if h[0] == 0 and h[1] == 0:
var val = s.sol.load(moRelaxed)
while i < val and not s.sol.compareExchangeWeak(val, i, moRelaxed):
discard
s.done.store(true, moRelaxed)
proc solve(input: string): uint32 =
let s = Shared(prefix: input)
s.sol.store(uint32.high)
var thr: array[20, Thread[(ptr SharedObj, int)]]
doAssert countProcessors() < thr.len
for thr in thr.toOpenArray(0, countProcessors() - 1).mitems:
createThread(thr, worker, (cast[ptr SharedObj](s), 15))
joinThreads(thr.toOpenArray(0, countProcessors() - 1))
s.sol.load
proc part1(input: string): string =
$input.solve
proc colorizeTime(t: float): string =
# Crash happens here
let s = fmt"{t:.3f}"
if t < 0.5:
ansiForegroundColorCode(fgGreen) & s & ansiResetCode
elif t < 1.0:
ansiForegroundColorCode(fgYellow) & s & ansiResetCode
else:
ansiForegroundColorCode(fgRed) & s & ansiResetCode
proc timeit(f: (string) -> string, inp: string): (string, float) =
let startT = getMonoTime()
var ans = f(inp)
let endT = getMonoTime()
let t = endT - startT
return (ans, float(t.inMicroseconds) / 1_000_000)
proc run() =
# Needs the strip call to fail
let contents = readFile("input.txt").strip
let (ans, t) = timeit(part1, contents)
stdout.write fmt"Part 1: "
echo "$1 Elapsed time $2 seconds".format(align(ans, 54), colorizeTime(t))
run()
Ah yeah that seems to be the problem. Switching to a SharedPtr in threading removes the crash. Perhaps this can be signaled more prominently in the threading docs? An example in https://nim-lang.org/docs/typedthreads.html for something that isn't copied by value would be helpful, I had to search around to find that library, and https://forum.nim-lang.org/t/9378 which I found more helpful than the docs.
Description
I apologize in advance for having a lot of code here, I've been trying to narrow this down to a simpler repro case, but I keep running into situations where removing seemingly unrelated parts of the code stops the bug from triggering.
I'm seeing inconsistent segfaults in system/alloc.nim rawAlloc after running some threaded code. All of the created threads should have been joined by the time the segfault triggers, it's like the heap is getting corrupted.
I've been able to reproduce by running the binary 100 times in a row to have higher confidence in whether a change I made affected it:
Repro code:
main.nim
:input.txt
:I tried inlining the contents of
input.txt
into the source, but that also stopped the crashes.Nim Version
Nim Compiler Version 2.0.2 [MacOSX: arm64] Compiled at 2023-12-15 Copyright (c) 2006-2023 by Andreas Rumpf
active boot switches: -d:release -d:nimUseLinenoise
Current Output
Expected Output
Possible Solution
No response
Additional Information
I can also reproduce compiling with
-d:useMalloc
, which results in the following trace instead:I can also reproduce compiling with
--mm:arc
, but cannot reproduce running with--mm:refc
(though refc seems to be giving an incorrect answer, that seems like a separate issue).