Nim is a statically typed compiled systems programming language. It combines successful concepts from mature languages like Python, Ada and Modula. Its design focuses on efficiency, expressiveness, and elegance (in that order of priority).
# Compile with at least `-d:ThreadPoolSize=workgroupSize*workgroupSize+1`
import std/[math, strutils], malebolgia
type
UVec3 = object
x, y, z: uint
GlEnvironment* = object
gl_GlobalInvocationID*: UVec3
gl_WorkGroupSize*: UVec3
gl_WorkGroupID*: UVec3
gl_NumWorkGroups*: UVec3
gl_LocalInvocationID*: UVec3
proc uvec3(x, y, z: uint): UVec3 =
result = UVec3(x: x, y: y, z: z)
proc sgemmShader(env: GlEnvironment; smem: ptr[seq[float32]]) {.gcsafe.} =
let localRow = env.gl_LocalInvocationID.y.int
let localCol = env.gl_LocalInvocationID.x.int
let globalRow = env.gl_WorkGroupID.y.int * env.gl_WorkGroupSize.y.int + localRow
let globalCol = env.gl_WorkGroupID.x.int * env.gl_WorkGroupSize.x.int + localCol
proc runComputeOnCpu(numWorkGroups, workGroupSize: UVec3) =
var env: GlEnvironment
env.gl_NumWorkGroups = numWorkGroups
env.gl_WorkGroupSize = workGroupSize
for wgZ in 0 ..< numWorkGroups.z:
for wgY in 0 ..< numWorkGroups.y:
for wgX in 0 ..< numWorkGroups.x:
env.gl_WorkGroupID = uvec3(wgX, wgY, wgZ)
# echo "New workgroup! id ", wgX, ", ", wgY
var shared = newSeq[float32](workGroupSize.x)
var master = createMaster(activeProducer = true)
master.awaitAll:
for z in 0 ..< workGroupSize.z:
for y in 0 ..< workGroupSize.y:
for x in 0 ..< workGroupSize.x:
env.gl_LocalInvocationID = uvec3(x, y, z)
env.gl_GlobalInvocationID = uvec3(
wgX * workGroupSize.x + x,
wgY * workGroupSize.y + y,
wgZ * workGroupSize.z + z
)
master.spawn sgemmShader(env, addr shared)
# Main
const
M = 64
K = 16
N = 32
localSize = 4 # workgroupSize
proc main =
# Set the number of work groups and the size of each work group
let numWorkGroups = uvec3(ceilDiv(N, localSize).uint, ceilDiv(M, localSize).uint, 1)
let workGroupSize = uvec3(localSize, localSize, 1)
# Run the compute shader on CPU, pass buffers and dimensions as parameters.
runComputeOnCpu(numWorkGroups, workGroupSize)
main()
Compiled with nim c --cc:clang -d:ThreadPoolSize=18 test1 and later with nim c --cc:clang -d:ThreadPoolSize=18 -l:"-fsanitize=address,undefined" -d:nosignalhandler -d:release -g test1
Run with the bash script while true; do ./test; done since it doesn't always crash.
Nim Version
Nim Compiler Version 2.1.1 [Linux: amd64]
Compiled at 2024-06-16
Copyright (c) 2006-2024 by Andreas Rumpf
git hash: ae4b47c5bd48d244ee1f93ec6ba5f6bcf55eb973
active boot switches: -d:release
Current Output
Traceback (most recent call last)
malebolgia/src/malebolgia.nim(119) worker
Nim/lib/std/tasks.nim(78) =destroy
Nim/lib/system/alloc.nim(1068) dealloc
Nim/lib/system/alloc.nim(957) rawDealloc
Nim/lib/system/alloc.nim(769) addToSharedFreeList
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
With Address Sanitizer we get a little more info:
AddressSanitizer:DEADLYSIGNAL
=================================================================
==9652==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000800 (pc 0x55c4dfe9f722 bp 0x7ffcdd5f0ea0 sp 0x7f51a81e9a18 T13)
==9652==The signal is caused by a READ memory access.
==9652==Hint: address points to the zero page.
#0 0x55c4dfe9f722 in system::addToSharedFreeList(ptr<system::SmallChunk>, ptr<system::FreeCell>, int) Nim/lib/system/alloc.nim:769:66
#1 0x55c4dfe9f722 in system::rawDealloc(var<system::MemRegion>, pointer) Nim/lib/system/alloc.nim:957:4
#2 0x55c4dfea4c0b in tasks::eqdestroy_(tasks::Task) Nim/lib/std/tasks.nim:78:3
#3 0x55c4dfea4c0b in tasks::eqsink_(var<tasks::Task>, tasks::Task) malebolgia/src/malebolgia.nim:119:181
#4 0x55c4dfea50a1 in malebolgia::eqsink_(var<malebolgia::PoolTask>, malebolgia::PoolTask) malebolgia/src/malebolgia.nim:119:244
#5 0x55c4dfea50a1 in malebolgia::worker malebolgia/src/malebolgia.nim:119:5
#6 0x55c4dfea3290 in system::threadProcWrapDispatch(ptr<Thread<void>>) Nim/lib/system/threadimpl.nim:66:2
#7 0x55c4dfea3290 in system::threadProcWrapStackFrame(ptr<Thread<void>>) Nim/lib/system/threadimpl.nim:95:2
#8 0x55c4dfe9d9ce in typedthreads::threadProcWrapper(pointer) Nim/lib/system/threadimpl.nim:101:2
#9 0x55c4dfd8b626 in asan_thread_start(void*) (vk-tut/mul_cpu+0x9e626) (BuildId: 942e16e896e6518ca1b07200579a310d3abb5501)
#10 0x7f51adb6bdec (/usr/lib/libc.so.6+0x92dec) (BuildId: 32a656aa5562eece8c59a585f5eacd6cf5e2307b)
#11 0x7f51adbef0db (/usr/lib/libc.so.6+0x1160db) (BuildId: 32a656aa5562eece8c59a585f5eacd6cf5e2307b)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV Nim/lib/system/alloc.nim:769:66 in system::addToSharedFreeList(ptr<system::SmallChunk>, ptr<system::FreeCell>, int)
Thread T13 created by T0 here:
#0 0x55c4dfe4b198 in pthread_create (vk-tut/mul_cpu+0x15e198) (BuildId: 942e16e896e6518ca1b07200579a310d3abb5501)
#1 0x55c4dfe9da7b in typedthreads::createThread(var<Thread<void>>, proc<>) Nim/lib/std/typedthreads.nim:286:106
#2 0x7f51adafed4b in __libc_start_main (/usr/lib/libc.so.6+0x25d4b) (BuildId: 32a656aa5562eece8c59a585f5eacd6cf5e2307b)
#3 0x55c4dfd64d94 in _start (vk-tut/mul_cpu+0x77d94) (BuildId: 942e16e896e6518ca1b07200579a310d3abb5501)
==9652==ABORTING
Description
Minimal reproducible example:
Compiled with
nim c --cc:clang -d:ThreadPoolSize=18 test1
and later withnim c --cc:clang -d:ThreadPoolSize=18 -l:"-fsanitize=address,undefined" -d:nosignalhandler -d:release -g test1
Run with the bash scriptwhile true; do ./test; done
since it doesn't always crash.Nim Version
Nim Compiler Version 2.1.1 [Linux: amd64] Compiled at 2024-06-16 Copyright (c) 2006-2024 by Andreas Rumpf
git hash: ae4b47c5bd48d244ee1f93ec6ba5f6bcf55eb973 active boot switches: -d:release
Current Output
Expected Output
No response
Possible Solution
No response
Additional Information
No response