JuliaGPU / Metal.jl

Metal programming in Julia
MIT License
348 stars 37 forks source link

Simple throwing kernel hangs #433

Open tgymnich opened 1 week ago

tgymnich commented 1 week ago

This simple kernel will cause a hang leading to 100% GPU usage and eventually taking down the WindowServer. A similar Metal/C++ kernel runs just fine. The generated IR is also mostly identical.

Another interesting observation is that one can save the WindowServer from its death spiral by running the good C++ kernel. This leads me to belief that the problem is most likely not in the generated IR but in the way we handle kernel launches.

using Metal

function kernel()
    throw(nothing)
    return nothing
end

Metal.@sync @metal threads=8 kernel()
void gpu_report_exception() {
    return;
}

void gpu_signal_exception() {
    return;
}

kernel void add_arrays()
{
    gpu_report_exception();
    gpu_signal_exception();

    __builtin_trap();
    __builtin_unreachable();
}
tgymnich commented 1 week ago

Taking the metallib that worked in the xcode example and running it with Metal.jl also hangs! The same also holds in reverse: Running the metallib generated by Metal.jl with the Xcode example does not hang.

using Metal

file = "/Users/tim/Desktop/default.metallib"
dev = Metal.device()
lib = MTL.MTLLibraryFromFile(dev, file)
fun = MTL.MTLFunction(lib, "add_arrays")
pipeline_state = try
    MTL.MTLComputePipelineState(dev, fun)
catch err
    isa(err, NSError) || rethrow()
    retain(err)
    error("""Compilation to native code failed""")
end

queue = MTL.MTLCommandQueue(dev)
cmdbuf = MTL.MTLCommandBuffer(queue)

groups = MTL.MTLSize(1)
threads = MTL.MTLSize(1)

cce = MTL.MTLComputeCommandEncoder(cmdbuf)
MTL.set_function!(cce, pipeline_state)
MTL.append_current_function!(cce, groups, threads)
close(cce)

MTL.commit!(cmdbuf)
MTL.wait_completed(cmdbuf)
tgymnich commented 1 week ago

Neither metallib/kernel hangs with just ObjectiveC.jl:

Metallibs

default(built using Xcode) fun name: add_arrays args: none julia(built using Metal.jl) fun name: _Z14squarekernel args: none

Working ObjectiveC.jl impl

using ObjectiveC, .Foundation

@objcwrapper MTLDevice <: NSObject
dev = MTLDevice(ccall(:MTLCreateSystemDefaultDevice, id{MTLDevice}, ()))

@objcwrapper MTLLibrary <: NSObject
err = Ref{id{NSError}}(nil)
file = NSString("/Users/tim/Desktop/default.metallib")
libhandle = @objc [dev::id{MTLDevice} newLibraryWithFile:file::id{NSString} error:err::Ptr{id{NSError}}]::id{MTLLibrary}
lib = MTLLibrary(libhandle)
err[] == nil || throw(NSError(err[]))

@objcwrapper MTLFunction <: NSObject
name = NSString("add_arrays")
funhandle = @objc [lib::id{MTLLibrary} newFunctionWithName:name::id{NSString}]::id{MTLFunction}
fun = MTLFunction(funhandle)

@objcwrapper MTLComputePipelineState <: NSObject
err = Ref{id{NSError}}(nil)
piphandle = @objc [dev::id{MTLDevice} newComputePipelineStateWithFunction:fun::id{MTLFunction} error:err::Ptr{id{NSError}}]::id{MTLComputePipelineState}
pip = MTLComputePipelineState(piphandle)
err[] == nil || throw(NSError(err[]))

struct MTLSize
    width::NSUInteger
    height::NSUInteger
    depth::NSUInteger

    MTLSize(w=1, h=1, d=1) = new(w, h, d)
end

gridSize = MTLSize(16777216,1,1)
threadGroupSize = MTLSize(64,1,1)

@objcwrapper MTLCommandQueue <: NSObject
queuehandle = @objc [dev::id{MTLDevice} newCommandQueue]::id{MTLCommandQueue}
queue = MTLCommandQueue(queuehandle)

@objcwrapper MTLCommandBuffer <: NSObject
@objcproperties MTLCommandBuffer begin
    @autoproperty status::NSUInteger
    @autoproperty error::id{NSError}
end
cmdbufhandle = @objc [queue::id{MTLCommandQueue} commandBuffer]::id{MTLCommandBuffer}
cmdbuf = MTLCommandBuffer(cmdbufhandle)

@objcwrapper MTLComputeCommandEncoder <: NSObject
ccehandle = @objc [cmdbuf::id{MTLCommandBuffer} computeCommandEncoder]::id{MTLComputeCommandEncoder}
cce = MTLComputeCommandEncoder(ccehandle)

@objc [cce::id{MTLComputeCommandEncoder} setComputePipelineState:pip::id{MTLComputePipelineState}]::Nothing
@objc [cce::id{MTLComputeCommandEncoder} dispatchThreadgroups:gridSize::MTLSize threadsPerThreadgroup:threadGroupSize::MTLSize]::Nothing
@objc [cce::id{MTLComputeCommandEncoder} endEncoding]::Nothing

@objc [cmdbuf::id{MTLCommandBuffer} enqueue]::Nothing
@show cmdbuf.status
@objc [cmdbuf::id{MTLCommandBuffer} commit]::Nothing
@show cmdbuf.status
@objc [cmdbuf::id{MTLCommandBuffer} waitUntilCompleted]::Nothing
@show cmdbuf.status

ObjectiveC traces:

working (code above):

+ [NSString stringWithUTF8String: (Int8*)0x000000010890dd58]
  (id<NSString>)0x000000015b0ec770
- [(id<MTLDevice>)0x000000015f0e0200 newLibraryWithFile:error: (id<NSString>)0x000000015b0ec770 (id*)0x0000000109b3e110]
  (id<MTLLibrary>)0x000000015a6c27d0
+ [NSString stringWithUTF8String: (Int8*)0x000000010b0b4408]
  (id<NSString>)0x000000015b0d1040
- [(id<MTLLibrary>)0x000000015a6c27d0 newFunctionWithName: (id<NSString>)0x000000015b0d1040]
  (id<MTLFunction>)0x000000013a7528b0
- [(id<MTLDevice>)0x000000015f0e0200 newComputePipelineStateWithFunction:error: (id<MTLFunction>)0x000000013a7528b0 (id*)0x0000000109dcc2d0]
  (id<MTLComputePipelineState>)0x000000015a753bf0
- [(id<MTLDevice>)0x000000015f0e0200 newCommandQueue]
  (id<MTLCommandQueue>)0x000000015f021600
- [(id<MTLCommandQueue>)0x000000015f021600 commandBuffer]
  (id<MTLCommandBuffer>)0x000000015b2bfd70
- [(id<MTLCommandBuffer>)0x000000015b2bfd70 computeCommandEncoder]
  (id<MTLComputeCommandEncoder>)0x000000016833d510
- [(id<MTLComputeCommandEncoder>)0x000000016833d510 setComputePipelineState: (id<MTLComputePipelineState>)0x000000015a753bf0]
- [(id<MTLComputeCommandEncoder>)0x000000016833d510 dispatchThreadgroups:threadsPerThreadgroup: MTLSize(0x0000000001000000, 0x0000000000000001, 0x0000000000000001) MTLSize(0x0000000000000040, 0x0000000000000001, 0x0000000000000001)]
- [(id<MTLComputeCommandEncoder>)0x000000016833d510 endEncoding]
- [(id<MTLCommandBuffer>)0x000000015b2bfd70 enqueue]
- [(id<MTLCommandBuffer>)0x000000015b2bfd70 commit]
- [(id<MTLCommandBuffer>)0x000000015b2bfd70 waitUntilCompleted]

broken (code prev comment):

+ [NSURL fileURLWithPath: + [NSString stringWithUTF8String: (Int8*)0x0000000348495458]
  (id<NSString>)0x0000000168e244c0
(id<NSString>)0x0000000168e244c0]
+ [NSString stringWithUTF8String: (Int8*)0x0000000348495458]
  (id<NSString>)0x0000000168e2d9a0
  (id<NSURL>)0x0000000168e57de0
- [(id<MTLDevice>)0x000000015f0e0200 newLibraryWithURL:error: (id<NSURL>)0x0000000168e57de0 (id*)0x0000000346ab1260]
  (id<MTLLibrary>)0x000000013a62a280
- [(id<MTLLibrary>)0x000000013a62a280 newFunctionWithName: + [NSString stringWithUTF8String: (Int8*)0x000000010a8e1d68]
  (id<NSString>)0x00000001138581b0
(id<NSString>)0x00000001138581b0]
+ [NSString stringWithUTF8String: (Int8*)0x000000010a8e1d68]
  (id<NSString>)0x00000001138f4a20
  (id<MTLFunction>)0x000000013a7528b0
- [(id<MTLDevice>)0x000000015f0e0200 newComputePipelineStateWithFunction:error: (id<MTLFunction>)0x000000013a7528b0 (id*)0x000000034f5b14a0]
  (id<MTLComputePipelineState>)0x000000015f9a0c40
- [(id<MTLDevice>)0x000000015f0e0200 newCommandQueue]
  (id<MTLCommandQueue>)0x000000015f76e200
+ [MTLCommandBufferDescriptor new]
  (id<MTLCommandBufferDescriptor>)0x00000001683105b0
- [(id<MTLCommandQueue>)0x000000015f76e200 commandBufferWithDescriptor: (id<MTLCommandBufferDescriptor>)0x00000001683105b0]
  (id<MTLCommandBuffer>)0x000000030d835400
- [(id<MTLCommandBuffer>)0x000000030d835400 computeCommandEncoder]
  (id<MTLComputeCommandEncoder>)0x0000000168378ce0
- [(id<MTLComputeCommandEncoder>)0x0000000168378ce0 setComputePipelineState: (id<MTLComputePipelineState>)0x000000015f9a0c40]
- [(id<MTLComputeCommandEncoder>)0x0000000168378ce0 dispatchThreadgroups:threadsPerThreadgroup: Metal.MTL.MTLSize(0x0000000000000001, 0x0000000000000001, 0x0000000000000001) Metal.MTL.MTLSize(0x0000000000000001, 0x0000000000000001, 0x0000000000000001)]
- [(id<MTLCommandEncoder>)0x0000000168378ce0 endEncoding]
- [(id<MTLCommandBuffer>)0x000000030d835400 status]
  Metal.MTL.MTLCommandBufferStatusNotEnqueued
- [(id<MTLCommandBuffer>)0x000000030d835400 commit]
- [(id<MTLCommandBuffer>)0x000000030d835400 waitUntilCompleted]