I modified the 2D multiple body problem to run it on GPU. It looks like there is an error during compiling related to the multibody construct. The full error message is attahced below. My modification is minimal, only changing mem to mem=CuArray.
ERROR: LoadError: GPU compilation of MethodInstance for (::WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}})(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, ::CuDeviceArray{Float32, 3, 1}, ::CuDeviceArray{Float32, 4, 1}, ::CuDeviceArray{Float32, 3, 1}, ::CuDeviceMatrix{Float32, 1}, ::CartesianIndex{2}) failed
KernelError: passing and using non-bitstype argument
Argument 1 to your kernel function is of type WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}, which is not isbits:
.fill! is of type WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies} which is not isbits.
.body is of type Bodies which is not isbits.
.bodies is of type Vector{AutoBody} which is not isbits.
.ref is of type MemoryRef{AutoBody} which is not isbits.
.mem is of type Memory{AutoBody} which is not isbits.
.ops is of type Vector{Function} which is not isbits.
.ref is of type MemoryRef{Function} which is not isbits.
.mem is of type Memory{Function} which is not isbits.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:92
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:92 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/NRdsv/src/TimerOutput.jl:253 [inlined]
[4] codegen(output::Symbol, job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, strip::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:90
[5] codegen
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
[6] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
[7] compile
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
[8] #1145
@ ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:250 [inlined]
[9] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
[10] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
[11] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:249
[12] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
[13] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
[14] macro expansion
@ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:380 [inlined]
[15] macro expansion
@ ./lock.jl:273 [inlined]
[16] cufunction(f::WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceArray{Float32, 3, 1}, CuDeviceArray{Float32, 4, 1}, CuDeviceArray{Float32, 3, 1}, CuDeviceMatrix{Float32, 1}, CartesianIndex{2}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Int64})
@ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:375
[17] macro expansion
@ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:112 [inlined]
[18] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.StaticSize{(64,)}, KernelAbstractions.NDIteration.DynamicSize, WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}})(::CuArray{Float32, 3, CUDA.DeviceMemory}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64}, workgroupsize::Nothing)
@ CUDA.CUDAKernels ~/.julia/packages/CUDA/2kjXI/src/CUDAKernels.jl:103
[19] (::WaterLily.var"##kern#549#221"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}, Flow{2, Float32, CuArray{Float32, 2, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}}})(μ₀::CuArray{Float32, 3, CUDA.DeviceMemory}, μ₁::CuArray{Float32, 4, CUDA.DeviceMemory}, V::CuArray{Float32, 3, CUDA.DeviceMemory}, σ::CuArray{Float32, 2, CUDA.DeviceMemory}, ::Val{8})
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/util.jl:114
[20] macro expansion
@ ~/.julia/packages/WaterLily/raEyO/src/util.jl:116 [inlined]
[21] measure!(a::Flow{2, Float32, CuArray{Float32, 2, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}}, body::Bodies; t::Float32, ϵ::Int64)
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/Body.jl:45
[22] Simulation(dims::Tuple{Int64, Int64}, u_BC::Tuple{Int64, Int64}, L::Float64; Δt::Float64, ν::Float64, g::Nothing, U::Nothing, ϵ::Int64, perdir::Tuple{}, uλ::Nothing, exitBC::Bool, body::Bodies, T::Type, mem::Type)
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:75
[23] Simulation
@ ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:65 [inlined]
[24] circle(n::Int64, m::Int64; Re::Int64, U::Int64, mem::Type, T::Type)
@ Main ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:17
[25] circle(n::Int64, m::Int64)
@ Main ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:7
[26] top-level scope
@ ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:23
I modified the 2D multiple body problem to run it on GPU. It looks like there is an error during compiling related to the multibody construct. The full error message is attahced below. My modification is minimal, only changing
mem
tomem=CuArray
.