WaterLily-jl / WaterLily-Examples

WaterLily tutorial files
GNU General Public License v3.0
15 stars 7 forks source link

Error running mutlibody problem on GPU #4

Open biao-geng-me opened 6 days ago

biao-geng-me commented 6 days ago

I modified the 2D multiple body problem to run it on GPU. It looks like there is an error during compiling related to the multibody construct. The full error message is attahced below. My modification is minimal, only changing mem to mem=CuArray.

ERROR: LoadError: GPU compilation of MethodInstance for (::WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}})(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, ::CuDeviceArray{Float32, 3, 1}, ::CuDeviceArray{Float32, 4, 1}, ::CuDeviceArray{Float32, 3, 1}, ::CuDeviceMatrix{Float32, 1}, ::CartesianIndex{2}) failed
KernelError: passing and using non-bitstype argument

Argument 1 to your kernel function is of type WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}, which is not isbits:
  .fill! is of type WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies} which is not isbits.
    .body is of type Bodies which is not isbits.
      .bodies is of type Vector{AutoBody} which is not isbits.
        .ref is of type MemoryRef{AutoBody} which is not isbits.
          .mem is of type Memory{AutoBody} which is not isbits.
      .ops is of type Vector{Function} which is not isbits.
        .ref is of type MemoryRef{Function} which is not isbits.
          .mem is of type Memory{Function} which is not isbits.

Stacktrace:
  [1] check_invocation(job::GPUCompiler.CompilerJob)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:92
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:92 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/NRdsv/src/TimerOutput.jl:253 [inlined]
  [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, strip::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:90
  [5] codegen
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
  [6] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
  [7] compile
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
  [8] #1145
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:250 [inlined]
  [9] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
 [10] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
 [11] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:249
 [12] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
 [13] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
 [14] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:380 [inlined]
 [15] macro expansion
    @ ./lock.jl:273 [inlined]
 [16] cufunction(f::WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceArray{Float32, 3, 1}, CuDeviceArray{Float32, 4, 1}, CuDeviceArray{Float32, 3, 1}, CuDeviceMatrix{Float32, 1}, CartesianIndex{2}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Int64})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:375
 [17] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:112 [inlined]
 [18] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.StaticSize{(64,)}, KernelAbstractions.NDIteration.DynamicSize, WaterLily.var"#gpu_##kern_#550#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}}})(::CuArray{Float32, 3, CUDA.DeviceMemory}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64}, workgroupsize::Nothing)
    @ CUDA.CUDAKernels ~/.julia/packages/CUDA/2kjXI/src/CUDAKernels.jl:103
 [19] (::WaterLily.var"##kern#549#221"{WaterLily.var"#fill!#220"{2, Float32, Int64, Bodies}, Flow{2, Float32, CuArray{Float32, 2, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}}})(μ₀::CuArray{Float32, 3, CUDA.DeviceMemory}, μ₁::CuArray{Float32, 4, CUDA.DeviceMemory}, V::CuArray{Float32, 3, CUDA.DeviceMemory}, σ::CuArray{Float32, 2, CUDA.DeviceMemory}, ::Val{8})
    @ WaterLily ~/.julia/packages/WaterLily/raEyO/src/util.jl:114
 [20] macro expansion
    @ ~/.julia/packages/WaterLily/raEyO/src/util.jl:116 [inlined]
 [21] measure!(a::Flow{2, Float32, CuArray{Float32, 2, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}}, body::Bodies; t::Float32, ϵ::Int64)
    @ WaterLily ~/.julia/packages/WaterLily/raEyO/src/Body.jl:45
 [22] Simulation(dims::Tuple{Int64, Int64}, u_BC::Tuple{Int64, Int64}, L::Float64; Δt::Float64, ν::Float64, g::Nothing, U::Nothing, ϵ::Int64, perdir::Tuple{}, uλ::Nothing, exitBC::Bool, body::Bodies, T::Type, mem::Type)
    @ WaterLily ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:75
 [23] Simulation
    @ ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:65 [inlined]
 [24] circle(n::Int64, m::Int64; Re::Int64, U::Int64, mem::Type, T::Type)
    @ Main ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:17
 [25] circle(n::Int64, m::Int64)
    @ Main ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:7
 [26] top-level scope
    @ ~/play/julia/WaterLily-Examples/examples/TwoD_MultipleBodies.jl:23
b-fg commented 3 days ago

Hey, unfortunately Bodies is not yet GPU-ready, so that example will not work on a GPU. The PR https://github.com/WaterLily-jl/WaterLily.jl/pull/158 should be where we finally fix this.