Open marinlauber opened 1 week ago
The issue is linked to the measure
function.
function measure(body::AbstractParametricBody,x,t;fastd²=Inf)
# curve props and velocity in ξ-frame
d,n,dotS = curve_props(body,x,t;fastd²)
d^2 > fastd² && return d,zero(x),zero(x)
1 dξdt = dotS-ForwardDiff.derivative(t->body.map(x,t),t)
# Convert to x-frame with dξ/dx⁻¹ (d has already been scaled)
2 dξdx = ForwardDiff.jacobian(x->body.map(x,t),x)
3 return (d,dξdx\n/body.scale,dξdx\dξdt)
end
First, ForwardDiff
breaks on line 1 because the input is SVector{3}
but the output is SVector{2}
. This throws a _throw_size_mismatch(as...) @ StaticArrays
error from within KernelAbstraction
(in KernelAbstractions.NDIteration.DynamicCheck
it seems), see below. Interestingly, line 2 does not throw an error.
Line 3 also breaks (if line 1 is commented and line 3 is return (d,dξdx\n/body.scale,zero(x))
) this time with a "call through a literal pointer"
error... Explicitly enforcing the size of n as n = SVector{length(x)}(dξdx\n/body.scale)
results in the same error. This seems to be linked to the ldiv
(\
).
If line 3 is changed to return (d,zero(x),zero(x))
the code runs as expected.
The full _throw_size_mismatch
error
1-element ExceptionStack:
LoadError: InvalidIRError: compiling MethodInstance for (::WaterLily.var"#gpu_##kern_#552#227"{WaterLily.var"#fill!#224"{3, Float32, Float32, Int64, ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}, Int64}})(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{3, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{3, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1, 1)}, CartesianIndices{3, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, ::CuDeviceArray{Float32, 4, 1}, ::CuDeviceArray{Float32, 5, 1}, ::CuDeviceArray{Float32, 4, 1}, ::CuDeviceArray{Float32, 3, 1}, ::CartesianIndex{3}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to _throw_size_mismatch(as...) @ StaticArrays ~/.julia/packages/StaticArrays/MSJcA/src/traits.jl:116)
Stacktrace:
[1] same_size
@ ~/.julia/packages/StaticArrays/MSJcA/src/traits.jl:111
[2] macro expansion
@ ~/.julia/packages/StaticArrays/MSJcA/src/mapreduce.jl:76
[3] _map
@ ~/.julia/packages/StaticArrays/MSJcA/src/mapreduce.jl:42
[4] map
@ ~/.julia/packages/StaticArrays/MSJcA/src/mapreduce.jl:39
[5] -
@ ~/.julia/packages/StaticArrays/MSJcA/src/linalg.jl:16
[6] #measure#1
@ ~/Workspace/ParametricBodies.jl/src/ParametricBodies.jl:17
[7] measure
@ ~/Workspace/ParametricBodies.jl/src/ParametricBodies.jl:13
[8] fill!
@ ~/Workspace/WaterLily/src/Body.jl:37
[9] macro expansion
@ ~/Workspace/WaterLily/src/util.jl:111
[10] gpu_
@ ~/.julia/packages/KernelAbstractions/60cqT/src/macros.jl:95
[11] gpu_
@ ./none:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/validation.jl:147
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:458 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:457 [inlined]
[5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:103
[6] emit_llvm
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:97 [inlined]
[7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:136
[8] codegen
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:115 [inlined]
[9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:111
[10] compile
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:103 [inlined]
[11] #1145
@ ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:254 [inlined]
[12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:52
[13] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:42
[14] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:253
[15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:237
[16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:151
[17] macro expansion
@ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:369 [inlined]
[18] macro expansion
@ ./lock.jl:267 [inlined]
[19] cufunction(f::WaterLily.var"#gpu_##kern_#552#227"{WaterLily.var"#fill!#224"{3, Float32, Float32, Int64, ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}, Int64}}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{3, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{3, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1, 1)}, CartesianIndices{3, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceArray{Float32, 4, 1}, CuDeviceArray{Float32, 5, 1}, CuDeviceArray{Float32, 4, 1}, CuDeviceArray{Float32, 3, 1}, CartesianIndex{3}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Int64})
@ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:364
[20] macro expansion
@ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:112 [inlined]
[21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.StaticSize{(64,)}, KernelAbstractions.NDIteration.DynamicSize, WaterLily.var"#gpu_##kern_#552#227"{WaterLily.var"#fill!#224"{3, Float32, Float32, Int64, ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}, Int64}}})(::CuArray{Float32, 4, CUDA.DeviceMemory}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64, Int64}, workgroupsize::Nothing)
@ CUDA.CUDAKernels ~/.julia/packages/CUDA/Tl08O/src/CUDAKernels.jl:103
[22] (::WaterLily.var"##kern#551#225"{WaterLily.var"#fill!#224"{3, Float32, Float32, Int64, ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}, Int64}, Flow{3, Float32, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}, CuArray{Float32, 5, CUDA.DeviceMemory}}})(μ₀::CuArray{Float32, 4, CUDA.DeviceMemory}, μ₁::CuArray{Float32, 5, CUDA.DeviceMemory}, V::CuArray{Float32, 4, CUDA.DeviceMemory}, σ::CuArray{Float32, 3, CUDA.DeviceMemory}, ::Val{16})
@ WaterLily ~/Workspace/WaterLily/src/util.jl:114
[23] macro expansion
@ ~/Workspace/WaterLily/src/util.jl:116 [inlined]
[24] measure!(a::Flow{3, Float32, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 4, CUDA.DeviceMemory}, CuArray{Float32, 5, CUDA.DeviceMemory}}, body::ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}; t::Float32, ϵ::Int64)
@ WaterLily ~/Workspace/WaterLily/src/Body.jl:50
[25] Simulation(dims::Tuple{Int64, Int64, Int64}, u_BC::Tuple{Int64, Int64, Int64}, L::Int64; Δt::Float64, ν::Float64, g::Nothing, U::Nothing, ϵ::Int64, perdir::Tuple{}, uλ::Nothing, exitBC::Bool, body::ParametricBody{Float32, NurbsLocator{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, SVector{2, Float32}}, NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}, ParametricBodies.var"#7#9"{NurbsCurve{2, 2, SMatrix{2, 9, Float32, 18}, SVector{12, Float32}, SVector{9, Float32}}}, var"#map#12"{SVector{3, Float32}}}, T::Type, mem::Type)
@ WaterLily ~/Workspace/WaterLily/src/WaterLily.jl:75
[26] make_sim(L::Int64; Re::Int64, mem::Type, U::Int64, T::Type)
@ Main ~/Workspace/ParametricBodies.jl/example/ThreeD_Sphere.jl:23
[27] top-level scope
@ ~/Workspace/ParametricBodies.jl/example/ThreeD_Sphere.jl:1
[28] eval
@ ./boot.jl:385 [inlined]
[29] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:2070
[30] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::@Kwargs{})
@ Base ./essentials.jl:887
[31] invokelatest(::Any, ::Any, ::Vararg{Any})
@ Base ./essentials.jl:884
[32] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:271
[33] (::VSCodeServer.var"#69#74"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:181
[34] withpath(f::VSCodeServer.var"#69#74"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/repl.jl:276
[35] (::VSCodeServer.var"#68#73"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:179
[36] hideprompt(f::VSCodeServer.var"#68#73"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/repl.jl:38
[37] (::VSCodeServer.var"#67#72"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:150
[38] with_logstate(f::Function, logstate::Any)
@ Base.CoreLogging ./logging.jl:515
[39] with_logger
@ ./logging.jl:627 [inlined]
[40] (::VSCodeServer.var"#66#71"{VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:263
[41] #invokelatest#2
@ Base ./essentials.jl:887 [inlined]
[42] invokelatest(::Any)
@ Base ./essentials.jl:884
[43] (::VSCodeServer.var"#64#65")()
@ VSCodeServer ~/.vscode/extensions/julialang.language-julia-1.124.2/scripts/packages/VSCodeServer/src/eval.jl:34
in expression starting at /home/marin/Workspace/ParametricBodies.jl/example/ThreeD_Sphere.jl:1
Using the mapping to extrude/revolve a 2D curve in 3D space via the mapping function breaks on the GPU.
See the script below with an example that runs on the CPU but fails on the GPU