Closed vchuravy closed 1 year ago
I removed the return statement from the kernel and I get this error (I can't modify your PR hence posting here):
julia> @time sol = solve(monteprob, GPUTsit5(), EnsembleGPUKernel(), trajectories = 10_000, adaptive = false, dt = 0.1f0) ERROR: MethodError: no method matching typeof(DiffEqGPU.gpu_tsit5_kernel)(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, ::CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, ::CUDA.CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Int64, ::Nothing) Stacktrace: [1] macro expansion @ ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:0 [inlined] [2] specialization_id(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{typeof(DiffEqGPU.gpu_tsit5_kernel), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}}}) @ GPUCompiler ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:12 [3] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link)) @ GPUCompiler ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:71 [4] cufunction(f::typeof(DiffEqGPU.gpu_tsit5_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}) @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299 [5] cufunction(f::typeof(DiffEqGPU.gpu_tsit5_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}}) @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:292 [6] macro expansion @ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:102 [inlined] [7] (::KernelAbstractions.Kernel{CUDAKernels.CUDADevice, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_tsit5_kernel)})(::CUDA.CuArray{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, dependencies::CUDAKernels.CudaEvent, workgroupsize::Nothing, progress::Function) @ CUDAKernels ~/.julia/packages/CUDAKernels/YWLqR/src/CUDAKernels.jl:273 [8] vectorized_solve(probs::CUDA.CuArray{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, alg::GPUTsit5; dt::Float32, saveat::Nothing, save_everystep::Bool, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::Base.Pairs{Symbol, DiffEqGPU.var"#15#21", Tuple{Symbol}, NamedTuple{(:unstable_check,), Tuple{DiffEqGPU.var"#15#21"}}}) @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/solve.jl:85 [9] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel, I::UnitRange{Int64}, adaptive::Bool; kwargs::Base.Pairs{Symbol, Any, Tuple{Symbol, Symbol, Symbol}, NamedTuple{(:saveat, :unstable_check, :dt), Tuple{Nothing, DiffEqGPU.var"#15#21", Float32}}}) @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:694 [10] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel, I::UnitRange{Int64}, adaptive::Bool; kwargs::Base.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:unstable_check, :dt), Tuple{DiffEqGPU.var"#15#21", Float32}}}) @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:637 [11] macro expansion @ ./timing.jl:382 [inlined] [12] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::Base.Pairs{Symbol, Float32, Tuple{Symbol}, NamedTuple{(:dt,), Tuple{Float32}}}) @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:523 [13] #solve#28 @ ~/.julia/packages/DiffEqBase/9H9Sj/src/solve.jl:872 [inlined] [14] top-level scope @ ./timing.jl:262 [inlined] [15] top-level scope @ ./REPL[30]:0 [16] top-level scope @ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52
Requires: https://github.com/JuliaGPU/KernelAbstractions.jl/pull/341
I removed the return statement from the kernel and I get this error (I can't modify your PR hence posting here):