SciML / DiffEqGPU.jl

GPU-acceleration routines for DifferentialEquations.jl and the broader SciML scientific machine learning ecosystem
https://docs.sciml.ai/DiffEqGPU/stable/
MIT License
274 stars 28 forks source link

Simple translation to KernelAbstractions #220

Closed vchuravy closed 1 year ago

utkarsh530 commented 1 year ago

I removed the return statement from the kernel and I get this error (I can't modify your PR hence posting here):

julia> @time sol = solve(monteprob, GPUTsit5(), EnsembleGPUKernel(), trajectories = 10_000, adaptive = false, dt = 0.1f0)
ERROR: MethodError: no method matching typeof(DiffEqGPU.gpu_tsit5_kernel)(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, ::CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, ::CUDA.CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Int64, ::Nothing)
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:0 [inlined]
  [2] specialization_id(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{typeof(DiffEqGPU.gpu_tsit5_kernel), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}}})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:12
  [3] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/hi5Wg/src/cache.jl:71
  [4] cufunction(f::typeof(DiffEqGPU.gpu_tsit5_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299
  [5] cufunction(f::typeof(DiffEqGPU.gpu_tsit5_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CUDA.CuDeviceVector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1}, CUDA.CuDeviceMatrix{SVector{3, Float32}, 1}, CUDA.CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Int64, Nothing}})
    @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:292
  [6] macro expansion
    @ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:102 [inlined]
  [7] (::KernelAbstractions.Kernel{CUDAKernels.CUDADevice, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_tsit5_kernel)})(::CUDA.CuArray{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, dependencies::CUDAKernels.CudaEvent, workgroupsize::Nothing, progress::Function)
    @ CUDAKernels ~/.julia/packages/CUDAKernels/YWLqR/src/CUDAKernels.jl:273
  [8] vectorized_solve(probs::CUDA.CuArray{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, alg::GPUTsit5; dt::Float32, saveat::Nothing, save_everystep::Bool, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::Base.Pairs{Symbol, DiffEqGPU.var"#15#21", Tuple{Symbol}, NamedTuple{(:unstable_check,), Tuple{DiffEqGPU.var"#15#21"}}})
    @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/solve.jl:85
  [9] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel, I::UnitRange{Int64}, adaptive::Bool; kwargs::Base.Pairs{Symbol, Any, Tuple{Symbol, Symbol, Symbol}, NamedTuple{(:saveat, :unstable_check, :dt), Tuple{Nothing, DiffEqGPU.var"#15#21", Float32}}})
    @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:694
 [10] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel, I::UnitRange{Int64}, adaptive::Bool; kwargs::Base.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:unstable_check, :dt), Tuple{DiffEqGPU.var"#15#21", Float32}}})
    @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:637
 [11] macro expansion
    @ ./timing.jl:382 [inlined]
 [12] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{3, Float32}, Tuple{Float32, Float32}, false, SVector{3, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, var"#1#2", typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUTsit5, ensemblealg::EnsembleGPUKernel; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::Base.Pairs{Symbol, Float32, Tuple{Symbol}, NamedTuple{(:dt,), Tuple{Float32}}})
    @ DiffEqGPU ~/.julia/dev/DiffEqGPU/src/DiffEqGPU.jl:523
 [13] #solve#28
    @ ~/.julia/packages/DiffEqBase/9H9Sj/src/solve.jl:872 [inlined]
 [14] top-level scope
    @ ./timing.jl:262 [inlined]
 [15] top-level scope
    @ ./REPL[30]:0
 [16] top-level scope
    @ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52
utkarsh530 commented 1 year ago

Requires: https://github.com/JuliaGPU/KernelAbstractions.jl/pull/341