Closed ChrisRackauckas closed 3 years ago
using OrdinaryDiffEq, DiffEqGPU, CuArrays, LinearAlgebra
CuArrays.device!(0)
function lorenz(du,u,p,t)
@inbounds begin
du[1] = p[1]*(u[2]-u[1])
du[2] = u[1]*(p[2]-u[3]) - u[2]
du[3] = u[1]*u[2] - p[3]*u[3]
end
nothing
end
u0 = Float32[1.0;0.0;0.0]
tspan = (0.0f0,100.0f0)
p = (10.0f0,28.0f0,8/3f0)
prob = ODEProblem(lorenz,u0,tspan,p)
prob_func = (prob,i,repeat) -> remake(prob,p=rand(Float32,3).*p)
monteprob = EnsembleProblem(prob, prob_func = prob_func)
@time sol = solve(monteprob,Tsit5(),EnsembleGPUArray(),trajectories=100_000,batch_size=50_000,saveat=1.0f0)
is 2.5 seconds,
using OrdinaryDiffEq, DiffEqGPU, CuArrays, LinearAlgebra
CuArrays.device!(1)
function lorenz(du,u,p,t)
@inbounds begin
du[1] = p[1]*(u[2]-u[1])
du[2] = u[1]*(p[2]-u[3]) - u[2]
du[3] = u[1]*u[2] - p[3]*u[3]
end
nothing
end
u0 = Float32[1.0;0.0;0.0]
tspan = (0.0f0,100.0f0)
p = (10.0f0,28.0f0,8/3f0)
prob = ODEProblem(lorenz,u0,tspan,p)
prob_func = (prob,i,repeat) -> remake(prob,p=rand(Float32,3).*p)
monteprob = EnsembleProblem(prob, prob_func = prob_func)
@time sol = solve(monteprob,Tsit5(),EnsembleGPUArray(),trajectories=100_000,batch_size=50_000,saveat=1.0f0)
is 3 seconds, but multi-GPU is 94 seconds on master! Trying to figure it out, we added
https://github.com/JuliaDiffEq/DiffEqGPU.jl/commit/748b7d18a7c91cc8c7bdfbbc8c39f75684262cea
which was sufficient to send the figure down to 1.5 seconds, making multi-GPU the fastest. However, this is clearly not the ideal strategy.
Turns out that only happened when the return was broken...
@vchuravy