SciML / DiffEqGPU.jl

GPU-acceleration routines for DifferentialEquations.jl and the broader SciML scientific machine learning ecosystem
https://docs.sciml.ai/DiffEqGPU/stable/
MIT License
279 stars 29 forks source link

Mysterious CI-only failure of reductions.jl tests #151

Closed ChrisRackauckas closed 2 years ago

ChrisRackauckas commented 2 years ago

MWE:

# ode checks
using OrdinaryDiffEq, DiffEqGPU, Test

seed = 100
using Random;Random.seed!(seed)
ra = rand(100)

function f!(du,u,p,t)
     du[1] = 1.01*u[1]
end

prob = ODEProblem(f!,[0.5],(0.0,1.0))

function output_func(sol,i)
  last(sol), false
end

function prob_func(prob,i,repeat)
  remake(prob,u0=ra[i]*prob.u0)
end

function reduction(u,batch,I)
  u.+sum(batch),false
end

# no reduction
prob1 = EnsembleProblem(prob,prob_func=prob_func,output_func=output_func)
sim1 = @time solve(prob1,Tsit5(),trajectories=100,batch_size=20)

# reduction and EnsembleThreads()
prob2 = EnsembleProblem(prob,prob_func=prob_func,output_func=output_func,
  reduction=reduction,u_init=Vector{eltype(prob.u0)}([0.0])
  )
sim2 = @time solve(prob2,Tsit5(),trajectories=100,batch_size=20)

# EnsembleCPUArray() and EnsembleGPUArray()
sim3 = @time solve(prob2,Tsit5(),EnsembleCPUArray(),trajectories=100,batch_size=20)
sim4 = @time solve(prob2,Tsit5(),EnsembleGPUArray(),trajectories=100,batch_size=20)

@info  sim2[1]

@test sum(sim1.u) ≈ sim2.u
@test sim2.u ≈ sim3.u
@test sim2.u ≈ sim4.u
<html>
<body>
<!--StartFragment-->

Reduction: Error During Test at /root/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/SafeTestsets/A83XK/src/SafeTestsets.jl:25
--
  | Got exception outside of a @test
  | LoadError: KernelException: exception thrown during kernel execution on device NVIDIA A100-PCIE-40GB MIG 1g.5gb
  | Stacktrace:
  | [1] check_exceptions()
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/src/compiler/exceptions.jl:34
  | [2] nonblocking_synchronize
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/lib/cudadrv/context.jl:331 [inlined]
  | [3] device_synchronize()
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/lib/cudadrv/context.jl:319
  | [4] CUDA.CuModule(data::Vector{UInt8}, options::Dict{CUDA.CUjit_option_enum, Any})
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/lib/cudadrv/module.jl:41
  | [5] CuModule
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/lib/cudadrv/module.jl:23 [inlined]
  | [6] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:236 [inlined]
  | [7] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/src/compiler/execution.jl:479 [inlined]
  | [8] cufunction_link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry, :external_gvars), Tuple{Vector{UInt8}, String, Vector{String}}})
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:236
  | [9] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
  | @ GPUCompiler ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/GPUCompiler/XyxTy/src/cache.jl:95
  | [10] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/src/compiler/execution.jl:299 [inlined]
  | [11] cufunction(f::typeof(CUDA.partial_mapreduce_grid), tt::Type{Tuple{typeof(identity), typeof(Base.add_sum), Float64, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Val{true}, CUDA.CuDeviceArray{Float64, 3, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(abs2), Tuple{CUDA.CuDeviceMatrix{Float64, 1}}}}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:236
  | [12] cufunction
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:229 [inlined]
  | [13] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/src/compiler/execution.jl:102 [inlined]
  | [14] mapreducedim!(f::typeof(identity), op::typeof(Base.add_sum), R::CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, A::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(abs2), Tuple{CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}}; init::Float64)
  | @ CUDA ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/CUDA/GGwVa/src/mapreduce.jl:234
  | [15] _mapreduce(f::typeof(abs2), op::typeof(Base.add_sum), As::CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}; dims::Colon, init::Nothing)
  | @ GPUArrays ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/GPUArrays/Zecv7/src/host/mapreduce.jl:69
  | [16] #mapreduce#20
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/GPUArrays/Zecv7/src/host/mapreduce.jl:31 [inlined]
  | [17] mapreduce
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/GPUArrays/Zecv7/src/host/mapreduce.jl:31 [inlined]
  | [18] #_sum#741
  | @ ./reducedim.jl:894 [inlined]
  | [19] _sum
  | @ ./reducedim.jl:894 [inlined]
  | [20] #sum#739
  | @ ./reducedim.jl:890 [inlined]
  | [21] sum
  | @ ./reducedim.jl:890 [inlined]
  | [22] diffeqgpunorm
  | @ /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:289 [inlined]
  | [23] __init(prob::SciMLBase.ODEProblem{CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, Tuple{Float64, Float64}, true, CUDA.CuArray{SciMLBase.NullParameters, 2, CUDA.Mem.DeviceBuffer}, SciMLBase.ODEFunction{true, DiffEqGPU.var"#59#63"{typeof(Main.##4656.f!), typeof(DiffEqGPU.gpu_kernel)}, LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, alg::OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, timeseries_init::Tuple{}, ts_init::Tuple{}, ks_init::Tuple{}, recompile::Type{Val{true}}; saveat::Tuple{}, tstops::Tuple{}, d_discontinuities::Tuple{}, save_idxs::Nothing, save_everystep::Bool, save_on::Bool, save_start::Bool, save_end::Nothing, callback::Nothing, dense::Bool, calck::Bool, dt::Float64, dtmin::Nothing, dtmax::Float64, force_dtmin::Bool, adaptive::Bool, gamma::Rational{Int64}, abstol::Nothing, reltol::Nothing, qmin::Rational{Int64}, qmax::Int64, qsteady_min::Int64, qsteady_max::Int64, beta1::Nothing, beta2::Nothing, qoldinit::Rational{Int64}, controller::Nothing, fullnormalize::Bool, failfactor::Int64, maxiters::Int64, internalnorm::typeof(DiffEqGPU.diffeqgpunorm), internalopnorm::typeof(LinearAlgebra.opnorm), isoutofdomain::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), unstable_check::DiffEqGPU.var"#12#18", verbose::Bool, timeseries_errors::Bool, dense_errors::Bool, advance_to_tstop::Bool, stop_at_next_tstop::Bool, initialize_save::Bool, progress::Bool, progress_steps::Int64, progress_name::String, progress_message::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), userdata::Nothing, allow_extrapolation::Bool, initialize_integrator::Bool, alias_u0::Bool, alias_du0::Bool, initializealg::OrdinaryDiffEq.DefaultInit, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
  | @ OrdinaryDiffEq ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/OrdinaryDiffEq/irVAX/src/solve.jl:274
  | [24] #__solve#502
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/OrdinaryDiffEq/irVAX/src/solve.jl:4 [inlined]
  | [25] #solve_call#28
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/DiffEqBase/kFf4V/src/solve.jl:388 [inlined]
  | [26] solve_up(prob::SciMLBase.ODEProblem{CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, Tuple{Float64, Float64}, true, CUDA.CuArray{SciMLBase.NullParameters, 2, CUDA.Mem.DeviceBuffer}, SciMLBase.ODEFunction{true, DiffEqGPU.var"#59#63"{typeof(Main.##4656.f!), typeof(DiffEqGPU.gpu_kernel)}, LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, sensealg::Nothing, u0::CUDA.CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, p::CUDA.CuArray{SciMLBase.NullParameters, 2, CUDA.Mem.DeviceBuffer}, args::OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}; kwargs::Base.Pairs{Symbol, Any, NTuple{4, Symbol}, NamedTuple{(:unstable_check, :callback, :merge_callbacks, :internalnorm), Tuple{DiffEqGPU.var"#12#18", Nothing, Bool, typeof(DiffEqGPU.diffeqgpunorm)}}})
  | @ DiffEqBase ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/DiffEqBase/kFf4V/src/solve.jl:686
  | [27] #solve#29
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/DiffEqBase/kFf4V/src/solve.jl:670 [inlined]
  | [28] batch_solve_up(ensembleprob::SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, probs::Vector{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}}, alg::OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, ensemblealg::DiffEqGPU.EnsembleGPUArray, I::UnitRange{Int64}, u0::Matrix{Float64}, p::Matrix{SciMLBase.NullParameters}; kwargs::Base.Pairs{Symbol, DiffEqGPU.var"#12#18", Tuple{Symbol}, NamedTuple{(:unstable_check,), Tuple{DiffEqGPU.var"#12#18"}}})
  | @ DiffEqGPU /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:362
  | [29] batch_solve(ensembleprob::SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, alg::OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, ensemblealg::DiffEqGPU.EnsembleGPUArray, I::UnitRange{Int64}, adaptive::Bool; kwargs::Base.Pairs{Symbol, DiffEqGPU.var"#12#18", Tuple{Symbol}, NamedTuple{(:unstable_check,), Tuple{DiffEqGPU.var"#12#18"}}})
  | @ DiffEqGPU /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:326
  | [30] (::DiffEqGPU.var"#9#15"{Int64, DiffEqGPU.var"#12#18", Bool, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, DiffEqGPU.EnsembleGPUArray, Int64})(i::Int64)
  | @ DiffEqGPU /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:247
  | [31] iterate
  | @ ./generator.jl:47 [inlined]
  | [32] _collect(c::UnitRange{Int64}, itr::Base.Generator{UnitRange{Int64}, DiffEqGPU.var"#9#15"{Int64, DiffEqGPU.var"#12#18", Bool, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, DiffEqGPU.EnsembleGPUArray, Int64}}, #unused#::Base.EltypeUnknown, isz::Base.HasShape{1})
  | @ Base ./array.jl:744
  | [33] collect_similar(cont::UnitRange{Int64}, itr::Base.Generator{UnitRange{Int64}, DiffEqGPU.var"#9#15"{Int64, DiffEqGPU.var"#12#18", Bool, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, DiffEqGPU.EnsembleGPUArray, Int64}})
  | @ Base ./array.jl:653
  | [34] map(f::Function, A::UnitRange{Int64})
  | @ Base ./abstractarray.jl:2867
  | [35] macro expansion
  | @ /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:241 [inlined]
  | [36] macro expansion
  | @ ./timing.jl:299 [inlined]
  | [37] __solve(ensembleprob::SciMLBase.EnsembleProblem{SciMLBase.ODEProblem{Vector{Float64}, Tuple{Float64, Float64}, true, SciMLBase.NullParameters, SciMLBase.ODEFunction{true, typeof(Main.##4656.f!), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, typeof(Main.##4656.prob_func), typeof(Main.##4656.output_func), typeof(Main.##4656.reduction), Vector{Float64}}, alg::OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, ensemblealg::DiffEqGPU.EnsembleGPUArray; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
  | @ DiffEqGPU /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/src/DiffEqGPU.jl:240
  | [38] #solve#31
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/DiffEqBase/kFf4V/src/solve.jl:700 [inlined]
  | [39] top-level scope
  | @ ./timing.jl:220
  | [40] include(mod::Module, _path::String)
  | @ Base ./Base.jl:418
  | [41] include(x::String)
  | @ Main.##4656 ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/SafeTestsets/A83XK/src/SafeTestsets.jl:23
  | [42] macro expansion
  | @ /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/test/runtests.jl:22 [inlined]
  | [43] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/julia_installs/bin/linux/x64/1.7/julia-1.7-latest-linux-x86_64/share/julia/stdlib/v1.7/Test/src/Test.jl:1283 [inlined]
  | [44] top-level scope
  | @ /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/test/runtests.jl:22
  | [45] eval(m::Module, e::Any)
  | @ Core ./boot.jl:373
  | [46] macro expansion
  | @ ~/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b/packages/SafeTestsets/A83XK/src/SafeTestsets.jl:23 [inlined]
  | [47] top-level scope
  | @ timing.jl:220
  | [48] include(fname::String)
  | @ Base.MainInclude ./client.jl:451
  | [49] top-level scope
  | @ none:5
  | [50] eval
  | @ ./boot.jl:373 [inlined]
  | [51] exec_options(opts::Base.JLOptions)
  | @ Base ./client.jl:268
  | in expression starting at /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/test/reduction.jl:39
  | Test Summary: \| Error  Total
  | Reduction     \|     1      1
  | ERROR: LoadError: Some tests did not pass: 0 passed, 0 failed, 1 errored, 0 broken.
  | in expression starting at /var/lib/buildkite-agent/builds/gpuci-13/julialang/diffeqgpu-dot-jl/test/runtests.jl:22
  | ERROR: LoadError: failed process: Process(`/root/.cache/julia-buildkite-plugin/julia_installs/bin/linux/x64/1.7/julia-1.7-latest-linux-x86_64/bin/julia -Cnative -J/root/.cache/julia-buildkite-plugin/julia_installs/bin/linux/x64/1.7/julia-1.7-latest-linux-x86_64/lib/julia/sys.so --depwarn=yes -g1 --color=yes --startup-file=no --eval 'append!(empty!(Base.DEPOT_PATH), ["/root/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b"])
  | append!(empty!(Base.DL_LOAD_PATH), String[])

<!--EndFragment-->
</body>
</html>

@vchuravy @maleadt @utkarsh530 are you able to reproduce this locally? I can't for some reason.

ChrisRackauckas commented 2 years ago

https://buildkite.com/julialang/diffeqgpu-dot-jl/builds/174#018143d0-fe6f-4bfe-ae12-f842cf5eae91

ChrisRackauckas commented 2 years ago

This "CI only" error seems to show up in multiple different cases too: https://github.com/SciML/DiffEqGPU.jl/pull/148#issuecomment-1150311061 . So there's not just one trigger. Maybe the gpuci drivers are old or something?

maleadt commented 2 years ago

CI only

Maybe because of --check-bounds=true?

ChrisRackauckas commented 2 years ago

It went away. That's good enough I guess 😅