Open ccasert opened 2 years ago
@DhairyaLGandhi any good way around this?
Just curious whether there has been any movement on this? Or whether there could be an alternative to getting the laplacian which works by using other functions / AD packages.
Not sure if its related, but if I simply try to call the loss of this FFJORD code I get a scalar indexing on a GPU array error. Seems to point to the [:, :, end] slice on the solve in forward_ffjord.
Code:
nn = Chain(
Dense(2, 32, tanh),
Dense(32, 2),
) |> gpu
tspan = (0.0f0, 1.0f0)
ffjord_mdl = FFJORD(nn, tspan, Tsit5())
function loss(x)
e = randn(Float32, size(x)) |> gpu
logpx, λ₁, λ₂ = ffjord_mdl(x, ffjord_mdl.p, e)
return logpx
end
function lapl(x)
return Zygote.diaghessian(x -> sum(loss(x)), x)
end
data_dist = Normal(0.0f0, 1.0f0)
train_data = gpu(rand(data_dist, 2))
loss(train_data)
Error:
ERROR: LoadError: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] assertscalar(op::String)
@ GPUArrays C:\Users\domin\.julia\packages\GPUArrays\gkF6S\src\host\indexing.jl:53
[3] getindex(::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Int64, ::Int64)
@ GPUArrays C:\Users\domin\.julia\packages\GPUArrays\gkF6S\src\host\indexing.jl:86
[4] getindex
@ C:\Users\domin\.julia\packages\RecursiveArrayTools\gr5FR\src\vector_of_array.jl:164 [inlined]
[5] macro expansion
@ .\multidimensional.jl:860 [inlined]
[6] macro expansion
@ .\cartesian.jl:64 [inlined]
[7] macro expansion
@ .\multidimensional.jl:855 [inlined]
[8] _unsafe_getindex!
@ .\multidimensional.jl:868 [inlined]
[9] _unsafe_getindex(::IndexCartesian, ::VectorOfArray{Float32, 3, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, ::Base.Slice{Base.OneTo{Int64}}, ::Base.Slice{Base.OneTo{Int64}}, ::Int64)
@ Base .\multidimensional.jl:846
[10] _getindex
@ .\multidimensional.jl:832 [inlined]
[11] getindex
@ .\abstractarray.jl:1170 [inlined]
[12] getindex(::ODESolution{Float32, 3, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Nothing, Nothing, Vector{Float32}, Vector{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, ODEProblem{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, false, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ODEFunction{false, DiffEqFlux.var"#ffjord_#63"{Bool, Bool, FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, OrdinaryDiffEq.InterpolationData{ODEFunction{false, DiffEqFlux.var"#ffjord_#63"{Bool, Bool, FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing}, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Vector{Float32}, Vector{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, OrdinaryDiffEq.Tsit5ConstantCache{Float32, Float32}}, DiffEqBase.DEStats}, ::Colon, ::Colon, ::Int64)
@ SciMLBase C:\Users\domin\.julia\packages\SciMLBase\jj8Ix\src\solutions\solution_interface.jl:33
[13] forward_ffjord(n::FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, p::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, e::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}; regularize::Bool, monte_carlo::Bool)
@ DiffEqFlux C:\Users\domin\.julia\packages\DiffEqFlux\w4Zm0\src\ffjord.jl:219
[14] forward_ffjord(n::FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, p::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, e::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ DiffEqFlux C:\Users\domin\.julia\packages\DiffEqFlux\w4Zm0\src\ffjord.jl:203
[15] (::FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}})(::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, N} where N; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ DiffEqFlux C:\Users\domin\.julia\packages\DiffEqFlux\w4Zm0\src\ffjord.jl:199
[16] (::FFJORD{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Flux.var"#60#62"{Chain{Tuple{Dense{typeof(tanh), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, Vector{Float32}}, Tuple{Float32, Float32}, Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}})(::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, N} where N)
@ DiffEqFlux C:\Users\domin\.julia\packages\DiffEqFlux\w4Zm0\src\ffjord.jl:199
[17] loss(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Main c:\Users\domin\Dropbox\code_practice\julia\diffeqflux\ffjord_test.jl:38
[18] top-level scope
@ c:\Users\domin\Dropbox\code_practice\julia\diffeqflux\ffjord_test.jl:48
in expression starting at c:\Users\domin\Dropbox\code_practice\julia\diffeqflux\ffjord_test.jl:48
Environment status:
[052768ef] CUDA v3.6.4
[aae7a2af] DiffEqFlux v1.44.1
[0c46a032] DifferentialEquations v7.1.0
[31c24e10] Distributions v0.25.38
https://github.com/SciML/DiffEqFlux.jl/pull/614 is probably the solution when it's finished.
A small update: https://github.com/FluxML/NNlibCUDA.jl/pull/48 fixes the original bug in this issue. However, there remains another bug (that now looks Zygote related) in the diaghessian call. The scalar indexing in the forward call of the loss also remains.
I need to calculate the Laplacian of the densities modelled by a normalizing flow w.r.t. to the inputs. On CPU, I can e.g. use the following code (which works, but seems to scale poorly with the number of samples)
However, when I attempt to run this code on GPU
I get the following error:
These are my package versions:
Any help would be appreciated!