FluxML / Flux.jl

Relax! Flux is the ML library that doesn't make you tensor
https://fluxml.ai/
Other
4.53k stars 609 forks source link

OneHotArray scalar indexing problem in a gradient context #1703

Open CarloLucibello opened 3 years ago

CarloLucibello commented 3 years ago

Came up when trying to do semisupervised learning with GNNs

julia> using Flux, CUDA

julia> CUDA.allowscalar(false)

julia> gradient(y -> sum(y[:,1:2]), y)
(nothing,)

julia> gradient(y -> sum(y[:,1:2]), y |> gpu)
ERROR: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:33
  [2] assertscalar(op::String)
    @ GPUArrays ~/.julia/packages/GPUArrays/Tebtl/src/host/indexing.jl:53
  [3] getindex
    @ ~/.julia/packages/GPUArrays/Tebtl/src/host/indexing.jl:86 [inlined]
  [4] getindex
    @ ~/.julia/packages/Flux/Zz9RI/src/onehot.jl:45 [inlined]
  [5] getindex
    @ ~/.julia/packages/Flux/Zz9RI/src/onehot.jl:48 [inlined]
  [6] iterate
    @ ./abstractarray.jl:1096 [inlined]
  [7] iterate
    @ ./abstractarray.jl:1094 [inlined]
  [8] _foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, init::Base._InitialValue, itr::Flux.OneHotArray{UInt32, 7, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Base ./reduce.jl:56
  [9] foldl_impl
    @ ./reduce.jl:48 [inlined]
 [10] mapfoldl_impl
    @ ./reduce.jl:44 [inlined]
 [11] #mapfoldl#214
    @ ./reduce.jl:160 [inlined]
 [12] mapfoldl
    @ ./reduce.jl:160 [inlined]
 [13] _mapreduce
    @ ./reduce.jl:421 [inlined]
 [14] _mapreduce_dim
    @ ./reducedim.jl:318 [inlined]
 [15] #mapreduce#672
    @ ./reducedim.jl:310 [inlined]
 [16] #reduce#674
    @ ./reducedim.jl:359 [inlined]
 [17] #sum#223
    @ ./reduce.jl:529 [inlined]
 [18] #adjoint#624
    @ ~/.julia/packages/Zygote/l3aNG/src/lib/array.jl:289 [inlined]
 [19] adjoint
    @ ./none:0 [inlined]
 [20] _pullback
    @ ~/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:57 [inlined]
 [21] _pullback
    @ ./REPL[12]:1 [inlined]
 [22] _pullback(ctx::Zygote.Context, f::var"#7#8", args::Flux.OneHotArray{UInt32, 7, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/l3aNG/src/compiler/interface2.jl:0
 [23] _pullback(f::Function, args::Flux.OneHotArray{UInt32, 7, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/l3aNG/src/compiler/interface.jl:34
 [24] pullback(f::Function, args::Flux.OneHotArray{UInt32, 7, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/l3aNG/src/compiler/interface.jl:40
 [25] gradient(f::Function, args::Flux.OneHotArray{UInt32, 7, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/l3aNG/src/compiler/interface.jl:75
 [26] top-level scope
    @ REPL[12]:1
 [27] top-level scope
    @ ~/.julia/dev/CUDA/src/initialization.jl:65
mcabbott commented 3 years ago

Where presumably you define something like:

julia> y = Flux.onehotbatch([1,2,3,1,2], 1:3)
3×5 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 1  ⋅  ⋅  1  ⋅
 ⋅  1  ⋅  ⋅  1
 ⋅  ⋅  1  ⋅  ⋅

julia> gpu(y)
3×5 OneHotMatrix(::CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}) with eltype Bool:
 1  ⋅  ⋅  1  ⋅
 ⋅  1  ⋅  ⋅  1
 ⋅  ⋅  1  ⋅  ⋅

The gradient of sum seems not to notice that the eltype of this is Bool?

Sometimes CuArray bypasses that rule, because of this (intended to avoid Fill I think): https://github.com/FluxML/Zygote.jl/blob/master/src/lib/broadcast.jl#L269

julia> gradient(sum, randn(3).>0)
(nothing,)

julia> gradient(sum, cu(randn(3).>0))
(Bool[1, 1, 1],)

julia> y isa CUDA.AbstractGPUArray  # hence this rule shouldn't apply to y
false

julia> gradient(sum, gpu(y))  # still fails
ERROR: Scalar indexing is disallowed.

If we pick a function whose gradient isn't zero on the CPU (because not every operation projects) then I presume that scalar indexing is unavoidable:

ulia> gradient(y -> sum(y[:,1:3] .+ y[:, 1:3]'), y)
([2 2 … 0 0; 2 2 … 0 0; 2 2 … 0 0],)

julia> gradient(y -> sum(y[:,1:3] .+ y[:, 1:3]'), gpu(y))
ERROR: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:33
  [2] assertscalar(op::String)
    @ GPUArrays ~/.julia/packages/GPUArrays/UBzTm/src/host/indexing.jl:53
  [3] getindex(::CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}, ::Int64, ::Int64)
    @ GPUArrays ~/.julia/packages/GPUArrays/UBzTm/src/host/indexing.jl:86
  [4] getindex
    @ /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/LinearAlgebra/src/adjtrans.jl:179 [inlined]
  [5] _getindex
    @ ./abstractarray.jl:1265 [inlined]
  [6] getindex
    @ ./abstractarray.jl:1221 [inlined]
  [7] _broadcast_getindex
    @ ./broadcast.jl:636 [inlined]
  [8] _getindex
    @ ./broadcast.jl:667 [inlined]
  [9] _getindex
    @ ./broadcast.jl:666 [inlined]
 [10] _broadcast_getindex
    @ ./broadcast.jl:642 [inlined]
 [11] getindex
    @ ./broadcast.jl:597 [inlined]
 [12] macro expansion
    @ ./broadcast.jl:1005 [inlined]
 [13] macro expansion
    @ ./simdloop.jl:77 [inlined]
 [14] copyto!
    @ ./broadcast.jl:1004 [inlined]
 [15] copyto!
    @ ./broadcast.jl:957 [inlined]
 [16] materialize!
    @ ./broadcast.jl:915 [inlined]
 [17] materialize!
    @ ./broadcast.jl:912 [inlined]
 [18] (::Zygote.var"#430#432"{2, Bool, Flux.OneHotArray{UInt32, 3, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}}, Tuple{Colon, UnitRange{Int64}}})(dy::LinearAlgebra.Adjoint{Int64, CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/nsu1Y/src/lib/array.jl:39
 [19] #2309#back
    @ ~/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:59 [inlined]
 [20] Pullback
    @ ./REPL[28]:1 [inlined]
 [21] (::Zygote.var"#52#53"{typeof(∂(#23))})(Δ::Int64)
    @ Zygote ~/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:41
 [22] gradient(f::Function, args::Flux.OneHotArray{UInt32, 3, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
    @ Zygote ~/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:76
DhairyaLGandhi commented 3 years ago

You can then annotate that piece of user code with CUDA.@allowscalar