slimgroup / InvertibleNetworks.jl

A Julia framework for invertible neural networks
MIT License
149 stars 21 forks source link

NNlib error with GPU examples #19

Closed rafaelorozco closed 3 years ago

rafaelorozco commented 3 years ago

If I run some of the examples on GPU I get this error:

julia> include("examples/applications/application_conditional_hint_banana_linear.jl")
┌ Warning: Performing scalar operations on GPU arrays: This is very slow, consider disallowing these operations with `allowscalar(false)`
└ @ GPUArrays ~/.julia/packages/GPUArrays/WV76E/src/host/indexing.jl:43
ERROR: LoadError: TaskFailedException:
MethodError: no method matching gemm!(::Val{false}, ::Val{false}, ::Int64, ::Int64, ::Int64, ::Float32, ::CuPtr{Float32}, ::CuPtr{Float32}, ::Float32, ::CuPtr{Float32})
Closest candidates are:
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Float32, ::Ptr{Float32}, ::Ptr{Float32}, ::Float32, ::Ptr{Float32}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/gemm.jl:32
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Float64, ::Ptr{Float64}, ::Ptr{Float64}, ::Float64, ::Ptr{Float64}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/gemm.jl:32
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Complex{Float64}, ::Ptr{Complex{Float64}}, ::Ptr{Complex{Float64}}, ::Complex{Float64}, ::Ptr{Complex{Float64}}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/gemm.jl:32
  ...
Stacktrace:
 [1] macro expansion at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/impl/conv_im2col.jl:58 [inlined]
 [2] (::NNlib.var"#445#threadsfor_fun#279"{CuArray{Float32,3},Float32,Float32,CuArray{Float32,5},Base.ReshapedArray{Float32,5,SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}},CuArray{Float32,5},DenseConvDims{3,(1, 1, 1),1,64,(1, 1, 1),(0, 0, 0, 0, 0, 0),(1, 1, 1),false},Int64,Int64,Int64,UnitRange{Int64}})(::Bool) at ./threadingconstructs.jl:81
 [3] (::NNlib.var"#445#threadsfor_fun#279"{CuArray{Float32,3},Float32,Float32,CuArray{Float32,5},Base.ReshapedArray{Float32,5,SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}},CuArray{Float32,5},DenseConvDims{3,(1, 1, 1),1,64,(1, 1, 1),(0, 0, 0, 0, 0, 0),(1, 1, 1),false},Int64,Int64,Int64,UnitRange{Int64}})() at ./threadingconstructs.jl:48
Stacktrace:
 [1] wait at ./task.jl:267 [inlined]
 [2] threading_run(::Function) at ./threadingconstructs.jl:34
 [3] macro expansion at ./threadingconstructs.jl:93 [inlined]
 [4] conv_im2col!(::CuArray{Float32,5}, ::Base.ReshapedArray{Float32,5,SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, ::CuArray{Float32,5}, ::DenseConvDims{3,(1, 1, 1),1,64,(1, 1, 1),(0, 0, 0, 0, 0, 0),(1, 1, 1),false}; col::CuArray{Float32,3}, alpha::Float32, beta::Float32) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/impl/conv_im2col.jl:49
 [5] conv_im2col! at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/impl/conv_im2col.jl:30 [inlined]
 [6] #conv!#147 at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:191 [inlined]
 [7] conv!(::CuArray{Float32,5}, ::Base.ReshapedArray{Float32,5,SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, ::CuArray{Float32,5}, ::DenseConvDims{3,(1, 1, 1),1,64,(1, 1, 1),(0, 0, 0, 0, 0, 0),(1, 1, 1),false}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:191
 [8] conv!(::CuArray{Float32,4}, ::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::CuArray{Float32,4}, ::DenseConvDims{2,(1, 1),1,64,(1, 1),(0, 0, 0, 0),(1, 1),false}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:148
 [9] conv! at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:148 [inlined]
 [10] conv(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::CuArray{Float32,4}, ::DenseConvDims{2,(1, 1),1,64,(1, 1),(0, 0, 0, 0),(1, 1),false}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:91
 [11] conv(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::CuArray{Float32,4}, ::DenseConvDims{2,(1, 1),1,64,(1, 1),(0, 0, 0, 0),(1, 1),false}) at /home/rorozcom3/.julia/packages/NNlib/Pcmaw/src/conv.jl:89
 [12] forward(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::ResidualBlock; save::Bool) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/layer_residual_block.jl:183
 [13] forward at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/layer_residual_block.jl:181 [inlined]
 [14] #45#46 at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17 [inlined]
 [15] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{ResidualBlock}})(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [16] forward(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::CouplingLayerBasic; save::Bool, logdet::Nothing) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/invertible_layer_basic.jl:99
 [17] forward(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::CouplingLayerBasic) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/invertible_layer_basic.jl:96
 [18] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{CouplingLayerBasic}})(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::Vararg{SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [19] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{CouplingLayerBasic}})(::SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false}, ::Vararg{SubArray{Float32,4,CuArray{Float32,4},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},UnitRange{Int64},Base.Slice{Base.OneTo{Int64}}},false},N} where N) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [20] forward(::CuArray{Float32,4}, ::CouplingLayerHINT; scale::Int64, permute::Nothing, logdet::Nothing) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/invertible_layer_hint.jl:0
 [21] forward at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/layers/invertible_layer_hint.jl:128 [inlined]
 [22] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{CouplingLayerHINT}})(::CuArray{Float32,4}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [23] #45 at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17 [inlined]
 [24] forward(::CuArray{Float32,4}, ::CuArray{Float32,4}, ::ConditionalLayerHINT; logdet::Nothing) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/conditional_layers/conditional_layer_hint.jl:103
 [25] forward(::CuArray{Float32,4}, ::CuArray{Float32,4}, ::ConditionalLayerHINT) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/conditional_layers/conditional_layer_hint.jl:99
 [26] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{ConditionalLayerHINT}})(::CuArray{Float32,4}, ::Vararg{CuArray{Float32,4},N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [27] (::InvertibleNetworks.var"#45#63"{InvertibleNetworks.var"#45#46#64"{ConditionalLayerHINT}})(::CuArray{Float32,4}, ::Vararg{CuArray{Float32,4},N} where N) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17
 [28] forward(::CuArray{Float32,4}, ::CuArray{Float32,4}, ::NetworkConditionalHINT; logdet::Nothing) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/networks/invertible_network_conditional_hint.jl:85
 [29] forward at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/networks/invertible_network_conditional_hint.jl:78 [inlined]
 [30] #45#46 at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17 [inlined]
 [31] #45 at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/src/utils/neuralnet.jl:17 [inlined]
 [32] loss(::NetworkConditionalHINT, ::CuArray{Float32,4}, ::CuArray{Float32,4}) at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/examples/applications/application_conditional_hint_banana_linear.jl:34
 [33] top-level scope at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/examples/applications/application_conditional_hint_banana_linear.jl:59
 [34] include(::String) at ./client.jl:457
 [35] top-level scope at REPL[1]:1
in expression starting at /home/rorozcom3/experiments/test_inv/InvertibleNetworks.jl/examples/applications/application_conditional_hint_banana_linear.jl:49

I get this error after changing the above example so that it runs on GPU:

using LinearAlgebra, InvertibleNetworks, PyPlot, Flux, Random, Test
using CUDA

import Flux.Optimise.update!

# Random seed
Random.seed!(99)

####################################################################################################

# Define network
nx = 1
ny = 1
n_in = 2
n_hidden = 64
batchsize = 64
depth = 8

# Construct HINT network
H = NetworkConditionalHINT(nx, ny, n_in, batchsize, n_hidden, depth; k1=1, k2=1, p1=0, p2=0) |> gpu

# Linear forward operator
A = randn(Float32,2,2)
A = A / (2*opnorm(A))

####################################################################################################

# Loss
function loss(H, X, Y)
    Zx, Zy, logdet = H.forward(X, Y)
    f = -log_likelihood(tensor_cat(Zx, Zy)) - logdet
    ΔZ = -∇log_likelihood(tensor_cat(Zx, Zy))
    ΔZx, ΔZy = tensor_split(ΔZ)
    ΔX, ΔY = H.backward(ΔZx, ΔZy, Zx, Zy)[1:2]
    return f, ΔX, ΔY
end

# Training
maxiter = 1000
opt = Flux.ADAM(1f-3)
lr_step = 100
lr_decay_fn = Flux.ExpDecay(1f-3, .9, lr_step, 0.)
fval = zeros(Float32, maxiter)

for j=1:maxiter

    # Evaluate objective and gradients
    X = sample_banana(batchsize)
    Y = reshape(A*reshape(X, :, batchsize), nx, ny, n_in, batchsize)
    Y += .2f0*randn(Float32, nx, ny, n_in, batchsize)

    X = X |> gpu
    Y = Y |> gpu

    fval[j] = loss(H, X, Y)[1]
    mod(j, 10) == 0 && (print("Iteration: ", j, "; f = ", fval[j], "\n"))

    # Update params
    for p in get_params(H)
        update!(opt, p.data, p.grad)
        update!(lr_decay_fn, p.data, p.grad)
    end
    clear_grad!(H)
end

I am on the InvertibleNetworks.jl project environment so I am using the package versions as given by the Project.toml