slimgroup / InvertibleNetworks.jl

A Julia framework for invertible neural networks
MIT License
148 stars 20 forks source link

Issues with forward evaluation of conv1x1 on GPU with CUDA 4 #73

Closed alisiahkoohi closed 1 year ago

alisiahkoohi commented 1 year ago

I run into errors when evaluating the GLOW network (more percisely conv1x1 layer) on GPU (e.g., when running this line).

The error originates in https://github.com/slimgroup/InvertibleNetworks.jl/blob/master/src/utils/compute_utils.jl#L20. Specifically, this line raises ERROR: DivideError: integer division error (can be circuvmented by replacing norm(v) with sum(v.^2)) and this line raises CUBLASError: an invalid value was used as an argument (code 7, CUBLAS_STATUS_INVALID_VALUE), which I was unable to fix.

This issue does not exists when using CUDA@3.10.0 (possible any version before 4? not sure.)

mloubout commented 1 year ago

For the first one dot(v, v) is in general faster than norm so should swap it, I'll have a look at the gemm error

mloubout commented 1 year ago

I can seem to be able to replicate either issue with CUDA 4 on my desktop. which version exactly leads to this error?

alisiahkoohi commented 1 year ago

I also get a slightly different error with a fresh instalation:

$ julia --project examples/networks/network_glow.jl 
  Downloaded artifact: CUDA_Driver
  Downloaded artifact: CUDA_Runtime
  Downloaded artifact: CUDNN
ERROR: LoadError: CUBLASError: the requested feature is not supported (code 15, CUBLAS_STATUS_NOT_SUPPORTED)
Stacktrace:
  [1] throw_api_error(res::CUDA.CUBLAS.cublasStatus_t)
    @ CUDA.CUBLAS /data/as356/.julia/packages/CUDA/ZdCxS/lib/cublas/libcublas.jl:11
  [2] macro expansion
    @ /data/as356/.julia/packages/CUDA/ZdCxS/lib/cublas/libcublas.jl:24 [inlined]
  [3] cublasSgemv_v2(handle::Ptr{CUDA.CUBLAS.cublasContext}, trans::Char, m::Int64, n::Int64, alpha::Bool, A::CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, lda::Int64, x::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, incx::Int64, beta::Bool, y::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, incy::Int64)
    @ CUDA.CUBLAS /data/as356/.julia/packages/CUDA/ZdCxS/lib/utils/call.jl:26
  [4] gemv!
    @ /data/as356/.julia/packages/CUDA/ZdCxS/lib/cublas/wrappers.jl:331 [inlined]
  [5] gemv_dispatch!(Y::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, A::CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, B::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, alpha::Bool, beta::Bool)
    @ CUDA.CUBLAS /data/as356/.julia/packages/CUDA/ZdCxS/lib/cublas/linalg.jl:179
  [6] mul!
    @ /data/as356/.julia/packages/CUDA/ZdCxS/lib/cublas/linalg.jl:188 [inlined]
  [7] mul!
    @ /opt2/software/julia/1.7.1/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:275 [inlined]
  [8] chain_lr(::Base.ReshapedArray{Float32, 2, SubArray{Float32, 3, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Tuple{}}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/compute_utils.jl:25
  [9] forward(X::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, C::Conv1x1; logdet::Nothing)
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/layers/invertible_layer_conv1x1.jl:184
 [10] forward(X::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, C::Conv1x1)
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/layers/invertible_layer_conv1x1.jl:174
 [11] _predefined_mode(obj::Conv1x1, sym::Symbol, args::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:29
 [12] _predefined_mode
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:28 [inlined]
 [13] #134#135
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40 [inlined]
 [14] #134
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40 [inlined]
 [15] forward(X::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, L::CouplingLayerGlow)
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/layers/invertible_layer_glow.jl:96
 [16] _predefined_mode(obj::CouplingLayerGlow, sym::Symbol, args::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:29
 [17] _predefined_mode
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:28 [inlined]
 [18] #134#135
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40 [inlined]
 [19] (::InvertibleNetworks.var"#134#136"{InvertibleNetworks.var"#134#135#137"{CouplingLayerGlow}})(args::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40
 [20] forward(X::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, G::NetworkGlow)
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/networks/invertible_network_glow.jl:110
 [21] _predefined_mode(obj::NetworkGlow, sym::Symbol, args::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:29
 [22] _predefined_mode
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:28 [inlined]
 [23] #134#135
    @ /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40 [inlined]
 [24] (::InvertibleNetworks.var"#134#136"{InvertibleNetworks.var"#134#135#137"{NetworkGlow}})(args::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/neuralnet.jl:40
 [25] loss(X::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
    @ Main /data/as356/InvertibleNetworks.jl/examples/networks/network_glow.jl:27
 [26] top-level scope
    @ /data/as356/InvertibleNetworks.jl/examples/networks/network_glow.jl:34
in expression starting at /data/as356/InvertibleNetworks.jl/examples/networks/network_glow.jl:3

Current versions are:

(InvertibleNetworks) pkg> status
     Project InvertibleNetworks v2.2.3
      Status `/data/as356/InvertibleNetworks.jl/Project.toml`
  [052768ef] CUDA v4.0.1
  [d360d2e6] ChainRulesCore v1.15.7
  [587475ba] Flux v0.13.13
  [bb331ad6] JOLI v0.8.4
  [872c559c] NNlib v0.8.19
  [29a6e085] Wavelets v0.9.5
  [37e2e46d] LinearAlgebra
  [9a3f8284] Random
  [10745b16] Statistics
mloubout commented 1 year ago

What's your CUDA version? CUDA.versioninfo()

alisiahkoohi commented 1 year ago
julia> CUDA.versioninfo()
CUDA runtime 11.8, artifact installation
CUDA driver 11.8
NVIDIA driver 470.103.1, originally for CUDA 11.4

Libraries: 
- CUBLAS: 11.8.1
- CURAND: 10.3.0
- CUFFT: 10.9.0
- CUSOLVER: 11.4.1
- CUSPARSE: 11.7.5
- CUPTI: 18.0.0
- NVML: 11.0.0+470.103.1
mloubout commented 1 year ago

Hum really weird

mloubout commented 1 year ago

What's the CUDA version inside the InvertibleNetworks? I.e julia --project then status

alisiahkoohi commented 1 year ago

I posted this information here but below is the outcome for a fresh installation (from master branch):

$ julia --project                                                                                                          
               _                                                                                                                        
   _       _ _(_)_     |  Documentation: https://docs.julialang.org                                                                     
  (_)     | (_) (_)    |                                                                                                                
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.                                                                         
  | | | | | | |/ _` |  |                                                                                                                
  | | |_| | | | (_| |  |  Version 1.7.1 (2021-12-22)                                                                                    
 _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release                                                                       
|__/                   |                                                                                                                

(InvertibleNetworks) pkg> status                                                                                                        
     Project InvertibleNetworks v2.2.4                                                                                                  
      Status `/data/as356/InvertibleNetworks.jl/Project.toml`                                                                           
  [052768ef] CUDA v4.0.1                                                                                                                
  [d360d2e6] ChainRulesCore v1.15.7                                                                                                     
  [587475ba] Flux v0.13.13                                                                                                              
  [bb331ad6] JOLI v0.8.4                                                                                                                
  [872c559c] NNlib v0.8.19                                                                                                              
  [29a6e085] Wavelets v0.9.5                                                                                                            
  [37e2e46d] LinearAlgebra                                                                                                              
  [9a3f8284] Random                                                                                                                     
  [10745b16] Statistics

Same error as above: LoadError: CUBLASError: the requested feature is not supported (code 15, CUBLAS_STATUS_NOT_SUPPORTED) originated at

    [8] chain_lr(::Base.ReshapedArray{Float32, 2, SubArray{Float32, 3, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Tuple{}}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
    @ InvertibleNetworks /data/as356/InvertibleNetworks.jl/src/utils/compute_utils.jl:25