TuringLang / NormalizingFlows.jl

https://turinglang.org/NormalizingFlows.jl/
MIT License
32 stars 4 forks source link

CUDAarray support #12

Open zuhengxu opened 1 year ago

zuhengxu commented 1 year ago
using CUDA
using LinearAlgebra
using FunctionChains
using Bijectors
using Flux

T = Float32
q0 = MvNormal(ones(T, 2))

Distributions._rand!(rng, q0_g, xx)
ts = reduce(∘, [f32(Bijectors.PlanarLayer(2)) for _ in 1:2])
flow = transformed(q0, ts)

# gpu 
CUDA.functional() 
flow_g = gpu(flow)
ts_g = gpu(ts)

xs = rand(flow_g.dist, 10) # on cpu
ys_g = transform(ts_g, cu(xs)) # good
logpdf(flow_g, ys_g[:, 1]) # good
rand(flow_g) # bug

output

julia> rand(flow_g) # bug
ERROR: MethodError: no method matching dot(::Int64, ::CuPtr{Float32}, ::Int64, ::Ptr{Float32}, ::Int64)

Closest candidates are:
  dot(::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer)
   @ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344
  dot(::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer)
   @ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344

Stacktrace:
  [1] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
    @ LinearAlgebra.BLAS ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:395
  [2] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
    @ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/matmul.jl:14
  [3] aT_b(a::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, b::Vector{Float32})
    @ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/utils.jl:4
  [4] _transform(flow::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
    @ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:77
  [5] transform(b::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
    @ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:82
  [6] (::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}})(x::Vector{Float32})
    @ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/interface.jl:80
  [7] call_composed(fs::Tuple{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, x::Tuple{Vector{Float32}}, kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Base ./operators.jl:1035
  [8] call_composed
    @ ./operators.jl:1034 [inlined]
  [9] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Base ./operators.jl:1031
 [10] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32})
    @ Base ./operators.jl:1031
 [11] rand(td::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}})
    @ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/transformed_distribution.jl:159
 [12] top-level scope
    @ REPL[67]:1
 [13] top-level scope
    @ ~/.julia/packages/CUDA/p5OVK/src/initialization.jl:171
sunxd3 commented 1 year ago

Another example

using Bijectors
using CUDA
using Distributions
using Random

q0_gpu = MvNormal(cu(ones(2))) # reference distribution
ts_gpu = reduce(∘, [Bijectors.PlanarLayer(rand(CURAND.default_rng(), 2), rand(CURAND.default_rng(), 2), rand(CURAND.default_rng(), 1)) for _ in 1:2]) # transformation
flow_gpu = transformed(q0_gpu, ts_gpu)

rand(flow_gpu, 10)

error

ERROR: This object is not a GPU array
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:35
  [2] backend(#unused#::Type)
    @ GPUArraysCore ~/packages/GPUArraysCore/src/GPUArraysCore.jl:148
  [3] backend(x::Matrix{Float32})
    @ GPUArraysCore ~/packages/GPUArraysCore/src/GPUArraysCore.jl:149
  [4] _copyto!
    @ ~/packages/GPUArrays/src/host/broadcast.jl:65 [inlined]
  [5] materialize!
    @ ~/packages/GPUArrays/src/host/broadcast.jl:41 [inlined]
  [6] materialize!
    @ ./broadcast.jl:881 [inlined]
  [7] unwhiten!(r::Matrix{Float32}, a::PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, x::Matrix{Float32})
    @ PDMats ~/packages/PDMats/src/pdiagmat.jl:107
  [8] unwhiten!
    @ ~/packages/PDMats/src/generics.jl:33 [inlined]
  [9] _rand!(rng::TaskLocalRNG, d::MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, x::Matrix{Float32})
    @ Distributions ~/packages/Distributions/src/multivariate/mvnormal.jl:277
 [10] rand!
    @ ~/packages/Distributions/src/genericrand.jl:108 [inlined]
 [11] rand
    @ ~/packages/Distributions/src/multivariates.jl:23 [inlined]
 [12] rand(rng::TaskLocalRNG, td::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}}}, num_samples::Int64)
    @ Bijectors ~/packages/Bijectors/src/transformed_distribution.jl:163
 [13] rand(s::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}}}, dims::Int64)
    @ Distributions ~/packages/Distributions/src/genericrand.jl:22
 [14] top-level scope
    @ ~/Workspace/julia_gpu/nf.jl:18