cossio / RestrictedBoltzmannMachines.jl

Train and sample Restricted Boltzmann machines in Julia
MIT License
15 stars 3 forks source link

CUDA issue #9

Closed cossio closed 1 year ago

cossio commented 2 years ago
using CUDA
import RestrictedBoltzmannMachines as RBMs

rbmgpu = RBMs.BinaryRBM(CUDA.randn(N), CUDA.randn(M), CUDA.randn(N,M))
vgpu = CUDA.CuArray{Float32}(bitrand(N,B))

gives the following error:

julia> RBMs.sample_v_from_v(rbmgpu, vgpu)
ERROR: GPU compilation of kernel broadcast_kernel(CUDA.CuKernelContext, CuDeviceMatrix{Bool, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(RestrictedBoltzmannMachines.binary_rand), Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float32, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}, Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}}}, Int64) failed
KernelError: passing and using non-bitstype argument

Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(RestrictedBoltzmannMachines.binary_rand), Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float32, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}, Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}}}, which is not isbits:
  .args is of type Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float32, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}, Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}} which is not isbits.
    .2 is of type Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}} which is not isbits.
      .x is of type Matrix{Float32} which is not isbits.

Stacktrace:
  [1] check_invocation(job::GPUCompiler.CompilerJob)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/1FdJy/src/validation.jl:71
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:385 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/8mHel/src/TimerOutput.jl:252 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:384 [inlined]
  [5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:64
  [6] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context)
    @ CUDA ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:332
  [7] #260
    @ ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:325 [inlined]
  [8] JuliaContext(f::CUDA.var"#260#261"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#17", Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Bool, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(RestrictedBoltzmannMachines.binary_rand), Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float32, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}, Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}}}, Int64}}}})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:74
  [9] cufunction_compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:324
 [10] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/1FdJy/src/cache.jl:90
 [11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Bool, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(RestrictedBoltzmannMachines.binary_rand), Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float32, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}, Base.Broadcast.Extruded{Matrix{Float32}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CUDA ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:297
 [12] cufunction
    @ ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:291 [inlined]
 [13] macro expansion
    @ ~/.julia/packages/CUDA/Uurn4/src/compiler/execution.jl:102 [inlined]
 [14] #launch_heuristic#284
    @ ~/.julia/packages/CUDA/Uurn4/src/gpuarrays.jl:17 [inlined]
 [15] _copyto!
    @ ~/.julia/packages/GPUArrays/Zecv7/src/host/broadcast.jl:73 [inlined]
 [16] copyto!
    @ ~/.julia/packages/GPUArrays/Zecv7/src/host/broadcast.jl:56 [inlined]
 [17] copy
    @ ~/.julia/packages/GPUArrays/Zecv7/src/host/broadcast.jl:47 [inlined]
 [18] materialize
    @ ./broadcast.jl:860 [inlined]
 [19] transfer_sample(layer::RestrictedBoltzmannMachines.Binary{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, inputs::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
    @ RestrictedBoltzmannMachines ~/.julia/packages/RestrictedBoltzmannMachines/9myqX/src/layers/binary.jl:30
 [20] sample_h_from_v
    @ ~/.julia/packages/RestrictedBoltzmannMachines/9myqX/src/rbm.jl:102 [inlined]
 [21] sample_v_from_v_once
    @ ~/.julia/packages/RestrictedBoltzmannMachines/9myqX/src/rbm.jl:144 [inlined]
 [22] sample_v_from_v(rbm::RestrictedBoltzmannMachines.RBM{RestrictedBoltzmannMachines.Binary{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, RestrictedBoltzmannMachines.Binary{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, v::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}; steps::Int64)
    @ RestrictedBoltzmannMachines ~/.julia/packages/RestrictedBoltzmannMachines/9myqX/src/rbm.jl:124
 [23] sample_v_from_v(rbm::RestrictedBoltzmannMachines.RBM{RestrictedBoltzmannMachines.Binary{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, RestrictedBoltzmannMachines.Binary{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, v::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
    @ RestrictedBoltzmannMachines ~/.julia/packages/RestrictedBoltzmannMachines/9myqX/src/rbm.jl:122
 [24] top-level scope
    @ REPL[76]:1
 [25] top-level scope
    @ ~/.julia/packages/CUDA/Uurn4/src/initialization.jl:52

Something about this line:

https://github.com/cossio/RestrictedBoltzmannMachines.jl/blob/2dd911848d95a92078c9b7baecfd12bd9e25654e/src/layers/binary.jl#L30

that CUDA doesn't like??

cossio commented 1 year ago

Seems to work now. See also https://github.com/cossio/CudaRBMs.jl.