LuxDL / Lux.jl

Elegant & Performant Scientific Machine Learning in Julia
https://lux.csail.mit.edu/
MIT License
446 stars 50 forks source link

Fast activation error #663

Closed vpuri3 closed 1 month ago

vpuri3 commented 1 month ago
device = Lux.gpu_device()
l = Dense(2, 2, tanh; allow_fast_activation = false)
p, st = Lux.setup(rng, l)
x = CUDA.ones(2, 5)
l(x, p |> device, st |> device)[1]
# Float32[-0.16319485 -0.16319485 … -0.16319485 -0.16319485; -0.7446033 -0.7446033 … -0.7446033 -0.7446033]
device = Lux.gpu_device()
l = Dense(2, 2, tanh)
p, st = Lux.setup(rng, l)
x = CUDA.ones(2, 5)
l(x, p |> device, st |> device)[1] # Error
ERROR: ArgumentError: Pass LowerSIMDLoop is not a module pass                                                                                                                                                                                                                            
Stacktrace:                                                                                                                                                                                                                                                                              
  [1] add!(pm::LLVM.NewPMModulePassManager, pb::LLVM.PassBuilder, pass::LLVM.Interop.LowerSIMDLoopPass)                                                                                                                                                                                  
    @ LLVM ~/.julia/packages/LLVM/ShACK/src/newpm/passes.jl:701                                                                                                                                                                                                                          
  [2] add!(pm::LLVM.NewPMModulePassManager, pass::LLVM.Interop.LowerSIMDLoopPass)                                                                                                                                                                                                        
    @ LLVM ~/.julia/packages/LLVM/ShACK/src/newpm/passes.jl:728                                                                                                                                                                                                                          
  [3] buildNewPMPipeline!(mpm::LLVM.NewPMModulePassManager, job::GPUCompiler.CompilerJob, opt_level::Int64)                                 
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:37                                                                       
  [4] buildNewPMPipeline!(mpm::LLVM.NewPMModulePassManager, job::GPUCompiler.CompilerJob)                                                                                                                                                                                                
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:34                                                                       
  [5] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:24 [inlined]                                                                                                                                                                                                                      
  [6] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/LLVM/ShACK/src/base.jl:98 [inlined]                                                                                 
  [7] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:23 [inlined]                                                                         
  [8] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/LLVM/ShACK/src/base.jl:98 [inlined]                                                                                                                                                                                                                              
  [9] optimize_newpm!(job::GPUCompiler.CompilerJob, mod::LLVM.Module) 
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:22                                                                       
 [10] optimize!(job::GPUCompiler.CompilerJob, mod::LLVM.Module)                                                                                                                                                                                                                          
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/optim.jl:5                                                                                                                                                                                                                     
 [11] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:344 [inlined]                                                                       
 [12] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]                                                                 
 [13] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:343 [inlined]                                                                       
 [14] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]                                                                 
 [15] macro expansion                                                                                                                       
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:316 [inlined]                                                                       
 [16] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)                                                                                                                                          
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:92                                                                       
 [17] emit_llvm                                                                                                                                                                                                                                                                          
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:86 [inlined]                                                                         
 [18] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)                                                                                          
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:134                                                                                                                                                                                                                  
 [19] codegen                                                                                                                                                                                                                                                                            
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:115 [inlined]                                                                       
 [20] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)                                                                                                               
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:111                                                                     
 [21] compile                                                         
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:103 [inlined] 
 [22] #1116                                                                                                                                                                                                                                                                              
    @ ~/.julia/packages/CUDA/XUdwt/src/compiler/compilation.jl:247 [inlined]                                                                
 [23] JuliaContext(f::CUDA.var"#1116#1119"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})                                                                                                                                          
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52                                                                      
 [24] JuliaContext(f::Function)                                       
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42                                                                      
 [25] compile(job::GPUCompiler.CompilerJob)                                                                                                                                                                                                                                              
    @ CUDA ~/.julia/packages/CUDA/XUdwt/src/compiler/compilation.jl:246                                                                     
 [26] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))                                      
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:128                                                                  
 [27] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)                                                                          
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:103                                                                  
 [28] macro expansion                                                                                                                                                                                                                                                                    
    @ ~/.julia/packages/CUDA/XUdwt/src/compiler/execution.jl:367 [inlined]                                                                  
 [29] macro expansion                                                                                                                                                                                                                                                                    
    @ ./lock.jl:267 [inlined]                                                                                                               
 [30] cufunction(f::GPUArrays.var"#35#37", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Float32, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{…}, Tuple{…}, typeof(tanh_fast), Tuple{…}}, Int64}}; kwargs::@Kwargs{})                                                         
    @ CUDA ~/.julia/packages/CUDA/XUdwt/src/compiler/execution.jl:362                                                                       
 [34] launch_heuristic
    @ ~/.julia/packages/CUDA/XUdwt/src/gpuarrays.jl:15 [inlined]
 [35] _copyto!
    @ ~/.julia/packages/GPUArrays/OqrUV/src/host/broadcast.jl:78 [inlined]
 [36] materialize!
    @ ~/.julia/packages/GPUArrays/OqrUV/src/host/broadcast.jl:38 [inlined]
 [37] materialize!
    @ ./broadcast.jl:911 [inlined]
 [38] _cublaslt_matmul_fused!(transy::Bool, y::CuArray{…}, σ::typeof(tanh_fast), transw::Bool, w::CuArray{…}, transx::Bool, x::CuArray{…}, b::CuArray{…}, aux::Nothing)
    @ LuxLibCUDAExt ~/.julia/packages/LuxLib/VRICL/ext/LuxLibCUDAExt/cublaslt.jl:141
 [39] _cublaslt_matmul_fused!
    @ ~/.julia/packages/LuxLib/VRICL/ext/LuxLibCUDAExt/cublaslt.jl:13 [inlined]
 [40] _cublaslt_matmul_fused!
    @ ~/.julia/packages/LuxLib/VRICL/ext/LuxLibCUDAExt/cublaslt.jl:10 [inlined]
 [41] __fused_dense_bias_activation_impl(act::typeof(tanh_fast), weight::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, b::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
    @ LuxLibCUDAExt ~/.julia/packages/LuxLib/VRICL/ext/LuxLibCUDAExt/fused_dense.jl:15
 [42] fused_dense_bias_activation
    @ ~/.julia/packages/LuxLib/VRICL/src/api/dense.jl:46 [inlined]
 [43] fused_dense_bias_activation
    @ ~/.julia/packages/LuxLib/VRICL/src/api/dense.jl:38 [inlined]
 [44] (::Dense{true, typeof(tanh_fast), typeof(glorot_uniform), typeof(zeros32)})(x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ps::@NamedTuple{weight::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, bias::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, st::@NamedTuple{})
    @ Lux ~/.julia/packages/Lux/PsbZF/src/layers/basic.jl:218
 [45] top-level scope
    @ REPL[13]:1
 [46] top-level scope
    @ ~/.julia/packages/CUDA/XUdwt/src/initialization.jl:209
Some type information was truncated. Use `show(err)` to see complete types.
(test) pkg> st
Status `~/.julia/dev/KolmogorovArnold.jl/test/Project.toml`
  [6e4b80f9] BenchmarkTools v1.5.0
  [052768ef] CUDA v5.3.4
  [b0b7db55] ComponentArrays v0.15.13
  [b2108857] Lux v0.5.51
  [d0bbae9a] LuxCUDA v0.3.2
  [34f89e08] LuxDeviceUtils v0.1.20
  [eb30cadb] MLDatasets v0.7.14
  [f1d291b0] MLUtils v0.4.4
  [3bd65402] Optimisers v0.3.3
  [36348300] OptimizationOptimJL v0.3.1
  [91a5bcdd] Plots v1.40.4
  [e88e6eb3] Zygote v0.6.70
  [02a925ec] cuDNN v1.3.1
vpuri3 commented 1 month ago

My apologies, this was due to an issue with my environment (I was using LLVM 7 in place of 6).