Closed avik-pal closed 2 weeks ago
I can't reproduce, so this is likely launch configuration-related (i.e., only triggering on certain devices where specific launch configurations are used). Could you perhaps @show
the following variables to show what's happening? https://github.com/JuliaGPU/CUDA.jl/blob/e1e5be2b6bf17f03a367cebeb18c4645e593f80d/lib/cublas/linalg.jl#L737-L759
julia> size(kron(cu(rand(100,1)), cu(rand(3, 1))))
(m, n, p, q) = (100, 1, 3, 1)
sizes = (100, 1)
config = (blocks = 14, threads = 640)
dim_ratio = 100.0
(max_threads_i, max_threads_j, max_blocks_i, max_blocks_j) = (252, 2, 37, 0)
(threads_i, threads_j, blocks_i, blocks_j) = (100, 1, 1, 0)
Describe the bug
kron
throws error for specific matrix sizesTo reproduce
The Minimal Working Example (MWE) for this bug:
Long Error Message
```julia ERROR: Grid dimensions should be non-null Stacktrace: [1] error(s::String) @ Base ./error.jl:35 [2] diagnose_launch_failure(f::CuFunction, err::CuError; blockdim::CuDim3, threaddim::CuDim3, shmem::Int64) @ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:84 [3] launch(::CuFunction, ::CUDA.KernelState, ::CuDeviceMatrix{…}, ::CuDeviceMatrix{…}, ::CuDeviceMatrix{…}, ::Int64, ::Int64, ::Int64, ::Int64; blocks::Tuple{…}, threads::Tuple{…}, cooperative::Bool, shmem::Int64, stream::CuStream) @ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:73 [4] launch @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:52 [inlined] [5] #972 @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:189 [inlined] [6] macro expansion @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:149 [inlined] [7] macro expansion @ ./none:0 [inlined] [8] convert_arguments @ ./none:0 [inlined] [9] #cudacall#971 @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:191 [inlined] [10] cudacall @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:187 [inlined] [11] macro expansion @ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:268 [inlined] [12] macro expansion @ ./none:0 [inlined] [13] call @ ./none:0 [inlined] [14] (::CUDA.HostKernel{…})(::CuArray{…}, ::CuArray{…}, ::CuArray{…}, ::Int64, ::Int64, ::Int64, ::Int64; threads::Tuple{…}, blocks::Tuple{…}, kwargs::@Kwargs{}) @ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:390 [15] HostKernel @ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:389 [inlined] [16] kron!(C::CuArray{Float32, 2, CUDA.DeviceMemory}, A::CuArray{Float32, 2, CUDA.DeviceMemory}, B::CuArray{Float32, 2, CUDA.DeviceMemory}) @ CUDA.CUBLAS ~/.julia/packages/CUDA/75aiI/lib/cublas/linalg.jl:761 [17] kron(A::CuArray{Float32, 2, CUDA.DeviceMemory}, B::CuArray{Float32, 2, CUDA.DeviceMemory}) @ CUDA.CUBLAS ~/.julia/packages/CUDA/75aiI/lib/cublas/linalg.jl:773 [18] top-level scope @ REPL[72]:1 [19] top-level scope @ none:1 caused by: CUDA error: invalid argument (code 1, ERROR_INVALID_VALUE) Stacktrace: [1] throw_api_error(res::CUDA.cudaError_enum) @ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:30 [2] check @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:37 [inlined] [3] cuLaunchKernel @ ~/.julia/packages/CUDA/75aiI/lib/utils/call.jl:34 [inlined] [4] (::CUDA.var"#966#967"{Bool, Int64, CuStream, CuFunction, CuDim3, CuDim3})(kernelParams::Vector{Ptr{Nothing}}) @ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:66 [5] macro expansion @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:33 [inlined] [6] macro expansion @ ./none:0 [inlined] [7] pack_arguments(::CUDA.var"#966#967"{Bool, Int64, CuStream, CuFunction, CuDim3, CuDim3}, ::CUDA.KernelState, ::CuDeviceMatrix{Float32, 1}, ::CuDeviceMatrix{Float32, 1}, ::CuDeviceMatrix{Float32, 1}, ::Int64, ::Int64, ::Int64, ::Int64) @ CUDA ./none:0 [8] launch(::CuFunction, ::CUDA.KernelState, ::CuDeviceMatrix{…}, ::CuDeviceMatrix{…}, ::CuDeviceMatrix{…}, ::Int64, ::Int64, ::Int64, ::Int64; blocks::Tuple{…}, threads::Tuple{…}, cooperative::Bool, shmem::Int64, stream::CuStream) @ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:59 [9] launch @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:52 [inlined] [10] #972 @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:189 [inlined] [11] macro expansion @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:149 [inlined] [12] macro expansion @ ./none:0 [inlined] [13] convert_arguments @ ./none:0 [inlined] [14] #cudacall#971 @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:191 [inlined] [15] cudacall @ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:187 [inlined] [16] macro expansion @ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:268 [inlined] [17] macro expansion @ ./none:0 [inlined] [18] call @ ./none:0 [inlined] [19] (::CUDA.HostKernel{…})(::CuArray{…}, ::CuArray{…}, ::CuArray{…}, ::Int64, ::Int64, ::Int64, ::Int64; threads::Tuple{…}, blocks::Tuple{…}, kwargs::@Kwargs{}) @ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:390 [20] HostKernel @ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:389 [inlined] [21] kron!(C::CuArray{Float32, 2, CUDA.DeviceMemory}, A::CuArray{Float32, 2, CUDA.DeviceMemory}, B::CuArray{Float32, 2, CUDA.DeviceMemory}) @ CUDA.CUBLAS ~/.julia/packages/CUDA/75aiI/lib/cublas/linalg.jl:761 [22] kron(A::CuArray{Float32, 2, CUDA.DeviceMemory}, B::CuArray{Float32, 2, CUDA.DeviceMemory}) @ CUDA.CUBLAS ~/.julia/packages/CUDA/75aiI/lib/cublas/linalg.jl:773 [23] top-level scope @ REPL[72]:1 [24] top-level scope @ none:1 Some type information was truncated. Use `show(err)` to see complete types. ```
Version info
Details on Julia:
Details on CUDA: