JuliaGPU / CUDAnative.jl

Julia support for native CUDA programming
https://juliagpu.org/cuda/
Other
392 stars 55 forks source link

Missing typevar results in japi3 function #265

Closed dextorious closed 5 years ago

dextorious commented 5 years ago

When I run the following kernel (taken from a Lattice Boltzmann simulation)

function advance_gpu!(ρ::CuDeviceArray{T,D}, u::CuDeviceArray{SVector{D,T},D},
        δt::T, δx::T, τ::T, e::CuDeviceVector{SVector{D,T}}, w::CuDeviceVector{T}) where {D,Q,T<:AbstractFloat}
    x = (blockIdx().x-1) * blockDim().x + threadIdx().x
    y = (blockIdx().y-1) * blockDim().y + threadIdx().y
    NX, NY = size(u)
    cs = one(T) / sqrt(T(3))
    eq0 = zero(MVector{Q,T})
    # predictor step
    ρ0, u0 = zero(T), zero(SVector{D,T})
    for q ∈ 1 : Q
        i, j = Int(x-δt*e[q][1]), Int(y-δt*e[q][2])
        if     i < 1    i = NX
        elseif i > NX   i = 1   end
        if     j < 1    j = NY
        elseif j > NY   j = 1   end
        uij = u[i,j]
        uu = dot(uij, uij)
        eu = dot(e[q], uij)
        eq0[q] = ρ[i,j] * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
        ρ0 += eq0[q]
        u0 += eq0[q] * e[q]
    end
    u0 = u0 / ρ0

    # corrector step
    ρ[x,y] = ρ0
    sumneq = zero(T)
    uu = dot(u0, u0)
    for q ∈ 1 : Q
        eu = dot(e[q], u0)
        eq1 = ρ0 * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
        sumneq += e[q] * (eq1 - eq0[q])
    end
    u[x,y] = u0 - (one(T) - one(T)/τ) * sumneq / ρ0
    nothing
end

I get the following error:

ERROR: CUDAnative.jl encountered an unexpected internal compiler error.
Please file an issue attaching the following information, including the backtrace,
as well as a reproducible example (if possible).

InternalCompilerError: wrapper != nothing, at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\compiler\irgen.jl:159

Compiler invocation:
 - f = advance_gpu!
 - tt = Tuple{CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{SArray{Tuple{2},Float32,1,2},2,CUDAnative.AS.Global},Float32,Float32,Float32,CuDeviceArray{SArray{Tuple{2},Float32,1,2},1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}
 - cap = v"5.2.0"
 - kernel = true
 - alias = nothing
 - minthreads = nothing
 - maxthreads = nothing
 - blocks_per_sm = nothing
 - maxregs = nothing

Installed packages:
 - OpenCL = 0.7.0
 - GR = 0.34.1
 - Revise = 0.7.12
 - Atom = 0.7.8
 - BenchmarkTools = 0.4.1
 - NBodySimulator = 0.0.3+
 - CUDAdrv = 0.8.6
 - NumericalIntegration = 0.2.0
 - Juno = 0.5.3
 - StochasticDiffEq = 5.8.0
 - PyCall = 1.18.4
 - LaTeXStrings = 1.0.3
 - NPZ = 0.3.0
 - MAT = 0.4.0
 - StatsBase = 0.25.0
 - Makie = 0.9.0+
 - CuArrays = 0.8.0
 - AbstractPlotting = 0.9.0+
 - OrdinaryDiffEq = 4.13.0
 - LsqFit = 0.6.0
 - GeometryTypes = 0.6.2
 - Plots = 0.19.3
 - PyPlot = 2.6.3
 - ProgressMeter = 0.6.1
 - CUDAnative = 0.9.1
 - DiffEqCallbacks = 2.1.0
 - FileIO = 1.0.2
 - GPUArrays = 0.4.2
 - DifferentialEquations = 5.2.1
 - GLFW = 2.2.0+
 - RecipesBase = 0.5.0
 - CLArrays = 0.1.3
 - CurveFit = 0.1.1
 - Parameters = 0.10.1
 - RecursiveArrayTools = 0.17.2
 - StaticArrays = 0.8.3
 - DiffEqBase = 4.21.3
 - Reexport = 0.2.0
 - Unitful = 0.12.0

Julia Version 0.7.0
Commit a4cb80f3ed (2018-08-08 06:46 UTC)
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: AMD Ryzen 7 2700X Eight-Core Processor
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-6.0.0 (ORCJIT, znver1)
Environment:
  JULIA_EDITOR = "C:\Users\admin\AppData\Local\atom\app-1.31.2\atom.exe" -a
  JULIA_NUM_THREADS = 16

Stacktrace:
 [1] irgen(::CUDAnative.CompilerContext) at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\compiler\irgen.jl:159
 [2] #compile_function#78(::Bool, ::Function, ::CUDAnative.CompilerContext) at .\logging.jl:308
 [3] compile_function at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\compiler\driver.jl:56 [inlined]
 [4] #cufunction#77(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::CuDevice, ::Any, ::Any) at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\compiler\driver.jl:22
 [5] cufunction at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\compiler\driver.jl:10 [inlined]
 [6] macro expansion at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\execution.jl:180 [inlined]
 [7] _cuda(::typeof(advance_gpu!), ::Tuple{}, ::NamedTuple{(:threads,),Tuple{Tuple{Int64,Int64}}}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{SArray{Tuple{2},Float32,1,2},2,CUDAnative.AS.Global}, ::Float32, ::Float32, ::Float32, ::CuDeviceArray{SArray{Tuple{2},Float32,1,2},1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}) at C:\Users\admin\.julia\packages\CUDAnative\AGfq2\src\execution.jl:139
 [8] advance_gpu!(::Lattice{2,9,Float32}, ::Int64) at D:\Code\Julia\shslbm.jl:121
 [9] advance_gpu!(::Lattice{2,9,Float32}) at D:\Code\Julia\shslbm.jl:116
 [10] top-level scope at util.jl:156

I'm not sure how to proceed with debugging this and was advised to post an issue here. The entire code, including a working CPU fallback, initialization and a wrapper for the GPU kernel is available in the following gist: https://gist.github.com/dextorious/e0a1d03a4fdfd29dc6cb5d58d84c53c1

Just running the file should automatically trigger the error.

maleadt commented 5 years ago

MWE:

julia> using CUDAnative

julia> foo() where {Q} = nothing
foo (generic function with 1 method)

julia> code_llvm(foo, Tuple{})

; Function foo
; Location: REPL[1]:1
define nonnull %jl_value_t addrspace(10)* @japi3_foo_35164(%jl_value_t addrspace(10)**, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) #0 {
top:
  %4 = alloca %jl_value_t addrspace(10)**, align 8
  store volatile %jl_value_t addrspace(10)** %2, %jl_value_t addrspace(10)*** %4, align 8
  ret %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140671766708232 to %jl_value_t*) to %jl_value_t addrspace(10)*)
}

julia> foo()

julia> CUDAnative.code_llvm(foo, Tuple{})
ERROR: CUDAnative.jl encountered an unexpected internal compiler error.
Please file an issue attaching the following information, including the backtrace,
as well as a reproducible example (if possible).

InternalCompilerError: wrapper != nothing, at /home/tbesard/Julia/CUDAnative/src/compiler/irgen.jl:159

Still a bug though, the compiler should catch this :slightly_smiling_face: