JuliaGPU / KernelAbstractions.jl

Heterogeneous programming in Julia
MIT License
364 stars 65 forks source link

[EnzymeExt] tape_type error #495

Open michel2323 opened 1 month ago

michel2323 commented 1 month ago

printall.log Getting this error

ERROR: BoundsError: attempt to access 0-element Vector{LLVM.LLVMType} at index [1]
Stacktrace:
  [1] getindex
    @ ./essentials.jl:13 [inlined]
  [2] call!(builder::LLVM.IRBuilder, rt::GPUCompiler.Runtime.RuntimeMethodInstance, args::Vector{LLVM.ConstantExpr})
    @ GPUCompiler /disk/mschanen/julia_depot/packages/GPUCompiler/Y4hSX/src/rtlib.jl:39
  [3] emit_exception!(builder::LLVM.IRBuilder, name::String, inst::LLVM.UnreachableInst)
    @ GPUCompiler /disk/mschanen/julia_depot/packages/GPUCompiler/Y4hSX/src/irgen.jl:219
  [4] emit_error(B::LLVM.IRBuilder, orig::LLVM.UnreachableInst, string::String)
    @ Enzyme.Compiler /disk/mschanen/julia_depot/dev/Enzyme/src/compiler.jl:1647
  [5] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::GPUCompiler.CompilerJob{…})
    @ Enzyme.Compiler /disk/mschanen/julia_depot/dev/Enzyme/src/compiler.jl:5912
  [6] codegen
    @ /disk/mschanen/julia_depot/dev/Enzyme/src/compiler.jl:5143 [inlined]
  [7] (::Enzyme.var"#70#71"{GPUCompiler.CompilerJob{…}, UInt64, GPUCompiler.CompilerJob{…}})(ctx::LLVM.Context)
    @ Enzyme /disk/mschanen/julia_depot/dev/Enzyme/src/Enzyme.jl:761
  [8] JuliaContext(f::Enzyme.var"#70#71"{GPUCompiler.CompilerJob{…}, UInt64, GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
    @ GPUCompiler /disk/mschanen/julia_depot/packages/GPUCompiler/Y4hSX/src/driver.jl:52
  [9] JuliaContext
    @ /disk/mschanen/julia_depot/packages/GPUCompiler/Y4hSX/src/driver.jl:42 [inlined]
 [10] tape_type(::GPUCompiler.CompilerJob{…}, ::EnzymeCore.ReverseModeSplit{…}, ::Type{…}, ::Type{…}, ::Type{…}, ::Type{…}, ::Type{…})
    @ Enzyme /disk/mschanen/julia_depot/dev/Enzyme/src/Enzyme.jl:760
 [11] _create_tape_kernel(::KernelAbstractions.Kernel{…}, ::Val{…}, ::Type, ::Type, ::Tuple{…}, ::KernelAbstractions.NDIteration.NDRange{…}, ::Duplicated{…}, ::Vararg{…})
    @ EnzymeExt /disk/mschanen/julia_depot/dev/KernelAbstractions/ext/EnzymeExt.jl:146

when running

using KernelAbstractions
using CUDA
using Enzyme
using Adapt

@kernel function stencil_kernel!(y, x)
    i = @index(Global, Linear)
    y[i] = x[i]
end

function stencil!(y, x, backend)
    stencil_kernel!(backend, 64)(y, x, ndrange=64)
end

function driver!(y,x, backend)
    stencil!(y, x, backend)
    return nothing
end

function primal(backend)
    x = adapt(backend, ones(64))
    y = adapt(backend, zeros(64))

    driver!(y, x, backend)

    return all(y .== 1.0)
end

function adjoint(backend)
    x = adapt(backend, ones(64))
    y = adapt(backend, zeros(64))

    dx = adapt(backend, zeros(64))
    dy = adapt(backend, ones(64))

    autodiff(Reverse, driver!, Const, Duplicated(y,dy), Duplicated(x,dx), Const(backend))
    return all(dx .== 1.0)
end

primal(CPU())
primal(CUDABackend())

@time adjoint(CPU())
# BoundsError: attempt to access 0-element Vector{LLVM.LLVMType} at index [1]
# Enzyme.Compiler /disk/mschanen/julia_depot/dev/Enzyme/src/compiler.jl:5912
# emit_error(b, term, "Enzyme: The original primal code hits this error condition, thus differentiating it does not make sense")
@time adjoint(CUDABackend())

It seems to crash at:

# Enzyme.Compiler /disk/mschanen/julia_depot/dev/Enzyme/src/compiler.jl:5912
emit_error(b, term, "Enzyme: The original primal code hits this error condition, thus differentiating it does not make sense")

However both the CUDABackend() original primal as well as the CPU reverse run successfully.

wsmoses commented 1 month ago

Looks like the same one I found in https://github.com/JuliaGPU/GPUCompiler.jl/issues/596

wsmoses commented 1 month ago

Underlying cause from Enzyme side here should now be resolved.

However, the fact this is hit also implies the Enzyme KA rule was never hit, which is bad.

cc @vchuravy

michel2323 commented 1 month ago

What do you mean by never hit? It hit the rules for the tests . Or at least the code in the EnzymeExt . Enzyme would crash without the rules.

Edit: Confirming it's resolved.