EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 63 forks source link

On 1.6 CUDA.jl device functions cause compilation issues #581

Closed wsmoses closed 1 year ago

wsmoses commented 1 year ago
# HACK: work around Pkg.jl#2500
# if VERSION < v"1.8-"
# test_project = Base.active_project()
# preferences_file = joinpath(dirname(@__DIR__), "LocalPreferences.toml")
# test_preferences_file = joinpath(dirname(test_project), "LocalPreferences.toml")
# if isfile(preferences_file) && !isfile(test_preferences_file)
#     cp(preferences_file, test_preferences_file)
# end
# end

using Enzyme

# using Enzyme_jll
# @info "Testing against" Enzyme_jll.libEnzyme

genlatestsin(x)::Float64 = Base.invokelatest(sin, x)

Enzyme.autodiff(Forward, genlatestsin, Duplicated(2.0, 1.0))[1]

function pusher(x, y)
    push!(x, y)
    x[1] + x[2]
end

x  = [2.3]
dx = [0.0]
# Enzyme.autodiff(Reverse, pusher, Duplicated(x, dx), Active(2.0))

using CUDA

genericsin(x) = Base.invokelatest(sin, x)
res = Enzyme.autodiff(Forward, genericsin, BatchDuplicated(2.0, NTuple{10,Float64}((Float64(i) for i in 1:10))))[1]
wmoses@beast:~/git/Enzyme.jl (fastgen3) $ ./julia-1.6.7/bin/julia --project silly.jl 
ERROR: LoadError: KeyError: key MethodInstance for sin(::Float64) not found
Stacktrace:
  [1] getindex
    @ ./dict.jl:482 [inlined]
  [2] irgen(job::GPUCompiler.CompilerJob, method_instance::Core.MethodInstance; ctx::LLVM.Context)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/qdoh1/src/irgen.jl:7
  [3] macro expansion
    @ ~/.julia/packages/GPUCompiler/qdoh1/src/driver.jl:215 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/TimerOutputs/LHjFw/src/TimerOutput.jl:253 [inlined]
  [5] macro expansion
    @ ~/.julia/packages/GPUCompiler/qdoh1/src/driver.jl:214 [inlined]
  [6] emit_llvm(job::GPUCompiler.CompilerJob, method_instance::Any; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, ctx::LLVM.Context)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/qdoh1/src/utils.jl:83
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing, ctx::LLVM.Context)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/qdoh1/src/driver.jl:115
  [8] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(sin), Tuple{Float64}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.Context, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:6161
  [9] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(sin), Tuple{Float64}}}, ctx::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:6986
 [10] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(sin), Tuple{Float64}}})
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:6980
 [11] cached_compilation(job::GPUCompiler.CompilerJob, key::UInt64, specid::UInt64)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:7024
 [12] #s448#146
    @ ~/git/Enzyme.jl/src/compiler.jl:7084 [inlined]
 [13] var"#s448#146"(F::Any, Fn::Any, DF::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, specid::Any, ReturnPrimal::Any, ShadowInit::Any, ::Any, #unused#::Type, f::Any, df::Any, #unused#::Type, tt::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any)
    @ Enzyme.Compiler ./none:0
 [14] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any, N} where N)
    @ Core ./boot.jl:571
 [15] thunk
    @ ~/git/Enzyme.jl/src/compiler.jl:7117 [inlined]
 [16] thunk
    @ ~/git/Enzyme.jl/src/compiler.jl:7110 [inlined]
 [17] runtime_generic_fwd(activity::Val{(false, true)}, width::Val{10}, RT::Val{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11")), NTuple{11, Any}}}, f::typeof(sin), df::Nothing, df_2::Nothing, df_3::Nothing, df_4::Nothing, df_5::Nothing, df_6::Nothing, df_7::Nothing, df_8::Nothing, df_9::Nothing, df_10::Nothing, primal_1::Float64, shadow_1_1::Float64, shadow_1_2::Float64, shadow_1_3::Float64, shadow_1_4::Float64, shadow_1_5::Float64, shadow_1_6::Float64, shadow_1_7::Float64, shadow_1_8::Float64, shadow_1_9::Float64, shadow_1_10::Float64)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:962
in expression starting at /home/wmoses/git/Enzyme.jl/silly.jl:32
wsmoses commented 1 year ago

Fails only on Julia 1.6 (succeeds on CI 1.7/8).

vchuravy commented 1 year ago

Haven't had time to look into this. cc: @maleadt in case it tickles a memory.

Note: Requires running on https://github.com/EnzymeAD/Enzyme.jl/pull/537

maleadt commented 1 year ago

Sorry, no. dump the compile dict to see what's in there?

vchuravy commented 1 year ago

Well that is odd:

compiled = Dict{Any, Any}(MethodInstance for genericsin(::Float64) => (ci = Core.CodeInstance(MethodInstance for genericsin(::Float64), #undef, 0x0000000000001c46, 0xffffffffffffffff, Any, #undef, UInt8[0x0c, 0x02, 0x00, 0x00, 0x00, 0x00, 0x08, 0x16, 0x88, 0xc2, 0xe6, 0x1f, 0x20, 0x11, 0x04, 0x2e, 0x03, 0x01, 0x17, 0x11, 0x05, 0x38, 0xc1, 0x37, 0x03, 0x02, 0x16, 0x88, 0xc2, 0xe6, 0x11, 0x06, 0x3e, 0x3e, 0x16, 0x01, 0xc2, 0xf2, 0x00, 0x00, 0x00, 0x3d, 0x16, 0x88, 0xc5, 0x27, 0x00, 0x2f, 0x2f, 0x23, 0x11, 0x02, 0x11, 0x03, 0xc3, 0xbf, 0x2f, 0x21, 0x11, 0x00, 0x56, 0x2b, 0xc2, 0x02, 0xc0, 0x2f, 0x21, 0x11, 0x07, 0x56, 0x2b, 0xc2, 0x02, 0xc1, 0x2f, 0x21, 0x11, 0x07, 0x56, 0x2b, 0xc3, 0x02, 0xc1, 0x2f, 0x21, 0x11, 0x07, 0x56, 0x2b, 0xc4, 0x02, 0xc1, 0x2f, 0x21, 0x11, 0x07, 0x56, 0x2b, 0xc6, 0x02, 0xc1, 0x3d, 0x05, 0x05, 0x01], false, false, Ptr{Nothing} @0x0000000000000000, Ptr{Nothing} @0x0000000000000000), func = "jfptr_genericsin_1978", specfunc = "julia_genericsin_1977"))
ERROR: LoadError: KeyError: key MethodInstance for sin(::Float64) not found
vchuravy commented 1 year ago
using Enzyme

module Mock
    import GPUCompiler
    # local method table for device functions
    @static if isdefined(Base.Experimental, Symbol("@overlay"))
    Base.Experimental.@MethodTable(method_table)
    else
    const method_table = nothing
    end

    # list of overrides (only for Julia 1.6)
    const overrides = Expr[]

    macro device_override(ex)
        ex = macroexpand(__module__, ex)
        if Meta.isexpr(ex, :call)
            @show ex = eval(ex)
            error()
        end
        code = quote
            $GPUCompiler.@override($method_table, $ex)
        end
        if isdefined(Base.Experimental, Symbol("@overlay"))
            return esc(code)
        else
            push!(overrides, code)
            return
        end
    end

    @device_override Base.sin(x::Float64) = ccall("extern __nv_sin", llvmcall, Cdouble, (Cdouble,), x)

    function __init__()
        # register device overrides
        eval(Expr(:block, overrides...))
        empty!(overrides)
    end
end

genericsin(x) = Base.invokelatest(sin, x)
res = Enzyme.autodiff(Forward, genericsin, BatchDuplicated(2.0, NTuple{10,Float64}((Float64(i) for i in 1:10))))[1]