EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
439 stars 62 forks source link

confusing error message in forward over reverse #1870

Open ExpandingMan opened 2 days ago

ExpandingMan commented 2 days ago
using Enzyme

function f_ip(x, tmp)
    tmp .= x ./ 2
    return dot(tmp, x)
end

function f_gradient_deferred!(dx, x, tmp)
    dtmp = make_zero(tmp)
    autodiff_deferred(Reverse, Const(f_ip), Active, Duplicated(x, dx), Duplicated(tmp, dtmp))
    return nothing
end

function f_hvp!(hv, x, v, tmp)
    dx = make_zero(x)
    btmp = make_zero(tmp)
    autodiff(
        Forward,
        f_gradient_deferred!,
        Duplicated(dx, hv),
        Duplicated(x, v),
        Duplicated(tmp, btmp),
    )
    return nothing
end

x = [1.0]
v = [-1.0]
hv = make_zero(v)
tmp = similar(x)

f_hvp!(hv, x, v, tmp)

In this example, all calls to f_ip will fail because dot is not defined (one should do using LinearAlgebra to fix this). The error message on calling f_hvp! however is quite puzzling

ERROR: AssertionError: Base.allocatedinline(actualRetType) returns false: actualRetType = Any, rettype = Active{Any}
Stacktrace:
  [1]
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:4170
  [2] enzyme!(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{…}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…})
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:4011
  [3] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::GPUCompiler.CompilerJob{…})
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:6308
  [4] codegen
    @ ~/.julia/dev/Enzyme/src/compiler.jl:5465 [inlined]
  [5] (::GPUCompiler.var"#187#198"{GPUCompiler.CompilerJob{…}, GPUCompiler.CompilerJob{…}})()
    @ GPUCompiler ~/.julia/packages/GPUCompiler/CaR87/src/driver.jl:224
  [6] get!(default::GPUCompiler.var"#187#198"{…}, h::Dict{…}, key::GPUCompiler.CompilerJob{…})
    @ Base ./dict.jl:479
  [7] macro expansion
    @ ~/.julia/packages/GPUCompiler/CaR87/src/driver.jl:223 [inlined]
  [8] #emit_llvm#186
    @ ~/.julia/packages/GPUCompiler/CaR87/src/utils.jl:108
  [9] emit_llvm
    @ ~/.julia/packages/GPUCompiler/CaR87/src/utils.jl:106 [inlined]
 [10] #codegen#184
    @ ~/.julia/packages/GPUCompiler/CaR87/src/driver.jl:100
 [11] codegen
    @ ~/.julia/packages/GPUCompiler/CaR87/src/driver.jl:82 [inlined]
 [12] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:5497
 [13] codegen
    @ ~/.julia/dev/Enzyme/src/compiler.jl:5465 [inlined]
 [14] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:7110
 [15] _thunk
    @ ~/.julia/dev/Enzyme/src/compiler.jl:7110 [inlined]
 [16] cached_compilation
    @ ~/.julia/dev/Enzyme/src/compiler.jl:7151 [inlined]
 [17] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{…}, ::Type{…}, ::Type{…}, tt::Type{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Type{…}, ::Val{…}, ::Val{…})
    @ Enzyme.Compiler ~/.julia/dev/Enzyme/src/compiler.jl:7224
 [18] #s2084#19056
    @ ~/.julia/dev/Enzyme/src/compiler.jl:7266 [inlined]
 [19]
    @ Enzyme.Compiler ./none:0
 [20] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [21] autodiff
    @ ~/.julia/dev/Enzyme/src/Enzyme.jl:428 [inlined]
 [22] autodiff
    @ ~/.julia/dev/Enzyme/src/Enzyme.jl:344 [inlined]
 [23] autodiff
    @ ~/.julia/dev/Enzyme/src/Enzyme.jl:325 [inlined]
 [24] f_hvp!(hv::Vector{Float64}, x::Vector{Float64}, v::Vector{Float64}, tmp::Vector{Float64})
    @ Main ~/src/scrap.jl:19

It's probably fair to say that anyone not intimately familiar with enzyme internals might have a hard time figuring out why this failed from the stack trace and error message.

Note that if one calls only f_gradient_deferred! one gets

ERROR: Active return values with automatic pullback (differential return value) deduction only supported for floating-like values and not type Any. If mutable memory, please use Duplicated. Otherwise, you can explicitly specify a pullback by using split mode, e.g. autodiff_thunk(ReverseSplitWithPrimal, ...)
Stacktrace:
 [1] error(s::String)
   @ Base ./error.jl:35
 [2] default_adjoint
   @ ~/.julia/dev/Enzyme/src/compiler.jl:6651 [inlined]
 [3] autodiff_deferred
   @ ~/.julia/dev/Enzyme/src/Enzyme.jl:467 [inlined]
 [4] f_gradient_deferred!(dx::Vector{Float64}, x::Vector{Float64}, tmp::Vector{Float64})

This seems a lot more reasonable because it's pretty clear that it can't infer the return type of dot so Enzyme thinks the call to autodiff_deferred is invalid for this code, but if there's anyway of showing the user there is an UndefVarError here, that would surely be useful.

wsmoses commented 2 days ago

hm did you start your split tests from your old PR... I say this because this is one of the abi's that changed and shouldn't happen if you just copy and paste from scratch.

It makes me a bit worried that the new PR to split tests misses some changes to tests or new tests (which is why I suggested to copy and paste from scratch and not rebase)

ExpandingMan commented 2 days ago

No, I started it entirely from latest main, I wound up not even touching my old branch. This is definitely on latest main.