EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 63 forks source link

hvp! Failure on Simple Quadratic #1597

Closed RS-Coop closed 4 months ago

RS-Coop commented 4 months ago

Was trying out the newly added Hessian-vector product function hvp! on a simple quadratic and I have run into the following strange error:


julia> using Enzyme: hvp!

julia> n = 10
10

julia> A = randn((n,n))
10×10 Matrix{Float64}:
 -0.490656   1.42018   -1.58321     0.494581   …  -1.77191    -0.0808346   2.08467
  0.449591   0.115893   0.0629379  -1.13509       -0.289725   -0.193639    1.62478
 -0.484999  -1.72228   -0.115046    0.691677       0.413538   -0.771367   -0.288895
 -1.39243   -0.261785  -1.05796    -0.0335503     -0.0408044  -0.712952   -1.42485
  0.04253    2.5166    -0.714064   -0.0372748     -0.310649   -0.285951   -0.578737
  0.579713   0.118267   0.0941903   1.04009    …   0.128445   -1.16517    -0.456552
 -2.0008     1.48337   -0.720863   -0.834935      -2.37018    -0.421089    0.298126
 -1.26819    0.834129  -0.087317   -0.754219      -2.14503    -0.199689   -0.15876
  0.943514   0.271405   0.106145   -0.184987      -1.17377     0.315934    0.172978
  1.01501    0.277746  -1.40313    -0.410305       0.24526    -0.0806078   0.828688

julia> f(x) = x'*A*x
f (generic function with 1 method)

julia> res = zeros(n)
10-element Vector{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

julia> hvp!(res, f, randn(n), randn(n))
ERROR: AssertionError: Base.allocatedinline(actualRetType) returns false: actualRetType = Any, rettype = EnzymeCore.Active{Any}
Stacktrace:
  [1] create_abi_wrapper(enzymefn::LLVM.Function, TT::Type, rettype::Type, actualRetType::Type, Mode::Enzyme.API.CDerivativeMode, augmented::Nothing, width::Int64, returnPrimal::Bool, shadow_init::Bool, world::UInt64, interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:3846
  [2] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:3717
  [3] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::GPUCompiler.CompilerJob{GPUCompiler.NativeCompilerTarget, Enzyme.Compiler.PrimalCompilerParams})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:5851
  [4] codegen
    @ ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:5129 [inlined]
  [5] (::GPUCompiler.var"#159#170"{GPUCompiler.CompilerJob{GPUCompiler.NativeCompilerTarget, Enzyme.Compiler.PrimalCompilerParams}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}})()
    @ GPUCompiler ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:261
  [6] get!(default::GPUCompiler.var"#159#170"{GPUCompiler.CompilerJob{GPUCompiler.NativeCompilerTarget, Enzyme.Compiler.PrimalCompilerParams}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}}, h::Dict{GPUCompiler.CompilerJob, String}, key::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams})
    @ Base ./dict.jl:468
  [7] macro expansion
    @ ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:260 [inlined]
  [8] #emit_llvm#158
    @ ~/.julia/packages/GPUCompiler/05oYT/src/utils.jl:103
  [9] emit_llvm
    @ ~/.julia/packages/GPUCompiler/05oYT/src/utils.jl:97 [inlined]
 [10] #codegen#156
    @ ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:136
 [11] codegen
    @ ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:115 [inlined]
 [12] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:5161
 [13] codegen
    @ ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:5129 [inlined]
 [14] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:6658
 [15] _thunk
    @ ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:6658 [inlined]
 [16] cached_compilation
    @ ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:6696 [inlined]
 [17] (::Enzyme.Compiler.var"#28593#28594"{DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{5, Bool}, Int64, Bool, Bool, UInt64, DataType})(ctx::LLVM.Context)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:6765
 [18] JuliaContext(f::Enzyme.Compiler.var"#28593#28594"{DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{5, Bool}, Int64, Bool, Bool, UInt64, DataType}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:52
 [19] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/05oYT/src/driver.jl:42
 [20] #s2008#28592
    @ ~/.julia/packages/Enzyme/baiSZ/src/compiler.jl:6716 [inlined]
 [21] var"#s2008#28592"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ::Any, #unused#::Type, #unused#::Type, #unused#::Type, tt::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any)
    @ Enzyme.Compiler ./none:0
 [22] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [23] autodiff
    @ ~/.julia/packages/Enzyme/baiSZ/src/Enzyme.jl:415 [inlined]
 [24] autodiff
    @ ~/.julia/packages/Enzyme/baiSZ/src/Enzyme.jl:333 [inlined]
 [25] autodiff
    @ ~/.julia/packages/Enzyme/baiSZ/src/Enzyme.jl:318 [inlined]
 [26] hvp!(res::Vector{Float64}, f::typeof(f), x::Vector{Float64}, v::Vector{Float64})
    @ Enzyme ~/.julia/packages/Enzyme/baiSZ/src/Enzyme.jl:1338
 [27] top-level scope
    @ REPL[7]:1
wsmoses commented 4 months ago

we should definitely make this work or at least give a better error message, but FYI so you're unblocked the issue is your f is type unstable as a return, due to the use of the global A. If you did something liek f(x)::Float64 = x'*A*x, I presume it owuld be resolved

RS-Coop commented 4 months ago

Thanks for the response @wsmoses. That idea makes sense. Trying it out I get an error reagrding no forward mode derivative for cblas_dcopy64_. If I try to use LinearAlgebra.dot() instead of writing out the multiplication explicitly I get a segmentation fault instead.

Originally, I was trying to compare the Enzyme Hvp with one I had written a while ago, which is how I ran into this issue. When I attempt to do this in a standalone file or in the REPL I run into the same issues discussed above with both variants. A MWE of this is included below.

I also have a variant of this test in test suite for a package of mine which can be found here (SFN.jl). If I run this with my variant, all tests pass, but if I include the Enzyme variant it causes the following:

julia: /workspace/srcdir/Enzyme/enzyme/Enzyme/GradientUtils.cpp:8229: void GradientUtils::eraseFictiousPHIs(): Assertion `0' failed.

[61920] signal (6.-6): Aborted

I wasn't sure how to reproduce the succesful test outside of the package, so apologies for no MWE.

This raises a question about why this specific test works sometimes within a test environment but not elsewhere, and why the two variants have different results?

using Enzyme: make_zero, make_zero!, autodiff, autodiff_deferred, Forward, Reverse, Active, Const, Duplicated, DuplicatedNoNeed, hvp!

function ehvp!(res::S, f::F, x::S, v::S) where {F, T<:AbstractFloat, S<:AbstractVector{T}}

    make_zero!(res)
    grad = make_zero(x)

    autodiff(
        Forward,
        d -> autodiff_deferred(Reverse, f, Active, d),
        Const,
        DuplicatedNoNeed(Duplicated(x, grad), Duplicated(v, res))
    )

    return nothing
end

#define the quadratic
n = 10
A::Matrix{Float64} = randn((n,n))

# f(x::Vector{Float64})::Float64 = LA.dot(x,A,x)
f(x::Vector{Float64})::Float64 = x'*A*x

#hvp problem setup
x = rand(n)
v = rand(n)
product = (A+A')*v

res = zeros(n)

ehvp!(res, f, x, v)
println("Custom Hvp: ", res ≈ product)

hvp!(res, f, x, v)
println("Enzyme Hvp: ", res ≈ product)
wsmoses commented 4 months ago

Should be fixed on main (and just tagged), reopen if it persists.

RS-Coop commented 4 months ago

Are you able to explain why the two variants both work, or whether they are both valid? It also seems the official Enzyme variant is a bit faster.

using Enzyme: make_zero, make_zero!, autodiff, autodiff_deferred, Forward, Reverse, Active, Const, Duplicated, DuplicatedNoNeed, hvp!
using BenchmarkTools

function ehvp!(res::S, f::F, x::S, v::S) where {F, T<:AbstractFloat, S<:AbstractVector{T}}

    make_zero!(res)
    grad = make_zero(x)

    autodiff(
        Forward,
        d -> autodiff_deferred(Reverse, f, Active, d),
        Const,
        DuplicatedNoNeed(Duplicated(x, grad), Duplicated(v, res))
    )

    return nothing
end

#define the quadratic
n = 10
A::Matrix{Float64} = randn((n,n))

# f(x::Vector{Float64})::Float64 = LA.dot(x,A,x)
f(x::Vector{Float64})::Float64 = x'*A*x

#hvp problem setup
x = rand(n)
v = rand(n)
product = (A+A')*v

res = zeros(n)

ehvp1!(res, f, x, v)
println("Custom Hvp: ", res ≈ product)
println(res, '\n')

hvp!(res, f, x, v)
println("Enzyme Hvp: ", res ≈ product)
println(res, '\n')

#benchmark
println("\nBenchmarking...")

function rosenbrock(x)
    res = 0.0
    for i = 1:size(x,1)-1
        res += 100*(x[i+1]-x[i]^2)^2 + (1-x[i])^2
    end
    return res
end

dim = 100

x = zeros(dim)
result = similar(x)

t = @benchmark ehvp1!($result, $rosenbrock, $x, v) setup=(v=randn(dim))
println("Custom")
display(t)
println()

t = @benchmark hvp!($result, $rosenbrock, $x, v) setup=(v=randn(dim))
println("Enzyme")
display(t)
println()
wsmoses commented 4 months ago

Looks reasonable enough to me.

I think the reasons the builtin one may be faster is 1) you use a closure x -> autodiff_deferred.... This forces recompilation of the derivative code every time [since it doesnt get cached] 2) Doing separate arguments rather than Duplicated(Duplicated(x, y), ...) is likely faster

RS-Coop commented 4 months ago

This still incurs a seg fault when using LinearAlgebra.dot(x,A,x) instead of x'*A*x. Should I reopen or create a new issue?

wsmoses commented 4 months ago

Yeah open an issue

On Tue, Jul 9, 2024 at 7:37 PM Cooper Simpson @.***> wrote:

This still incurs a seg fault when using LinearAlgebra.dot(x,A,x) instead of x'Ax. Should I reopen or create a new issue?

— Reply to this email directly, view it on GitHub https://github.com/EnzymeAD/Enzyme.jl/issues/1597#issuecomment-2218998270, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAJTUXA365O62E6QYVCOCVDZLRX45AVCNFSM6AAAAABKGQUA2SVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDEMJYHE4TQMRXGA . You are receiving this because you modified the open/close state.Message ID: @.***>