EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
454 stars 63 forks source link

StackoverFlow with KernelAbstractions #1664

Closed roflmaostc closed 3 months ago

roflmaostc commented 3 months ago

Hi,

the following code errors.

If I move kernel! = f_kernel!(get_backend(measurement)) outside of f and capture over it, then it does not error:

julia> using KernelAbstractions, Enzyme, Atomix

julia> params0 = rand(4,);

julia> x = range(-10, 10, 100);

julia> gauss(x::T, σ, μ) where T = 1 / (σ*√(2 * T(π))) * exp(- (x - μ)^2 / (2 * σ))
gauss (generic function with 1 method)

julia> measurement = gauss.(x, 1.2, 2.1) .+ gauss.(x, 0.6, -1.1);

julia> function create_fg2!(measurement, x;  N=2)
    buffer = copy(measurement)
    @kernel function f_kernel!(buffer, pp, x)
        i, j = @index(Global, NTuple)
        Atomix.@atomic buffer[j] += gauss(x[j], pp[(i-1) * 2 + 1], pp[(i-1) * 2 + 2])
    end

    function f(pp)
        buffer .= .- measurement
        kernel! = f_kernel!(get_backend(measurement))
        kernel!(buffer, pp, x, ndrange=(N, length(measurement)))
        return sum(abs2, buffer)
    end

    fg! = let f=f
        function fg!(F, G, p)
            if G !== nothing
                dp = make_zero(p)
                pp = copy(p)
                y = Enzyme.autodiff(Enzyme.ReverseWithPrimal, Duplicated(f, make_zero(f)), Duplicated(pp, dp))
                G .= dp
                if F !== nothing
                    return y[2]
                end
            end
            if F !== nothing
                return f(p)
            end
        end
    end

    f, fg!
end

julia> params0 = rand(4,);

julia> f2, fg2! = create_fg2!(measurement, x, N=length(params0) ÷ 2);

julia> @time fg2!(2, copy(params0),  params0)
ERROR: StackOverflowError:
Stacktrace:
     [1] forcefold
       @ ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:386 [inlined]
     [2] active_reg_inner(::Type{var"#f_kernel!#11"{var"#gpu_f_kernel!#10", var"#cpu_f_kernel!#9"}}, seen::Tuple{}, world::Nothing, ::Val{false}, ::Val{false}, ::Val{false})
       @ Enzyme.Compiler ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:631
     [3] active_reg_inner (repeats 3 times)
       @ ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:473 [inlined]
     [4] guaranteed_const_nongen
       @ ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:663 [inlined]
     [5] make_zero(::Type{var"#f_kernel!#11"{…}}, seen::IdDict{Any, Any}, prev::var"#f_kernel!#11"{var"#gpu_f_kernel!#10", var"#cpu_f_kernel!#9"}, ::Val{false})
       @ Enzyme.Compiler ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:1434
     [6] make_zero
       @ ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:1428 [inlined]
     [7] make_zero(::Type{var"#f_kernel!#11"{…}}, seen::IdDict{Any, Any}, prev::var"#f_kernel!#11"{var"#gpu_f_kernel!#10", var"#cpu_f_kernel!#9"}, ::Val{false})
       @ Enzyme.Compiler ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:1466
--- the last 2 lines are repeated 16337 more times ---
 [32682] make_zero(::Type{Core.Box}, seen::IdDict{Any, Any}, prev::Core.Box, ::Val{false})
       @ Enzyme.Compiler ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:1428
 [32683] make_zero
       @ ~/.julia/packages/Enzyme/YDcYf/src/compiler.jl:1466 [inlined]
 [32684] make_zero (repeats 2 times)
       @ ~/.julia/packages/EnzymeCore/yDAqg/src/EnzymeCore.jl:277 [inlined]
 [32685] (::var"#fg!#13"{var"#f#12"{Int64, Vector{…}, StepRangeLen{…}, Vector{…}}})(F::Int64, G::Vector{Float64}, p::Vector{Float64})
       @ Main ./REPL[38]:23
 [32686] macro expansion
       @ ./timing.jl:279 [inlined]
Some type information was truncated. Use `show(err)` to see complete types.

Julia 1.10.4

(@AD) pkg> st
Status `~/.julia/environments/AD/Project.toml`
  [a9b6321e] Atomix v0.1.0
  [a0c0ee7d] DifferentiationInterface v0.5.9
  [7da242da] Enzyme v0.12.24
  [63c18a36] KernelAbstractions v0.9.22

Best,

Felix

MilesCranmer commented 3 months ago

You could try with a larger stack size, e.g.,

y = Task(64 * 1024^2) do
    Enzyme.autodiff(Enzyme.ReverseWithPrimal, Duplicated(f, make_zero(f)), Duplicated(pp, dp))              
end |> schedule |> fetch

?

roflmaostc commented 3 months ago

This works

       kernel! = f_kernel!(get_backend(measurement))
    function f(pp)
        buffer .= .- measurement

        kernel!(buffer, pp, x, ndrange=(N, length(measurement)))
        return sum(abs2, buffer)
    end

So I suppose the issue is somehow that kernel construct from KA?

wsmoses commented 3 months ago

@MilesCranmer I think this is just probably an accidental infinite recursion where the object isn't added to the "seen" set when it ought be

wsmoses commented 3 months ago

Fixed by https://github.com/EnzymeAD/Enzyme.jl/pull/1665.

Note that you will then hit an issue where a type unstable MixedDuplicated of a closure is not yet supported.


julia> @time fg2!(2, copy(params0),  params0)
ERROR: MethodError: no method matching Duplicated(::var"#f_kernel!#10"{var"#gpu_f_kernel!#9", var"#cpu_f_kernel!#8"}, ::Base.RefValue{var"#f_kernel!#10"{var"#gpu_f_kernel!#9", var"#cpu_f_kernel!#8"}})

Closest candidates are:
  Duplicated(::T1, ::T1) where T1
   @ EnzymeCore ~/git/Enzyme.jl/lib/EnzymeCore/src/EnzymeCore.jl:66
  Duplicated(::T1, ::T1, ::Bool) where T1
   @ EnzymeCore ~/git/Enzyme.jl/lib/EnzymeCore/src/EnzymeCore.jl:66

Stacktrace:
  [1] runtime_generic_augfwd(activity::Type{Val{…}}, width::Val{1}, ModifiedBetween::Val{(true, true)}, RT::Val{@NamedTuple{…}}, f::var"#f_kernel!#10"{var"#gpu_f_kernel!#9", var"#cpu_f_kernel!#8"}, df::Base.RefValue{var"#f_kernel!#10"{…}}, primal_1::CPU, shadow_1_1::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/rules/jitrules.jl:313
  [2] f
    @ ./REPL[17]:11 [inlined]
  [3] f
    @ ./REPL[17]:0 [inlined]
  [4] diffejulia_f_1728_inner_1wrap
    @ ./REPL[17]:0
  [5] macro expansion
    @ ~/git/Enzyme.jl/src/compiler.jl:6646 [inlined]
  [6] enzyme_call
    @ ~/git/Enzyme.jl/src/compiler.jl:6246 [inlined]
  [7] CombinedAdjointThunk
    @ ~/git/Enzyme.jl/src/compiler.jl:6123 [inlined]
  [8] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:314 [inlined]
  [9] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:338 [inlined]
 [10] (::var"#fg!#12"{var"#f#11"{Int64, Vector{Float64}, StepRangeLen{Float64, Base.TwicePrecision{Float64}, Base.TwicePrecision{Float64}, Int64}, Vector{Float64}}})(F::Int64, G::Vector{Float64}, p::Vector{Float64})
    @ Main ./REPL[17]:23
 [11] macro expansion
    @ ./timing.jl:279 [inlined]
 [12] top-level scope
    @ ./REPL[21]:1

Feel free to open an issue for the follow up issue, but I'm going to close this one as completed once the box fix lands