EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 63 forks source link

Extremely long compilation time on recursive, branching functions (DynamicExpressions.jl) #1156

Open MilesCranmer opened 11 months ago

MilesCranmer commented 11 months ago

The old issue #1018 from August was getting a bit lengthy so I'm moving to a new issue – feel free to close that one.

This relates to my one-year effort to try to get Enzyme.jl working as one of the AD backends for DynamicExpressions.jl, SymbolicRegression.jl, and PySR. The current status is:

  1. Working first-order gradients, if I disable some of the optimizations
  2. Hanging first-order gradients (extremely long compilation time), if all the optimizations are left on
  3. Hanging second-order gradients (extremely long compilation time), regardless of optimization settings
(expand) I've boiled down the MWE to the following code which replicates the issues I am seeing: ```julia using Enzyme ################################################################################ ### OperatorEnum.jl ################################################################################ struct OperatorEnum{B,U} binops::B unaops::U end ################################################################################ ################################################################################ ### Equation.jl ################################################################################ mutable struct Node{T} degree::UInt8 # 0 for constant/variable, 1 for cos/sin, 2 for +/* etc. constant::Bool # false if variable val::Union{T,Nothing} # If is a constant, this stores the actual value # ------------------- (possibly undefined below) feature::UInt16 # If is a variable (e.g., x in cos(x)), this stores the feature index. op::UInt8 # If operator, this is the index of the operator in operators.binops, or operators.unaops l::Node{T} # Left child node. Only defined for degree=1 or degree=2. r::Node{T} # Right child node. Only defined for degree=2. Node(d::Integer, c::Bool, v::_T) where {_T} = new{_T}(UInt8(d), c, v) Node(::Type{_T}, d::Integer, c::Bool, v::_T) where {_T} = new{_T}(UInt8(d), c, v) Node(::Type{_T}, d::Integer, c::Bool, v::Nothing, f::Integer) where {_T} = new{_T}(UInt8(d), c, v, UInt16(f)) Node(d::Integer, c::Bool, v::Nothing, f::Integer, o::Integer, l::Node{_T}) where {_T} = new{_T}(UInt8(d), c, v, UInt16(f), UInt8(o), l) Node(d::Integer, c::Bool, v::Nothing, f::Integer, o::Integer, l::Node{_T}, r::Node{_T}) where {_T} = new{_T}(UInt8(d), c, v, UInt16(f), UInt8(o), l, r) end function Node(::Type{T}; val::T1=nothing, feature::T2=nothing)::Node{T} where {T,T1,T2} if T2 <: Nothing !(T1 <: T) && (val = convert(T, val)) return Node(T, 0, true, val) else return Node(T, 0, false, nothing, feature) end end Node(op::Integer, l::Node{T}) where {T} = Node(1, false, nothing, 0, op, l) Node(op::Integer, l::Node{T}, r::Node{T}) where {T} = Node(2, false, nothing, 0, op, l, r) ################################################################################ ################################################################################ ### Utils.jl ################################################################################ @inline function fill_similar(value, array, args...) out_array = similar(array, args...) out_array .= value return out_array end is_bad_array(array) = !(isempty(array) || isfinite(sum(array))) function is_constant(tree::Node) if tree.degree == 0 return tree.constant elseif tree.degree == 1 return is_constant(tree.l) else return is_constant(tree.l) && is_constant(tree.r) end end ################################################################################ ################################################################################ ### EvaluateEquation.jl ################################################################################ struct ResultOk{A<:AbstractArray} x::A ok::Bool end function eval_tree_array(tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum, fuse_level=Val(2)) where {T<:Number} result = _eval_tree_array(tree, cX, operators, fuse_level) return (result.x, result.ok && !is_bad_array(result.x)) end counttuple(::Type{<:NTuple{N,Any}}) where {N} = N get_nuna(::Type{<:OperatorEnum{B,U}}) where {B,U} = counttuple(U) get_nbin(::Type{<:OperatorEnum{B}}) where {B} = counttuple(B) @generated function _eval_tree_array(tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum, ::Val{fuse_level})::ResultOk where {T<:Number,fuse_level} nuna = get_nuna(operators) nbin = get_nbin(operators) quote # First, we see if there are only constants in the tree - meaning # we can just return the constant result. if tree.degree == 0 return deg0_eval(tree, cX) elseif is_constant(tree) # Speed hack for constant trees. const_result = _eval_constant_tree(tree, operators)::ResultOk{Vector{T}} !const_result.ok && return ResultOk(similar(cX, axes(cX, 2)), false) return ResultOk(fill_similar(const_result.x[], cX, axes(cX, 2)), true) elseif tree.degree == 1 op_idx = tree.op # This @nif lets us generate an if statement over choice of operator, # which means the compiler will be able to completely avoid type inference on operators. return Base.Cartesian.@nif( $nuna, i -> i == op_idx, i -> let op = operators.unaops[i] if fuse_level > 1 && tree.l.degree == 2 && tree.l.l.degree == 0 && tree.l.r.degree == 0 # op(op2(x, y)), where x, y, z are constants or variables. l_op_idx = tree.l.op Base.Cartesian.@nif( $nbin, j -> j == l_op_idx, j -> let op_l = operators.binops[j] deg1_l2_ll0_lr0_eval(tree, cX, op, op_l) end, ) elseif fuse_level > 1 && tree.l.degree == 1 && tree.l.l.degree == 0 # op(op2(x)), where x is a constant or variable. l_op_idx = tree.l.op Base.Cartesian.@nif( $nuna, j -> j == l_op_idx, j -> let op_l = operators.unaops[j] deg1_l1_ll0_eval(tree, cX, op, op_l) end, ) else # op(x), for any x. result = _eval_tree_array(tree.l, cX, operators, Val(fuse_level)) !result.ok && return result deg1_eval(result.x, op) end end ) else op_idx = tree.op return Base.Cartesian.@nif( $nbin, i -> i == op_idx, i -> let op = operators.binops[i] if fuse_level > 1 && tree.l.degree == 0 && tree.r.degree == 0 deg2_l0_r0_eval(tree, cX, op) elseif tree.r.degree == 0 result_l = _eval_tree_array(tree.l, cX, operators, Val(fuse_level)) !result_l.ok && return result_l # op(x, y), where y is a constant or variable but x is not. deg2_r0_eval(tree, result_l.x, cX, op) elseif tree.l.degree == 0 result_r = _eval_tree_array(tree.r, cX, operators, Val(fuse_level)) !result_r.ok && return result_r # op(x, y), where x is a constant or variable but y is not. deg2_l0_eval(tree, result_r.x, cX, op) else result_l = _eval_tree_array(tree.l, cX, operators, Val(fuse_level)) !result_l.ok && return result_l result_r = _eval_tree_array(tree.r, cX, operators, Val(fuse_level)) !result_r.ok && return result_r # op(x, y), for any x or y deg2_eval(result_l.x, result_r.x, op) end end ) end end end function deg2_eval( cumulator_l::AbstractVector{T}, cumulator_r::AbstractVector{T}, op::F )::ResultOk where {T<:Number,F} @inbounds @simd for j in eachindex(cumulator_l) x = op(cumulator_l[j], cumulator_r[j])::T cumulator_l[j] = x end return ResultOk(cumulator_l, true) end function deg1_eval( cumulator::AbstractVector{T}, op::F )::ResultOk where {T<:Number,F} @inbounds @simd for j in eachindex(cumulator) x = op(cumulator[j])::T cumulator[j] = x end return ResultOk(cumulator, true) end function deg0_eval(tree::Node{T}, cX::AbstractMatrix{T})::ResultOk where {T<:Number} if tree.constant return ResultOk(fill_similar(tree.val::T, cX, axes(cX, 2)), true) else return ResultOk(cX[tree.feature, :], true) end end function deg1_l2_ll0_lr0_eval( tree::Node{T}, cX::AbstractMatrix{T}, op::F, op_l::F2 ) where {T<:Number,F,F2} if tree.l.l.constant && tree.l.r.constant val_ll = tree.l.l.val::T val_lr = tree.l.r.val::T x_l = op_l(val_ll, val_lr)::T x = op(x_l)::T return ResultOk(fill_similar(x, cX, axes(cX, 2)), true) elseif tree.l.l.constant val_ll = tree.l.l.val::T feature_lr = tree.l.r.feature cumulator = similar(cX, axes(cX, 2)) @inbounds @simd for j in axes(cX, 2) x_l = op_l(val_ll, cX[feature_lr, j])::T x = isfinite(x_l) ? op(x_l)::T : T(Inf) cumulator[j] = x end return ResultOk(cumulator, true) elseif tree.l.r.constant feature_ll = tree.l.l.feature val_lr = tree.l.r.val::T cumulator = similar(cX, axes(cX, 2)) @inbounds @simd for j in axes(cX, 2) x_l = op_l(cX[feature_ll, j], val_lr)::T x = isfinite(x_l) ? op(x_l)::T : T(Inf) cumulator[j] = x end return ResultOk(cumulator, true) else feature_ll = tree.l.l.feature feature_lr = tree.l.r.feature cumulator = similar(cX, axes(cX, 2)) @inbounds @simd for j in axes(cX, 2) x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])::T x = isfinite(x_l) ? op(x_l)::T : T(Inf) cumulator[j] = x end return ResultOk(cumulator, true) end end # op(op2(x)) for x variable or constant function deg1_l1_ll0_eval( tree::Node{T}, cX::AbstractMatrix{T}, op::F, op_l::F2 ) where {T<:Number,F,F2} if tree.l.l.constant val_ll = tree.l.l.val::T x_l = op_l(val_ll)::T x = op(x_l)::T return ResultOk(fill_similar(x, cX, axes(cX, 2)), true) else feature_ll = tree.l.l.feature cumulator = similar(cX, axes(cX, 2)) @inbounds @simd for j in axes(cX, 2) x_l = op_l(cX[feature_ll, j])::T x = isfinite(x_l) ? op(x_l)::T : T(Inf) cumulator[j] = x end return ResultOk(cumulator, true) end end # op(x, y) for x and y variable/constant function deg2_l0_r0_eval( tree::Node{T}, cX::AbstractMatrix{T}, op::F ) where {T<:Number,F} if tree.l.constant && tree.r.constant val_l = tree.l.val::T val_r = tree.r.val::T x = op(val_l, val_r)::T return ResultOk(fill_similar(x, cX, axes(cX, 2)), true) elseif tree.l.constant cumulator = similar(cX, axes(cX, 2)) val_l = tree.l.val::T feature_r = tree.r.feature @inbounds @simd for j in axes(cX, 2) x = op(val_l, cX[feature_r, j])::T cumulator[j] = x end return ResultOk(cumulator, true) elseif tree.r.constant cumulator = similar(cX, axes(cX, 2)) feature_l = tree.l.feature val_r = tree.r.val::T @inbounds @simd for j in axes(cX, 2) x = op(cX[feature_l, j], val_r)::T cumulator[j] = x end return ResultOk(cumulator, true) else cumulator = similar(cX, axes(cX, 2)) feature_l = tree.l.feature feature_r = tree.r.feature @inbounds @simd for j in axes(cX, 2) x = op(cX[feature_l, j], cX[feature_r, j])::T cumulator[j] = x end return ResultOk(cumulator, true) end end # op(x, y) for x variable/constant, y arbitrary function deg2_l0_eval( tree::Node{T}, cumulator::AbstractVector{T}, cX::AbstractArray{T}, op::F ) where {T<:Number,F} if tree.l.constant val = tree.l.val::T @inbounds @simd for j in eachindex(cumulator) x = op(val, cumulator[j])::T cumulator[j] = x end return ResultOk(cumulator, true) else feature = tree.l.feature @inbounds @simd for j in eachindex(cumulator) x = op(cX[feature, j], cumulator[j])::T cumulator[j] = x end return ResultOk(cumulator, true) end end # op(x, y) for x arbitrary, y variable/constant function deg2_r0_eval( tree::Node{T}, cumulator::AbstractVector{T}, cX::AbstractArray{T}, op::F ) where {T<:Number,F} if tree.r.constant val = tree.r.val::T @inbounds @simd for j in eachindex(cumulator) x = op(cumulator[j], val)::T cumulator[j] = x end return ResultOk(cumulator, true) else feature = tree.r.feature @inbounds @simd for j in eachindex(cumulator) x = op(cumulator[j], cX[feature, j])::T cumulator[j] = x end return ResultOk(cumulator, true) end end @generated function _eval_constant_tree(tree::Node{T}, operators::OperatorEnum) where {T<:Number} nuna = get_nuna(operators) nbin = get_nbin(operators) quote if tree.degree == 0 return deg0_eval_constant(tree)::ResultOk{Vector{T}} elseif tree.degree == 1 op_idx = tree.op return Base.Cartesian.@nif( $nuna, i -> i == op_idx, i -> deg1_eval_constant( tree, operators.unaops[i], operators )::ResultOk{Vector{T}} ) else op_idx = tree.op return Base.Cartesian.@nif( $nbin, i -> i == op_idx, i -> deg2_eval_constant( tree, operators.binops[i], operators )::ResultOk{Vector{T}} ) end end end @inline function deg0_eval_constant(tree::Node{T}) where {T<:Number} output = tree.val::T return ResultOk([output], true)::ResultOk{Vector{T}} end function deg1_eval_constant(tree::Node{T}, op::F, operators::OperatorEnum) where {T<:Number,F} result = _eval_constant_tree(tree.l, operators) !result.ok && return result output = op(result.x[])::T return ResultOk([output], isfinite(output))::ResultOk{Vector{T}} end function deg2_eval_constant(tree::Node{T}, op::F, operators::OperatorEnum) where {T<:Number,F} cumulator = _eval_constant_tree(tree.l, operators) !cumulator.ok && return cumulator result_r = _eval_constant_tree(tree.r, operators) !result_r.ok && return result_r output = op(cumulator.x[], result_r.x[])::T return ResultOk([output], isfinite(output))::ResultOk{Vector{T}} end ################################################################################ ```

Now, we can see that the forward pass works okay:

# Operators to use:
operators = OperatorEnum((+, -, *, /), (cos, sin, exp, tanh))

# Variables:
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)

# Expression:
tree = Node(1, x1, Node(1, x2))  # == x1 + cos(x2)

# Input data
X = randn(3, 100);

# Output:
eval_tree_array(tree, X, operators, Val(2))

This evaluates x1 + cos(x2) over 100 random rows. Both Val(1) and Val(2) (fuse_level=1 and =2, respectively) here will work and produce the same output. Please see with ctrl-F which parts it activates in the code – it basically just turns on a couple branches related to "fused" operators (e.g., sin(exp(x)) evaluated over the data inside a single loop).

Now, if I try compiling the reverse-mode gradient with respect to the input data for fuse-level 1:

f(tree, X, operators, output) = (output[] = sum(eval_tree_array(tree, X, operators, Val(1))[1]); nothing)
dX = Enzyme.make_zero(X)
output = [0.0]
doutput = [1.0]

autodiff(
    Reverse,
    f,
    Const(tree),
    Duplicated(X, dX),
    Const(operators),
    Duplicated(output, doutput)
)

This takes about 1 minute to compile. But, once it's compiled, it's pretty fast.

However, if I switch on some of the optimizations (fuse_level=2):

f(tree, X, operators, output) = (output[] = sum(eval_tree_array(tree, X, operators, Val(2))[1]); nothing)

output = [0.0]
doutput = [1.0]

autodiff(
    Reverse,
    f,
    Const(tree),
    Duplicated(X, dX),
    Const(operators),
    Duplicated(output, doutput)
)

This seems to hang forever. I left it going for about a day and came back and it was still running. I'm assuming it will finish eventually, but it's obviously not a good solution as the existing AD backends with forward-mode auto-diff compile in under a second. And if the user changes data types or operators, it will need to recompile again.

If I force it to quit with ctl-\, I see various LLVM calls:

[68568] signal (3): Quit: 3
in expression starting at REPL[44]:1
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
unknown function (ip: 0x0)
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
unknown function (ip: 0x0)
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
unknown function (ip: 0x0)
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
unknown function (ip: 0x0)
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
unknown function (ip: 0x0)
_ZN4llvm22MustBeExecutedIterator7advanceEv at /Users/mcranmer/.julia/juliaup/julia-1.10.0-rc1+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib (unknown line)
unknown function (ip: 0x0)
Allocations: 37668296 (Pool: 37621782; Big: 46514); GC: 45

Any idea how to get this scaling better? It seems like some step of the compilation is hanging here and it is scaling exponentially with the number of branches.


Edit: Updated code MWE to further reduce it.

MilesCranmer commented 11 months ago

Okay I have verified the gradients are not broken. It's just extremely long. For example, if I reduce the number of functions to just 4 in total:

operators = OperatorEnum((+, -), (cos, sin))

then the compilation with Val(2) takes about 30 seconds to compile. But every function I add here, it seems to exponentially scale the compilation time.

@wsmoses I remember you mentioned some cache lock may be hanging here: https://github.com/EnzymeAD/Enzyme.jl/issues/1018#issuecomment-1689434106. Could that be the cause of this?

wsmoses commented 11 months ago

cc @vchuravy

Yeah that last issue seemed to have some sort of deadlock (aka a lock state that would never release)

wsmoses commented 11 months ago

@MilesCranmer would you be able to simplify your hanging MWE.

It doesn't need to compute the same thing as long as it still hangs (e.g. sum -> first, etc). The plentiful macros, generated functions, etc is making it difficult to diagnose.

MilesCranmer commented 11 months ago

Unfortunately all of this stuff is required to reproduce it. The generated functions are necessary for Base.Cartesian.@nif to work, and that itself is necessary for Enzyme to work, otherwise there’s a type instability in the operators used. You can see the statements that get turned on with fuse_level > 1. Those are what gives the very large compilation time.

vchuravy commented 11 months ago

Can you look at https://github.com/JuliaGPU/GPUCompiler.jl/blob/21ca075c1e91fe0c15f1330ab487b4831013ec1f/src/jlgen.jl#L175 after the compilation?

vchuravy commented 11 months ago

@MilesCranmer in particular I am interesting in how much it grows.

See also https://github.com/EnzymeAD/Enzyme.jl/issues/1182

MilesCranmer commented 11 months ago

Oh I see what you mean. Yeah let me check.

MilesCranmer commented 11 months ago

Okay for both fuse levels it is the same length of dictionary – just 3.

Here's the code I'm using to test (referencing the code in the original snippet above)

function gen_f(::Val{fuse_level}) where {fuse_level}
    function (tree, X, operators, output)
        output[] = sum(eval_tree_array(tree, X, operators, Val(fuse_level))[1])
        return nothing
    end
end

function run_test(fuse_level::Integer)
    # Operators to use:
    operators = OperatorEnum((+, -), (cos, sin))

    # Variables:
    x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)

    # Expression:
    tree = Node(1, x1, Node(1, x2))  # == x1 + cos(x2)

    # Input data
    X = randn(3, 100)

    # Output:
    # eval_tree_array(tree, X, operators, Val(2))

    output = [0.0]
    doutput = [1.0]
    dX = zeros(size(X))

    autodiff(
        Reverse,
        gen_f(Val(fuse_level)),
        Const(tree),
        Duplicated(X, dX),
        Const(operators),
        Duplicated(output, doutput)
    )
    dX
end

This number of operators is still pretty light. If I go to OperatorEnum((+, -, *, /), (cos, sin)) and fuse_level=2, it will hit a stack overflow with no explanation for where it occurred. So it seems like there is some recursive function that is working really really hard for this type of branching?

MilesCranmer commented 11 months ago

If I first do run_test(1), and then in the same session, run_test(2), it says that the cache is 5 in length.

MilesCranmer commented 11 months ago

If maybe you are asking about the contents of the entries of GLOBAL_CI_CACHES (?), then this is the result:

I first run run_test(1).

Then, GLOBAL_CI_CACHES is length 3.

Within that, the contents are:

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 1006

k = CompilerConfig for GPUCompiler.NativeCompilerTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 1011

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 0

I then run run_test(2). The cache has 5 elements in total. The contents are:

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 1006

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 1017

k = CompilerConfig for GPUCompiler.NativeCompilerTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 1025

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 0

k = CompilerConfig for Enzyme.Compiler.EnzymeTarget
length((Enzyme.GPUCompiler.GLOBAL_CI_CACHES[k]).dict) = 0

If I then change the definition of run_test so that the OperatorEnum has an additional operator, and run run_test(1), I see the NativeCompilerTarget have 1039 entries. The EnzymeTarget for that run seems to be 1010 entries.

With that updated run_test, if I then do run_test(2), I see NativeCompilerTarget grow to 1045 entries, and the latest EnzymeTarget have 1024 entries.

MilesCranmer commented 11 months ago

@wsmoses I just managed to find a way to further reduce the code while still getting the long compilation behavior (updated first comment). It's a bit faster overall so I'm not sure if all the same behavior is there, but the exponential scaling from fuse_level=1 to fuse_level=2, or for more operators, seems to be the same.

(Note that the generated functions cannot be removed as they are required for the Base.Cartesian.@nif and that is required for Enzyme to know the operators at compile time.)

MilesCranmer commented 10 months ago

Hey both, Just wanted to check-in on this and see how things are going and whether you've had a chance to check it out? Thanks for all your work on this great package. Best, Miles

vchuravy commented 10 months ago

To temper expectations, I probably won't have time to look at things until end of January.

wsmoses commented 6 months ago

@MilesCranmer the latest release will have just added substantial improvements to compile time. However I don't think they address the root cause (presumably nested gpucompiler compilation?), so it'll be a constant overhead speedup -- but still not nothing.

@vchuravy do you have time to diagnose this together next week?

wsmoses commented 6 months ago

sorry finally starting to investigate this more closely.

for the first function which you said was a minute compile.


julia> @time autodiff(
           Reverse,
           f,
           Const(tree),
           Duplicated(X, dX),
           Const(operators),
           Duplicated(output, doutput)
       )
 45.757168 seconds (27.99 M allocations: 1.761 GiB, 1.81% gc time, 100.00% compilation time)
((nothing, nothing, nothing, nothing),)

julia> @time autodiff(
           Reverse,
           f,
           Const(tree),
           Duplicated(X, dX),
           Const(operators),
           Duplicated(output, doutput)
       )
  0.000065 seconds (161 allocations: 10.609 KiB)
((nothing, nothing, nothing, nothing),)
wsmoses commented 6 months ago

Time seems to persist on current main.

julia> @time autodiff(
           Reverse,
           f,
           Const(tree),
           Duplicated(X, dX),
           Const(operators),
           Duplicated(output, doutput)
       )
^CERROR: InterruptException:
Stacktrace:
  [1] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{…}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{…}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{…}, augmented::Ptr{…}, atomicAdd::Bool)
    @ Enzyme.API ~/git/Enzyme.jl/src/api.jl:154
  [2] enzyme!(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{…}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…})
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:3177
  [3] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:5070
  [4] codegen
    @ ~/git/Enzyme.jl/src/compiler.jl:4477 [inlined]
  [5] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:5755
  [6] _thunk
    @ ~/git/Enzyme.jl/src/compiler.jl:5755 [inlined]
  [7] cached_compilation
    @ ~/git/Enzyme.jl/src/compiler.jl:5793 [inlined]
  [8] (::Enzyme.Compiler.var"#554#555"{DataType, DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{5, Bool}, Int64, Bool, Bool, UInt64, DataType})(ctx::LLVM.Context)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:5859
  [9] JuliaContext(f::Enzyme.Compiler.var"#554#555"{DataType, DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{5, Bool}, Int64, Bool, Bool, UInt64, DataType}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
 [10] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
 [11] #s2027#553
    @ ~/git/Enzyme.jl/src/compiler.jl:5811 [inlined]
 [12] var"#s2027#553"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
    @ Enzyme.Compiler ./none:0
 [13] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [14] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:286 [inlined]
 [15] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:315 [inlined]
 [16] autodiff(::ReverseMode{false, FFIABI, false}, ::typeof(f), ::Const{Node{Float64}}, ::Duplicated{Matrix{Float64}}, ::Const{OperatorEnum{Tuple{…}, Tuple{…}}}, ::Duplicated{Vector{Float64}})
    @ Enzyme ~/git/Enzyme.jl/src/Enzyme.jl:300
 [17] macro expansion
    @ ./timing.jl:279 [inlined]
 [18] top-level scope
    @ ./REPL[16]:1
Some type information was truncated. Use `show(err)` to see complete types.

Looks like its in the autodiff function itself rather than a deadlock (hopefully).

@vchuravy do you mind hitting it with a profile like when we were perf debugging sarah's code?

MilesCranmer commented 4 months ago

I have an attempt v2 of using Enzyme in SymbolicRegression.jl here: https://github.com/MilesCranmer/SymbolicRegression.jl/pull/326. This is the extension file itself: https://github.com/MilesCranmer/SymbolicRegression.jl/blob/98d6329355eae8567304eed3e163eba300d1a7d8/ext/SymbolicRegressionEnzymeExt.jl.

I got some small-scale tests working (takes a while to compile), but when I do a full test and use it as the AD backend for symbolic regression, it just hangs.

Looking at btop:

Screenshot 2024-07-01 at 16 50 48

I can see that Julia isn't even spinning the CPUs.

It's weird because it seems to be worse than before even. Before, even though it took a very long time, it eventually compiled. But now it seems to just freeze. Any guesses what it's from? This is on Julia 1.10.4 on macOS. Let me know if there's any other info I can try to provide.

vchuravy commented 4 months ago

It's weird because it seems to be worse than before even. Before, even though it took a very long time, it eventually compiled. But now it seems to just freeze. Any guesses what it's from? This is on Julia 1.10.4 on macOS. Let me know if there's any other info I can try to provide.

Attach lldb and get a stacktrace on all threads. Are you using multiple threads and autodiff_deferred?

MilesCranmer commented 4 months ago

Attach lldb

Will do

Are you using multiple threads

Yes (auto) I turned off multiple threads within the search – it should run serially (though Julia itself has access to multiple)

and autodiff_deferred?

No

MilesCranmer commented 4 months ago

Ran in lldb, maybe this is it @vchuravy?

(lldb) frame info
frame #0: 0x000000010535467c libjulia-internal.1.10.4.dylib`_jl_mutex_lock [inlined] _jl_mutex_wait(self=0x000000010bf65c30, lock=0x00000001056b1fc8, safepoint=1) at threading.c:847:17 [opt]

And the backtrace:

(lldb) thread backtrace
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGSTOP
  * frame #0: 0x000000010535467c libjulia-internal.1.10.4.dylib`_jl_mutex_lock [inlined] _jl_mutex_wait(self=0x000000010bf65c30, lock=0x00000001056b1fc8, safepoint=1) at threading.c:847:17 [opt]
    frame #1: 0x0000000105354638 libjulia-internal.1.10.4.dylib`_jl_mutex_lock(self=0x000000010bf65c30, lock=0x00000001056b1fc8) at threading.c:875:5 [opt]
    frame #2: 0x0000000105af0f28 libjulia-codegen.1.10.4.dylib`::jl_generate_fptr_impl(jl_method_instance_t *, size_t, int *) [inlined] jl_mutex_lock(lock=<unavailable>) at julia_locks.h:65:5 [opt]
    frame #3: 0x0000000105af0f18 libjulia-codegen.1.10.4.dylib`jl_generate_fptr_impl(mi=0x00000001777dfc10, world=31546, did_compile=0x000000015768e484) at jitlayers.cpp:483:5 [opt]
    frame #4: 0x0000000105309e3c libjulia-internal.1.10.4.dylib`jl_compile_method_internal(mi=0x00000001777dfc10, world=31546) at gf.c:2481:16 [opt]
    frame #5: 0x000000010530d104 libjulia-internal.1.10.4.dylib`ijl_apply_generic [inlined] _jl_invoke(F=0x0000000174ff0670, args=0x000000015768e5a8, nargs=2, mfunc=0x00000001777dfc10, world=31546) at gf.c:2887:16 [opt]
    frame #6: 0x000000010530d0d0 libjulia-internal.1.10.4.dylib`ijl_apply_generic(F=0x0000000174ff0670, args=0x000000015768e5a8, nargs=<unavailable>) at gf.c:3077:12 [opt]
    frame #7: 0x0000000171a174d0 X2eIS_auMC0.dylib`julia_value_gradientNOT.NOT._25951 at interface.jl:82
    frame #8: 0x0000000171a68c7c X2eIS_auMC0.dylib`julia_initial_state_25915 at bfgs.jl:94
    frame #9: 0x0000000171300de4
    frame #10: 0x00000001713089cc
    frame #11: 0x000000016751c208
    frame #12: 0x00000001712c841c
    frame #13: 0x000000010532c8d8 libjulia-internal.1.10.4.dylib`start_task [inlined] jl_apply(args=<unavailable>, nargs=1) at julia.h:1982:12 [opt]
    frame #14: 0x000000010532c8cc libjulia-internal.1.10.4.dylib`start_task at task.c:1238:19 [opt]

When I continue and then ctrl-c:

(lldb) c
Process 27992 resuming
Process 27992 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGSTOP
    frame #0: 0x000000010535467c libjulia-internal.1.10.4.dylib`_jl_mutex_lock [inlined] _jl_mutex_wait(self=0x000000010bf65c30, lock=0x00000001056b1fc8, safepoint=1) at threading.c:847:17 [opt]
   844              uv_mutex_unlock(&tls_lock);
   845          }
   846          jl_cpu_suspend();
-> 847          owner = jl_atomic_load_relaxed(&lock->owner);
   848      }
   849  }
   850 
MilesCranmer commented 4 months ago

When I run it with --threads=1:

julia --startup-file=no --project=. --threads=1 example.jl
ERROR: LoadError: TaskFailedException

    nested task error: StackOverflowError:
Stacktrace:
  [1] threading_run(fun::SymbolicRegression.SingleIterationModule.var"#296#threadsfor_fun#5"{SymbolicRegression.SingleIterationModule.var"#296#threadsfor_fun#4#6"{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, BitVector, Vector{Float64}, UnitRange{Int64}}}, static::Bool)
    @ Base.Threads ./threadingconstructs.jl:172
  [2] macro expansion
    @ ./threadingconstructs.jl:220 [inlined]
  [3] macro expansion
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/Utils.jl:155 [inlined]
  [4] optimize_and_simplify_population(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, pop::Population{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, curmaxsize::Int64, record::Dict{String, Any})
    @ SymbolicRegression.SingleIterationModule ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SingleIteration.jl:111
  [5] _dispatch_s_r_cycle(in_pop::Population{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}; pop::Int64, out::Int64, iteration::Int64, verbosity::Int64, cur_maxsize::Int64, running_search_statistics::SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics)
    @ SymbolicRegression ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:1150
  [6] macro expansion
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:841 [inlined]
  [7] macro expansion
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SearchUtils.jl:118 [inlined]
  [8] _warmup_search!(state::SymbolicRegression.SearchUtilsModule.SearchState{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Tuple{Population{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, HallOfFame{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, Dict{String, Any}, Float64}, Channel}, datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:serial, 1, false}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}})
    @ SymbolicRegression ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:836
  [9] _equation_search(datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:serial, 1, false}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, saved_state::Nothing)
    @ SymbolicRegression ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:622
 [10] equation_search(datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}; niterations::Int64, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, parallelism::Symbol, numprocs::Nothing, procs::Nothing, addprocs_function::Nothing, heap_size_hint_in_bytes::Nothing, runtests::Bool, saved_state::Nothing, return_state::Nothing, verbosity::Nothing, progress::Nothing, v_dim_out::Val{1})
    @ SymbolicRegression ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:595
 [11] equation_search
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:473 [inlined]
 [12] #equation_search#26
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:436 [inlined]
 [13] equation_search
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:382 [inlined]
 [14] #equation_search#28
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/src/SymbolicRegression.jl:466 [inlined]
 [15] top-level scope
    @ ~/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/example.jl:14
in expression starting at /Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/example.jl:14

And when I remove that @threads call, I get:

> julia --startup-file=no --project=. --threads=1 example.jl
ERROR: LoadError: StackOverflowError:
in expression starting at /Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/SymbolicRegression.jl/example.jl:14
MilesCranmer commented 4 months ago

Hahaha I found a hack to make it work. Increase the stack size 😆

with_stack(f, n) = fetch(schedule(Task(f, n)))

hall_of_fame = with_stack(2_000_000_000) do
    equation_search(X, y; niterations=40, options=options, parallelism=:serial)
end

This actually works in the single-threaded version. It's even surprisingly pretty fast in compilation! (Maybe the multiple threads were interfering with each other?)

But... why is the stack exploding?

(Also... this crashes in multi-threaded mode, see below. Maybe spawned tasks don't inherit the parent task's stack size?)

fatal: error thrown and no exception handler available.
ErrorException("attempt to switch to exited task")
ijl_error at /Users/mcranmer/PermaDocuments/julia/src/rtutils.c:41
ijl_switch at /Users/mcranmer/PermaDocuments/julia/src/task.c:634
fatal: error thrown and no exception handler available.
ErrorException("attempt to switch to exited task")
try_yieldto at ./task.jl:921
ijl_error at /Users/mcranmer/PermaDocuments/julia/src/rtutils.c:41
ijl_switch at /Users/mcranmer/PermaDocuments/julia/src/task.c:634
wait at ./task.jl:995
try_yieldto at ./task.jl:921
task_done_hook at ./task.jl:675
wait at ./task.jl:995
jfptr_task_done_hook_75409 at /Users/mcranmer/PermaDocuments/julia/usr/lib/julia/sys.dylib (unknown line)
task_done_hook at ./task.jl:675
_jl_invoke at /Users/mcranmer/PermaDocuments/julia/src/gf.c:0 [inlined]
ijl_apply_generic at /Users/mcranmer/PermaDocuments/julia/src/gf.c:3077
jfptr_task_done_hook_75409 at /Users/mcranmer/PermaDocuments/julia/usr/lib/julia/sys.dylib (unknown line)
jl_apply at /Users/mcranmer/PermaDocuments/julia/src/./julia.h:1982 [inlined]
jl_finish_task at /Users/mcranmer/PermaDocuments/julia/src/task.c:320
_jl_invoke at /Users/mcranmer/PermaDocuments/julia/src/gf.c:0 [inlined]
ijl_apply_generic at /Users/mcranmer/PermaDocuments/julia/src/gf.c:3077
start_task at /Users/mcranmer/PermaDocuments/julia/src/task.c:1249
jl_apply at /Users/mcranmer/PermaDocuments/julia/src/./julia.h:1982 [inlined]
jl_finish_task at /Users/mcranmer/PermaDocuments/julia/src/task.c:320
start_task at /Users/mcranmer/PermaDocuments/julia/src/task.c:1249
MilesCranmer commented 4 months ago

Quick updates:

  1. I can do multi-threaded searches with this too! I just wrap the Enzyme call itself:
with_stacksize(8 * 1024 * 1024) do
    autodiff(
        Reverse,
        evaluator,
        Duplicated(g.f.tree, g.extra.storage_tree),
        Duplicated(g.f.dataset, g.extra.storage_dataset),
        Const(g.f.options),
        Const(g.f.idx),
        Duplicated(output, doutput),
    )
end

and this seems to fix things. It works with a full multi-threaded SymbolicRegression.jl search, being used by Optim.jl for optimizing constants – even with the new ParametricExpression that stores constants in multiple places.

  1. I have no idea if this Task API is safe or not. So I posted on discourse here: https://discourse.julialang.org/t/raising-the-roof-increased-stack-size-shenanigans-in-enzyme/116511?u=milescranmer

Edit: The default Julia stack size is 4MB. I am requesting 8MB in the task.

MilesCranmer commented 4 months ago

The stack size trick (/hack) is magic, Enzyme is rock solid now for me. So this is as good as closed. Thanks for all the help!!


P.S., It works so well that I wonder if you would consider putting in a modified stack size directly into Enzyme? The default Julia stack size of 4 MB (or 2 MB on 32-bit systems) seems tiny for what Enzyme needs: (posted an issue here: https://github.com/JuliaLang/julia/issues/54998). I feel like Enzyme should even request 32 MB to be safe.

It also would give more reliable stack overflow errors if your compilation thread always starts with the same stack size.

I think part of the difficult in debugging this was because the stack overflows were random:

  1. If the root task is the first to lock the compile cache, it might hit the stack overflow, since it carries over its stack from the underlying process.
  2. If a thread is the first to lock the compile cache, it might work, because that thread has a clean stack and more room!

I know that sometimes languages allocate threads with smaller stack sizes, so maybe that muddied things up as well (maybe a thread started doing the Enzyme compilation, and hit a stack overflow, and basically locked the compile cache forever)

P.P.S., I realised that it gets even trickier because of the caching. If one thread is compiling, hits a stack overflow, but cached some of its work, then theoretically could another thread get deeper in the AD process (since it no longer needs to travel through those particular frames)?

Maybe I'm imagining things but I also feel like the larger stack size made compilation much faster... Maybe it was from some threads dying off from stack overflows, and the larger stack just let the first one run to completion..

vchuravy commented 4 months ago

I do think we need to actually check what is creating such deep stacks here

vchuravy commented 4 months ago

Compilers are not supposed to be recursive and if they are we should transition to a worklist approach

MilesCranmer commented 4 months ago

I honestly don’t know. I think even without recursion you could hit this stack overflow, see my experiments here: https://discourse.julialang.org/t/experiments-with-julia-stack-sizes-and-enzyme/116511/2?u=milescranmer

If the workload is big enough, and Enzyme gets called deep enough in the existing stack, I think it’s doable for it to hit an overflow with Julia’s 4 MB stack size if the stack frames are hefty

For my case — I was calling Optim deep inside SymbolicRegression, and that Optim optimization (which is also quite deep with all the line search stuff) wraps an Enzyme-computed gradient of a loss function which itself calls deep into DynamicExpressions (which uses recursive evaluations).

Even creating a single Task at the start of an enzyme call is probably a good idea, since it will start a fresh stack. Wdyt?

wsmoses commented 4 months ago

For fun can you set the environmental bar ENABLE_GDBLISTENER=1 and get the backtrace of the stack overflow in gdb. That would hopefully let us see what Julia functions cause relevant issues here

MilesCranmer commented 4 months ago

I don't think gdb is available on macOS unfortunately. Is lldb okay?

vchuravy commented 4 months ago

Yeah this works for lldb as well.

You can also now use the Julia profiler during compilation, since it doesn't hang anymore to get some idea of the stack traces involved

MilesCranmer commented 4 months ago

Just tried... The call stack is so deep that the profiler is refusing to show it 😆 Screenshot 2024-07-03 at 18 36 13

MilesCranmer commented 4 months ago

Okay, ProfileView.jl to the rescue: Screenshot 2024-07-03 at 18 39 13

MilesCranmer commented 4 months ago

Okay, here is a cycle I found from scrolling the stack. You can see it starts at abstractinterpretation.jl:2370, and then ends up there again:

file method
./compiler/abstractinterpretation.jl:2370 Core.Compiler.abstract_eval_call(::Core.Compiler.NativeInterpreter, ::Expr, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2913 Core.Compiler.abstract_eval_basic_statement(::Core.Compiler.NativeInterpreter, ::Any, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2624 Core.Compiler.abstract_eval_statement(::Core.Compiler.NativeInterpreter, ::Any, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2380 Core.Compiler.abstract_eval_statement_expr(::Core.Compiler.NativeInterpreter, ::Expr, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2370 Core.Compiler.abstract_eval_call(::Core.Compiler.NativeInterpreter, ::Expr, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2354 Core.Compiler.abstract_call(::Core.Compiler.NativeInterpreter, ::Core.Compiler.ArgInfo, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2162 Core.Compiler.abstract_call(::Core.Compiler.NativeInterpreter, ::Core.Compiler.ArgInfo, ::Core.Compiler.StmtInfo, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2169 Core.Compiler.abstract_call(::Core.Compiler.NativeInterpreter, ::Core.Compiler.ArgInfo, ::Core.Compiler.StmtInfo, ::Core.Compiler.InferenceState, ::Int64)
./compiler/abstractinterpretation.jl:2087 Core.Compiler.abstract_call_known(::Core.Compiler.NativeInterpreter, ::Any, ::Core.Compiler.ArgInfo, ::Core.Compiler.StmtInfo, ::Core.Compiler.InferenceState, ::Int64)
./compiler/abstractinterpretation.jl:95 Core.Compiler.abstract_call_gf_by_type(::Core.Compiler.NativeInterpreter, ::Any, ::Core.Compiler.ArgInfo, ::Core.Compiler.StmtInfo, ::Any, ::Core.Compiler.InferenceState, ::Int64)
./compiler/abstractinterpretation.jl:629 Core.Compiler.abstract_call_method(::Core.Compiler.NativeInterpreter, ::Method, ::Any, ::Core.SimpleVector, ::Bool, ::Core.Compiler.StmtInfo, ::Core.Compiler.InferenceState)
./compiler/typeinfer.jl:930 Core.Compiler.typeinf_edge(::Core.Compiler.NativeInterpreter, ::Method, ::Any, ::Core.SimpleVector, ::Core.Compiler.InferenceState)
./compiler/typeinfer.jl:216 Core.Compiler.typeinf(::Core.Compiler.NativeInterpreter, ::Core.Compiler.InferenceState)
./compiler/typeinfer.jl:247 Core.Compiler._typeinf(::Core.Compiler.NativeInterpreter, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:3186 Core.Compiler.typeinf_nocycle(::Core.Compiler.NativeInterpreter, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:3098 Core.Compiler.typeinf_local(::Core.Compiler.NativeInterpreter, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2913 Core.Compiler.abstract_eval_basic_statement(::Core.Compiler.NativeInterpreter, ::Any, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2624 Core.Compiler.abstract_eval_statement(::Core.Compiler.NativeInterpreter, ::Any, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2380 Core.Compiler.abstract_eval_statement_expr(::Core.Compiler.NativeInterpreter, ::Expr, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
./compiler/abstractinterpretation.jl:2370 Core.Compiler.abstract_eval_call(::Core.Compiler.NativeInterpreter, ::Expr, ::Vector{Core.Compiler.VarState}, ::Core.Compiler.InferenceState)
MilesCranmer commented 4 months ago

And, here's a flat profile, sorted by count, and with the last 300 rows:

(Toggle) |Count|File|Function| |-|-|-| |94357|`/usr/lib/system/libsystem_kernel.dylib:?`|`__psynch_cvwait`| |82394|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:3077`|`ijl_apply_generic`| |82365|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/./julia.h:1982`|`jl_apply`| |82363|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/task.c:1238`|`start_task`| |82108|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:2481`|`jl_compile_method_internal`| |82108|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:2887`|`_jl_invoke`| |79133|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/threading.c:875`|`_jl_mutex_lock`| |79106|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/./julia_locks.h:65`|`jl_mutex_lock`| |79104|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/jitlayers.cpp:483`|`jl_generate_fptr_impl`| |77986|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/threading.c:847`|`_jl_mutex_wait`| |74453|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:?`|`_jl_invoke`| |73704|`[any unknown stackframes]:`|``| |56039|`@SymbolicRegression/src/LossFunctions.jl:119`|`#eval_loss#3`| |56039|`@SymbolicRegression/src/LossFunctions.jl:111`|`eval_loss`| |56022|`@SymbolicRegression/ext/SymbolicRegressionEnzymeExt.jl:27`|`evaluator`| |56022|`@SymbolicRegression/ext/SymbolicRegressionEnzymeExt.jl:?`|`evaluator`| |56022|`@SymbolicRegression/ext/SymbolicRegressionEnzymeExt.jl:?`|`diffejulia_evaluator_3806_inner_1wrap`| |56022|`@Enzyme/src/compiler.jl:6606`|`macro expansion`| |56022|`@Enzyme/src/compiler.jl:6207`|`enzyme_call`| |56022|`@Enzyme/src/compiler.jl:6084`|`CombinedAdjointThunk`| |56022|`@Enzyme/src/Enzyme.jl:309`|`autodiff`| |56022|`@Enzyme/src/Enzyme.jl:333`|`autodiff`| |56022|`@Enzyme/src/Enzyme.jl:318`|`autodiff`| |56022|`@SymbolicRegression/ext/SymbolicRegressionEnzymeExt.jl:42`|`(::SymbolicRegressionEnzymeExt.var"#1#2"{SymbolicRegression.ConstantOptimizationModule.GradEvaluator{SymbolicRegression.ConstantOptimizationModule.Evaluator{Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Vector{Ref{Node{Float32}}}, Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, Nothing}, ADTypes.AutoEnzyme{Nothing}, @NamedTuple{storage_tree::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, storage_refs::Vector{Ref{Node{Float32}}}, storage_dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}}, Vector{Float32}, Vector{Float32}})()`| |54227|`@SymbolicRegression/src/InterfaceDynamicExpressions.jl:61`|`#eval_tree_array#1`| |54227|`@SymbolicRegression/src/InterfaceDynamicExpressions.jl:54`|`eval_tree_array`| |54227|`@SymbolicRegression/src/LossFunctions.jl:57`|`eval_tree_dispatch`| |54227|`@SymbolicRegression/src/LossFunctions.jl:68`|`_eval_loss`| |35790|`@Enzyme/src/rules/jitrules.jl:307`|`runtime_generic_augfwd(activity::Type{Val{(false, false, false, true, true, false)}}, width::Val{1}, ModifiedBetween::Val{(true, true, true, true, true, true)}, RT::Val{@NamedTuple{1, 2, 3}}, f::typeof(Core.kwcall), df::Nothing, primal_1::@NamedTuple{turbo::Val{false}, bumper::Val{false}}, shadow_1_1::Nothing, primal_2::typeof(eval_tree_array), shadow_2_1::Nothing, primal_3::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, shadow_3_1::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, primal_4::Matrix{Float32}, shadow_4_1::Matrix{Float32}, primal_5::DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}, shadow_5_1::Nothing)`| |26070|`/usr/lib/system/libsystem_kernel.dylib:?`|`kevent`| |17595|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:2902`|`ijl_invoke`| |17574|`@Enzyme/src/compiler.jl:408`|`active_reg_inner`| |17509|`@Enzyme/src/compiler.jl:609`|`guess_activity`| |17509|`@Enzyme/src/rules/jitrules.jl:296`|`runtime_generic_augfwd(activity::Type{Val{(false, false, false, true, true, false)}}, width::Val{1}, ModifiedBetween::Val{(true, true, true, true, true, true)}, RT::Val{@NamedTuple{1, 2, 3}}, f::typeof(Core.kwcall), df::Nothing, primal_1::@NamedTuple{turbo::Val{false}, bumper::Val{false}}, shadow_1_1::Nothing, primal_2::typeof(eval_tree_array), shadow_2_1::Nothing, primal_3::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, shadow_3_1::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, primal_4::Matrix{Float32}, shadow_4_1::Matrix{Float32}, primal_5::DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}, shadow_5_1::Nothing)`| |16138|`@Base/threadingconstructs.jl:154`|`(::Base.Threads.var"#1#2"{SymbolicRegression.SearchUtilsModule.var"#347#threadsfor_fun#39"{SymbolicRegression.SearchUtilsModule.var"#347#threadsfor_fun#37#40"{Vector{PopMember{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}}, Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, Vector{String}, Vector{Float32}, Vector{Int64}, UnitRange{Int64}}}, Int64})()`| |16066|`@Base/threadingconstructs.jl:215`|`(::SymbolicRegression.SingleIterationModule.var"#296#threadsfor_fun#5"{SymbolicRegression.SingleIterationModule.var"#296#threadsfor_fun#4#6"{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, BitVector, Vector{Float64}, UnitRange{Int64}}})(tid::Int64; onethread::Bool)`| |16066|`@Base/threadingconstructs.jl:182`|`#296#threadsfor_fun`| |15949|`@SymbolicRegression/src/SingleIteration.jl:120`|`macro expansion`| |15943|`@SymbolicRegression/src/ConstantOptimization.jl:42`|`dispatch_optimize_constants(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, idx::Nothing)`| |15943|`@SymbolicRegression/src/ConstantOptimization.jl:28`|`optimize_constants(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}})`| |7883|`@Base/deepcopy.jl:65`|`deepcopy_internal(x::Any, stackdict::IdDict{Any, Any})`| |7883|`@Base/deepcopy.jl:26`|`deepcopy`| |7883|`@SymbolicRegression/ext/SymbolicRegressionEnzymeExt.jl:21`|`SymbolicRegression.ConstantOptimizationModule.GradEvaluator(f::SymbolicRegression.ConstantOptimizationModule.Evaluator{Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Vector{Ref{Node{Float32}}}, Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, Nothing}, backend::ADTypes.AutoEnzyme{Nothing})`| |7883|`@SymbolicRegression/src/ConstantOptimization.jl:63`|`_optimize_constants(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, algorithm::Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}, optimizer_options::Optim.Options{Float64, Nothing}, idx::Nothing)`| |7866|`@Optim/src/multivariate/optimize/optimize.jl:36`|`optimize`| |7866|`@Optim/src/multivariate/optimize/interface.jl:143`|`optimize(f::NLSolversBase.InplaceObjective{Nothing, SymbolicRegression.ConstantOptimizationModule.GradEvaluator{SymbolicRegression.ConstantOptimizationModule.Evaluator{Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Vector{Ref{Node{Float32}}}, Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, Nothing}, ADTypes.AutoEnzyme{Nothing}, @NamedTuple{storage_tree::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, storage_refs::Vector{Ref{Node{Float32}}}, storage_dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}}, Nothing, Nothing, Nothing}, initial_x::Vector{Float32}, method::Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}, options::Optim.Options{Float64, Nothing}; inplace::Bool, autodiff::Symbol)`| |7866|`@Optim/src/multivariate/optimize/interface.jl:139`|`optimize`| |7866|`@SymbolicRegression/src/ConstantOptimization.jl:70`|`_optimize_constants(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, algorithm::Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}, optimizer_options::Optim.Options{Float64, Nothing}, idx::Nothing)`| |7826|`@NLSolversBase/src/interface.jl:82`|`value_gradient!!(obj::NLSolversBase.OnceDifferentiable{Float32, Vector{Float32}, Vector{Float32}}, x::Vector{Float32})`| |7826|`@Optim/src/multivariate/solvers/first_order/bfgs.jl:94`|`initial_state(method::Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}, options::Optim.Options{Float64, Nothing}, d::NLSolversBase.OnceDifferentiable{Float32, Vector{Float32}, Vector{Float32}}, initial_x::Vector{Float32})`| |5317|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm22MustBeExecutedIterator7advanceEv`| |3046|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm8DenseMapINS_14PointerIntPairIPKNS_11InstructionELj1ENS_20ExplorationDirectionENS_21PointerLikeTypeTraitsIS4_EENS_18PointerIntPairInfoIS4_Lj1ES7_EEEENS_6detail13DenseSetEmptyENS_12DenseMapInfoISA_vEENSB_12DenseSetPairISA_EEE4growEj`| |2986|`@Base/compiler/typeinfer.jl:1082`|`typeinf_ext_toplevel(interp::Core.Compiler.NativeInterpreter, linfo::Core.MethodInstance)`| |2986|`@Base/compiler/typeinfer.jl:1078`|`typeinf_ext_toplevel(mi::Core.MethodInstance, world::UInt64)`| |2986|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/sys.dylib:?`|`jfptr_typeinf_ext_toplevel_35854.3`| |2986|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:394`|`jl_type_infer`| |2986|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/jitlayers.cpp:504`|`jl_generate_fptr_impl`| |2463|`@Base/compiler/abstractinterpretation.jl:2624`|`abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)`| |2463|`@Base/compiler/abstractinterpretation.jl:3098`|`typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |2463|`@Base/compiler/abstractinterpretation.jl:3186`|`typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |2463|`@Base/compiler/typeinfer.jl:247`|`_typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |2462|`@Base/compiler/abstractinterpretation.jl:2380`|`abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)`| |2461|`@Base/compiler/abstractinterpretation.jl:2162`|`abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)`| |2461|`@Base/compiler/abstractinterpretation.jl:2354`|`abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, sv::Core.Compiler.InferenceState)`| |2461|`@Base/compiler/abstractinterpretation.jl:2370`|`abstract_eval_call(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)`| |2455|`@Base/compiler/abstractinterpretation.jl:2169`|`abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |2450|`@Base/compiler/abstractinterpretation.jl:2087`|`abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |2444|`@Base/compiler/abstractinterpretation.jl:2913`|`abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)`| |2426|`@Base/compiler/abstractinterpretation.jl:629`|`abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)`| |2425|`@Base/compiler/abstractinterpretation.jl:95`|`abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |2398|`@Base/compiler/typeinfer.jl:1051`|`typeinf_ext(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance)`| |2376|`@Base/compiler/typeinfer.jl:930`|`typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)`| |2323|`@Base/compiler/typeinfer.jl:216`|`typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |2260|`@Base/compiler/inferencestate.jl:430`|`Core.Compiler.InferenceState(result::Core.Compiler.InferenceResult, cache::Symbol, interp::Core.Compiler.NativeInterpreter)`| |2246|`@Base/compiler/utilities.jl:123`|`get_staged(mi::Core.MethodInstance, world::UInt64)`| |2246|`@Base/compiler/utilities.jl:135`|`retrieve_code_info`| |2238|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/method.c:540`|`jl_call_staged`| |2238|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/method.c:593`|`ijl_code_for_staged`| |2235|`@Base/boot.jl:602`|`(::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})`| |2166|`@Base/compiler/abstractinterpretation.jl:2889`|`abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)`| |1812|`@SymbolicRegression/src/LossFunctions.jl:81`|`_eval_loss(tree::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, regularization::Bool, idx::Nothing)`| |1635|`@Base/compiler/abstractinterpretation.jl:2004`|`abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |1632|`@Base/compiler/abstractinterpretation.jl:1612`|`abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |1558|`@GPUCompiler/src/driver.jl:42`|`JuliaContext(f::Function)`| |1519|`@Base/compiler/typeinfer.jl:920`|`typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)`| |1509|`@Enzyme/src/compiler.jl:6716`|`#s2010#28592`| |1509|`none:?`|`var"#s2010#28592"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)`| |1460|`@GPUCompiler/src/driver.jl:52`|`JuliaContext(f::Enzyme.Compiler.var"#28577#28583"{Bool, DataType, DataType, DataType, Int64, DataType, Int64, Bool, DataType, DataType}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}})`| |1295|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/builtins.c:768`|`do_apply`| |1247|`@Base/compiler/typeinfer.jl:272`|`_typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |1240|`@Base/compiler/optimize.jl:489`|`run_passes(ci::Core.CodeInfo, sv::Core.Compiler.OptimizationState{Core.Compiler.NativeInterpreter}, caller::Core.Compiler.InferenceResult, optimize_until::Nothing)`| |1240|`@Base/compiler/optimize.jl:504`|`run_passes`| |1240|`@Base/compiler/optimize.jl:453`|`optimize`| |1239|`@Enzyme/src/compiler.jl:6765`|`(::Enzyme.Compiler.var"#28593#28594"{DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{4, Bool}, Int64, Bool, Bool, UInt64, DataType})(ctx::LLVM.Context)`| |1237|`@Enzyme/src/compiler.jl:6658`|`_thunk`| |1237|`@Enzyme/src/compiler.jl:6696`|`cached_compilation`| |1184|`@Enzyme/src/compiler.jl:5129`|`codegen`| |1184|`@Enzyme/src/compiler.jl:6658`|`_thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)`| |1020|`@Base/compiler/abstractinterpretation.jl:2156`|`abstract_call_unknown(interp::Core.Compiler.NativeInterpreter, ft::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |1020|`@Base/compiler/abstractinterpretation.jl:2166`|`abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |781|`@Base/compiler/typeinfer.jl:1049`|`typeinf_ext(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance)`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/interpreter.c:126`|`do_call`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/interpreter.c:489`|`eval_body`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/interpreter.c:544`|`eval_body`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/interpreter.c:775`|`jl_interpret_toplevel_thunk`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/toplevel.c:934`|`jl_toplevel_eval_flex`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/toplevel.c:877`|`jl_toplevel_eval_flex`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/toplevel.c:943`|`ijl_toplevel_eval`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/toplevel.c:985`|`ijl_toplevel_eval_in`| |724|`@Base/boot.jl:385`|`eval`| |724|`@Base/Base.jl:88`|`eval`| |724|`@VSCodeServer/src/repl.jl:229`|`repleval(m::Module, code::Expr, ::String)`| |724|`@VSCodeServer/src/repl.jl:192`|`(::VSCodeServer.var"#112#114"{Module, Expr, REPL.LineEditREPL, REPL.LineEdit.Prompt})()`| |724|`@Base/logging.jl:515`|`with_logstate(f::Function, logstate::Any)`| |724|`@Base/logging.jl:627`|`with_logger`| |724|`@VSCodeServer/src/repl.jl:193`|`(::VSCodeServer.var"#111#113"{Module, Expr, REPL.LineEditREPL, REPL.LineEdit.Prompt})()`| |724|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/builtins.c:812`|`jl_f__call_latest`| |724|`@Base/essentials.jl:892`|`#invokelatest#2`| |724|`@Base/essentials.jl:889`|`invokelatest(::Any)`| |724|`@VSCodeServer/src/eval.jl:34`|`(::VSCodeServer.var"#64#65")()`| |694|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/threading.c:837`|`_jl_mutex_wait`| |658|`@Enzyme/src/compiler.jl:5851`|`codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)`| |631|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm12DenseMapBaseINS_8DenseMapINS_14PointerIntPairIPKNS_11InstructionELj1ENS_20ExplorationDirectionENS_21PointerLikeTypeTraitsIS5_EENS_18PointerIntPairInfoIS5_Lj1ES8_EEEENS_6detail13DenseSetEmptyENS_12DenseMapInfoISB_vEENSC_12DenseSetPairISB_EEEESB_SD_SF_SH_E20InsertIntoBucketImplISB_EEPSH_RKSB_RKT_SL_`| |592|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN16AdjointGenerator13visitCallInstERN4llvm8CallInstE`| |589|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN16AdjointGenerator28recursivelyHandleSubfunctionERN4llvm8CallInstEPNS0_8FunctionERKNSt3__16vectorIbNS5_9allocatorIbEEEEb10DIFFE_TYPEb`| |587|`@Base/compiler/abstractinterpretation.jl:103`|`abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |586|`@Base/compiler/abstractinterpretation.jl:788`|`abstract_call_method_with_const_args(interp::Core.Compiler.NativeInterpreter, result::Core.Compiler.MethodCallResult, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, match::Core.MethodMatch, sv::Core.Compiler.InferenceState)`| |473|`@Base/compiler/abstractinterpretation.jl:818`|`abstract_call_method_with_const_args(interp::Core.Compiler.NativeInterpreter, result::Core.Compiler.MethodCallResult, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, match::Core.MethodMatch, sv::Core.Compiler.InferenceState, invokecall::Nothing)`| |473|`/usr/lib/system/libsystem_malloc.dylib:?`|`tiny_free_no_lock`| |461|`/usr/lib/system/libsystem_malloc.dylib:?`|`tiny_malloc_from_free_list`| |442|`@Enzyme/src/rules/jitrules.jl:311`|`runtime_generic_augfwd(activity::Type{Val{(false, false, false, true, true, false)}}, width::Val{1}, ModifiedBetween::Val{(true, true, true, true, true, true)}, RT::Val{@NamedTuple{1, 2, 3}}, f::typeof(Core.kwcall), df::Nothing, primal_1::@NamedTuple{turbo::Val{false}, bumper::Val{false}}, shadow_1_1::Nothing, primal_2::typeof(eval_tree_array), shadow_2_1::Nothing, primal_3::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, shadow_3_1::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, primal_4::Matrix{Float32}, shadow_4_1::Matrix{Float32}, primal_5::DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}, shadow_5_1::Nothing)`| |433|`@Base/compiler/abstractinterpretation.jl:1207`|`const_prop_call(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance, result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo, sv::Core.Compiler.InferenceState, concrete_eval_result::Nothing)`| |408|`/usr/lib/system/libsystem_platform.dylib:?`|`_platform_memmove`| |406|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/threading.c:846`|`_jl_mutex_wait`| |387|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN11EnzymeLogic21CreateAugmentedPrimalE14RequestContextPN4llvm8FunctionE10DIFFE_TYPENS1_8ArrayRefIS4_EER12TypeAnalysisbbRK10FnTypeInfoNSt3__16vectorIbNSC_9allocatorIbEEEEbjbb`| |350|`/usr/lib/system/libsystem_malloc.dylib:?`|`free_tiny`| |338|`/usr/lib/system/libsystem_kernel.dylib:?`|`__psynch_rw_wrlock`| |330|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN11EnzymeLogic23CreatePrimalAndGradientE14RequestContextOK15ReverseCacheKeyR12TypeAnalysisPK15AugmentedReturnb`| |330|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`EnzymeCreatePrimalAndGradient`| |330|`@Enzyme/src/api.jl:156`|`EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{LLVM.API.LLVMOpaqueType}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)`| |330|`@Base/compiler/ssair/inlining.jl:78`|`ssa_inlining_pass!`| |325|`/usr/lib/system/libsystem_malloc.dylib:?`|`tiny_free_list_add_ptr`| |324|`@Base/compiler/typeinfer.jl:219`|`typeinf`| |324|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:1300`|`jl_gc_pool_alloc_inner`| |324|`@Base/boot.jl:477`|`Array`| |317|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZNK4llvm11Instruction24isIdenticalToWhenDefinedEPKS0_`| |315|`@Base/compiler/typeinfer.jl:219`|`typeinf(interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter, frame::Core.Compiler.InferenceState)`| |314|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/array.c:436`|`ijl_alloc_array_1d`| |313|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:937`|`maybe_collect`| |310|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:3531`|`ijl_gc_collect`| |296|`/usr/lib/system/libsystem_malloc.dylib:?`|`tiny_malloc_should_clear`| |288|`@Enzyme/src/compiler.jl:6095`|`(::Enzyme.Compiler.AugmentedForwardThunk{Ptr{Nothing}, Const{typeof(Core.kwcall)}, Duplicated{Tuple{Vector{Float32}, Bool}}, Tuple{Const{@NamedTuple{turbo::Val{false}, bumper::Val{false}}}, Const{typeof(eval_tree_array)}, Duplicated{Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, Duplicated{Matrix{Float32}}, Const{DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}}}, 1, true, @NamedTuple{1::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::@NamedTuple{1, 2, 3, 4, 5::Bool, 6::UInt64, 7::UInt64, 8::UInt64, 9::Core.LLVMPtr{UInt64, 0}, 10::Core.LLVMPtr{UInt64, 0}}, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 13::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 14::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 15::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 16::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 17::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 18::UInt8, 19::Bool, 20::UInt8}, 13, 14, 15, 16, 17, 18, 19::UInt8, 20, 21::UInt8, 22, 23, 24, 25::Bool, 26::UInt64, 27::UInt64, 28::UInt64, 29::Bool, 30::UInt8, 31::UInt8}, 2::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::UInt8, 9, 10, 11}, 3, 4}})(::Const{typeof(Core.kwcall)}, ::Const{@NamedTuple{turbo::Val{false}, bumper::Val{false}}}, ::Vararg{Any})`| |286|`@Base/compiler/ssair/inlining.jl:1698`|`assemble_inline_todo!(ir::Core.Compiler.IRCode, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter})`| |273|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`EnzymeCreateAugmentedPrimal`| |273|`@Enzyme/src/api.jl:192`|`EnzymeCreateAugmentedPrimal(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnUsed::Bool, shadowReturnUsed::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, forceAnonymousTape::Bool, width::Int64, atomicAdd::Bool)`| |273|`@Enzyme/src/compiler.jl:3670`|`enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{4, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})`| |263|`@Base/compiler/methodtable.jl:74`|`#findall#281`| |263|`@Base/compiler/methodtable.jl:73`|`findall`| |262|`@Base/compiler/ssair/inlining.jl:1450`|`handle_call!`| |261|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN13GradientUtils20forceActiveDetectionEv`| |256|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN16ActivityAnalyzer15isConstantValueERK11TypeResultsPN4llvm5ValueE`| |256|`@Base/reflection.jl:1031`|`_methods_by_ftype(t::Any, mt::Nothing, lim::Int64, world::UInt64, ambig::Bool, min::Core.Compiler.RefValue{UInt64}, max::Core.Compiler.RefValue{UInt64}, has_ambig::Core.Compiler.RefValue{Int32})`| |256|`@Base/compiler/methodtable.jl:105`|`_findall`| |254|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:2284`|`ijl_matching_methods`| |248|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:1357`|`jl_gc_pool_alloc_noinline`| |248|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/./julia_internal.h:476`|`jl_gc_alloc_`| |247|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/array.c:198`|`_new_array`| |243|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:2746`|`gc_mark_and_steal`| |238|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE`| |235|`@Base/compiler/ssair/inlining.jl:1312`|`handle_any_const_result!`| |231|`@Base/compiler/ssair/inlining.jl:1404`|`compute_inlining_cases(info::Core.Compiler.CallInfo, flag::UInt8, sig::Core.Compiler.Signature, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter})`| |225|`@Base/compiler/abstractinterpretation.jl:24`|`abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)`| |223|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/partr.c:142`|`jl_gc_mark_threadfun`| |223|`/usr/lib/system/libsystem_pthread.dylib:?`|`_pthread_start`| |220|`@Base/compiler/methodtable.jl:110`|`findall`| |219|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/jlapi.c:472`|`ijl_gc_safepoint`| |219|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/partr.c:480`|`ijl_task_get_next`| |219|`@Base/task.jl:985`|`poptask(W::Base.IntrusiveLinkedListSynchronized{Task})`| |219|`@Base/task.jl:994`|`wait()`| |219|`@Base/condition.jl:130`|`wait(c::Base.GenericCondition{ReentrantLock}; first::Bool)`| |219|`@Base/condition.jl:125`|`wait`| |219|`@Base/channels.jl:477`|`take_buffered(c::Channel{Any})`| |219|`@Base/channels.jl:471`|`take!`| |219|`@Base/channels.jl:613`|`iterate(c::Channel{Any}, state::Nothing)`| |219|`@JSONRPC/src/core.jl:175`|`(::VSCodeServer.JSONRPC.var"#3#5"{VSCodeServer.JSONRPC.JSONRPCEndpoint{Base.PipeEndpoint, Base.PipeEndpoint}})()`| |216|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:2893`|`gc_mark_loop_parallel`| |216|`@Base/compiler/abstractinterpretation.jl:288`|`find_matching_methods(𝕃::Core.Compiler.InferenceLattice{Core.Compiler.ConditionalsLattice{Core.Compiler.PartialsLattice{Core.Compiler.ConstsLattice}}}, argtypes::Vector{Any}, atype::Any, method_table::Core.Compiler.CachedMethodTable{Core.Compiler.InternalMethodTable}, max_union_splitting::Int64, max_methods::Int64)`| |212|`@Base/compiler/ssair/inlining.jl:871`|`resolve_todo`| |205|`@Base/compiler/ssair/inlining.jl:81`|`ssa_inlining_pass!`| |204|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/array.c:134`|`_new_array_`| |203|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_Z14isInactiveCallRN4llvm8CallBaseE`| |202|`@Enzyme/src/rules/jitrules.jl:455`|`runtime_generic_rev(activity::Type{Val{(false, false, false, true, true, false)}}, width::Val{1}, ModifiedBetween::Val{(true, true, true, true, true, true)}, tape::Enzyme.Compiler.Tape{@NamedTuple{1::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::@NamedTuple{1, 2, 3, 4, 5::Bool, 6::UInt64, 7::UInt64, 8::UInt64, 9::Core.LLVMPtr{UInt64, 0}, 10::Core.LLVMPtr{UInt64, 0}}, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 13::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 14::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 15::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 16::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 17::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 18::UInt8, 19::Bool, 20::UInt8}, 13, 14, 15, 16, 17, 18, 19::UInt8, 20, 21::UInt8, 22, 23, 24, 25::Bool, 26::UInt64, 27::UInt64, 28::UInt64, 29::Bool, 30::UInt8, 31::UInt8}, 2::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::UInt8, 9, 10, 11}, 3, 4}, Nothing, Tuple{Vector{Float32}, Bool}}, f::typeof(Core.kwcall), df::Nothing, primal_1::@NamedTuple{turbo::Val{false}, bumper::Val{false}}, shadow_1_1::Nothing, primal_2::typeof(eval_tree_array), shadow_2_1::Nothing, primal_3::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, shadow_3_1::Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, primal_4::Matrix{Float32}, shadow_4_1::Matrix{Float32}, primal_5::DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}, shadow_5_1::Nothing)`| |202|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:3296`|`ml_mtable_visitor`| |202|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:3669`|`ml_matches`| |200|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gf.c:3073`|`ijl_apply_generic`| |199|`@Base/compiler/optimize.jl:626`|`slot2reg(ir::Core.Compiler.IRCode, ci::Core.CodeInfo, sv::Core.Compiler.OptimizationState{Core.Compiler.NativeInterpreter})`| |199|`@Base/compiler/ssair/ir.jl:1592`|`iterate`| |197|`/usr/lib/system/libsystem_platform.dylib:?`|`_platform_memset`| |194|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZZN16ActivityAnalyzer15isConstantValueERK11TypeResultsPN4llvm5ValueEENK3$_1clEPNS3_11InstructionE`| |189|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZNK4llvm10BasicBlock19getFirstInsertionPtEv`| |189|`/usr/lib/libc++abi.dylib:?`|`_Znwm`| |188|`@Base/compiler/typeinfer.jl:12`|`typeinf`| |188|`@Base/compiler/typeinfer.jl:1071`|`typeinf_type(interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector)`| |188|`@Enzyme/src/compiler.jl:6727`|`(::Enzyme.Compiler.var"#28593#28594"{DataType, DataType, Enzyme.API.CDerivativeMode, NTuple{4, Bool}, Int64, Bool, Bool, UInt64, DataType})(ctx::LLVM.Context)`| |186|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/typemap.c:543`|`jl_typemap_intersection_node_visitor`| |183|`@Base/compiler/methodtable.jl:119`|`findall(sig::Type, table::Core.Compiler.CachedMethodTable{Core.Compiler.InternalMethodTable}; limit::Int64)`| |181|`@Base/compiler/ssair/ir.jl:1855`|`compact!`| |179|`@Enzyme/src/compiler.jl:6090`|`(::Enzyme.Compiler.AdjointThunk{Ptr{Nothing}, Const{typeof(Core.kwcall)}, Duplicated{Tuple{Vector{Float32}, Bool}}, Tuple{Const{@NamedTuple{turbo::Val{false}, bumper::Val{false}}}, Const{typeof(eval_tree_array)}, Duplicated{Expression{Float32, Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, Duplicated{Matrix{Float32}}, Const{DynamicExpressions.OperatorEnumModule.OperatorEnum{Tuple{typeof(+), typeof(*), typeof(/), typeof(-)}, Tuple{typeof(cos), typeof(exp)}}}}, 1, @NamedTuple{1::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::@NamedTuple{1, 2, 3, 4, 5::Bool, 6::UInt64, 7::UInt64, 8::UInt64, 9::Core.LLVMPtr{UInt64, 0}, 10::Core.LLVMPtr{UInt64, 0}}, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 13::@NamedTuple{1, 2, 3, 4, 5, 6, 7::Float32}, 14::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 15::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 16::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12::Float32, 13::Float32}, 17::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 18::UInt8, 19::Bool, 20::UInt8}, 13, 14, 15, 16, 17, 18, 19::UInt8, 20, 21::UInt8, 22, 23, 24, 25::Bool, 26::UInt64, 27::UInt64, 28::UInt64, 29::Bool, 30::UInt8, 31::UInt8}, 2::@NamedTuple{1, 2, 3, 4, 5, 6, 7, 8::UInt8, 9, 10, 11}, 3, 4}})(::Const{typeof(Core.kwcall)}, ::Const{@NamedTuple{turbo::Val{false}, bumper::Val{false}}}, ::Vararg{Any})`| |176|`@Base/compiler/ssair/inlining.jl:1457`|`handle_match!`| |176|`@Base/compiler/ssair/inlining.jl:1329`|`handle_any_const_result!(cases::Vector{Core.Compiler.InliningCase}, result::Any, match::Core.MethodMatch, argtypes::Vector{Any}, info::Core.Compiler.CallInfo, flag::UInt8, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter}; allow_abstract::Bool, allow_typevars::Bool)`| |173|`@Base/compiler/ssair/inlining.jl:950`|`analyze_method!`| |172|`@Base/compiler/ssair/inlining.jl:1467`|`handle_match!(cases::Vector{Core.Compiler.InliningCase}, match::Core.MethodMatch, argtypes::Vector{Any}, info::Core.Compiler.CallInfo, flag::UInt8, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter}; allow_abstract::Bool, allow_typevars::Bool)`| |171|`/usr/lib/system/libsystem_platform.dylib:?`|`_platform_memset_pattern16`| |171|`@Base/compiler/ssair/inlining.jl:981`|`analyze_method!(match::Core.MethodMatch, argtypes::Vector{Any}, info::Core.Compiler.CallInfo, flag::UInt8, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter}; allow_typevars::Bool, invokesig::Nothing)`| |171|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/typemap.c:812`|`jl_typemap_intersection_visitor`| |170|`@Enzyme/src/compiler.jl:3699`|`enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{4, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})`| |164|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm29MustBeExecutedContextExplorer32getMustBeExecutedNextInstructionERNS_22MustBeExecutedIteratorEPKNS_11InstructionE`| |164|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_Z18isInactiveCallInstRN4llvm8CallBaseERNS_17TargetLibraryInfoE`| |164|`/usr/lib/system/libsystem_malloc.dylib:?`|`free`| |161|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN13GradientUtils15CreateFromCloneER11EnzymeLogicjPN4llvm8FunctionERNS2_17TargetLibraryInfoER12TypeAnalysisR10FnTypeInfo10DIFFE_TYPENS2_8ArrayRefISB_EEbbRNSt3__13mapI15AugmentedStructiNSE_4lessISG_EENSE_9allocatorINSE_4pairIKSG_iEEEEEEb`| |160|`@Enzyme/src/compiler.jl:3710`|`enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{6, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})`| |155|`@Base/compiler/ssair/ir.jl:1670`|`iterate_compact(compact::Core.Compiler.IncrementalCompact)`| |154|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm19SmallPtrSetImplBase14insert_imp_bigEPKv`| |151|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm16InstCombinerImpl12visitPHINodeERNS_7PHINodeE`| |143|`@GPUCompiler/src/driver.jl:115`|`codegen`| |143|`@Enzyme/src/compiler.jl:5161`|`codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)`| |142|`@TimerOutputs/src/TimerOutput.jl:253`|`macro expansion`| |142|`@GPUCompiler/src/utils.jl:103`|`emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)`| |142|`@GPUCompiler/src/utils.jl:97`|`emit_llvm`| |142|`@GPUCompiler/src/driver.jl:136`|`codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)`| |141|`@GPUCompiler/src/irgen.jl:4`|`irgen(job::GPUCompiler.CompilerJob)`| |141|`@GPUCompiler/src/driver.jl:208`|`macro expansion`| |141|`@GPUCompiler/src/driver.jl:207`|`macro expansion`| |139|`@Enzyme/src/compiler.jl:5457`|`codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)`| |137|`/usr/lib/system/libsystem_malloc.dylib:?`|`_szone_free`| |136|`@GPUCompiler/src/jlgen.jl:523`|`ci_cache_populate(interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter, cache::Core.Compiler.WorldView{GPUCompiler.CodeCache}, mi::Core.MethodInstance, min_world::UInt64, max_world::UInt64)`| |136|`@GPUCompiler/src/jlgen.jl:599`|`compile_method_instance(job::GPUCompiler.CompilerJob)`| |136|`@Base/compiler/ssair/inlining.jl:710`|`batch_inline!(ir::Core.Compiler.IRCode, todo::Vector{Pair{Int64, Any}}, propagate_inbounds::Bool, params::Core.Compiler.OptimizationParams)`| |135|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:3389`|`_jl_gc_collect`| |134|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:1508`|`gc_sweep_pool_page`| |134|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:1594`|`gc_sweep_pool`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:595`|`equation_search(datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}; niterations::Int64, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, parallelism::Symbol, numprocs::Nothing, procs::Nothing, addprocs_function::Nothing, heap_size_hint_in_bytes::Nothing, runtests::Bool, saved_state::Nothing, return_state::Nothing, verbosity::Nothing, progress::Nothing, v_dim_out::Val{1})`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:473`|`equation_search`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:436`|`#equation_search#26`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:382`|`equation_search`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:466`|`#equation_search#28`| |132|`@SymbolicRegression/src/SymbolicRegression.jl:463`|`kwcall(::@NamedTuple{niterations::Int64, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, Node, Expression, @NamedTuple{}, false, false, nothing, StatsBase.Weights{Float64, Float64, Vector{Float64}}, ADTypes.AutoEnzyme{Nothing}}, parallelism::Symbol}, ::typeof(equation_search), X::Matrix{Float32}, y::Vector{Float32})`| |131|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm17LiveIntervalUnion5Query23collectInterferingVRegsEj`| |131|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:1682`|`forall_exists_subtype`| |130|`@Enzyme/src/rules/activityrules.jl:61`|`julia_activity_rule(f::LLVM.Function)`| |127|`/usr/lib/system/libsystem_malloc.dylib:?`|`rack_get_thread_index`| |127|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:2132`|`ijl_subtype_env`| |126|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:908`|`subtype_unionall`| |126|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:1637`|`exists_subtype`| |126|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:1668`|`_forall_exists_subtype`| |125|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm12SelectionDAG7CombineENS_12CombineLevelEPNS_9AAResultsENS_10CodeGenOpt5LevelE`| |122|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:4033`|`intersect_all`| |120|`/usr/lib/system/libsystem_malloc.dylib:?`|`_malloc_zone_malloc`| |119|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:2904`|`gc_mark_loop`| |119|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:3234`|`_jl_gc_collect`| |119|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:1348`|`ijl_gc_pool_alloc`| |118|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:2885`|`gc_mark_loop_parallel`| |117|`@Base/boot.jl:486`|`Array`| |117|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:915`|`subtype_unionall`| |116|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN4llvm13StringMapImpl11RehashTableEj`| |115|`/usr/lib/system/libsystem_malloc.dylib:?`|`tiny_free_list_remove_ptr`| |114|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN12_GLOBAL__N_119followUsesInContextINS_11AAAlignImplEN4llvm15IncIntegerStateIyLy4294967296ELy1EEEEEvRT_RNS2_10AttributorERNS2_29MustBeExecutedContextExplorerEPKNS2_11InstructionERNS2_9SetVectorIPKNS2_3UseENSt3__16vectorISH_NSI_9allocatorISH_EEEENS2_8DenseSetISH_NS2_12DenseMapInfoISH_vEEEEEERT0_`| |114|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:?`|`gc_mark_outrefs`| |114|`/usr/lib/system/libsystem_kernel.dylib:?`|`__ulock_wait`| |113|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/ircode.c:902`|`ijl_uncompress_ir`| |111|`@Base/compiler/ssair/inlining.jl:908`|`resolve_todo(mi::Core.MethodInstance, result::Core.MethodMatch, argtypes::Vector{Any}, info::Core.Compiler.CallInfo, flag::UInt8, state::Core.Compiler.InliningState{Core.Compiler.NativeInterpreter}; invokesig::Nothing)`| |110|`/usr/lib/system/libsystem_malloc.dylib:?`|`set_tiny_meta_header_in_use`| |110|`/Users/mcranmer/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/lib/julia/libLLVM.dylib:?`|`_ZN12_GLOBAL__N_111DAGCombiner13AddToWorklistEPN4llvm6SDNodeE`| |107|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:1452`|`subtype`| |107|`@Base/compiler/typeinfer.jl:283`|`_typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)`| |107|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:2168`|`ijl_subtype`| |106|`@SymbolicRegression/src/SingleIteration.jl:114`|`macro expansion`| |104|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:4268`|`jl_type_intersection_env_s`| |104|`@Base/compiler/typeinfer.jl:405`|`cache_result!(interp::Core.Compiler.NativeInterpreter, result::Core.Compiler.InferenceResult)`| |103|`/usr/lib/system/libsystem_malloc.dylib:?`|`szone_malloc_should_clear`| |103|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/jitlayers.cpp:520`|`jl_generate_fptr_impl`| |98|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/ircode.c:501`|`jl_decode_value_array`| |97|`@Base/compiler/ssair/ir.jl:673`|`IncrementalCompact`| |95|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZNSt3__13setIjNS_4lessIjEENS_9allocatorIjEEEC2ESt16initializer_listIjERKS2_`| |95|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/subtype.c:2092`|`ijl_obvious_subtype`| |93|`/Users/mcranmer/.julia/compiled/v1.10/Enzyme/G1p5n_DsnnP.dylib:?`|`jlcapi_julia_allocator_9767`| |93|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_Z16CreateAllocationRN4llvm9IRBuilderINS_14ConstantFolderENS_24IRBuilderDefaultInserterEEEPNS_4TypeEPNS_5ValueERKNS_5TwineEPPNS_8CallInstEPPNS_11InstructionEb`| |93|`@EnzymeCore/src/rules.jl:124`|`has_rrule_from_sig`| |93|`@Enzyme/src/compiler/interpreter.jl:237`|`inlining_policy(interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter, src::Any, info::Core.Compiler.CallInfo, stmt_flag::UInt8, mi::Core.MethodInstance, argtypes::Vector{Any})`| |92|`@Base/compiler/ssair/ir.jl:1857`|`compact!(code::Core.Compiler.IRCode, allow_cfg_transforms::Bool)`| |89|`@Base/compiler/ssair/inlining.jl:903`|`resolve_todo(mi::Core.MethodInstance, result::Core.MethodMatch, argtypes::Vector{Any}, info::Core.Compiler.CallInfo, flag::UInt8, state::Core.Compiler.InliningState{Enzyme.Compiler.Interpreter.EnzymeInterpreter}; invokesig::Nothing)`| |89|`@Base/compiler/typeinfer.jl:379`|`transform_result_for_cache`| |88|`@Enzyme/src/compiler.jl:2755`|`julia_allocator(B::Ptr{LLVM.API.LLVMOpaqueBuilder}, LLVMType::Ptr{LLVM.API.LLVMOpaqueType}, Count::Ptr{LLVM.API.LLVMOpaqueValue}, AlignedSize::Ptr{LLVM.API.LLVMOpaqueValue}, IsDefault::UInt8, ZI::Ptr{Ptr{LLVM.API.LLVMOpaqueValue}})`| |88|`/Users/mcranmer/.julia/artifacts/4f5f5b6df77206271fda4b37fa3712579d8183c6/lib/libEnzyme-15.dylib:?`|`_ZN12TypeAnalysis15analyzeFunctionERK10FnTypeInfo`| |88|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/gc.c:2641`|`gc_mark_outrefs`| |88|`@Base/compiler/abstractinterpretation.jl:812`|`abstract_call_method_with_const_args(interp::Core.Compiler.NativeInterpreter, result::Core.Compiler.MethodCallResult, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, match::Core.MethodMatch, sv::Core.Compiler.InferenceState, invokecall::Nothing)`| |88|`@SymbolicRegression/src/InterfaceDynamicExpressions.jl:192`|`string_tree`| |88|`@Base/compiler/typeinfer.jl:360`|`maybe_compress_codeinfo(interp::Core.Compiler.NativeInterpreter, linfo::Core.MethodInstance, ci::Core.CodeInfo)`| |87|`/usr/lib/system/libsystem_pthread.dylib:?`|`pthread_getspecific`| |85|`/Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-XG3Q6T6R70.0/build/default-honeycrisp-XG3Q6T6R70-0/julialang/julia-release-1-dot-10/src/ircode.c:358`|`jl_encode_value_`|
MilesCranmer commented 4 months ago

Also, if I may, can you explain a bit about what sorts of things you are looking for here, and what you can draw from the profile results? I want to learn 🙂 It has taken me a while to collect all of the information in this thread so I'd really like to come away with a deeper understanding of Julia/LLVM/Enzyme internals!

vchuravy commented 4 months ago

Could you export the data with PProf.jl? Generally I would be looking for Enzyme related cycles.

The inference cycle you found is annoying, but there is ongoing work to transition inference more to a worklist approach.

MilesCranmer commented 4 months ago

Here you go: profile.pb.gz

MilesCranmer commented 3 months ago

Friendly ping on this