Open danielwe opened 2 months ago
Can you make a MWE (perhaps copying from quadgk) that reproduces this standalone?
Whew! Here's a standalone reproducer. In this MWE you can't work around the problem with @{no}inline
tags, but either of the following changes gets rid of the issue (see also comments in the code pointing out the exact changes):
foo
such that it no longer gets fully specializedguaranteed_nonactive
trivial, i.e., always returning either false
or true
guaranteed_nonactive
altogether from the recursive_add
callusing Enzyme
foo(f::F, segs::Vararg{Any,N}) where {F,N} = sum(f, segs)
# foo(f, segs...) = sum(f, segs) # enable => problem solved
function EnzymeRules.augmented_primal(
config, ::Const{typeof(foo)}, ::Type{RT}, f::Active, segs::Const...
) where {RT}
primal = if EnzymeRules.needs_primal(config)
foo(f.val, map(s -> s.val, segs)...)
else
nothing
end
return EnzymeRules.AugmentedReturn(primal, nothing, nothing)
end
struct ClosureVector{F}
f::F
end
@inline function guaranteed_nonactive(::Type{T}) where T
# return false # enable => problem solved
# return true # enable => problem solved
rt = Enzyme.Compiler.active_reg_inner(T, (), nothing)
return rt == Enzyme.Compiler.AnyState || rt == Enzyme.Compiler.DupState
end
function Base.:+(a::CV, b::CV) where {CV<:ClosureVector}
# return Enzyme.Compiler.recursive_add(a, b, identity)::CV # enable => problem solved
return Enzyme.Compiler.recursive_add(a, b, identity, guaranteed_nonactive)::CV
end
function EnzymeRules.reverse(
::Any, ::Const{typeof(foo)}, ::Active, ::Any, f::Active, segs::Const...
)
dfcv = foo(_ -> ClosureVector(f.val), segs...)
return (dfcv.f, map(_ -> nothing, segs)...)
end
constantfoo(a) = foo(_ -> a, -1.0, 1.0)
@show constantfoo(1.0)
@show autodiff(Reverse, constantfoo, Active, Active(1.0))
Output:
constantfoo(1.0) = 2.0
ERROR: LoadError: Enzyme compilation failed.
Current scope:
define internal fastcc [1 x [1 x double]] @julia___2120([1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %0, [1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %1) unnamed_addr #6 !dbg !34 {
top:
%2 = call {}*** @julia.get_pgcstack()
%current_task186 = getelementptr inbounds {}**, {}*** %2, i64 -14
%current_task1 = bitcast {}*** %current_task186 to {}**
%ptls_field87 = getelementptr inbounds {}**, {}*** %2, i64 2
%3 = bitcast {}*** %ptls_field87 to i64***
%ptls_load8889 = load i64**, i64*** %3, align 8, !tbaa !11
%4 = getelementptr inbounds i64*, i64** %ptls_load8889, i64 2
%safepoint = load i64*, i64** %4, align 8, !tbaa !15
fence syncscope("singlethread") seq_cst
call void @julia.safepoint(i64* %safepoint), !dbg !35
fence syncscope("singlethread") seq_cst
%5 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227952807648 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227789406832 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226967509088 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127227788473088 to {}*) to {} addrspace(10)*)), !dbg !36
%6 = call {} addrspace(10)* @ijl_get_nth_field_checked({} addrspace(10)* nonnull %5, i64 noundef 0), !dbg !36
%7 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227815142080 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827000400 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226826999904 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827002080 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %6), !dbg !47
%8 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !49
%9 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %8) #11, !dbg !49
%10 = addrspacecast {} addrspace(10)* %9 to {} addrspace(11)*, !dbg !49
%11 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %10) #11, !dbg !49
%exactly_isa.not = icmp eq {}* %11, inttoptr (i64 127227786679488 to {}*), !dbg !49
br i1 %exactly_isa.not, label %post_box_union, label %L13, !dbg !49
L13: ; preds = %top
%magicptr = ptrtoint {}* %11 to i64, !dbg !49
switch i64 %magicptr, label %L26 [
i64 127227858680800, label %L15
i64 127226787157376, label %is45
], !dbg !49
L15: ; preds = %L13
%12 = call fastcc i8 @julia____2123({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %8), !dbg !49, !range !50
br label %pass, !dbg !49
L26: ; preds = %L13, %pass
%13 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !49
%14 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %13) #11, !dbg !49
%15 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !49
%16 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %15) #11, !dbg !49
%exactly_isa22.not = icmp eq {}* %16, inttoptr (i64 127227786679488 to {}*), !dbg !49
br i1 %exactly_isa22.not, label %post_box_union29, label %L31, !dbg !49
L31: ; preds = %L26
%magicptr93 = ptrtoint {}* %16 to i64, !dbg !49
switch i64 %magicptr93, label %L44 [
i64 127227858680800, label %L33
i64 127226787157376, label %is
], !dbg !49
L33: ; preds = %L31
%17 = call fastcc i8 @julia____2123({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %13), !dbg !49, !range !50
br label %pass34, !dbg !49
L44: ; preds = %L31, %pass34
%18 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !51
%19 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227787719360 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %18), !dbg !51
%20 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227815142080 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827000864 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226826999904 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %19), !dbg !52
%21 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903648 to {}*) to {} addrspace(10)*)), !dbg !52
br label %L51, !dbg !51
L51: ; preds = %pass, %pass34, %L44
%value_phi = phi {} addrspace(10)* [ %21, %L44 ], [ addrspacecast ({}* inttoptr (i64 127226765235504 to {}*) to {} addrspace(10)*), %pass34 ], [ addrspacecast ({}* inttoptr (i64 127226765235504 to {}*) to {} addrspace(10)*), %pass ]
%22 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %value_phi) #11, !dbg !53
%23 = addrspacecast {} addrspace(10)* %22 to {} addrspace(11)*, !dbg !53
%24 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %23) #11, !dbg !53
%exactly_isa6.not = icmp eq {}* %24, inttoptr (i64 127226787157376 to {}*), !dbg !53
br i1 %exactly_isa6.not, label %L68, label %L59, !dbg !53
L59: ; preds = %L51
%25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227786671392 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226787157376 to {}*) to {} addrspace(10)*), {} addrspace(10)* %value_phi), !dbg !53
br label %L68, !dbg !53
L68: ; preds = %L51, %L59
%unbox17.in.in = phi {} addrspace(10)* [ %25, %L59 ], [ %value_phi, %L51 ]
%unbox17.in = bitcast {} addrspace(10)* %unbox17.in.in to i32 addrspace(10)*
%unbox17 = load i32, i32 addrspace(10)* %unbox17.in, align 4, !dbg !54, !tbaa !58, !alias.scope !61, !noalias !62
%26 = and i32 %unbox17, -3, !dbg !57
%27 = icmp eq i32 %26, 0, !dbg !57
br i1 %27, label %L81, label %L76, !dbg !44
L76: ; preds = %L68
%memcpy_refined_src = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %0, i64 0, i64 0, i64 0, !dbg !63
%28 = load double, double addrspace(11)* %memcpy_refined_src, align 8, !dbg !63, !tbaa !15, !alias.scope !19, !noalias !22
%memcpy_refined_src11 = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %1, i64 0, i64 0, i64 0, !dbg !63
%29 = load double, double addrspace(11)* %memcpy_refined_src11, align 8, !dbg !63, !tbaa !15, !alias.scope !19, !noalias !22
%box = call noalias nonnull dereferenceable(16) {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127226768084880 to {}*) to {} addrspace(10)*)) #12, !dbg !63
%30 = bitcast {} addrspace(10)* %box to i8 addrspace(10)*, !dbg !63
%newstruct.sroa.0.0..sroa_cast = bitcast {} addrspace(10)* %box to double addrspace(10)*, !dbg !63
store double %28, double addrspace(10)* %newstruct.sroa.0.0..sroa_cast, align 8, !dbg !63, !tbaa !64, !alias.scope !65, !noalias !66
%newstruct.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %30, i64 8, !dbg !63
%newstruct.sroa.2.0..sroa_cast = bitcast i8 addrspace(10)* %newstruct.sroa.2.0..sroa_idx to double addrspace(10)*, !dbg !63
store double %29, double addrspace(10)* %newstruct.sroa.2.0..sroa_cast, align 8, !dbg !63, !tbaa !64, !alias.scope !65, !noalias !66
%31 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127226754453744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227789406832 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %box, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127227788473088 to {}*) to {} addrspace(10)*)), !dbg !63
%32 = addrspacecast {} addrspace(10)* %31 to [1 x [1 x double]] addrspace(11)*, !dbg !67
%33 = bitcast {} addrspace(10)* %31 to [1 x [1 x double]] addrspace(10)*
br label %L81
L81: ; preds = %L68, %L76
%nodecayed..pn = phi [1 x [1 x double]] addrspace(10)*
%nodecayedoff..pn = phi i64
%.pn = phi [1 x [1 x double]] addrspace(11)* [ %32, %L76 ], [ %0, %L68 ]
%.sroa.081.0.in = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %.pn, i64 0, i64 0, i64 0
%.sroa.081.0 = load double, double addrspace(11)* %.sroa.081.0.in, align 8, !tbaa !64, !alias.scope !71, !noalias !72
%unbox10.fca.0.0.insert = insertvalue [1 x [1 x double]] poison, double %.sroa.081.0, 0, 0, !dbg !46
ret [1 x [1 x double]] %unbox10.fca.0.0.insert, !dbg !46
post_box_union: ; preds = %top
call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str1, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227858681216 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 127227786679552 to {}*) to {} addrspace(12)*)) #13, !dbg !49
unreachable, !dbg !49
pass: ; preds = %is45, %L15
%unbox4.ph = phi i8 [ %12, %L15 ], [ %phi.cast94, %is45 ]
%.not90 = icmp eq i8 %unbox4.ph, 0, !dbg !49
br i1 %.not90, label %L26, label %L51, !dbg !49
post_box_union29: ; preds = %L26
call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str1, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227858681216 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 127227786679552 to {}*) to {} addrspace(12)*)) #13, !dbg !49
unreachable, !dbg !49
pass34: ; preds = %is, %L33
%unbox35.ph = phi i8 [ %phi.cast, %is ], [ %17, %L33 ]
%.not92 = icmp eq i8 %unbox35.ph, 0, !dbg !49
br i1 %.not92, label %L44, label %L51, !dbg !49
is: ; preds = %L31
%34 = bitcast {} addrspace(10)* %13 to i32 addrspace(10)*, !dbg !73
%unbox40 = load i32, i32 addrspace(10)* %34, align 4, !dbg !73, !tbaa !58, !alias.scope !61, !noalias !62
%35 = icmp eq i32 %unbox40, 0, !dbg !73
%phi.cast = zext i1 %35 to i8, !dbg !73
br label %pass34, !dbg !73
is45: ; preds = %L13
%36 = bitcast {} addrspace(10)* %8 to i32 addrspace(10)*, !dbg !73
%unbox47 = load i32, i32 addrspace(10)* %36, align 4, !dbg !73, !tbaa !58, !alias.scope !61, !noalias !62
%37 = icmp eq i32 %unbox47, 0, !dbg !73
%phi.cast94 = zext i1 %37 to i8, !dbg !73
br label %pass, !dbg !73
}
Could not analyze garbage collection behavior of
inst: %.pn = phi [1 x [1 x double]] addrspace(11)* [ %32, %L76 ], [ %0, %L68 ]
v0: [1 x [1 x double]] addrspace(11)* %0
v: [1 x [1 x double]] addrspace(11)* %0
offset: i64 0
hasload: false
Stacktrace:
[1] add_sum
@ ./reduce.jl:24
[2] BottomRF
@ ./reduce.jl:86
[3] MappingRF
@ ./reduce.jl:100
[4] afoldl
@ ./operators.jl:545
[5] _foldl_impl
@ ./reduce.jl:68
[6] foldl_impl
@ ./reduce.jl:48
[7] mapfoldl_impl
@ ./reduce.jl:44
[8] mapfoldl
@ ./reduce.jl:175
[9] mapreduce
@ ./reduce.jl:307
[10] sum
@ ./reduce.jl:535
[11] foo
@ ~/issues/quadgkkwargs.jl:3
[12] reverse
@ ~/issues/quadgkkwargs.jl:36
Stacktrace:
[1] (::Enzyme.Compiler.var"#getparent#18860"{LLVM.Function, LLVM.IntegerType, Int64, Dict{LLVM.PHIInst, LLVM.PHIInst}, Dict{LLVM.PHIInst, LLVM.PHIInst}, LLVM.PHIInst, LLVM.Argument, LLVM.IRBuilder})(v::LLVM.Argument, offset::LLVM.ConstantInt, hasload::Bool)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:833
[2] nodecayed_phis!(mod::LLVM.Module)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:836
[3] optimize!
@ ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:2143 [inlined]
[4] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, funcspec::Core.MethodInstance, world::UInt64)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1889
[5] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, f::Function, tt::Type, world::UInt64)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1827
[6] enzyme_custom_common_rev
@ ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:765 [inlined]
[7] enzyme_custom_rev(B::LLVM.IRBuilder, orig::LLVM.CallInst, gutils::Enzyme.Compiler.GradientUtils, tape::Nothing)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:1138
[8] enzyme_custom_rev_cfunc(B::Ptr{LLVM.API.LLVMOpaqueBuilder}, OrigCI::Ptr{LLVM.API.LLVMOpaqueValue}, gutils::Ptr{Nothing}, tape::Ptr{LLVM.API.LLVMOpaqueValue})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/llvmrules.jl:27
[9] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, runtimeActivity::Bool, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
@ Enzyme.API ~/.julia/packages/Enzyme/uXW2v/src/api.jl:163
[10] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:4004
[11] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:6308
[12] codegen
@ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:5465 [inlined]
[13] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110
[14] _thunk
@ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110 [inlined]
[15] cached_compilation
@ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7151 [inlined]
[16] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b05}, ::Type{Const{typeof(constantfoo)}}, ::Type{Active}, tt::Type{Tuple{Active{Float64}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false)}, ::Val{false}, ::Val{false}, ::Type{FFIABI}, ::Val{true}, ::Val{false})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7224
[17] #s2084#19056
@ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7266 [inlined]
[18] var"#s2084#19056"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, RuntimeActivity::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
@ Enzyme.Compiler ./none:0
[19] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[20] autodiff
@ ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:311 [inlined]
[21] autodiff(mode::ReverseMode{false, false, FFIABI, false, false}, f::typeof(constantfoo), ::Type{Active}, args::Active{Float64})
@ Enzyme ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:328
[22] macro expansion
@ show.jl:1181 [inlined]
[23] top-level scope
@ ~/issues/quadgkkwargs.jl:42
in expression starting at /home/daniel/issues/quadgkkwargs.jl:42
Successor to #1845
Enzyme sometimes throws
LoadError: Enzyme compilation failed.
for custom rules, with the ultimate cause seemingly beingCould not analyze garbage collection behavior of ...
. The problem can be worked around by messing around with inlining. I've only been able to reproduce this issue through the custom rules in QuadGK.jl after adding the workaround for #1873 as shown in the reproducer below. The error disappears if I uncomment any of the@inline
s or@noinline
s in the monkey patch. My interpretation is that the key difference isquadgk
not getting inlined into its callers, either because of the explicit@noinline
or because an internal@inline
makes its body large enough to avoid automatic inlining. However, what does not help is adding@noinline
at every call site ofquadgk
, both in the function I'm differentiating and within the custom rules, so I'm not confident in this interpretation.Output: