EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
443 stars 62 forks source link

Custom rules may fail to compile depending on inlining/method specialization #1874

Open danielwe opened 2 weeks ago

danielwe commented 2 weeks ago

Successor to #1845

Enzyme sometimes throws LoadError: Enzyme compilation failed. for custom rules, with the ultimate cause seemingly being Could not analyze garbage collection behavior of .... The problem can be worked around by messing around with inlining. I've only been able to reproduce this issue through the custom rules in QuadGK.jl after adding the workaround for #1873 as shown in the reproducer below. The error disappears if I uncomment any of the @inlines or @noinlines in the monkey patch. My interpretation is that the key difference is quadgk not getting inlined into its callers, either because of the explicit @noinline or because an internal @inline makes its body large enough to avoid automatic inlining. However, what does not help is adding @noinline at every call site of quadgk, both in the function I'm differentiating and within the custom rules, so I'm not confident in this interpretation.

using Enzyme, QuadGK

# Monkey patch QuadGK.quadgk, adding `::F` to work around #1873
# Uncomment any @noinline or @inline to work around the present issue

#=@noinline=# function QuadGK.quadgk(
    f::F,
    segs::T...;
    atol=nothing,
    rtol=nothing,
    maxevals=10^7,
    order=7,
    norm=QuadGK.norm,
    segbuf=nothing,
    eval_segbuf=nothing,
) where {F,T}
    #=@inline=# QuadGK.handle_infinities(f, segs) do f, s, _
        #=@inline=# QuadGK.do_quadgk(
            f, s, order, atol, rtol, maxevals, norm, segbuf, eval_segbuf
        )
    end
end

constantintegral(a) = first(quadgk(_ -> a, -1.0, 1.0))
@show constantintegral(1.0)
@show autodiff(Reverse, constantintegral, Active, Active(1.0))

Output:

constantintegral(1.0) = 2.0
ERROR: LoadError: Enzyme compilation failed.
Current scope:
define internal fastcc [1 x [1 x double]] @julia___19796(double %0, [1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %1) unnamed_addr #0 !dbg !113 {
top:
  %2 = call {}*** @julia.get_pgcstack()
  %current_task189 = getelementptr inbounds {}**, {}*** %2, i64 -14
  %current_task1 = bitcast {}*** %current_task189 to {}**
  %ptls_field90 = getelementptr inbounds {}**, {}*** %2, i64 2
  %3 = bitcast {}*** %ptls_field90 to i64***
  %ptls_load9192 = load i64**, i64*** %3, align 8, !tbaa !117
  %4 = getelementptr inbounds i64*, i64** %ptls_load9192, i64 2
  %safepoint = load i64*, i64** %4, align 8, !tbaa !121
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint), !dbg !123
  fence syncscope("singlethread") seq_cst
  %5 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129562155693040 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563567944304 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562718579656 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563567010560 to {}*) to {} addrspace(10)*)), !dbg !124
  %6 = call {} addrspace(10)* @ijl_get_nth_field_checked({} addrspace(10)* nonnull %5, i64 noundef 0), !dbg !124
  %7 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563593679552 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562274187856 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562274187360 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562274189536 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %6), !dbg !135
  %8 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563793441408 to {}*) to {} addrspace(10)*)), !dbg !137
  %9 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %8) #114, !dbg !137
  %10 = addrspacecast {} addrspace(10)* %9 to {} addrspace(11)*, !dbg !137
  %11 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %10) #114, !dbg !137
  %exactly_isa.not = icmp eq {}* %11, inttoptr (i64 129563565216960 to {}*), !dbg !137
  br i1 %exactly_isa.not, label %post_box_union, label %L13, !dbg !137

L13:                                              ; preds = %top
  %magicptr = ptrtoint {}* %11 to i64, !dbg !137
  switch i64 %magicptr, label %L26 [
    i64 129563637218272, label %L15
    i64 129562234344832, label %is48
  ], !dbg !137

L15:                                              ; preds = %L13
  %12 = call fastcc i8 @julia____19770({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %8), !dbg !137, !range !138
  br label %pass, !dbg !137

L26:                                              ; preds = %L13, %pass
  %13 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563793441408 to {}*) to {} addrspace(10)*)), !dbg !137
  %14 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %13) #114, !dbg !137
  %15 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !137
  %16 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %15) #114, !dbg !137
  %exactly_isa25.not = icmp eq {}* %16, inttoptr (i64 129563565216960 to {}*), !dbg !137
  br i1 %exactly_isa25.not, label %post_box_union32, label %L31, !dbg !137

L31:                                              ; preds = %L26
  %magicptr96 = ptrtoint {}* %16 to i64, !dbg !137
  switch i64 %magicptr96, label %L44 [
    i64 129563637218272, label %L33
    i64 129562234344832, label %is
  ], !dbg !137

L33:                                              ; preds = %L31
  %17 = call fastcc i8 @julia____19770({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %13), !dbg !137, !range !138
  br label %pass37, !dbg !137

L44:                                              ; preds = %L31, %pass37
  %18 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563793441408 to {}*) to {} addrspace(10)*)), !dbg !139
  %19 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563566256832 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %18), !dbg !139
  %20 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563593679552 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562274188320 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562274187360 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %19), !dbg !140
  %21 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563793441120 to {}*) to {} addrspace(10)*)), !dbg !140
  br label %L51, !dbg !139

L51:                                              ; preds = %pass, %pass37, %L44
  %value_phi = phi {} addrspace(10)* [ %21, %L44 ], [ addrspacecast ({}* inttoptr (i64 129562472389296 to {}*) to {} addrspace(10)*), %pass37 ], [ addrspacecast ({}* inttoptr (i64 129562472389296 to {}*) to {} addrspace(10)*), %pass ]
  %22 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %value_phi) #114, !dbg !141
  %23 = addrspacecast {} addrspace(10)* %22 to {} addrspace(11)*, !dbg !141
  %24 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %23) #114, !dbg !141
  %exactly_isa6.not = icmp eq {}* %24, inttoptr (i64 129562234344832 to {}*), !dbg !141
  br i1 %exactly_isa6.not, label %L68, label %L59, !dbg !141

L59:                                              ; preds = %L51
  %25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563565208864 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129562234344832 to {}*) to {} addrspace(10)*), {} addrspace(10)* %value_phi), !dbg !141
  br label %L68, !dbg !141

L68:                                              ; preds = %L51, %L59
  %unbox20.in.in = phi {} addrspace(10)* [ %25, %L59 ], [ %value_phi, %L51 ]
  %unbox20.in = bitcast {} addrspace(10)* %unbox20.in.in to i32 addrspace(10)*
  %unbox20 = load i32, i32 addrspace(10)* %unbox20.in, align 4, !dbg !142, !tbaa !146, !alias.scope !149, !noalias !152
  %26 = and i32 %unbox20, -3, !dbg !145
  %27 = icmp eq i32 %26, 0, !dbg !145
  br i1 %27, label %L81, label %L76, !dbg !132

L76:                                              ; preds = %L68
  %memcpy_refined_src = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %1, i64 0, i64 0, i64 0, !dbg !157
  %28 = load double, double addrspace(11)* %memcpy_refined_src, align 8, !dbg !157, !tbaa !121, !alias.scope !158, !noalias !159
  %box = call noalias nonnull dereferenceable(24) {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129562165718800 to {}*) to {} addrspace(10)*)) #115, !dbg !157
  %29 = bitcast {} addrspace(10)* %box to i8 addrspace(10)*, !dbg !157
  %newstruct11.sroa.0.0..sroa_cast = bitcast {} addrspace(10)* %box to double addrspace(10)*, !dbg !157
  store double %28, double addrspace(10)* %newstruct11.sroa.0.0..sroa_cast, align 8, !dbg !157, !tbaa !160, !alias.scope !161, !noalias !162
  %newstruct11.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %29, i64 8, !dbg !157
  %newstruct11.sroa.2.0..sroa_cast = bitcast i8 addrspace(10)* %newstruct11.sroa.2.0..sroa_idx to double addrspace(10)*, !dbg !157
  store double %28, double addrspace(10)* %newstruct11.sroa.2.0..sroa_cast, align 8, !dbg !157, !tbaa !160, !alias.scope !161, !noalias !162
  %newstruct11.sroa.3.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %29, i64 16, !dbg !157
  %newstruct11.sroa.3.0..sroa_cast = bitcast i8 addrspace(10)* %newstruct11.sroa.3.0..sroa_idx to double addrspace(10)*, !dbg !157
  store double %0, double addrspace(10)* %newstruct11.sroa.3.0..sroa_cast, align 8, !dbg !157, !tbaa !160, !alias.scope !161, !noalias !162
  %30 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129562162374112 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563567944304 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %box, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 129563567010560 to {}*) to {} addrspace(10)*)), !dbg !157
  %31 = addrspacecast {} addrspace(10)* %30 to [1 x [1 x double]] addrspace(11)*, !dbg !165
  %32 = bitcast {} addrspace(10)* %30 to [1 x [1 x double]] addrspace(10)*
  br label %L81

L81:                                              ; preds = %L68, %L76
  %nodecayed..pn = phi [1 x [1 x double]] addrspace(10)*
  %nodecayedoff..pn = phi i64
  %.pn = phi [1 x [1 x double]] addrspace(11)* [ %31, %L76 ], [ %1, %L68 ]
  %.sroa.084.0.in = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %.pn, i64 0, i64 0, i64 0
  %.sroa.084.0 = load double, double addrspace(11)* %.sroa.084.0.in, align 8, !tbaa !160, !alias.scope !169, !noalias !170
  %unbox10.fca.0.0.insert = insertvalue [1 x [1 x double]] poison, double %.sroa.084.0, 0, 0, !dbg !134
  ret [1 x [1 x double]] %unbox10.fca.0.0.insert, !dbg !134

post_box_union:                                   ; preds = %top
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str4, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563637218688 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 129563565217024 to {}*) to {} addrspace(12)*)) #116, !dbg !137
  unreachable, !dbg !137

pass:                                             ; preds = %is48, %L15
  %unbox4.ph = phi i8 [ %12, %L15 ], [ %phi.cast97, %is48 ]
  %.not93 = icmp eq i8 %unbox4.ph, 0, !dbg !137
  br i1 %.not93, label %L26, label %L51, !dbg !137

post_box_union32:                                 ; preds = %L26
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str4, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 129563637218688 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 129563565217024 to {}*) to {} addrspace(12)*)) #116, !dbg !137
  unreachable, !dbg !137

pass37:                                           ; preds = %is, %L33
  %unbox38.ph = phi i8 [ %phi.cast, %is ], [ %17, %L33 ]
  %.not95 = icmp eq i8 %unbox38.ph, 0, !dbg !137
  br i1 %.not95, label %L44, label %L51, !dbg !137

is:                                               ; preds = %L31
  %33 = bitcast {} addrspace(10)* %13 to i32 addrspace(10)*, !dbg !171
  %unbox43 = load i32, i32 addrspace(10)* %33, align 4, !dbg !171, !tbaa !146, !alias.scope !149, !noalias !152
  %34 = icmp eq i32 %unbox43, 0, !dbg !171
  %phi.cast = zext i1 %34 to i8, !dbg !171
  br label %pass37, !dbg !171

is48:                                             ; preds = %L13
  %35 = bitcast {} addrspace(10)* %8 to i32 addrspace(10)*, !dbg !171
  %unbox50 = load i32, i32 addrspace(10)* %35, align 4, !dbg !171, !tbaa !146, !alias.scope !149, !noalias !152
  %36 = icmp eq i32 %unbox50, 0, !dbg !171
  %phi.cast97 = zext i1 %36 to i8, !dbg !171
  br label %pass, !dbg !171
}

Could not analyze garbage collection behavior of
 inst:   %.pn = phi [1 x [1 x double]] addrspace(11)* [ %31, %L76 ], [ %1, %L68 ]
 v0: [1 x [1 x double]] addrspace(11)* %1
 v: [1 x [1 x double]] addrspace(11)* %1
 offset: i64 0
 hasload: false

Stacktrace:
 [1] multiple call sites
   @ unknown:0

Stacktrace:
  [1] (::Enzyme.Compiler.var"#getparent#18860"{…})(v::LLVM.Argument, offset::LLVM.ConstantInt, hasload::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:833
  [2] nodecayed_phis!(mod::LLVM.Module)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:836
  [3] optimize!
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:2143 [inlined]
  [4] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, funcspec::Core.MethodInstance, world::UInt64)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1889
  [5] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, f::Function, tt::Type, world::UInt64)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1827
  [6] enzyme_custom_common_rev
    @ ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:765 [inlined]
  [7]
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:1138
  [8] enzyme_custom_rev_cfunc(B::Ptr{…}, OrigCI::Ptr{…}, gutils::Ptr{…}, tape::Ptr{…})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/llvmrules.jl:27
  [9] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{…}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, runtimeActivity::Bool, width::Int64, additionalArg::Ptr{…}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{…}, augmented::Ptr{…}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/uXW2v/src/api.jl:163
 [10] enzyme!(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{…}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:4004
 [11] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:6308
 [12] codegen
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:5465 [inlined]
 [13] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110
 [14] _thunk
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110 [inlined]
 [15] cached_compilation
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7151 [inlined]
 [16] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{…}, ::Type{…}, ::Type{…}, tt::Type{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Val{…}, ::Type{…}, ::Val{…}, ::Val{…})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7224
 [17] #s2084#19056
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7266 [inlined]
 [18]
    @ Enzyme.Compiler ./none:0
 [19] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [20] autodiff
    @ ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:311 [inlined]
 [21] autodiff(mode::ReverseMode{…}, f::typeof(constantintegral), ::Type{…}, args::Active{…})
    @ Enzyme ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:328
 [22] macro expansion
    @ show.jl:1181 [inlined]
 [23] top-level scope
    @ ~/issues/quadgkkwargs.jl:26
 [24] include(fname::String)
    @ Base.MainInclude ./client.jl:489
 [25] top-level scope
    @ REPL[7]:1
in expression starting at /home/daniel/issues/quadgkkwargs.jl:26
Some type information was truncated. Use `show(err)` to see complete types.
wsmoses commented 2 weeks ago

Can you make a MWE (perhaps copying from quadgk) that reproduces this standalone?

danielwe commented 2 weeks ago

Whew! Here's a standalone reproducer. In this MWE you can't work around the problem with @{no}inline tags, but either of the following changes gets rid of the issue (see also comments in the code pointing out the exact changes):

using Enzyme

foo(f::F, segs::Vararg{Any,N}) where {F,N} = sum(f, segs)
# foo(f, segs...) = sum(f, segs)  # enable => problem solved

function EnzymeRules.augmented_primal(
    config, ::Const{typeof(foo)}, ::Type{RT}, f::Active, segs::Const...
) where {RT}
    primal = if EnzymeRules.needs_primal(config)
        foo(f.val, map(s -> s.val, segs)...)
    else
        nothing
    end
    return EnzymeRules.AugmentedReturn(primal, nothing, nothing)
end

struct ClosureVector{F}
    f::F
end

@inline function guaranteed_nonactive(::Type{T}) where T
    # return false  # enable => problem solved
    # return true   # enable => problem solved
    rt = Enzyme.Compiler.active_reg_inner(T, (), nothing)
    return rt == Enzyme.Compiler.AnyState || rt == Enzyme.Compiler.DupState
end

function Base.:+(a::CV, b::CV) where {CV<:ClosureVector}
    # return Enzyme.Compiler.recursive_add(a, b, identity)::CV  # enable => problem solved
    return Enzyme.Compiler.recursive_add(a, b, identity, guaranteed_nonactive)::CV
end

function EnzymeRules.reverse(
    ::Any, ::Const{typeof(foo)}, ::Active, ::Any, f::Active, segs::Const...
)
    dfcv = foo(_ -> ClosureVector(f.val), segs...)
    return (dfcv.f, map(_ -> nothing, segs)...)
end

constantfoo(a) = foo(_ -> a, -1.0, 1.0)
@show constantfoo(1.0)
@show autodiff(Reverse, constantfoo, Active, Active(1.0))

Output:

constantfoo(1.0) = 2.0
ERROR: LoadError: Enzyme compilation failed.
Current scope:
define internal fastcc [1 x [1 x double]] @julia___2120([1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %0, [1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %1) unnamed_addr #6 !dbg !34 {
top:
  %2 = call {}*** @julia.get_pgcstack()
  %current_task186 = getelementptr inbounds {}**, {}*** %2, i64 -14
  %current_task1 = bitcast {}*** %current_task186 to {}**
  %ptls_field87 = getelementptr inbounds {}**, {}*** %2, i64 2
  %3 = bitcast {}*** %ptls_field87 to i64***
  %ptls_load8889 = load i64**, i64*** %3, align 8, !tbaa !11
  %4 = getelementptr inbounds i64*, i64** %ptls_load8889, i64 2
  %safepoint = load i64*, i64** %4, align 8, !tbaa !15
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint), !dbg !35
  fence syncscope("singlethread") seq_cst
  %5 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227952807648 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227789406832 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226967509088 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127227788473088 to {}*) to {} addrspace(10)*)), !dbg !36
  %6 = call {} addrspace(10)* @ijl_get_nth_field_checked({} addrspace(10)* nonnull %5, i64 noundef 0), !dbg !36
  %7 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227815142080 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827000400 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226826999904 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827002080 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %6), !dbg !47
  %8 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !49
  %9 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %8) #11, !dbg !49
  %10 = addrspacecast {} addrspace(10)* %9 to {} addrspace(11)*, !dbg !49
  %11 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %10) #11, !dbg !49
  %exactly_isa.not = icmp eq {}* %11, inttoptr (i64 127227786679488 to {}*), !dbg !49
  br i1 %exactly_isa.not, label %post_box_union, label %L13, !dbg !49

L13:                                              ; preds = %top
  %magicptr = ptrtoint {}* %11 to i64, !dbg !49
  switch i64 %magicptr, label %L26 [
    i64 127227858680800, label %L15
    i64 127226787157376, label %is45
  ], !dbg !49

L15:                                              ; preds = %L13
  %12 = call fastcc i8 @julia____2123({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %8), !dbg !49, !range !50
  br label %pass, !dbg !49

L26:                                              ; preds = %L13, %pass
  %13 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !49
  %14 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %13) #11, !dbg !49
  %15 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !49
  %16 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %15) #11, !dbg !49
  %exactly_isa22.not = icmp eq {}* %16, inttoptr (i64 127227786679488 to {}*), !dbg !49
  br i1 %exactly_isa22.not, label %post_box_union29, label %L31, !dbg !49

L31:                                              ; preds = %L26
  %magicptr93 = ptrtoint {}* %16 to i64, !dbg !49
  switch i64 %magicptr93, label %L44 [
    i64 127227858680800, label %L33
    i64 127226787157376, label %is
  ], !dbg !49

L33:                                              ; preds = %L31
  %17 = call fastcc i8 @julia____2123({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %13), !dbg !49, !range !50
  br label %pass34, !dbg !49

L44:                                              ; preds = %L31, %pass34
  %18 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903936 to {}*) to {} addrspace(10)*)), !dbg !51
  %19 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227787719360 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %18), !dbg !51
  %20 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227815142080 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226827000864 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226826999904 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %19), !dbg !52
  %21 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f__svec_ref, {} addrspace(10)* noundef null, {} addrspace(10)* nonnull %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127228014903648 to {}*) to {} addrspace(10)*)), !dbg !52
  br label %L51, !dbg !51

L51:                                              ; preds = %pass, %pass34, %L44
  %value_phi = phi {} addrspace(10)* [ %21, %L44 ], [ addrspacecast ({}* inttoptr (i64 127226765235504 to {}*) to {} addrspace(10)*), %pass34 ], [ addrspacecast ({}* inttoptr (i64 127226765235504 to {}*) to {} addrspace(10)*), %pass ]
  %22 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %value_phi) #11, !dbg !53
  %23 = addrspacecast {} addrspace(10)* %22 to {} addrspace(11)*, !dbg !53
  %24 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %23) #11, !dbg !53
  %exactly_isa6.not = icmp eq {}* %24, inttoptr (i64 127226787157376 to {}*), !dbg !53
  br i1 %exactly_isa6.not, label %L68, label %L59, !dbg !53

L59:                                              ; preds = %L51
  %25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227786671392 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127226787157376 to {}*) to {} addrspace(10)*), {} addrspace(10)* %value_phi), !dbg !53
  br label %L68, !dbg !53

L68:                                              ; preds = %L51, %L59
  %unbox17.in.in = phi {} addrspace(10)* [ %25, %L59 ], [ %value_phi, %L51 ]
  %unbox17.in = bitcast {} addrspace(10)* %unbox17.in.in to i32 addrspace(10)*
  %unbox17 = load i32, i32 addrspace(10)* %unbox17.in, align 4, !dbg !54, !tbaa !58, !alias.scope !61, !noalias !62
  %26 = and i32 %unbox17, -3, !dbg !57
  %27 = icmp eq i32 %26, 0, !dbg !57
  br i1 %27, label %L81, label %L76, !dbg !44

L76:                                              ; preds = %L68
  %memcpy_refined_src = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %0, i64 0, i64 0, i64 0, !dbg !63
  %28 = load double, double addrspace(11)* %memcpy_refined_src, align 8, !dbg !63, !tbaa !15, !alias.scope !19, !noalias !22
  %memcpy_refined_src11 = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %1, i64 0, i64 0, i64 0, !dbg !63
  %29 = load double, double addrspace(11)* %memcpy_refined_src11, align 8, !dbg !63, !tbaa !15, !alias.scope !19, !noalias !22
  %box = call noalias nonnull dereferenceable(16) {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127226768084880 to {}*) to {} addrspace(10)*)) #12, !dbg !63
  %30 = bitcast {} addrspace(10)* %box to i8 addrspace(10)*, !dbg !63
  %newstruct.sroa.0.0..sroa_cast = bitcast {} addrspace(10)* %box to double addrspace(10)*, !dbg !63
  store double %28, double addrspace(10)* %newstruct.sroa.0.0..sroa_cast, align 8, !dbg !63, !tbaa !64, !alias.scope !65, !noalias !66
  %newstruct.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %30, i64 8, !dbg !63
  %newstruct.sroa.2.0..sroa_cast = bitcast i8 addrspace(10)* %newstruct.sroa.2.0..sroa_idx to double addrspace(10)*, !dbg !63
  store double %29, double addrspace(10)* %newstruct.sroa.2.0..sroa_cast, align 8, !dbg !63, !tbaa !64, !alias.scope !65, !noalias !66
  %31 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127226754453744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227789406832 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %box, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 127227788473088 to {}*) to {} addrspace(10)*)), !dbg !63
  %32 = addrspacecast {} addrspace(10)* %31 to [1 x [1 x double]] addrspace(11)*, !dbg !67
  %33 = bitcast {} addrspace(10)* %31 to [1 x [1 x double]] addrspace(10)*
  br label %L81

L81:                                              ; preds = %L68, %L76
  %nodecayed..pn = phi [1 x [1 x double]] addrspace(10)*
  %nodecayedoff..pn = phi i64
  %.pn = phi [1 x [1 x double]] addrspace(11)* [ %32, %L76 ], [ %0, %L68 ]
  %.sroa.081.0.in = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]] addrspace(11)* %.pn, i64 0, i64 0, i64 0
  %.sroa.081.0 = load double, double addrspace(11)* %.sroa.081.0.in, align 8, !tbaa !64, !alias.scope !71, !noalias !72
  %unbox10.fca.0.0.insert = insertvalue [1 x [1 x double]] poison, double %.sroa.081.0, 0, 0, !dbg !46
  ret [1 x [1 x double]] %unbox10.fca.0.0.insert, !dbg !46

post_box_union:                                   ; preds = %top
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str1, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227858681216 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 127227786679552 to {}*) to {} addrspace(12)*)) #13, !dbg !49
  unreachable, !dbg !49

pass:                                             ; preds = %is45, %L15
  %unbox4.ph = phi i8 [ %12, %L15 ], [ %phi.cast94, %is45 ]
  %.not90 = icmp eq i8 %unbox4.ph, 0, !dbg !49
  br i1 %.not90, label %L26, label %L51, !dbg !49

post_box_union29:                                 ; preds = %L26
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str1, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127227858681216 to {}*) to {} addrspace(10)*), {} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 127227786679552 to {}*) to {} addrspace(12)*)) #13, !dbg !49
  unreachable, !dbg !49

pass34:                                           ; preds = %is, %L33
  %unbox35.ph = phi i8 [ %phi.cast, %is ], [ %17, %L33 ]
  %.not92 = icmp eq i8 %unbox35.ph, 0, !dbg !49
  br i1 %.not92, label %L44, label %L51, !dbg !49

is:                                               ; preds = %L31
  %34 = bitcast {} addrspace(10)* %13 to i32 addrspace(10)*, !dbg !73
  %unbox40 = load i32, i32 addrspace(10)* %34, align 4, !dbg !73, !tbaa !58, !alias.scope !61, !noalias !62
  %35 = icmp eq i32 %unbox40, 0, !dbg !73
  %phi.cast = zext i1 %35 to i8, !dbg !73
  br label %pass34, !dbg !73

is45:                                             ; preds = %L13
  %36 = bitcast {} addrspace(10)* %8 to i32 addrspace(10)*, !dbg !73
  %unbox47 = load i32, i32 addrspace(10)* %36, align 4, !dbg !73, !tbaa !58, !alias.scope !61, !noalias !62
  %37 = icmp eq i32 %unbox47, 0, !dbg !73
  %phi.cast94 = zext i1 %37 to i8, !dbg !73
  br label %pass, !dbg !73
}

Could not analyze garbage collection behavior of
 inst:   %.pn = phi [1 x [1 x double]] addrspace(11)* [ %32, %L76 ], [ %0, %L68 ]
 v0: [1 x [1 x double]] addrspace(11)* %0
 v: [1 x [1 x double]] addrspace(11)* %0
 offset: i64 0
 hasload: false

Stacktrace:
  [1] add_sum
    @ ./reduce.jl:24
  [2] BottomRF
    @ ./reduce.jl:86
  [3] MappingRF
    @ ./reduce.jl:100
  [4] afoldl
    @ ./operators.jl:545
  [5] _foldl_impl
    @ ./reduce.jl:68
  [6] foldl_impl
    @ ./reduce.jl:48
  [7] mapfoldl_impl
    @ ./reduce.jl:44
  [8] mapfoldl
    @ ./reduce.jl:175
  [9] mapreduce
    @ ./reduce.jl:307
 [10] sum
    @ ./reduce.jl:535
 [11] foo
    @ ~/issues/quadgkkwargs.jl:3
 [12] reverse
    @ ~/issues/quadgkkwargs.jl:36

Stacktrace:
  [1] (::Enzyme.Compiler.var"#getparent#18860"{LLVM.Function, LLVM.IntegerType, Int64, Dict{LLVM.PHIInst, LLVM.PHIInst}, Dict{LLVM.PHIInst, LLVM.PHIInst}, LLVM.PHIInst, LLVM.Argument, LLVM.IRBuilder})(v::LLVM.Argument, offset::LLVM.ConstantInt, hasload::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:833
  [2] nodecayed_phis!(mod::LLVM.Module)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:836
  [3] optimize!
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler/optimize.jl:2143 [inlined]
  [4] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, funcspec::Core.MethodInstance, world::UInt64)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1889
  [5] nested_codegen!(mode::Enzyme.API.CDerivativeMode, mod::LLVM.Module, f::Function, tt::Type, world::UInt64)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:1827
  [6] enzyme_custom_common_rev
    @ ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:765 [inlined]
  [7] enzyme_custom_rev(B::LLVM.IRBuilder, orig::LLVM.CallInst, gutils::Enzyme.Compiler.GradientUtils, tape::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/customrules.jl:1138
  [8] enzyme_custom_rev_cfunc(B::Ptr{LLVM.API.LLVMOpaqueBuilder}, OrigCI::Ptr{LLVM.API.LLVMOpaqueValue}, gutils::Ptr{Nothing}, tape::Ptr{LLVM.API.LLVMOpaqueValue})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/rules/llvmrules.jl:27
  [9] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, runtimeActivity::Bool, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/uXW2v/src/api.jl:163
 [10] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:4004
 [11] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:6308
 [12] codegen
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:5465 [inlined]
 [13] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110
 [14] _thunk
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7110 [inlined]
 [15] cached_compilation
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7151 [inlined]
 [16] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b05}, ::Type{Const{typeof(constantfoo)}}, ::Type{Active}, tt::Type{Tuple{Active{Float64}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false)}, ::Val{false}, ::Val{false}, ::Type{FFIABI}, ::Val{true}, ::Val{false})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7224
 [17] #s2084#19056
    @ ~/.julia/packages/Enzyme/uXW2v/src/compiler.jl:7266 [inlined]
 [18] var"#s2084#19056"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, RuntimeActivity::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
    @ Enzyme.Compiler ./none:0
 [19] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [20] autodiff
    @ ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:311 [inlined]
 [21] autodiff(mode::ReverseMode{false, false, FFIABI, false, false}, f::typeof(constantfoo), ::Type{Active}, args::Active{Float64})
    @ Enzyme ~/.julia/packages/Enzyme/uXW2v/src/Enzyme.jl:328
 [22] macro expansion
    @ show.jl:1181 [inlined]
 [23] top-level scope
    @ ~/issues/quadgkkwargs.jl:42
in expression starting at /home/daniel/issues/quadgkkwargs.jl:42