EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 63 forks source link

Compilation failed for amdgpu #1771

Closed jariji closed 2 months ago

jariji commented 2 months ago

Can I do something differently here?

  [21141c5a] AMDGPU v1.0.1
  [7da242da] Enzyme v0.12.34
  [09ab397b] StructArrays v0.6.18

julia> versioninfo()
Julia Version 1.10.3
Commit 0b4590a5507 (2024-04-30 10:59 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 24 × AMD Ryzen 9 3900XT 12-Core Processor
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, znver2)
Threads: 24 default, 0 interactive, 12 GC (on 24 virtual cores)
Environment:
  LD_LIBRARY_PATH = :/lib:/nix/store/sy1k5jj33a8frbmf3sx1hd1ksx0schqx-clr-6.0.2/lib:/nix/store/7d8yhjypiybk9j3lqa2wk1chdpn8346m-rocm-runtime-6.0.2/lib:/nix/store/yx40lzzcww59nppw2xwakwlnq23sh7av-rocblas-6.0.2/lib:/nix/store/raj03q8wil10103d050d0ngx4k0qfjyc-mesa-24.0.7-drivers/lib
  JULIA_NUM_THREADS = 24

using StructArrays, AMDGPU, ForwardDiff, Enzyme
s((;γ, λ, η), t) = exp(-η * t) * exp(γ / λ * (1 - exp(λ * t)))
h((;γ, λ, η), t) = η + γ * exp(λ * t)
cmf(θ, d, t) = s(θ, t) * (h(θ, t) ^ d)
tcmf(θ, l, d, t) = (cmf(θ, d, t)) / ( s(θ, l))
lln(params, (;Z,df)) = let
    Θ = exp.(Z * params)
    sa = @views StructArray(γ=Θ[:,1],λ=Θ[:,2],η=Θ[:,3])
    lls = log.(tcmf.(sa, df.ls, df.ds, df.ts))
    @noinline sum(lls)
end
let n = 10, k = 5, p = 3
    Z = ROCArray(rand(n,k))
    params = ROCArray(-3 .+ rand(k, p))
    df = StructArray(
        ls=ROCArray(30.0 .+ 10rand(n)),
        ds=ROCArray(rand(n) .< 0.1),
        ts=ROCArray(50 .+ 10rand(n)),
    )
    data = (;Z,df)
    f(params) = lln(params, data)
    @assert isfinite(f(params))    
    Enzyme.gradient(Enzyme.Reverse, Enzyme.Const(f), params)
end

LoadError: Enzyme compilation failed.
Current scope: 
; Function Attrs: mustprogress willreturn
define internal fastcc void @preprocess_julia__HIPBuffer_6_33953({ { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }* noalias nocapture nofree noundef nonnull writeonly sret({ { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }) align 8 dereferenceable(64) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,32]:Pointer, [-1,32,0]:Pointer, [-1,32,8]:Integer, [-1,40]:Pointer, [-1,48]:Integer, [-1,49]:Integer, [-1,50]:Integer, [-1,51]:Integer, [-1,52]:Integer, [-1,53]:Integer, [-1,54]:Integer, [-1,55]:Integer, [-1,56]:Integer}" %0, [2 x {} addrspace(10)*]* noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer}" "enzymejl_returnRoots" %1, {} addrspace(10)* noundef nonnull align 8 dereferenceable(64) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Pointer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,32]:Pointer, [-1,40]:Integer, [-1,41]:Integer, [-1,42]:Integer, [-1,43]:Integer, [-1,48]:Pointer, [-1,48,0]:Pointer, [-1,48,8]:Integer, [-1,56]:Integer}" "enzymejl_parmtype"="140640195481280" "enzymejl_parmtype_ref"="2" %2, i64 signext "enzyme_inactive" "enzyme_type"="{[-1]:Integer}" "enzymejl_parmtype"="140645797537536" "enzymejl_parmtype_ref"="0" %3) unnamed_addr #511 !dbg !39545 {
top:
  %4 = call {}*** @julia.get_pgcstack()
  %5 = call {}*** @julia.get_pgcstack()
  %6 = bitcast {}*** %5 to {}**
  %7 = getelementptr inbounds {}*, {}** %6, i64 -14
  %8 = getelementptr inbounds {}*, {}** %7, i64 16
  %9 = bitcast {}** %8 to i8**
  %10 = load i8*, i8** %9, align 8
  %11 = call noalias nonnull dereferenceable(32) dereferenceable_or_null(32) {} addrspace(10)* @julia.gc_alloc_obj({}** %7, i64 32, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140635290939216 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !539
  call void @zeroType.273({} addrspace(10)* %11, i8 0, i64 32), !enzyme_zerostack !0
  %12 = bitcast {} addrspace(10)* %11 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)*, !enzyme_caststack !0
  %13 = bitcast {}*** %4 to {}**
  %14 = getelementptr inbounds {}*, {}** %13, i64 -14
  %15 = getelementptr inbounds {}*, {}** %14, i64 16
  %16 = bitcast {}** %15 to i8**
  %17 = load i8*, i8** %16, align 8
  %18 = call noalias nonnull dereferenceable(32) dereferenceable_or_null(32) {} addrspace(10)* @julia.gc_alloc_obj({}** %14, i64 32, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140635290939216 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !539
  call void @zeroType.274({} addrspace(10)* %18, i8 0, i64 32), !enzyme_zerostack !0
  %19 = bitcast {} addrspace(10)* %18 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)*, !enzyme_caststack !0
  %20 = call {}*** @julia.get_pgcstack() #515
  %current_task141 = getelementptr inbounds {}**, {}*** %20, i64 -14
  %current_task1 = bitcast {}*** %current_task141 to {}**
  %ptls_field42 = getelementptr inbounds {}**, {}*** %20, i64 2
  %21 = bitcast {}*** %ptls_field42 to i64***
  %ptls_load4344 = load i64**, i64*** %21, align 8, !tbaa !516
  %22 = getelementptr inbounds i64*, i64** %ptls_load4344, i64 2
  %safepoint = load i64*, i64** %22, align 8, !tbaa !520
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint) #515, !dbg !39546
  fence syncscope("singlethread") seq_cst
  %newstruct = call noalias nonnull dereferenceable(8) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645776580368 to {}*) to {} addrspace(10)*)) #516, !dbg !39547
  %23 = addrspacecast {} addrspace(10)* %newstruct to {} addrspace(10)* addrspace(11)*, !dbg !39547
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %23, align 8, !dbg !39547, !tbaa !527, !alias.scope !531, !noalias !39549
  %24 = addrspacecast {} addrspace(10)* %2 to i8 addrspace(11)*, !dbg !39552, !enzyme_inactive !0
  %25 = getelementptr inbounds i8, i8 addrspace(11)* %24, i64 16, !dbg !39552
  %26 = bitcast i8 addrspace(11)* %25 to { i64, i32, {} addrspace(10)*, i32 } addrspace(11)*, !dbg !39552
  %27 = load { i64, i32, {} addrspace(10)*, i32 }, { i64, i32, {} addrspace(10)*, i32 } addrspace(11)* %26, align 8, !dbg !39552, !tbaa !527, !alias.scope !531, !noalias !534
  %getfield_addr = getelementptr inbounds i8, i8 addrspace(11)* %24, i64 48, !dbg !39552
  %28 = bitcast i8 addrspace(11)* %getfield_addr to {} addrspace(10)* addrspace(11)*, !dbg !39552
  %getfield = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %28 unordered, align 8, !dbg !39552, !tbaa !527, !alias.scope !531, !noalias !534, !nonnull !0, !dereferenceable !679, !align !539
  %.not = icmp eq i64 %3, 0, !dbg !39553
  br i1 %.not, label %L6, label %L9, !dbg !39554

common.ret:                                       ; preds = %L45, %L6
  %storemerge = phi { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } [ %33, %L6 ], [ %value_phi, %L45 ], !dbg !39555
  store { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %storemerge, { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }* %0, align 8, !dbg !39555, !noalias !39556
  ret void, !dbg !39555

L6:                                               ; preds = %top
  %29 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } zeroinitializer, { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39557
  %30 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %29, {} addrspace(10)* %getfield, 1, !dbg !39557
  %31 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %30, i64 0, 2, !dbg !39557
  %32 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %31, i64 0, 3, !dbg !39557
  %33 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %32, i8 1, 4, !dbg !39557
  %34 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 2, !dbg !39554
  %35 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 0, !dbg !39554
  store {} addrspace(10)* %34, {} addrspace(10)** %35, align 8, !dbg !39554, !noalias !39556
  %36 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 1, !dbg !39554
  store {} addrspace(10)* %getfield, {} addrspace(10)** %36, align 8, !dbg !39554, !noalias !39556
  br label %common.ret

L9:                                               ; preds = %top
  call fastcc void @julia__maybe_collect_31_32580(i8 noundef zeroext 0) #515, !dbg !39558
  %.fca.0.0.extract30 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39560
  %.fca.0.0.gep31 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %12, i64 0, i64 0, i32 0, !dbg !39560
  store i64 %.fca.0.0.extract30, i64 addrspace(10)* %.fca.0.0.gep31, align 8, !dbg !39560, !noalias !39556
  %.fca.0.1.extract32 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 1, !dbg !39560
  %.fca.0.1.gep33 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %12, i64 0, i64 0, i32 1, !dbg !39560
  store i32 %.fca.0.1.extract32, i32 addrspace(10)* %.fca.0.1.gep33, align 8, !dbg !39560, !noalias !39556
  %.fca.0.2.extract34 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 2, !dbg !39560
  %.fca.0.2.gep35 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %12, i64 0, i64 0, i32 2, !dbg !39560
  store {} addrspace(10)* %.fca.0.2.extract34, {} addrspace(10)* addrspace(10)* %.fca.0.2.gep35, align 8, !dbg !39560, !noalias !39556
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %11, {} addrspace(10)* %.fca.0.2.extract34), !dbg !39560
  %.fca.0.3.extract36 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 3, !dbg !39560
  %.fca.0.3.gep37 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %12, i64 0, i64 0, i32 3, !dbg !39560
  store i32 %.fca.0.3.extract36, i32 addrspace(10)* %.fca.0.3.gep37, align 8, !dbg !39560, !noalias !39556
  %37 = addrspacecast [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %12 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)*, !dbg !39560
  %38 = call fastcc nonnull {} addrspace(10)* @julia_lock_32602([1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) %37) #515, !dbg !39560
  %newstruct4 = call noalias nonnull dereferenceable(8) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645729098544 to {}*) to {} addrspace(10)*)) #516, !dbg !39562
  %box = call noalias nonnull dereferenceable(8) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer, [-1,-1,0]:Pointer, [-1,-1,8]:Pointer, [-1,-1,16]:Pointer, [-1,-1,24]:Integer, [-1,-1,25]:Integer, [-1,-1,26]:Integer, [-1,-1,27]:Integer, [-1,-1,32]:Pointer, [-1,-1,40]:Integer, [-1,-1,41]:Integer, [-1,-1,42]:Integer, [-1,-1,43]:Integer, [-1,-1,48]:Pointer, [-1,-1,48,0]:Pointer, [-1,-1,48,8]:Integer, [-1,-1,56]:Integer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645926743952 to {}*) to {} addrspace(10)*)) #516, !dbg !39565
  %39 = bitcast {} addrspace(10)* %box to [1 x {} addrspace(10)*] addrspace(10)*, !dbg !39565, !enzyme_inactive !0
  %40 = getelementptr [1 x {} addrspace(10)*], [1 x {} addrspace(10)*] addrspace(10)* %39, i64 0, i64 0, !dbg !39565
  store {} addrspace(10)* %2, {} addrspace(10)* addrspace(10)* %40, align 8, !dbg !39565, !tbaa !552, !alias.scope !531, !noalias !39549
  %box7 = call noalias nonnull dereferenceable(32) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Pointer, [-1,0,8]:Pointer, [-1,0,16]:Pointer, [-1,0,24]:Integer, [-1,0,25]:Integer, [-1,0,26]:Integer, [-1,0,27]:Integer, [-1,0,32]:Pointer, [-1,0,40]:Integer, [-1,0,41]:Integer, [-1,0,42]:Integer, [-1,0,43]:Integer, [-1,0,48]:Pointer, [-1,0,48,0]:Pointer, [-1,0,48,8]:Integer, [-1,0,56]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Pointer, [-1,16,-1]:Pointer, [-1,24]:Pointer, [-1,24,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645925238864 to {}*) to {} addrspace(10)*)) #516, !dbg !39565
  %41 = bitcast {} addrspace(10)* %box7 to { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)*, !dbg !39565
  %.repack = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %41, i64 0, i32 0, !dbg !39565
  store {} addrspace(10)* %2, {} addrspace(10)* addrspace(10)* %.repack, align 8, !dbg !39565, !tbaa !552, !alias.scope !531, !noalias !39549
  %.repack45 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %41, i64 0, i32 1, !dbg !39565
  store i64 %3, i64 addrspace(10)* %.repack45, align 8, !dbg !39565, !tbaa !552, !alias.scope !531, !noalias !39549
  %.repack47 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %41, i64 0, i32 2, !dbg !39565
  store {} addrspace(10)* %newstruct, {} addrspace(10)* addrspace(10)* %.repack47, align 8, !dbg !39565, !tbaa !552, !alias.scope !531, !noalias !39549
  %.repack49 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %41, i64 0, i32 3, !dbg !39565
  store {} addrspace(10)* %newstruct4, {} addrspace(10)* addrspace(10)* %.repack49, align 8, !dbg !39565, !tbaa !552, !alias.scope !531, !noalias !39549
  %42 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645923535648 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645786484592 to {}*) to {} addrspace(10)*), {} addrspace(10)* nofree nonnull %box, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640197790256 to {}*) to {} addrspace(10)*), {} addrspace(10)* nofree nonnull %box7, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645726890992 to {}*) to {} addrspace(10)*)) #517, !dbg !39565
  store atomic {} addrspace(10)* %42, {} addrspace(10)* addrspace(11)* %23 release, align 8, !dbg !39565, !tbaa !527, !alias.scope !531, !noalias !39549
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %newstruct, {} addrspace(10)* nonnull %42) #518, !dbg !39565
  %43 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645730366480 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %42, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645739823824 to {}*) to {} addrspace(10)*)) #517, !dbg !39566
  %44 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %43) #519, !dbg !39566
  %45 = addrspacecast {} addrspace(10)* %44 to {} addrspace(11)*, !dbg !39566
  %46 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %45) #519, !dbg !39566
  %exactly_isa = icmp eq {}* %46, inttoptr (i64 192 to {}*), !dbg !39566
  br i1 %exactly_isa, label %pass11, label %fail10, !dbg !39566

L25:                                              ; preds = %pass11
  %box13 = call noalias nonnull dereferenceable(16) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,8]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140640196978176 to {}*) to {} addrspace(10)*)) #516, !dbg !39566
  %47 = bitcast {} addrspace(10)* %box13 to { i32, {} addrspace(10)* } addrspace(10)*, !dbg !39566, !enzyme_inactive !0
  store { i32, {} addrspace(10)* } { i32 2, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640204901952 to {}*) to {} addrspace(10)*) }, { i32, {} addrspace(10)* } addrspace(10)* %47, align 8, !dbg !39566, !tbaa !552, !alias.scope !531, !noalias !39549
  %48 = addrspacecast {} addrspace(10)* %box13 to {} addrspace(12)*, !dbg !39566, !enzyme_inactive !0
  call void @ijl_throw({} addrspace(12)* %48) #520, !dbg !39566
  unreachable, !dbg !39566

L29:                                              ; preds = %pass11
  %.fca.0.1.gep = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %19, i64 0, i64 0, i32 1, !dbg !39567
  store i32 %.fca.0.1.extract32, i32 addrspace(10)* %.fca.0.1.gep, align 8, !dbg !39567, !noalias !39556
  %49 = addrspacecast [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %19 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)*, !dbg !39567
  %50 = call fastcc nonnull {} addrspace(10)* @julia_lock_33874([1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) %49) #515, !dbg !39567
  %51 = addrspacecast {} addrspace(10)* %50 to i8 addrspace(11)*, !dbg !39569
  %52 = getelementptr inbounds i8, i8 addrspace(11)* %51, i64 16, !dbg !39569
  %53 = bitcast i8 addrspace(11)* %52 to i64 addrspace(11)*, !dbg !39569
  %atomic_initial = load atomic i64, i64 addrspace(11)* %53 monotonic, align 8, !dbg !39569, !tbaa !527, !alias.scope !531, !noalias !534
  br label %xchg, !dbg !39569

L40:                                              ; preds = %pass19
  %54 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } zeroinitializer, { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39571
  %55 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %54, {} addrspace(10)* %getfield, 1, !dbg !39571
  %56 = bitcast {} addrspace(10)* %70 to i64 addrspace(10)*, !dbg !39571
  %unbox = load i64, i64 addrspace(10)* %56, align 8, !dbg !39571, !tbaa !711, !alias.scope !531, !noalias !534
  %57 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %55, i64 %unbox, 2, !dbg !39571
  %58 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %57, i64 %3, 3, !dbg !39571
  %59 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %58, i8 1, 4, !dbg !39571
  br label %L45, !dbg !39572

L43:                                              ; preds = %pass19
  %box23 = call noalias nonnull dereferenceable(32) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140640195481408 to {}*) to {} addrspace(10)*)) #516, !dbg !39572
  %60 = bitcast {} addrspace(10)* %box23 to { i64, i32, {} addrspace(10)*, i32 } addrspace(10)*, !dbg !39572, !enzyme_inactive !0
  store { i64, i32, {} addrspace(10)*, i32 } %27, { i64, i32, {} addrspace(10)*, i32 } addrspace(10)* %60, align 8, !dbg !39572, !tbaa !552, !alias.scope !531, !noalias !39549
  %61 = call noalias nonnull "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Integer}" {} addrspace(10)* @ijl_box_int64(i64 signext %3) #521, !dbg !39572
  %62 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @ijl_apply_generic, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140640197755616 to {}*) to {} addrspace(10)*), {} addrspace(10)* nofree nonnull %box23, {} addrspace(10)* nonnull %getfield, {} addrspace(10)* nonnull %70, {} addrspace(10)* nonnull %61, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645865270416 to {}*) to {} addrspace(10)*)) #517, !dbg !39572
  %63 = bitcast {} addrspace(10)* %62 to { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)*
  %unbox28 = load { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }, { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)* %63, align 8, !tbaa !552, !alias.scope !531, !noalias !534
  br label %L45

L45:                                              ; preds = %L43, %L40
  %value_phi = phi { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } [ %59, %L40 ], [ %unbox28, %L43 ]
  %64 = extractvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %value_phi, 0, 2, !dbg !39572
  %65 = extractvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %value_phi, 1, !dbg !39572
  %66 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 0, !dbg !39572
  store {} addrspace(10)* %64, {} addrspace(10)** %66, align 8, !dbg !39572, !noalias !39556
  %67 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 1, !dbg !39572
  store {} addrspace(10)* %65, {} addrspace(10)** %67, align 8, !dbg !39572, !noalias !39556
  br label %common.ret

fail10:                                           ; preds = %L9
  %68 = addrspacecast {} addrspace(10)* %43 to {} addrspace(12)*, !dbg !39566
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str18, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645797537472 to {}*) to {} addrspace(10)*), {} addrspace(12)* %68) #520, !dbg !39566
  unreachable, !dbg !39566

pass11:                                           ; preds = %L9
  %69 = icmp eq {} addrspace(10)* %43, addrspacecast ({}* inttoptr (i64 140645865270432 to {}*) to {} addrspace(10)*), !dbg !39566
  br i1 %69, label %L29, label %L25, !dbg !39566

done_xchg:                                        ; preds = %xchg
  %70 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %23 unordered, align 8, !dbg !39572, !tbaa !527, !alias.scope !531, !noalias !534
  %isdefined15.not = icmp eq {} addrspace(10)* %70, null, !dbg !39572
  br i1 %isdefined15.not, label %err, label %pass19, !dbg !39572

xchg:                                             ; preds = %xchg, %L29
  %iv = phi i64 [ %iv.next, %xchg ], [ 0, %L29 ], !dbg !39569
  %71 = phi i64 [ %atomic_initial, %L29 ], [ %74, %xchg ], !dbg !39569
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !39569
  %72 = call fastcc i64 @julia___33959(i64 signext %71, i64 signext %3) #515, !dbg !39569
  %73 = cmpxchg i64 addrspace(11)* %53, i64 %71, i64 %72 seq_cst monotonic, align 8, !dbg !39569, !tbaa !527, !alias.scope !531, !noalias !534
  %74 = extractvalue { i64, i1 } %73, 0, !dbg !39569
  %75 = extractvalue { i64, i1 } %73, 1, !dbg !39569
  br i1 %75, label %done_xchg, label %xchg, !dbg !39569

pass19:                                           ; preds = %done_xchg
  %76 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %70) #519, !dbg !39572
  %77 = addrspacecast {} addrspace(10)* %76 to {} addrspace(11)*, !dbg !39572
  %78 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %77) #519, !dbg !39572
  %exactly_isa21.not = icmp eq {}* %78, inttoptr (i64 140645797538720 to {}*), !dbg !39572
  br i1 %exactly_isa21.not, label %L40, label %L43, !dbg !39572

err:                                              ; preds = %done_xchg
  call void @ijl_undefined_var_error({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140645954469896 to {}*) to {} addrspace(12)*)) #520, !dbg !39572
  unreachable, !dbg !39572
}

Illegal replace ficticious phi for:   %_replacementA15 = phi { i64, i1 } , !dbg !606 of   %73 = cmpxchg i64 addrspace(11)* %53, i64 %71, i64 %72 seq_cst monotonic, align 8, !dbg !576, !tbaa !528, !alias.scope !532, !noalias !545
; Function Attrs: mustprogress willreturn
define internal fastcc { {} addrspace(10)* } @fakeaugmented_julia__HIPBuffer_6_33953({ { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }* noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(64) "enzyme_inactive" "enzyme_sret" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,32]:Pointer, [-1,32,0]:Pointer, [-1,32,8]:Integer, [-1,40]:Pointer, [-1,48]:Integer, [-1,49]:Integer, [-1,50]:Integer, [-1,51]:Integer, [-1,52]:Integer, [-1,53]:Integer, [-1,54]:Integer, [-1,55]:Integer, [-1,56]:Integer}" %0, [2 x {} addrspace(10)*]* noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer}" "enzymejl_returnRoots" %1, {} addrspace(10)* noundef nonnull align 8 dereferenceable(64) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Pointer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,32]:Pointer, [-1,40]:Integer, [-1,41]:Integer, [-1,42]:Integer, [-1,43]:Integer, [-1,48]:Pointer, [-1,48,0]:Pointer, [-1,48,8]:Integer, [-1,56]:Integer}" "enzymejl_parmtype"="140640195481280" "enzymejl_parmtype_ref"="2" %2, i64 signext "enzyme_inactive" "enzyme_type"="{[-1]:Integer}" "enzymejl_parmtype"="140645797537536" "enzymejl_parmtype_ref"="0" %3) unnamed_addr #511 !dbg !39573 {
top:
  %"iv'ac" = alloca i64, align 8
  %loopLimit_cache = alloca i64, align 8
  %4 = alloca [1 x { i64, i32, {} addrspace(10)*, i32 }], i64 1, align 8
  %5 = bitcast [1 x { i64, i32, {} addrspace(10)*, i32 }]* %4 to {}*
  %6 = addrspacecast {}* %5 to {} addrspace(10)*, !enzyme_backstack !0
  %7 = alloca [1 x { i64, i32, {} addrspace(10)*, i32 }], i64 1, align 8
  %8 = bitcast [1 x { i64, i32, {} addrspace(10)*, i32 }]* %7 to {}*
  %9 = addrspacecast {}* %8 to {} addrspace(10)*, !enzyme_backstack !0
  %_cache = alloca i64*, align 8
  %_cache16 = alloca i64*, align 8
  %_replacementA15_cache = alloca { i64, i1 }*, align 8
  %10 = call {}*** @julia.get_pgcstack()
  %11 = call {}*** @julia.get_pgcstack()
  %12 = call {}*** @julia.get_pgcstack()
  %13 = bitcast {}*** %12 to {}**
  %14 = getelementptr inbounds {}*, {}** %13, i64 -14
  %_replacementA10 = phi i8** 
  call void @zeroType.273({} addrspace(10)* %9, i8 0, i64 32), !enzyme_zerostack !0
  %15 = bitcast {} addrspace(10)* %9 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)*, !enzyme_caststack !0
  %16 = bitcast {}*** %11 to {}**
  %17 = getelementptr inbounds {}*, {}** %16, i64 -14
  %_replacementA7 = phi i8** 
  %_replacementA = phi i8* 
  call void @zeroType.274({} addrspace(10)* %6, i8 0, i64 32), !enzyme_zerostack !0
  %18 = bitcast {} addrspace(10)* %6 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)*, !enzyme_caststack !0
  %19 = call {}*** @julia.get_pgcstack() #515
  %current_task141 = getelementptr inbounds {}**, {}*** %19, i64 -14
  %current_task1 = bitcast {}*** %current_task141 to {}**
  %ptls_field42 = getelementptr inbounds {}**, {}*** %19, i64 2
  %20 = bitcast {}*** %ptls_field42 to i64***
  %ptls_load4344 = load i64**, i64*** %20, align 8, !tbaa !516, !alias.scope !39574, !noalias !39577
  %21 = getelementptr inbounds i64*, i64** %ptls_load4344, i64 2
  %safepoint = load i64*, i64** %21, align 8, !tbaa !520, !alias.scope !39579, !noalias !39582
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint) #515, !dbg !39584
  fence syncscope("singlethread") seq_cst
  %"newstruct'mi" = call noalias nonnull dereferenceable(8) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645776580368 to {}*) to {} addrspace(10)*)) #516, !dbg !39585
  %22 = bitcast {} addrspace(10)* %"newstruct'mi" to i8 addrspace(10)*, !dbg !39585
  call void @llvm.memset.p10i8.i64(i8 addrspace(10)* nonnull dereferenceable(8) dereferenceable_or_null(8) %22, i8 0, i64 8, i1 false), !dbg !39585
  %newstruct = call noalias nonnull dereferenceable(8) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645776580368 to {}*) to {} addrspace(10)*)) #516, !dbg !39585
  %"'ipc" = addrspacecast {} addrspace(10)* %"newstruct'mi" to {} addrspace(10)* addrspace(11)*, !dbg !39585
  %23 = addrspacecast {} addrspace(10)* %newstruct to {} addrspace(10)* addrspace(11)*, !dbg !39585
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %"'ipc", align 8, !dbg !39585, !tbaa !527, !alias.scope !39587, !noalias !39590
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %23, align 8, !dbg !39585, !tbaa !527, !alias.scope !39592, !noalias !39593
  %24 = addrspacecast {} addrspace(10)* %2 to i8 addrspace(11)*, !dbg !39594, !enzyme_inactive !0
  %25 = getelementptr inbounds i8, i8 addrspace(11)* %24, i64 16, !dbg !39594
  %26 = bitcast i8 addrspace(11)* %25 to { i64, i32, {} addrspace(10)*, i32 } addrspace(11)*, !dbg !39594
  %27 = load { i64, i32, {} addrspace(10)*, i32 }, { i64, i32, {} addrspace(10)*, i32 } addrspace(11)* %26, align 8, !dbg !39594, !tbaa !527, !alias.scope !39595, !noalias !39598
  %getfield_addr = getelementptr inbounds i8, i8 addrspace(11)* %24, i64 48, !dbg !39594
  %28 = bitcast i8 addrspace(11)* %getfield_addr to {} addrspace(10)* addrspace(11)*, !dbg !39594
  %getfield = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %28 unordered, align 8, !dbg !39594, !tbaa !527, !alias.scope !39595, !noalias !39598, !nonnull !0, !dereferenceable !679, !align !539
  %.not = icmp eq i64 %3, 0, !dbg !39600
  br i1 %.not, label %L6, label %L9, !dbg !39601

common.ret:                                       ; preds = %L45, %L6
  %storemerge = phi { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } [ %33, %L6 ], [ %value_phi, %L45 ], !dbg !39602
  store { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %storemerge, { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }* %0, align 8, !dbg !39602, !noalias !39603
  ret { {} addrspace(10)* } undef, !dbg !39602

L6:                                               ; preds = %top
  %29 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } zeroinitializer, { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39606
  %30 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %29, {} addrspace(10)* %getfield, 1, !dbg !39606
  %31 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %30, i64 0, 2, !dbg !39606
  %32 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %31, i64 0, 3, !dbg !39606
  %33 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %32, i8 1, 4, !dbg !39606
  %34 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 2, !dbg !39601
  %35 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 0, !dbg !39601
  store {} addrspace(10)* %34, {} addrspace(10)** %35, align 8, !dbg !39601, !noalias !39603
  %36 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 1, !dbg !39601
  store {} addrspace(10)* %getfield, {} addrspace(10)** %36, align 8, !dbg !39601, !noalias !39603
  br label %common.ret

L9:                                               ; preds = %top
  call fastcc void @julia__maybe_collect_31_32580(i8 noundef zeroext 0) #515, !dbg !39607
  %.fca.0.0.extract30 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39609
  %.fca.0.0.gep31 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %15, i64 0, i64 0, i32 0, !dbg !39609
  store i64 %.fca.0.0.extract30, i64 addrspace(10)* %.fca.0.0.gep31, align 8, !dbg !39609, !noalias !39603
  %.fca.0.1.extract32 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 1, !dbg !39609
  %.fca.0.1.gep33 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %15, i64 0, i64 0, i32 1, !dbg !39609
  store i32 %.fca.0.1.extract32, i32 addrspace(10)* %.fca.0.1.gep33, align 8, !dbg !39609, !noalias !39603
  %.fca.0.2.extract34 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 2, !dbg !39609
  %.fca.0.2.gep35 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %15, i64 0, i64 0, i32 2, !dbg !39609
  store {} addrspace(10)* %.fca.0.2.extract34, {} addrspace(10)* addrspace(10)* %.fca.0.2.gep35, align 8, !dbg !39609, !noalias !39603
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %9, {} addrspace(10)* %.fca.0.2.extract34), !dbg !39609
  %.fca.0.3.extract36 = extractvalue { i64, i32, {} addrspace(10)*, i32 } %27, 3, !dbg !39609
  %.fca.0.3.gep37 = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %15, i64 0, i64 0, i32 3, !dbg !39609
  store i32 %.fca.0.3.extract36, i32 addrspace(10)* %.fca.0.3.gep37, align 8, !dbg !39609, !noalias !39603
  %37 = addrspacecast [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %15 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)*, !dbg !39609
  %38 = call fastcc nonnull {} addrspace(10)* @julia_lock_32602([1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) %37) #515, !dbg !39609
  %newstruct4 = call noalias nonnull dereferenceable(8) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645729098544 to {}*) to {} addrspace(10)*)) #516, !dbg !39611
  %box = call noalias nonnull dereferenceable(8) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Pointer, [-1,-1,0]:Pointer, [-1,-1,8]:Pointer, [-1,-1,16]:Pointer, [-1,-1,24]:Integer, [-1,-1,25]:Integer, [-1,-1,26]:Integer, [-1,-1,27]:Integer, [-1,-1,32]:Pointer, [-1,-1,40]:Integer, [-1,-1,41]:Integer, [-1,-1,42]:Integer, [-1,-1,43]:Integer, [-1,-1,48]:Pointer, [-1,-1,48,0]:Pointer, [-1,-1,48,8]:Integer, [-1,-1,56]:Integer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645926743952 to {}*) to {} addrspace(10)*)) #516, !dbg !39614
  %39 = bitcast {} addrspace(10)* %box to [1 x {} addrspace(10)*] addrspace(10)*, !dbg !39614, !enzyme_inactive !0
  %40 = getelementptr [1 x {} addrspace(10)*], [1 x {} addrspace(10)*] addrspace(10)* %39, i64 0, i64 0, !dbg !39614
  store {} addrspace(10)* %2, {} addrspace(10)* addrspace(10)* %40, align 8, !dbg !39614, !tbaa !552, !alias.scope !531, !noalias !39615
  %"box7'mi" = call noalias nonnull dereferenceable(32) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Pointer, [-1,0,8]:Pointer, [-1,0,16]:Pointer, [-1,0,24]:Integer, [-1,0,25]:Integer, [-1,0,26]:Integer, [-1,0,27]:Integer, [-1,0,32]:Pointer, [-1,0,40]:Integer, [-1,0,41]:Integer, [-1,0,42]:Integer, [-1,0,43]:Integer, [-1,0,48]:Pointer, [-1,0,48,0]:Pointer, [-1,0,48,8]:Integer, [-1,0,56]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Pointer, [-1,16,-1]:Pointer, [-1,24]:Pointer, [-1,24,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645925238864 to {}*) to {} addrspace(10)*)) #516, !dbg !39614
  %41 = bitcast {} addrspace(10)* %"box7'mi" to i8 addrspace(10)*, !dbg !39614
  call void @llvm.memset.p10i8.i64(i8 addrspace(10)* nonnull dereferenceable(32) dereferenceable_or_null(32) %41, i8 0, i64 32, i1 false), !dbg !39614
  %box7 = call noalias nonnull dereferenceable(32) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Pointer, [-1,0,8]:Pointer, [-1,0,16]:Pointer, [-1,0,24]:Integer, [-1,0,25]:Integer, [-1,0,26]:Integer, [-1,0,27]:Integer, [-1,0,32]:Pointer, [-1,0,40]:Integer, [-1,0,41]:Integer, [-1,0,42]:Integer, [-1,0,43]:Integer, [-1,0,48]:Pointer, [-1,0,48,0]:Pointer, [-1,0,48,8]:Integer, [-1,0,56]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Pointer, [-1,16,-1]:Pointer, [-1,24]:Pointer, [-1,24,-1]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645925238864 to {}*) to {} addrspace(10)*)) #516, !dbg !39614
  %"'ipc12" = bitcast {} addrspace(10)* %"box7'mi" to { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)*, !dbg !39614
  %42 = bitcast {} addrspace(10)* %box7 to { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)*, !dbg !39614
  %".repack'ipg" = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %"'ipc12", i64 0, i32 0, !dbg !39614
  %.repack = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %42, i64 0, i32 0, !dbg !39614
  store {} addrspace(10)* %2, {} addrspace(10)* addrspace(10)* %".repack'ipg", align 8, !dbg !39614, !tbaa !552, !alias.scope !39616, !noalias !39619
  store {} addrspace(10)* %2, {} addrspace(10)* addrspace(10)* %.repack, align 8, !dbg !39614, !tbaa !552, !alias.scope !39621, !noalias !39622
  %".repack45'ipg" = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %"'ipc12", i64 0, i32 1, !dbg !39614
  %.repack45 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %42, i64 0, i32 1, !dbg !39614
  store i64 %3, i64 addrspace(10)* %".repack45'ipg", align 8, !dbg !39614, !tbaa !552, !alias.scope !39616, !noalias !39619
  store i64 %3, i64 addrspace(10)* %.repack45, align 8, !dbg !39614, !tbaa !552, !alias.scope !39621, !noalias !39622
  %".repack47'ipg" = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %"'ipc12", i64 0, i32 2, !dbg !39614
  %.repack47 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %42, i64 0, i32 2, !dbg !39614
  store {} addrspace(10)* %"newstruct'mi", {} addrspace(10)* addrspace(10)* %".repack47'ipg", align 8, !dbg !39614, !tbaa !552, !alias.scope !39616, !noalias !39619
  store {} addrspace(10)* %newstruct, {} addrspace(10)* addrspace(10)* %.repack47, align 8, !dbg !39614, !tbaa !552, !alias.scope !39621, !noalias !39622
  %".repack49'ipg" = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %"'ipc12", i64 0, i32 3, !dbg !39614
  %.repack49 = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* }, { {} addrspace(10)*, i64, {} addrspace(10)*, {} addrspace(10)* } addrspace(10)* %42, i64 0, i32 3, !dbg !39614
  store {} addrspace(10)* %newstruct4, {} addrspace(10)* addrspace(10)* %".repack49'ipg", align 8, !dbg !39614, !tbaa !552, !alias.scope !39616, !noalias !39619
  store {} addrspace(10)* %newstruct4, {} addrspace(10)* addrspace(10)* %.repack49, align 8, !dbg !39614, !tbaa !552, !alias.scope !39621, !noalias !39622
  %43 = call {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* @ijl_apply_generic, {} addrspace(10)* @ejl_enz_runtime_generic_augfwd, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140635303005584 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_enz_val_1, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465974192 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465973096 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645786484592 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %box, {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640197790256 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %box7, {} addrspace(10)* %"box7'mi", {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645726890992 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing), !dbg !39614
  %44 = addrspacecast {} addrspace(10)* %43 to {} addrspace(11)*, !dbg !39614
  %45 = bitcast {} addrspace(11)* %44 to [3 x {} addrspace(10)*] addrspace(11)*, !dbg !39614
  %46 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %45, i64 0, i64 1, !dbg !39614
  %47 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %46, align 8, !dbg !39614
  %48 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %45, i64 0, i64 2, !dbg !39614
  %49 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %48, align 8, !dbg !39614
  %50 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %45, i64 0, i64 0, !dbg !39614
  %51 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %50, align 8, !dbg !39614
  store atomic {} addrspace(10)* %47, {} addrspace(10)* addrspace(11)* %"'ipc" release, align 8, !dbg !39614, !tbaa !527, !alias.scope !39587, !noalias !39590
  store atomic {} addrspace(10)* %51, {} addrspace(10)* addrspace(11)* %23 release, align 8, !dbg !39614, !tbaa !527, !alias.scope !39592, !noalias !39593
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %"newstruct'mi", {} addrspace(10)* %47), !dbg !39614
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %newstruct, {} addrspace(10)* nonnull %51) #517, !dbg !39614
  %52 = call {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* @ijl_apply_generic, {} addrspace(10)* @ejl_enz_runtime_generic_augfwd, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140635300857232 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_enz_val_1, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465974000 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465973096 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645730366480 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %51, {} addrspace(10)* %47, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645739823824 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing), !dbg !39623
  %53 = addrspacecast {} addrspace(10)* %52 to {} addrspace(11)*, !dbg !39623
  %54 = bitcast {} addrspace(11)* %53 to [3 x {} addrspace(10)*] addrspace(11)*, !dbg !39623
  %55 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %54, i64 0, i64 2, !dbg !39623
  %56 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %55, align 8, !dbg !39623
  %57 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %54, i64 0, i64 0, !dbg !39623
  %58 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %57, align 8, !dbg !39623
  %59 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %58) #518, !dbg !39623
  %60 = addrspacecast {} addrspace(10)* %59 to {} addrspace(11)*, !dbg !39623
  %61 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %60) #518, !dbg !39623
  %exactly_isa = icmp eq {}* %61, inttoptr (i64 192 to {}*), !dbg !39623
  br i1 %exactly_isa, label %pass11, label %fail10, !dbg !39623

L25:                                              ; preds = %pass11
  %box13 = call noalias nonnull dereferenceable(16) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,8]:Pointer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140640196978176 to {}*) to {} addrspace(10)*)) #516, !dbg !39623
  %62 = bitcast {} addrspace(10)* %box13 to { i32, {} addrspace(10)* } addrspace(10)*, !dbg !39623, !enzyme_inactive !0
  store { i32, {} addrspace(10)* } { i32 2, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640204901952 to {}*) to {} addrspace(10)*) }, { i32, {} addrspace(10)* } addrspace(10)* %62, align 8, !dbg !39623, !tbaa !552, !alias.scope !531, !noalias !39615
  %63 = addrspacecast {} addrspace(10)* %box13 to {} addrspace(12)*, !dbg !39623, !enzyme_inactive !0
  call void @ijl_throw({} addrspace(12)* %63) #519, !dbg !39623
  unreachable, !dbg !39623

L29:                                              ; preds = %pass11
  %.fca.0.1.gep = getelementptr [1 x { i64, i32, {} addrspace(10)*, i32 }], [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %18, i64 0, i64 0, i32 1, !dbg !39624
  store i32 %.fca.0.1.extract32, i32 addrspace(10)* %.fca.0.1.gep, align 8, !dbg !39624, !noalias !39603
  %64 = addrspacecast [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(10)* %18 to [1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)*, !dbg !39624
  %_augmented = call fastcc { { { i8*, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1 }, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1, i1, {} addrspace(10)*, i32 }, {} addrspace(10)*, {} addrspace(10)* } @augmented_julia_lock_33874([1 x { i64, i32, {} addrspace(10)*, i32 }] addrspace(11)* nocapture nofree readonly align 8 %64), !dbg !39624
  %subcache = extractvalue { { { i8*, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1 }, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1, i1, {} addrspace(10)*, i32 }, {} addrspace(10)*, {} addrspace(10)* } %_augmented, 0, !dbg !39624
  %65 = extractvalue { { { i8*, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1 }, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1, i1, {} addrspace(10)*, i32 }, {} addrspace(10)*, {} addrspace(10)* } %_augmented, 1, !dbg !39624
  %"'ac" = extractvalue { { { i8*, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1 }, {} addrspace(10)*, {} addrspace(10)*, i1, i1, i1, i1, {} addrspace(10)*, i32 }, {} addrspace(10)*, {} addrspace(10)* } %_augmented, 2, !dbg !39624
  %66 = addrspacecast {} addrspace(10)* %65 to i8 addrspace(11)*, !dbg !39626
  %67 = getelementptr inbounds i8, i8 addrspace(11)* %66, i64 16, !dbg !39626
  %68 = bitcast i8 addrspace(11)* %67 to i64 addrspace(11)*, !dbg !39626
  %atomic_initial = load atomic i64, i64 addrspace(11)* %68 monotonic, align 8, !dbg !39626, !tbaa !527, !alias.scope !39628, !noalias !39631
  store i64* null, i64** %_cache, align 8, !dbg !39626
  store i64* null, i64** %_cache16, align 8, !dbg !39626
  store { i64, i1 }* null, { i64, i1 }** %_replacementA15_cache, align 8, !dbg !39626
  br label %xchg, !dbg !39626

L40:                                              ; preds = %pass19
  %69 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } zeroinitializer, { i64, i32, {} addrspace(10)*, i32 } %27, 0, !dbg !39633
  %70 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %69, {} addrspace(10)* %getfield, 1, !dbg !39633
  %"'ipc13" = bitcast {} addrspace(10)* %"'ipl" to i64 addrspace(10)*, !dbg !39633
  %71 = bitcast {} addrspace(10)* %94 to i64 addrspace(10)*, !dbg !39633
  %"unbox'ipl" = load i64, i64 addrspace(10)* %"'ipc13", align 8, !dbg !39633, !tbaa !711, !alias.scope !39635, !noalias !39638
  %unbox = load i64, i64 addrspace(10)* %71, align 8, !dbg !39633, !tbaa !711, !alias.scope !39640, !noalias !39641
  %72 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %70, i64 %unbox, 2, !dbg !39633
  %73 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %72, i64 %3, 3, !dbg !39633
  %74 = insertvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %73, i8 1, 4, !dbg !39633
  br label %L45, !dbg !39634

L43:                                              ; preds = %pass19
  %box23 = call noalias nonnull dereferenceable(32) "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,16]:Pointer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140640195481408 to {}*) to {} addrspace(10)*)) #516, !dbg !39634
  %75 = bitcast {} addrspace(10)* %box23 to { i64, i32, {} addrspace(10)*, i32 } addrspace(10)*, !dbg !39634, !enzyme_inactive !0
  store { i64, i32, {} addrspace(10)*, i32 } %27, { i64, i32, {} addrspace(10)*, i32 } addrspace(10)* %75, align 8, !dbg !39634, !tbaa !552, !alias.scope !531, !noalias !39615
  %76 = call noalias nonnull "enzyme_inactive" "enzyme_type"="{[-1]:Pointer, [-1,-1]:Integer}" {} addrspace(10)* @ijl_box_int64(i64 signext %3) #520, !dbg !39634
  %77 = call {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* @ijl_apply_generic, {} addrspace(10)* @ejl_enz_runtime_generic_augfwd, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640411541136 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_enz_val_1, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465974512 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140642465973096 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140640197755616 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %box23, {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %getfield, {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* %94, {} addrspace(10)* %"'ipl", {} addrspace(10)* %76, {} addrspace(10)* @ejl_jl_nothing, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140645865270416 to {}*) to {} addrspace(10)*), {} addrspace(10)* @ejl_jl_nothing), !dbg !39634
  %78 = addrspacecast {} addrspace(10)* %77 to {} addrspace(11)*, !dbg !39634
  %79 = bitcast {} addrspace(11)* %78 to [3 x {} addrspace(10)*] addrspace(11)*, !dbg !39634
  %80 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %79, i64 0, i64 1, !dbg !39634
  %81 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %80, align 8, !dbg !39634
  %82 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %79, i64 0, i64 2, !dbg !39634
  %83 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %82, align 8, !dbg !39634
  %84 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*] addrspace(11)* %79, i64 0, i64 0, !dbg !39634
  %85 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %84, align 8, !dbg !39634
  %"'ipc14" = bitcast {} addrspace(10)* %81 to { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)*
  %86 = bitcast {} addrspace(10)* %85 to { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)*
  %"unbox28'ipl" = load { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }, { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)* %"'ipc14", align 8, !tbaa !552, !alias.scope !39642, !noalias !39645
  %unbox28 = load { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 }, { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } addrspace(10)* %86, align 8, !tbaa !552, !alias.scope !39647, !noalias !39648
  br label %L45

L45:                                              ; preds = %L43, %L40
  %value_phi = phi { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } [ %74, %L40 ], [ %unbox28, %L43 ]
  %87 = extractvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %value_phi, 0, 2, !dbg !39634
  %88 = extractvalue { { i64, i32, {} addrspace(10)*, i32 }, {} addrspace(10)*, i64, i64, i8 } %value_phi, 1, !dbg !39634
  %89 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 0, !dbg !39634
  store {} addrspace(10)* %87, {} addrspace(10)** %89, align 8, !dbg !39634, !noalias !39603
  %90 = getelementptr inbounds [2 x {} addrspace(10)*], [2 x {} addrspace(10)*]* %1, i64 0, i64 1, !dbg !39634
  store {} addrspace(10)* %88, {} addrspace(10)** %90, align 8, !dbg !39634, !noalias !39603
  br label %common.ret

fail10:                                           ; preds = %L9
  %91 = addrspacecast {} addrspace(10)* %58 to {} addrspace(12)*, !dbg !39623
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @_j_str18, i32 0, i32 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140645797537472 to {}*) to {} addrspace(10)*), {} addrspace(12)* %91) #519, !dbg !39623
  unreachable, !dbg !39623

pass11:                                           ; preds = %L9
  %92 = icmp eq {} addrspace(10)* %58, addrspacecast ({}* inttoptr (i64 140645865270432 to {}*) to {} addrspace(10)*), !dbg !39623
  br i1 %92, label %L29, label %L25, !dbg !39623

done_xchg:                                        ; preds = %xchg
  %93 = phi i64 [ %iv, %xchg ], !dbg !39634
  store i64 %93, i64* %loopLimit_cache, align 8, !dbg !39634, !invariant.group !39649
  %"'ipl" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"'ipc" unordered, align 8, !dbg !39634, !tbaa !527, !alias.scope !39587, !noalias !39650
  %94 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %23 unordered, align 8, !dbg !39634, !tbaa !527, !alias.scope !39592, !noalias !39651
  %isdefined15.not = icmp eq {} addrspace(10)* %94, null, !dbg !39634
  br i1 %isdefined15.not, label %err, label %pass19, !dbg !39634

xchg:                                             ; preds = %xchg, %L29
  %iv = phi i64 [ %iv.next, %xchg ], [ 0, %L29 ], !dbg !39626
  %95 = phi i64 [ %atomic_initial, %L29 ], [ %120, %xchg ], !dbg !39626
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !39626
  %96 = load { i64, i1 }*, { i64, i1 }** %_replacementA15_cache, align 8, !dbg !39626
  %97 = bitcast { i64, i1 }* %96 to i8*, !dbg !39626
  %_replacementA15_realloccache = call i8* @__enzyme_exponentialallocationzero(i8* %97, i64 %iv.next, i64 16), !dbg !39626
  %98 = bitcast i8* %_replacementA15_realloccache to { i64, i1 }*, !dbg !39626
  store { i64, i1 }* %98, { i64, i1 }** %_replacementA15_cache, align 8, !dbg !39626
  %99 = load i64*, i64** %_cache16, align 8, !dbg !39626
  %100 = bitcast i64* %99 to i8*, !dbg !39626
  %_realloccache17 = call i8* @__enzyme_exponentialallocationzero(i8* %100, i64 %iv.next, i64 8), !dbg !39626
  %101 = bitcast i8* %_realloccache17 to i64*, !dbg !39626
  store i64* %101, i64** %_cache16, align 8, !dbg !39626
  %102 = load i64*, i64** %_cache16, align 8, !dbg !39626, !dereferenceable !539, !invariant.group !39652
  %103 = getelementptr inbounds i64, i64* %102, i64 %iv, !dbg !39626
  store i64 %95, i64* %103, align 8, !dbg !39626, !invariant.group !39653
  %104 = load i64*, i64** %_cache, align 8, !dbg !39626
  %105 = bitcast i64* %104 to i8*, !dbg !39626
  %_realloccache = call i8* @__enzyme_exponentialallocationzero(i8* %105, i64 %iv.next, i64 8), !dbg !39626
  %106 = bitcast i8* %_realloccache to i64*, !dbg !39626
  store i64* %106, i64** %_cache, align 8, !dbg !39626
  %107 = call fastcc i64 @julia___33959(i64 signext %95, i64 signext %3) #515, !dbg !39626
  %_replacementA15 = phi { i64, i1 } , !dbg !39626
  %108 = bitcast {}*** %10 to {}**, !dbg !39626
  %109 = getelementptr inbounds {}*, {}** %108, i64 -14, !dbg !39626
  %110 = getelementptr inbounds {}*, {}** %109, i64 16, !dbg !39626
  %111 = bitcast {}** %110 to i8**, !dbg !39626
  %112 = load i8*, i8** %111, align 8, !dbg !39626
  %113 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %109, i64 8, {} addrspace(10)* @ejl_enz_no_derivative_exc), !dbg !39626
  %114 = bitcast {} addrspace(10)* %113 to i8* addrspace(10)*, !dbg !39626
  store i8* getelementptr inbounds ([23648 x i8], [23648 x i8]* @7, i32 0, i32 0), i8* addrspace(10)* %114, align 8, !dbg !39626
  %115 = load { i64, i1 }*, { i64, i1 }** %_replacementA15_cache, align 8, !dbg !39626, !dereferenceable !539, !invariant.group !39654
  %116 = getelementptr inbounds { i64, i1 }, { i64, i1 }* %115, i64 %iv, !dbg !39626
  store { i64, i1 } %_replacementA15, { i64, i1 }* %116, align 8, !dbg !39626, !invariant.group !39655
  %117 = load i64*, i64** %_cache, align 8, !dbg !39626, !dereferenceable !539, !invariant.group !39656
  %118 = getelementptr inbounds i64, i64* %117, i64 %iv, !dbg !39626
  store i64 %107, i64* %118, align 8, !dbg !39626, !invariant.group !39657
  %119 = addrspacecast {} addrspace(10)* %113 to {} addrspace(12)*, !dbg !39626
  call void @jl_throw({} addrspace(12)* %119) #521, !dbg !39626
  %120 = extractvalue { i64, i1 } undef, 0, !dbg !39626
  %121 = extractvalue { i64, i1 } undef, 1, !dbg !39626
  br i1 %121, label %done_xchg, label %xchg, !dbg !39626

pass19:                                           ; preds = %done_xchg
  %122 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* nonnull %94) #518, !dbg !39634
  %123 = addrspacecast {} addrspace(10)* %122 to {} addrspace(11)*, !dbg !39634
  %124 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %123) #518, !dbg !39634
  %exactly_isa21.not = icmp eq {}* %124, inttoptr (i64 140645797538720 to {}*), !dbg !39634
  br i1 %exactly_isa21.not, label %L40, label %L43, !dbg !39634

err:                                              ; preds = %done_xchg
  call void @ijl_undefined_var_error({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140645954469896 to {}*) to {} addrspace(12)*)) #519, !dbg !39634
  unreachable, !dbg !39634
}

LLVM.AtomicCmpXchgInst(%73 = cmpxchg i64 addrspace(11)* %53, i64 %71, i64 %72 seq_cst monotonic, align 8, !dbg !576, !tbaa !528, !alias.scope !532, !noalias !545)
LLVM.PHIInst(%_replacementA15 = phi { i64, i1 } , !dbg !606)

Stacktrace:
 [1] modifyproperty!
   @ ./Base.jl:74
 [2] account!
   @ ~/.julia/packages/AMDGPU/a1v0k/src/memory.jl:128
 [3] #HIPBuffer#6
   @ ~/.julia/packages/AMDGPU/a1v0k/src/runtime/memory/hip.jl:58

Stacktrace:
  [1] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}, data2::Ptr{LLVM.API.LLVMOpaqueValue}, B::Ptr{LLVM.API.LLVMOpaqueBuilder})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:2347
  [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/02rde/src/api.jl:163
  [3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:4104
  [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:6358
  [5] codegen
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:5545 [inlined]
  [6] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7159
  [7] _thunk
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7159 [inlined]
  [8] cached_compilation
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7200 [inlined]
  [9] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b52}, ::Type{Const{var"#f#17"{@NamedTuple{Z::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}, df::StructVector{@NamedTuple{ls::Float64, ds::Bool, ts::Float64}, @NamedTuple{ls::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ds::ROCArray{Bool, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ts::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}, Int64}}}}}, ::Type{Active}, tt::Type{Tuple{Duplicated{ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false)}, ::Val{false}, ::Val{false}, ::Type{FFIABI}, ::Val{false})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7273
 [10] #s2048#18999
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7325 [inlined]
 [11] var"#s2048#18999"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
    @ Enzyme.Compiler ./none:0
 [12] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [13] autodiff
    @ ~/.julia/packages/Enzyme/02rde/src/Enzyme.jl:315 [inlined]
 [14] gradient(rm::ReverseMode{false, FFIABI, false, false}, f::Const{var"#f#17"{@NamedTuple{Z::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}, df::StructVector{@NamedTuple{ls::Float64, ds::Bool, ts::Float64}, @NamedTuple{ls::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ds::ROCArray{Bool, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ts::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}, Int64}}}}, x::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer})
    @ Enzyme ~/.julia/packages/Enzyme/02rde/src/Enzyme.jl:1049
 [15] top-level scope
wsmoses commented 2 months ago

I think the same Enzyme CUDA extension needs to be added to AMDGPU.jl (in this case you're hitting an issue in the constructor for a HIPArray).

https://github.com/JuliaGPU/CUDA.jl/blob/master/ext/EnzymeCoreExt.jl

Would you like to try a PR to AMDGPU.jl for this and cc me?

wsmoses commented 2 months ago

moved to https://github.com/JuliaGPU/AMDGPU.jl/issues/667