EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 64 forks source link

Segfault / Bus Error #238

Closed wsmoses closed 2 years ago

wsmoses commented 2 years ago

Can only reproduce on AWS machine and not my machine.

Using Julia 1.7.1

using Enzyme

Enzyme.API.printall!(true)
Enzyme.API.instname!(true)

Enzyme.API.inlineall!(true)

mutable struct Data
   commDataSend::Vector{Float64}
end

function free(buf)
  return nothing
end

mutable struct Something
   x::Int64
end
function Isend()
    req = Something(2)
    finalizer(free, req)
    return req
end

function fooSend(domain, fields, dx)
     offset = 2
         for field in fields
            for i in 0:(dx-1)
               domain.commDataSend[offset+i + 1] = field[30+3*i + 1]
            end
            offset += 2
         end
         req = Isend()
    return nothing
end
function foo(domain, domx, dx)
      fields = (domx, domx, domx, domx, domx, domx)
      fooSend(domain, fields,
        dx)

    return nothing
end

function main(enzyme)

    domain = Data(Vector{Float64}(undef, 2+11+31))
        shadowDomain = Data(Vector{Float64}(undef, 2+11+31))

   dx = 30 + 1
    domx = Vector{Float64}(undef, 3*30+31)
    sdomx = Vector{Float64}(undef, 3*30+31)

    if enzyme
            Enzyme.autodiff(foo, Duplicated(domain, shadowDomain), Duplicated(domx, sdomx), dx)
        else
            foo(domain, domx, dx)
        end
end

main(false)
@show "ran primal"
flush(stdout)
main(true)

On Enzyme.jl#bude

commit 1d15ce6995714c22980b8f4154a7fefd46b63c96 (HEAD -> bude, origin/bude)
Author: William S. Moses <gh@wsmoses.com>
Date:   Mon Feb 21 02:42:30 2022 -0500

    flush

On Enzyme proper#remat:

commit 2380810e4c05c869138fa4632231b1a95fc09817 (HEAD, origin/remat)
Author: Ubuntu <ubuntu@ip-172-31-14-11.ec2.internal>
Date:   Mon Feb 21 16:47:12 2022 +0000

    Preserve for reverse bundles
wsmoses commented 2 years ago
"ran primal" = "ran primal"
after simplification :
; Function Attrs: willreturn mustprogress
define void @preprocess_julia_foo_1799.inner.6({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %arg, {} addrspace(10)* nonnull align 16 dereferenceable(40) %arg3, i64 signext %arg4, i64 signext %arg5, i64 signext %arg6) local_unnamed_addr #5 {
entry:
  %i = alloca [6 x {} addrspace(10)*], align 8
  %i7 = bitcast [6 x {} addrspace(10)*]* %i to i8*
  call void @llvm.lifetime.start.p0i8(i64 noundef 48, i8* noundef nonnull align 8 dereferenceable(48) %i7) #6
  %i8 = call {}*** @julia.get_pgcstack() #6
  %.fca.0.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 0, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.0.gep, align 8, !dbg !104
  %.fca.1.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 1, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.1.gep, align 8, !dbg !104
  %.fca.2.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 2, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.2.gep, align 8, !dbg !104
  %.fca.3.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 3, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.3.gep, align 8, !dbg !104
  %.fca.4.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 4, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.4.gep, align 8, !dbg !104
  %.fca.5.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 5, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.5.gep, align 8, !dbg !104
  %i9 = addrspacecast [6 x {} addrspace(10)*]* %i to [6 x {} addrspace(10)*] addrspace(11)*, !dbg !104
  %i10 = call {}*** @julia.get_pgcstack() #6
  %i11 = add i64 %arg4, -1
  %.inv.i = icmp sgt i64 %i11, -1
  %i12 = select i1 %.inv.i, i64 %i11, i64 -1
  %i13 = bitcast {} addrspace(10)* %arg to {} addrspace(10)* addrspace(10)*
  %i14 = addrspacecast {} addrspace(10)* addrspace(10)* %i13 to {} addrspace(10)* addrspace(11)*
  br i1 %.inv.i, label %top.split.us.i, label %julia_fooSend_1802.exit, !dbg !108

top.split.us.i:                                   ; preds = %entry
  %i15 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %i9, i64 0, i64 0, !dbg !110
  %i16 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i15 unordered, align 8, !dbg !110, !tbaa !46, !nonnull !4, !dereferenceable !50, !align !51
  br label %L19.preheader.us.i, !dbg !108

L19.preheader.us.i:                               ; preds = %L54.us.i, %top.split.us.i
  %iv = phi i64 [ %iv.next, %L54.us.i ], [ 0, %top.split.us.i ]
  %value_phi.us.i = phi {} addrspace(10)* [ %i16, %top.split.us.i ], [ %i47, %L54.us.i ]
  %i17 = shl nuw nsw i64 %iv, 1
  %iv.next = add nuw nsw i64 %iv, 1
  %i18 = add i64 %i17, 2
  %i19 = add i64 %iv, 2
  %i20 = bitcast {} addrspace(10)* %value_phi.us.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i21 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i20 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i22 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i21, i64 0, i32 1
  %i23 = load i64, i64 addrspace(11)* %i22, align 8, !tbaa !52, !range !55
  %i24 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14 unordered, align 8
  %i25 = bitcast {} addrspace(10)* %i24 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26, i64 0, i32 1
  %i28 = bitcast {} addrspace(10)* %value_phi.us.i to double addrspace(13)* addrspace(10)*
  %i29 = addrspacecast double addrspace(13)* addrspace(10)* %i28 to double addrspace(13)* addrspace(11)*
  %i30 = bitcast {} addrspace(10)* %i24 to double addrspace(13)* addrspace(10)*
  %i31 = addrspacecast double addrspace(13)* addrspace(10)* %i30 to double addrspace(13)* addrspace(11)*
  %i32 = icmp ugt i64 %i23, 30, !dbg !114
  %umax.i = select i1 %i32, i64 %i23, i64 30, !dbg !114
  %i33 = add nsw i64 %umax.i, -28, !dbg !114
  %i34 = udiv i64 %i33, 3, !dbg !114
  br label %L19.us.i, !dbg !114

L19.us.i:                                         ; preds = %idxend9.us.i, %L19.preheader.us.i
  %iv1 = phi i64 [ %iv.next2, %idxend9.us.i ], [ 0, %L19.preheader.us.i ]
  %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !116
  %i35 = mul nuw nsw i64 %iv1, 3, !dbg !116
  %i36 = add nuw nsw i64 %i35, 30, !dbg !114
  %exitcond24.not.i = icmp eq i64 %iv1, %i34, !dbg !114
  br i1 %exitcond24.not.i, label %oob.i, label %idxend.us.i, !dbg !114

idxend.us.i:                                      ; preds = %L19.us.i
  %i37 = add nuw nsw i64 %iv1, %i18, !dbg !117
  %i38 = load i64, i64 addrspace(11)* %i27, align 8, !dbg !119, !tbaa !52, !range !55
  %i39 = icmp ult i64 %i37, %i38, !dbg !119
  br i1 %i39, label %idxend9.us.i, label %oob8.i, !dbg !119

idxend9.us.i:                                     ; preds = %idxend.us.i
  %i40 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i29, align 16, !dbg !114, !tbaa !70, !nonnull !4
  %i41 = getelementptr inbounds double, double addrspace(13)* %i40, i64 %i36, !dbg !114
  %i42 = load double, double addrspace(13)* %i41, align 8, !dbg !114, !tbaa !72
  %i43 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31, align 8, !dbg !119, !tbaa !70, !nonnull !4
  %i44 = getelementptr inbounds double, double addrspace(13)* %i43, i64 %i37, !dbg !119
  store double %i42, double addrspace(13)* %i44, align 8, !dbg !119, !tbaa !72
  %.not.us.i = icmp eq i64 %iv1, %i12, !dbg !120
  br i1 %.not.us.i, label %L41.us.i, label %L19.us.i, !dbg !122

L41.us.i:                                         ; preds = %idxend9.us.i
  %exitcond25.i = icmp eq i64 %i19, 7, !dbg !123
  br i1 %exitcond25.i, label %julia_fooSend_1802.exit.loopexit, label %L54.us.i, !dbg !123

L54.us.i:                                         ; preds = %L41.us.i
  %value_phi1.off.us.i = add nsw i64 %i19, -1, !dbg !123
  %i45 = add nuw nsw i64 %i18, 2, !dbg !125
  %i46 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %i9, i64 0, i64 %value_phi1.off.us.i, !dbg !126
  %i47 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i46 unordered, align 8, !dbg !126, !tbaa !46, !nonnull !4, !dereferenceable !50, !align !51
  %i48 = add nuw nsw i64 %i19, 1, !dbg !128
  br label %L19.preheader.us.i, !dbg !129

oob.i:                                            ; preds = %L19.us.i
  call void @llvm.trap() #7, !dbg !114
  unreachable, !dbg !114

oob8.i:                                           ; preds = %idxend.us.i
  call void @llvm.trap() #7, !dbg !119
  unreachable, !dbg !119

julia_fooSend_1802.exit.loopexit:                 ; preds = %L41.us.i
  br label %julia_fooSend_1802.exit, !dbg !130

julia_fooSend_1802.exit:                          ; preds = %julia_fooSend_1802.exit.loopexit, %entry
  %ptls_field19.i = getelementptr inbounds {}**, {}*** %i10, i64 2305843009213693954, !dbg !130
  %i49 = bitcast {}*** %ptls_field19.i to i8**, !dbg !130
  %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !130, !tbaa !91
  %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !130
  %i51 = bitcast {} addrspace(10)* %i50 to i64 addrspace(10)*, !dbg !130
  store i64 2, i64 addrspace(10)* %i51, align 8, !dbg !130, !tbaa !93
  %i52 = call i64 @jl_get_ptls_states() #6, !dbg !133
  call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !136
  call void @llvm.lifetime.end.p0i8(i64 noundef 48, i8* noundef nonnull %i7) #6, !dbg !107
  ret void
}

 Could not promote allocation   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94 due to unknown call   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111
unknown origin may need caching   %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !94, !tbaa !99
Callsite   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94 arg 0   %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !94, !tbaa !99 uncacheable from origin   %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !94, !tbaa !99
Callsite   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94 arg 2 {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*) uncacheable from origin {}* inttoptr (i64 140230348265744 to {}*)
origin call may need caching   %i52 = call i64 @jl_get_ptls_states() #6, !dbg !104
Callsite   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111 arg 0   %i52 = call i64 @jl_get_ptls_states() #6, !dbg !104 uncacheable from origin   %i52 = call i64 @jl_get_ptls_states() #6, !dbg !104
origin call may need caching   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94
Callsite   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111 arg 1   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94 uncacheable from origin   %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #8, !dbg !94
Callsite   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111 arg 2 {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*) uncacheable from origin {}* inttoptr (i64 140228970242032 to {}*)
Load may need caching   %i23 = load i64, i64 addrspace(11)* %i22, align 8, !tbaa !57, !range !60 due to   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111
Load may need caching   %i24 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14 unordered, align 8 due to   store double %i42, double addrspace(13)* %i44, align 8, !dbg !73, !tbaa !77
Load may need caching   %i38 = load i64, i64 addrspace(11)* %i27, align 8, !dbg !73, !tbaa !57, !range !60 due to   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111
Load may need caching   %i42 = load double, double addrspace(13)* %i41, align 8, !dbg !61, !tbaa !77 due to   store double %i42, double addrspace(13)* %i44, align 8, !dbg !73, !tbaa !77
Load may need caching   %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !94, !tbaa !99 due to   call void @jl_gc_add_finalizer_th(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #6, !dbg !111
Load must be recomputed   %i24 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14 unordered, align 8 in reverse_invertidxend9.us.i due to   store double %i42, double addrspace(13)* %i44, align 8, !dbg !73, !tbaa !77
Caching instruction   %"i24'ipl" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc" unordered, align 8 legalRecompute: 0 shouldRecompute: 0 tryLegalRecomputeCheck: 1
; Function Attrs: willreturn mustprogress
define internal void @diffejulia_foo_1799.inner.6({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %arg, {} addrspace(10)* nocapture %"arg'", {} addrspace(10)* nonnull align 16 dereferenceable(40) %arg3, {} addrspace(10)* %"arg3'", i64 signext %arg4, i64 signext %arg5, i64 signext %arg6) local_unnamed_addr #5 {
entry:
  %"iv'ac" = alloca i64, align 8
  %"iv1'ac" = alloca i64, align 8
  %"i24'ipl_cache" = alloca {} addrspace(10)**, align 8
  %"i42'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i42'de", align 8
  %"i'ipa" = alloca [6 x {} addrspace(10)*], align 8
  store [6 x {} addrspace(10)*] zeroinitializer, [6 x {} addrspace(10)*]* %"i'ipa", align 8
  %i = alloca [6 x {} addrspace(10)*], align 8
  %".fca.0.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 0, !dbg !104
  %.fca.0.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 0, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.0.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.0.gep, align 8, !dbg !104
  %".fca.1.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 1, !dbg !104
  %.fca.1.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 1, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.1.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.1.gep, align 8, !dbg !104
  %".fca.2.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 2, !dbg !104
  %.fca.2.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 2, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.2.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.2.gep, align 8, !dbg !104
  %".fca.3.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 3, !dbg !104
  %.fca.3.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 3, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.3.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.3.gep, align 8, !dbg !104
  %".fca.4.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 4, !dbg !104
  %.fca.4.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 4, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.4.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.4.gep, align 8, !dbg !104
  %".fca.5.gep'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %"i'ipa", i64 0, i64 5, !dbg !104
  %.fca.5.gep = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*]* %i, i64 0, i64 5, !dbg !104
  store {} addrspace(10)* %"arg3'", {} addrspace(10)** %".fca.5.gep'ipg", align 8, !dbg !104
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.5.gep, align 8, !dbg !104
  %"i9'ipc" = addrspacecast [6 x {} addrspace(10)*]* %"i'ipa" to [6 x {} addrspace(10)*] addrspace(11)*, !dbg !104
  %i9 = addrspacecast [6 x {} addrspace(10)*]* %i to [6 x {} addrspace(10)*] addrspace(11)*, !dbg !104
  %i10 = call {}*** @julia.get_pgcstack() #7
  %i11 = add i64 %arg4, -1
  %.inv.i = icmp sgt i64 %i11, -1
  %i12 = select i1 %.inv.i, i64 %i11, i64 -1
  %"i13'ipc" = bitcast {} addrspace(10)* %"arg'" to {} addrspace(10)* addrspace(10)*
  %i13 = bitcast {} addrspace(10)* %arg to {} addrspace(10)* addrspace(10)*
  %"i14'ipc" = addrspacecast {} addrspace(10)* addrspace(10)* %"i13'ipc" to {} addrspace(10)* addrspace(11)*
  %i14 = addrspacecast {} addrspace(10)* addrspace(10)* %i13 to {} addrspace(10)* addrspace(11)*
  br i1 %.inv.i, label %top.split.us.i, label %julia_fooSend_1802.exit, !dbg !108

top.split.us.i:                                   ; preds = %entry
  %"i15'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %"i9'ipc", i64 0, i64 0, !dbg !110
  %i15 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %i9, i64 0, i64 0, !dbg !110
  %"i16'ipl" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i15'ipg" unordered, align 8, !dbg !110, !tbaa !46, !nonnull !4, !invariant.group !137
  %i16 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i15 unordered, align 8, !dbg !110, !tbaa !46, !nonnull !4, !dereferenceable !50, !align !51
  %malloccall = tail call noalias nonnull dereferenceable(48) dereferenceable_or_null(48) i8* @malloc(i64 48)
  %"i24'ipl_malloccache" = bitcast i8* %malloccall to {} addrspace(10)**
  call void @llvm.memset.p0i8.i64(i8* %malloccall, i8 0, i64 48, i1 false), !dbg !108
  store {} addrspace(10)** %"i24'ipl_malloccache", {} addrspace(10)*** %"i24'ipl_cache", align 8, !dbg !108, !invariant.group !138
  br label %L19.preheader.us.i, !dbg !108

L19.preheader.us.i:                               ; preds = %L54.us.i, %top.split.us.i
  %iv = phi i64 [ %iv.next, %L54.us.i ], [ 0, %top.split.us.i ]
  %0 = phi {} addrspace(10)* [ %"i16'ipl", %top.split.us.i ], [ %"i47'ipl", %L54.us.i ]
  %value_phi.us.i = phi {} addrspace(10)* [ %i16, %top.split.us.i ], [ %i47, %L54.us.i ]
  %iv.next = add nuw nsw i64 %iv, 1
  %i17 = shl nuw nsw i64 %iv, 1
  %i18 = add i64 %i17, 2
  %i19 = add i64 %iv, 2
  %i20 = bitcast {} addrspace(10)* %value_phi.us.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i21 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i20 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i22 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i21, i64 0, i32 1
  %i23 = load i64, i64 addrspace(11)* %i22, align 8, !tbaa !52, !range !55
  %"i24'ipl" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc" unordered, align 8
  %1 = load {} addrspace(10)**, {} addrspace(10)*** %"i24'ipl_cache", align 8, !dereferenceable !139, !invariant.group !138
  %2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %1, i64 %iv
  store {} addrspace(10)* %"i24'ipl", {} addrspace(10)** %2, align 8, !invariant.group !140
  %i24 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14 unordered, align 8
  %i25 = bitcast {} addrspace(10)* %i24 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26, i64 0, i32 1
  %"i28'ipc" = bitcast {} addrspace(10)* %0 to double addrspace(13)* addrspace(10)*
  %i28 = bitcast {} addrspace(10)* %value_phi.us.i to double addrspace(13)* addrspace(10)*
  %"i29'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i28'ipc" to double addrspace(13)* addrspace(11)*
  %i29 = addrspacecast double addrspace(13)* addrspace(10)* %i28 to double addrspace(13)* addrspace(11)*
  %"i30'ipc" = bitcast {} addrspace(10)* %"i24'ipl" to double addrspace(13)* addrspace(10)*
  %i30 = bitcast {} addrspace(10)* %i24 to double addrspace(13)* addrspace(10)*
  %"i31'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i30'ipc" to double addrspace(13)* addrspace(11)*
  %i31 = addrspacecast double addrspace(13)* addrspace(10)* %i30 to double addrspace(13)* addrspace(11)*
  %i32 = icmp ugt i64 %i23, 30, !dbg !114
  %umax.i = select i1 %i32, i64 %i23, i64 30, !dbg !114
  %i33 = add nsw i64 %umax.i, -28, !dbg !114
  %i34 = udiv i64 %i33, 3, !dbg !114
  br label %L19.us.i, !dbg !114

L19.us.i:                                         ; preds = %idxend9.us.i, %L19.preheader.us.i
  %iv1 = phi i64 [ %iv.next2, %idxend9.us.i ], [ 0, %L19.preheader.us.i ]
  %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !116
  %i35 = mul nuw nsw i64 %iv1, 3, !dbg !116
  %i36 = add nuw nsw i64 %i35, 30, !dbg !114
  %exitcond24.not.i = icmp eq i64 %iv1, %i34, !dbg !114
  br i1 %exitcond24.not.i, label %oob.i, label %idxend.us.i, !dbg !114

idxend.us.i:                                      ; preds = %L19.us.i
  %i37 = add nuw nsw i64 %iv1, %i18, !dbg !117
  %i38 = load i64, i64 addrspace(11)* %i27, align 8, !dbg !119, !tbaa !52, !range !55
  %i39 = icmp ult i64 %i37, %i38, !dbg !119
  br i1 %i39, label %idxend9.us.i, label %oob8.i, !dbg !119

idxend9.us.i:                                     ; preds = %idxend.us.i
  %"i40'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i29'ipc", align 16, !dbg !114, !tbaa !70, !nonnull !4
  %i40 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i29, align 16, !dbg !114, !tbaa !70, !nonnull !4
  %"i41'ipg" = getelementptr inbounds double, double addrspace(13)* %"i40'ipl", i64 %i36, !dbg !114
  %i41 = getelementptr inbounds double, double addrspace(13)* %i40, i64 %i36, !dbg !114
  %i42 = load double, double addrspace(13)* %i41, align 8, !dbg !114, !tbaa !72
  %"i43'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i31'ipc", align 8, !dbg !119, !tbaa !70, !nonnull !4
  %i43 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31, align 8, !dbg !119, !tbaa !70, !nonnull !4
  %"i44'ipg" = getelementptr inbounds double, double addrspace(13)* %"i43'ipl", i64 %i37, !dbg !119
  %i44 = getelementptr inbounds double, double addrspace(13)* %i43, i64 %i37, !dbg !119
  store double %i42, double addrspace(13)* %i44, align 8, !dbg !119, !tbaa !72
  %.not.us.i = icmp eq i64 %iv1, %i12, !dbg !120
  br i1 %.not.us.i, label %L41.us.i, label %L19.us.i, !dbg !122

L41.us.i:                                         ; preds = %idxend9.us.i
  %exitcond25.i = icmp eq i64 %i19, 7, !dbg !123
  br i1 %exitcond25.i, label %julia_fooSend_1802.exit.loopexit, label %L54.us.i, !dbg !123

L54.us.i:                                         ; preds = %L41.us.i
  %value_phi1.off.us.i = add nsw i64 %i19, -1, !dbg !123
  %"i46'ipg" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %"i9'ipc", i64 0, i64 %value_phi1.off.us.i, !dbg !126
  %i46 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %i9, i64 0, i64 %value_phi1.off.us.i, !dbg !126
  %"i47'ipl" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i46'ipg" unordered, align 8, !dbg !126, !tbaa !46, !nonnull !4
  %i47 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i46 unordered, align 8, !dbg !126, !tbaa !46, !nonnull !4, !dereferenceable !50, !align !51
  br label %L19.preheader.us.i, !dbg !129

oob.i:                                            ; preds = %L19.us.i
  call void @llvm.trap() #8, !dbg !114
  unreachable

oob8.i:                                           ; preds = %idxend.us.i
  call void @llvm.trap() #8, !dbg !119
  unreachable

julia_fooSend_1802.exit.loopexit:                 ; preds = %L41.us.i
  br label %julia_fooSend_1802.exit, !dbg !130

julia_fooSend_1802.exit:                          ; preds = %julia_fooSend_1802.exit.loopexit, %entry
  %ptls_field19.i = getelementptr inbounds {}**, {}*** %i10, i64 2305843009213693954, !dbg !130
  %i49 = bitcast {}*** %ptls_field19.i to i8**, !dbg !130
  %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !130, !tbaa !91
  %i50 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #9, !dbg !130
  %"i50'mi" = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load2021.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140230348265744 to {}*) to {} addrspace(10)*)) #9, !dbg !130
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %"i50'mi"), !dbg !130
  %3 = bitcast {} addrspace(10)* %"i50'mi" to i8 addrspace(10)*, !dbg !130
  call void @llvm.memset.p10i8.i64(i8 addrspace(10)* nonnull dereferenceable(8) dereferenceable_or_null(8) %3, i8 0, i64 8, i1 false), !dbg !130
  %"i51'ipc" = bitcast {} addrspace(10)* %"i50'mi" to i64 addrspace(10)*, !dbg !130
  %i51 = bitcast {} addrspace(10)* %i50 to i64 addrspace(10)*, !dbg !130
  store i64 2, i64 addrspace(10)* %"i51'ipc", align 8, !dbg !130
  store i64 2, i64 addrspace(10)* %i51, align 8, !dbg !130, !tbaa !93
  %i52 = call i64 @jl_get_ptls_states() #7, !dbg !133
  br label %invertjulia_fooSend_1802.exit

invertentry:                                      ; preds = %invertjulia_fooSend_1802.exit, %inverttop.split.us.i
  ret void

inverttop.split.us.i:                             ; preds = %invertL19.preheader.us.i
  %4 = load i64, i64* %"iv'ac", align 8
  %forfree = load {} addrspace(10)**, {} addrspace(10)*** %"i24'ipl_cache", align 8, !dereferenceable !139, !invariant.group !138
  %5 = bitcast {} addrspace(10)** %forfree to i8*
  tail call void @free(i8* nonnull %5)
  br label %invertentry

invertL19.preheader.us.i:                         ; preds = %invertL19.us.i
  %6 = load i64, i64* %"iv'ac", align 8
  %7 = icmp eq i64 %6, 0
  %8 = xor i1 %7, true
  br i1 %7, label %inverttop.split.us.i, label %incinvertL19.preheader.us.i

incinvertL19.preheader.us.i:                      ; preds = %invertL19.preheader.us.i
  %9 = load i64, i64* %"iv'ac", align 8
  %10 = add nsw i64 %9, -1
  store i64 %10, i64* %"iv'ac", align 8
  br label %invertL54.us.i

invertL19.us.i:                                   ; preds = %invertidxend.us.i
  %11 = load i64, i64* %"iv1'ac", align 8
  %12 = icmp eq i64 %11, 0
  %13 = xor i1 %12, true
  br i1 %12, label %invertL19.preheader.us.i, label %incinvertL19.us.i

incinvertL19.us.i:                                ; preds = %invertL19.us.i
  %14 = load i64, i64* %"iv1'ac", align 8
  %15 = add nsw i64 %14, -1
  store i64 %15, i64* %"iv1'ac", align 8
  br label %invertidxend9.us.i

invertidxend.us.i:                                ; preds = %invertidxend9.us.i_phimerge
  br label %invertL19.us.i

invertidxend9.us.i:                               ; preds = %mergeinvertL19.us.i_L41.us.i, %incinvertL19.us.i
  %16 = load i64, i64* %"iv1'ac", align 8
  %17 = load i64, i64* %"iv'ac", align 8
  %18 = load i64, i64* %"iv1'ac", align 8
  %19 = load i64, i64* %"iv'ac", align 8
  %20 = load {} addrspace(10)**, {} addrspace(10)*** %"i24'ipl_cache", align 8, !dereferenceable !139, !invariant.group !138
  %21 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %20, i64 %19
  %22 = load {} addrspace(10)*, {} addrspace(10)** %21, align 8, !invariant.group !140
  %"i30'ipc_unwrap" = bitcast {} addrspace(10)* %22 to double addrspace(13)* addrspace(10)*
  %"i31'ipc_unwrap" = addrspacecast double addrspace(13)* addrspace(10)* %"i30'ipc_unwrap" to double addrspace(13)* addrspace(11)*
  %"i43'il_phi_unwrap" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i31'ipc_unwrap", align 8, !dbg !119, !tbaa !70
  %i17_unwrap = shl nuw nsw i64 %17, 1
  %i18_unwrap = add i64 %i17_unwrap, 2
  %i37_unwrap = add nuw nsw i64 %16, %i18_unwrap
  %"i44'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i43'il_phi_unwrap", i64 %i37_unwrap
  %23 = load double, double addrspace(13)* %"i44'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i44'ipg_unwrap", align 8
  %24 = load double, double* %"i42'de", align 8
  %25 = fadd fast double %24, %23
  store double %25, double* %"i42'de", align 8
  %26 = load double, double* %"i42'de", align 8
  store double 0.000000e+00, double* %"i42'de", align 8
  %27 = load i64, i64* %"iv1'ac", align 8
  %28 = load i64, i64* %"iv'ac", align 8
  %29 = load i64, i64* %"iv1'ac", align 8
  %30 = load i64, i64* %"iv'ac", align 8
  %31 = icmp ne i64 %30, 0
  br i1 %31, label %invertidxend9.us.i_phirc, label %invertidxend9.us.i_phirc1

invertidxend9.us.i_phirc:                         ; preds = %invertidxend9.us.i
  %32 = sub nuw i64 %30, 1
  %33 = load i64, i64* %"iv1'ac", align 8
  %34 = load i64, i64* %"iv'ac", align 8
  %i19_unwrap = add i64 %34, 2
  %value_phi1.off.us.i_unwrap = add nsw i64 %i19_unwrap, -1
  %"i46'ipg_unwrap" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %"i9'ipc", i64 0, i64 %value_phi1.off.us.i_unwrap
  %"i47'il_phi_unwrap" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i46'ipg_unwrap" unordered, align 8, !dbg !126, !tbaa !46
  br label %invertidxend9.us.i_phimerge

invertidxend9.us.i_phirc1:                        ; preds = %invertidxend9.us.i
  %"i15'ipg_unwrap" = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %"i9'ipc", i64 0, i64 0
  %"i16'ipl_unwrap" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i15'ipg_unwrap" unordered, align 8, !dbg !110, !tbaa !46, !nonnull !4, !invariant.group !137
  br label %invertidxend9.us.i_phimerge

invertidxend9.us.i_phimerge:                      ; preds = %invertidxend9.us.i_phirc1, %invertidxend9.us.i_phirc
  %35 = phi {} addrspace(10)* [ %"i47'il_phi_unwrap", %invertidxend9.us.i_phirc ], [ %"i16'ipl_unwrap", %invertidxend9.us.i_phirc1 ]
  %"i28'ipc_unwrap" = bitcast {} addrspace(10)* %35 to double addrspace(13)* addrspace(10)*
  %"i29'ipc_unwrap" = addrspacecast double addrspace(13)* addrspace(10)* %"i28'ipc_unwrap" to double addrspace(13)* addrspace(11)*
  %"i40'il_phi_unwrap" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i29'ipc_unwrap", align 16, !dbg !114, !tbaa !70
  %i35_unwrap = mul nuw nsw i64 %27, 3
  %i36_unwrap = add nuw nsw i64 %i35_unwrap, 30
  %"i41'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i40'il_phi_unwrap", i64 %i36_unwrap
  %36 = load double, double addrspace(13)* %"i41'ipg_unwrap", align 8
  %37 = fadd fast double %36, %26
  store double %37, double addrspace(13)* %"i41'ipg_unwrap", align 8
  br label %invertidxend.us.i

invertL41.us.i:                                   ; preds = %mergeinvertL19.preheader.us.i_julia_fooSend_1802.exit.loopexit, %invertL54.us.i
  br label %mergeinvertL19.us.i_L41.us.i

mergeinvertL19.us.i_L41.us.i:                     ; preds = %invertL41.us.i
  store i64 %i12, i64* %"iv1'ac", align 8
  br label %invertidxend9.us.i

invertL54.us.i:                                   ; preds = %incinvertL19.preheader.us.i
  br label %invertL41.us.i

invertjulia_fooSend_1802.exit.loopexit:           ; preds = %invertjulia_fooSend_1802.exit
  br label %mergeinvertL19.preheader.us.i_julia_fooSend_1802.exit.loopexit

mergeinvertL19.preheader.us.i_julia_fooSend_1802.exit.loopexit: ; preds = %invertjulia_fooSend_1802.exit.loopexit
  store i64 5, i64* %"iv'ac", align 8
  br label %invertL41.us.i

invertjulia_fooSend_1802.exit:                    ; preds = %julia_fooSend_1802.exit
  br i1 %.inv.i, label %invertjulia_fooSend_1802.exit.loopexit, label %invertentry
}

("post_mod", mod) = ("post_mod", ; ModuleID = 'text'
source_filename = "text"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

define private fastcc void @julia_fooSend_1802({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %0, [6 x {} addrspace(10)*] addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(48) %1, i64 signext %2) unnamed_addr #0 !dbg !37 {
top:
  %gcframe28 = alloca [3 x {} addrspace(10)*], align 16
  %gcframe28.sub = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*]* %gcframe28, i64 0, i64 0
  %3 = bitcast [3 x {} addrspace(10)*]* %gcframe28 to i8*
  call void @llvm.memset.p0i8.i32(i8* nonnull align 16 dereferenceable(24) %3, i8 0, i32 24, i1 false), !tbaa !39
  %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #10
  %ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
  %ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
  %pgcstack = load {}***, {}**** %ppgcstack, align 8
  %4 = bitcast [3 x {} addrspace(10)*]* %gcframe28 to i64*
  store i64 4, i64* %4, align 16, !tbaa !39
  %5 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*]* %gcframe28, i64 0, i64 1
  %6 = bitcast {} addrspace(10)** %5 to {}***
  %7 = load {}**, {}*** %pgcstack, align 8
  store {}** %7, {}*** %6, align 8, !tbaa !39
  %8 = bitcast {}*** %pgcstack to {} addrspace(10)***
  store {} addrspace(10)** %gcframe28.sub, {} addrspace(10)*** %8, align 8
  %9 = add i64 %2, -1
  %.inv = icmp sgt i64 %9, -1
  %10 = select i1 %.inv, i64 %9, i64 -1
  %11 = bitcast {} addrspace(10)* %0 to {} addrspace(10)* addrspace(10)*
  %12 = addrspacecast {} addrspace(10)* addrspace(10)* %11 to {} addrspace(10)* addrspace(11)*
  br i1 %.inv, label %top.split.us, label %L60, !dbg !43

top.split.us:                                     ; preds = %top
  %13 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %1, i64 0, i64 0, !dbg !44
  %14 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %13 unordered, align 8, !dbg !44, !tbaa !50, !nonnull !4, !dereferenceable !52, !align !53
  br label %L19.preheader.us, !dbg !43

L19.preheader.us:                                 ; preds = %top.split.us, %L54.us
  %value_phi.us = phi {} addrspace(10)* [ %14, %top.split.us ], [ %43, %L54.us ]
  %value_phi1.us = phi i64 [ 2, %top.split.us ], [ %44, %L54.us ]
  %value_phi2.us = phi i64 [ 2, %top.split.us ], [ %41, %L54.us ]
  %15 = bitcast {} addrspace(10)* %value_phi.us to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %16 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %15 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %17 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %16, i64 0, i32 1
  %18 = load i64, i64 addrspace(11)* %17, align 8, !tbaa !54, !range !57
  %19 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %12 unordered, align 8
  %20 = bitcast {} addrspace(10)* %19 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %21 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %20 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %22 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %21, i64 0, i32 1
  %23 = bitcast {} addrspace(10)* %value_phi.us to double addrspace(13)* addrspace(10)*
  %24 = addrspacecast double addrspace(13)* addrspace(10)* %23 to double addrspace(13)* addrspace(11)*
  %25 = bitcast {} addrspace(10)* %19 to double addrspace(13)* addrspace(10)*
  %26 = addrspacecast double addrspace(13)* addrspace(10)* %25 to double addrspace(13)* addrspace(11)*
  %27 = icmp ugt i64 %18, 30, !dbg !58
  %umax = select i1 %27, i64 %18, i64 30, !dbg !58
  %28 = add nsw i64 %umax, -28, !dbg !58
  %29 = udiv i64 %28, 3, !dbg !58
  br label %L19.us, !dbg !58

L19.us:                                           ; preds = %idxend9.us, %L19.preheader.us
  %value_phi6.us = phi i64 [ %40, %idxend9.us ], [ 0, %L19.preheader.us ]
  %30 = mul nuw nsw i64 %value_phi6.us, 3, !dbg !62
  %31 = add nuw nsw i64 %30, 30, !dbg !58
  %exitcond24.not = icmp eq i64 %value_phi6.us, %29, !dbg !58
  br i1 %exitcond24.not, label %oob, label %idxend.us, !dbg !58

idxend.us:                                        ; preds = %L19.us
  %32 = add nuw nsw i64 %value_phi6.us, %value_phi2.us, !dbg !65
  %33 = load i64, i64 addrspace(11)* %22, align 8, !dbg !70, !tbaa !54, !range !57
  %34 = icmp ult i64 %32, %33, !dbg !70
  br i1 %34, label %idxend9.us, label %oob8, !dbg !70

idxend9.us:                                       ; preds = %idxend.us
  %35 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %24, align 16, !dbg !58, !tbaa !72, !nonnull !4
  %36 = getelementptr inbounds double, double addrspace(13)* %35, i64 %31, !dbg !58
  %37 = load double, double addrspace(13)* %36, align 8, !dbg !58, !tbaa !74
  %38 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %26, align 8, !dbg !70, !tbaa !72, !nonnull !4
  %39 = getelementptr inbounds double, double addrspace(13)* %38, i64 %32, !dbg !70
  store double %37, double addrspace(13)* %39, align 8, !dbg !70, !tbaa !74
  %.not.us = icmp eq i64 %value_phi6.us, %10, !dbg !77
  %40 = add nuw nsw i64 %value_phi6.us, 1, !dbg !80
  br i1 %.not.us, label %L41.us, label %L19.us, !dbg !61

L41.us:                                           ; preds = %idxend9.us
  %exitcond25 = icmp eq i64 %value_phi1.us, 7, !dbg !83
  br i1 %exitcond25, label %L60, label %L54.us, !dbg !83

L54.us:                                           ; preds = %L41.us
  %value_phi1.off.us = add nsw i64 %value_phi1.us, -1, !dbg !83
  %41 = add nuw nsw i64 %value_phi2.us, 2, !dbg !85
  %42 = getelementptr inbounds [6 x {} addrspace(10)*], [6 x {} addrspace(10)*] addrspace(11)* %1, i64 0, i64 %value_phi1.off.us, !dbg !86
  %43 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %42 unordered, align 8, !dbg !86, !tbaa !50, !nonnull !4, !dereferenceable !52, !align !53
  %44 = add nuw nsw i64 %value_phi1.us, 1, !dbg !87
  br label %L19.preheader.us, !dbg !84

L60:                                              ; preds = %L41.us, %top
  %ptls_field19 = getelementptr inbounds {}**, {}*** %pgcstack, i64 2305843009213693954, !dbg !88
  %45 = bitcast {}*** %ptls_field19 to i8**, !dbg !88
  %ptls_load2021 = load i8*, i8** %45, align 8, !dbg !88, !tbaa !39
  %46 = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8* %ptls_load2021, i32 1392, i32 16) #9, !dbg !88
  %47 = bitcast {} addrspace(10)* %46 to i64 addrspace(10)*, !dbg !88
  %48 = getelementptr inbounds i64, i64 addrspace(10)* %47, i64 -1, !dbg !88
  store atomic i64 140230348265744, i64 addrspace(10)* %48 unordered, align 8, !dbg !88, !tbaa !93
  store i64 2, i64 addrspace(10)* %47, align 8, !dbg !88, !tbaa !95
  %49 = getelementptr inbounds [3 x {} addrspace(10)*], [3 x {} addrspace(10)*]* %gcframe28, i64 0, i64 2
  store {} addrspace(10)* %46, {} addrspace(10)** %49, align 16
  %50 = call i64 inttoptr (i64 140230507985449 to i64 ()*)(), !dbg !98
  call void inttoptr (i64 140230507984285 to void (i64, {} addrspace(10)*, {} addrspace(10)*)*)(i64 %50, {} addrspace(10)* nonnull %46, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)), !dbg !101
  %51 = load {} addrspace(10)*, {} addrspace(10)** %5, align 8, !tbaa !39
  %52 = bitcast {}*** %pgcstack to {} addrspace(10)**
  store {} addrspace(10)* %51, {} addrspace(10)** %52, align 8, !tbaa !39
  ret void, !dbg !105

oob:                                              ; preds = %L19.us
  call void @llvm.trap() #11, !dbg !58
  unreachable, !dbg !58

oob8:                                             ; preds = %idxend.us
  call void @llvm.trap() #11, !dbg !70
  unreachable, !dbg !70
}

; Function Attrs: inaccessiblememonly allocsize(1)
declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) local_unnamed_addr #1

; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #2

define private void @julia_foo_1799.inner.6({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %0, {} addrspace(10)* nonnull align 16 dereferenceable(40) %1, i64 signext %2, i64 signext %3, i64 signext %4) local_unnamed_addr #0 {
entry:
  %gcframe9 = alloca [8 x {} addrspace(10)*], align 16
  %gcframe9.sub = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 0
  %5 = bitcast [8 x {} addrspace(10)*]* %gcframe9 to i8*
  call void @llvm.memset.p0i8.i32(i8* nonnull align 16 dereferenceable(64) %5, i8 0, i32 64, i1 false), !tbaa !39
  %6 = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 2
  %7 = bitcast {} addrspace(10)** %6 to [6 x {} addrspace(10)*]*
  %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #10
  %ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
  %ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
  %pgcstack = load {}***, {}**** %ppgcstack, align 8
  %8 = bitcast [8 x {} addrspace(10)*]* %gcframe9 to i64*, !dbg !106
  store i64 24, i64* %8, align 16, !dbg !106, !tbaa !39
  %9 = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 1, !dbg !106
  %10 = bitcast {} addrspace(10)** %9 to {}***, !dbg !106
  %11 = load {}**, {}*** %pgcstack, align 8, !dbg !106
  store {}** %11, {}*** %10, align 8, !dbg !106, !tbaa !39
  %12 = bitcast {}*** %pgcstack to {} addrspace(10)***, !dbg !106
  store {} addrspace(10)** %gcframe9.sub, {} addrspace(10)*** %12, align 8, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %6, align 16, !dbg !106
  %.fca.1.gep = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 3, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %.fca.1.gep, align 8, !dbg !106
  %.fca.2.gep = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 4, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %.fca.2.gep, align 16, !dbg !106
  %.fca.3.gep = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 5, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %.fca.3.gep, align 8, !dbg !106
  %.fca.4.gep = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 6, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %.fca.4.gep, align 16, !dbg !106
  %.fca.5.gep = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe9, i64 0, i64 7, !dbg !106
  store {} addrspace(10)* %1, {} addrspace(10)** %.fca.5.gep, align 8, !dbg !106
  %13 = addrspacecast [6 x {} addrspace(10)*]* %7 to [6 x {} addrspace(10)*] addrspace(11)*, !dbg !106
  call fastcc void @julia_fooSend_1802({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %0, [6 x {} addrspace(10)*] addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(48) %13, i64 signext %2) #0, !dbg !106
  %14 = load {} addrspace(10)*, {} addrspace(10)** %9, align 8, !tbaa !39
  %15 = bitcast {}*** %pgcstack to {} addrspace(10)**
  store {} addrspace(10)* %14, {} addrspace(10)** %15, align 8, !tbaa !39
  ret void
}

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3

; Function Attrs: willreturn mustprogress
define private void @preprocess_julia_foo_1799.inner.6({} addrspace(10)* nocapture nonnull readonly align 8 dereferenceable(8) %arg, {} addrspace(10)* nonnull align 16 dereferenceable(40) %arg3, i64 signext %arg4, i64 signext %arg5, i64 signext %arg6) local_unnamed_addr #4 {
entry:
  %gcframe57 = alloca [9 x {} addrspace(10)*], align 16
  %gcframe57.sub = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 0
  %0 = bitcast [9 x {} addrspace(10)*]* %gcframe57 to i8*
  call void @llvm.memset.p0i8.i32(i8* nonnull align 16 dereferenceable(72) %0, i8 0, i32 72, i1 false), !tbaa !39
  %i = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 2
  %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #10
  %ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
  %ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
  %pgcstack = load {}***, {}**** %ppgcstack, align 8
  %1 = bitcast [9 x {} addrspace(10)*]* %gcframe57 to i64*, !dbg !106
  store i64 28, i64* %1, align 16, !dbg !106, !tbaa !39
  %2 = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 1, !dbg !106
  %3 = bitcast {} addrspace(10)** %2 to {}***, !dbg !106
  %4 = load {}**, {}*** %pgcstack, align 8, !dbg !106
  store {}** %4, {}*** %3, align 8, !dbg !106, !tbaa !39
  %5 = bitcast {}*** %pgcstack to {} addrspace(10)***, !dbg !106
  store {} addrspace(10)** %gcframe57.sub, {} addrspace(10)*** %5, align 8, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %i, align 16, !dbg !106
  %.fca.1.gep = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 3, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.1.gep, align 8, !dbg !106
  %.fca.2.gep = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 4, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.2.gep, align 16, !dbg !106
  %.fca.3.gep = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 5, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.3.gep, align 8, !dbg !106
  %.fca.4.gep = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 6, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.4.gep, align 16, !dbg !106
  %.fca.5.gep = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 7, !dbg !106
  store {} addrspace(10)* %arg3, {} addrspace(10)** %.fca.5.gep, align 8, !dbg !106
  %i11 = add i64 %arg4, -1
  %.inv.i = icmp sgt i64 %i11, -1
  %i12 = select i1 %.inv.i, i64 %i11, i64 -1
  %i13 = bitcast {} addrspace(10)* %arg to {} addrspace(10)* addrspace(10)*
  %i14 = addrspacecast {} addrspace(10)* addrspace(10)* %i13 to {} addrspace(10)* addrspace(11)*
  br i1 %.inv.i, label %top.split.us.i, label %julia_fooSend_1802.exit, !dbg !109

top.split.us.i:                                   ; preds = %entry
  %i16 = load atomic {} addrspace(10)*, {} addrspace(10)** %i unordered, align 16, !dbg !111, !tbaa !50, !nonnull !4, !dereferenceable !52, !align !53
  br label %L19.preheader.us.i, !dbg !109

L19.preheader.us.i:                               ; preds = %L54.us.i, %top.split.us.i
  %iv = phi i64 [ %iv.next, %L54.us.i ], [ 0, %top.split.us.i ]
  %value_phi.us.i = phi {} addrspace(10)* [ %i47, %L54.us.i ], [ %i16, %top.split.us.i ]
  %i17 = shl nuw nsw i64 %iv, 1
  %iv.next = add nuw nsw i64 %iv, 1
  %i18 = add nuw nsw i64 %i17, 2
  %i20 = bitcast {} addrspace(10)* %value_phi.us.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i21 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i20 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i22 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i21, i64 0, i32 1
  %i23 = load i64, i64 addrspace(11)* %i22, align 8, !tbaa !54, !range !57
  %i24 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14 unordered, align 8
  %i25 = bitcast {} addrspace(10)* %i24 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26, i64 0, i32 1
  %i28 = bitcast {} addrspace(10)* %value_phi.us.i to double addrspace(13)* addrspace(10)*
  %i29 = addrspacecast double addrspace(13)* addrspace(10)* %i28 to double addrspace(13)* addrspace(11)*
  %i30 = bitcast {} addrspace(10)* %i24 to double addrspace(13)* addrspace(10)*
  %i31 = addrspacecast double addrspace(13)* addrspace(10)* %i30 to double addrspace(13)* addrspace(11)*
  %i32 = icmp ugt i64 %i23, 30, !dbg !115
  %umax.i = select i1 %i32, i64 %i23, i64 30, !dbg !115
  %i33 = add nsw i64 %umax.i, -28, !dbg !115
  %i34 = udiv i64 %i33, 3, !dbg !115
  br label %L19.us.i, !dbg !115

L19.us.i:                                         ; preds = %L19.us.i, %L19.preheader.us.i
  %iv1 = phi i64 [ %iv.next2, %L19.us.i ], [ 0, %L19.preheader.us.i ]
  %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !117
  %i35 = mul nuw nsw i64 %iv1, 3, !dbg !117
  %i36 = add nuw nsw i64 %i35, 30, !dbg !115
  %exitcond24.not.i = icmp ne i64 %iv1, %i34, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i), !dbg !115
  %i37 = add nuw nsw i64 %iv1, %i18, !dbg !118
  %i38 = load i64, i64 addrspace(11)* %i27, align 8, !dbg !120, !tbaa !54, !range !57
  %i39 = icmp ult i64 %i37, %i38, !dbg !120
  call void @llvm.assume(i1 %i39), !dbg !120
  %i40 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i29, align 16, !dbg !115, !tbaa !72, !nonnull !4
  %i41 = getelementptr inbounds double, double addrspace(13)* %i40, i64 %i36, !dbg !115
  %i42 = load double, double addrspace(13)* %i41, align 8, !dbg !115, !tbaa !74
  %i43 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31, align 8, !dbg !120, !tbaa !72, !nonnull !4
  %i44 = getelementptr inbounds double, double addrspace(13)* %i43, i64 %i37, !dbg !120
  store double %i42, double addrspace(13)* %i44, align 8, !dbg !120, !tbaa !74
  %.not.us.i = icmp eq i64 %iv1, %i12, !dbg !121
  br i1 %.not.us.i, label %L41.us.i, label %L19.us.i, !dbg !123

L41.us.i:                                         ; preds = %L19.us.i
  %exitcond25.i = icmp eq i64 %iv, 5, !dbg !124
  br i1 %exitcond25.i, label %julia_fooSend_1802.exit, label %L54.us.i, !dbg !124

L54.us.i:                                         ; preds = %L41.us.i
  %6 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %i, i64 %iv.next, !dbg !126
  %i47 = load atomic {} addrspace(10)*, {} addrspace(10)** %6 unordered, align 8, !dbg !126, !tbaa !50, !nonnull !4, !dereferenceable !52, !align !53
  br label %L19.preheader.us.i, !dbg !128

julia_fooSend_1802.exit:                          ; preds = %L41.us.i, %entry
  %ptls_field19.i = getelementptr inbounds {}**, {}*** %pgcstack, i64 2305843009213693954, !dbg !129
  %i49 = bitcast {}*** %ptls_field19.i to i8**, !dbg !129
  %ptls_load2021.i = load i8*, i8** %i49, align 8, !dbg !129, !tbaa !39
  %i50 = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8* %ptls_load2021.i, i32 1392, i32 16) #9, !dbg !129
  %7 = bitcast {} addrspace(10)* %i50 to i64 addrspace(10)*, !dbg !129
  %8 = getelementptr inbounds i64, i64 addrspace(10)* %7, i64 -1, !dbg !129
  store atomic i64 140230348265744, i64 addrspace(10)* %8 unordered, align 8, !dbg !129, !tbaa !93
  store i64 2, i64 addrspace(10)* %7, align 8, !dbg !129, !tbaa !95
  %9 = getelementptr inbounds [9 x {} addrspace(10)*], [9 x {} addrspace(10)*]* %gcframe57, i64 0, i64 8
  store {} addrspace(10)* %i50, {} addrspace(10)** %9, align 16
  %i52 = call i64 inttoptr (i64 140230507985449 to i64 ()*)() #12, !dbg !132
  call void inttoptr (i64 140230507984285 to void (i64, {} addrspace(10)*, {} addrspace(10)*)*)(i64 %i52, {} addrspace(10)* nonnull %i50, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140228970242032 to {}*) to {} addrspace(10)*)) #12, !dbg !135
  %10 = load {} addrspace(10)*, {} addrspace(10)** %2, align 8, !tbaa !39
  %11 = bitcast {}*** %pgcstack to {} addrspace(10)**
  store {} addrspace(10)* %10, {} addrspace(10)** %11, align 8, !tbaa !39
  ret void
}

declare noalias i8* @malloc(i64)

; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #5

declare void @free(i8*)

declare void @julia.write_barrier({} addrspace(10)*, ...)

; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p10i8.i64(i8 addrspace(10)* nocapture writeonly, i8, i64, i1 immarg) #5

; Function Attrs: alwaysinline
define void @diffejulia_foo_1799.inner.6wrap({} addrspace(10)* %0, {} addrspace(10)* %1, {} addrspace(10)* %2, {} addrspace(10)* %3, i64 %4, i64 %5, i64 %6) #6 {
entry:
  %gcframe233 = alloca [8 x {} addrspace(10)*], align 16
  %gcframe233.sub = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 0
  %7 = bitcast [8 x {} addrspace(10)*]* %gcframe233 to i8*
  call void @llvm.memset.p0i8.i32(i8* nonnull align 16 dereferenceable(64) %7, i8 0, i32 64, i1 false), !tbaa !39
  %"i'ipa.i" = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 2
  %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #10
  %ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
  %ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
  %pgcstack = load {}***, {}**** %ppgcstack, align 8
  %8 = bitcast [8 x {} addrspace(10)*]* %gcframe233 to i64*
  store i64 24, i64* %8, align 16, !tbaa !39
  %9 = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 1
  %10 = bitcast {} addrspace(10)** %9 to {}***
  %11 = load {}**, {}*** %pgcstack, align 8
  store {}** %11, {}*** %10, align 8, !tbaa !39
  %12 = bitcast {}*** %pgcstack to {} addrspace(10)***
  store {} addrspace(10)** %gcframe233.sub, {} addrspace(10)*** %12, align 8
  %.fca.1.gep3.i = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 3
  %.fca.2.gep4.i = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 4
  %.fca.3.gep5.i = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 5
  %.fca.4.gep6.i = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 6
  %.fca.5.gep7.i = getelementptr inbounds [8 x {} addrspace(10)*], [8 x {} addrspace(10)*]* %gcframe233, i64 0, i64 7
  store {} addrspace(10)* %3, {} addrspace(10)** %"i'ipa.i", align 16, !dbg !106
  store {} addrspace(10)* %3, {} addrspace(10)** %.fca.1.gep3.i, align 8, !dbg !106
  store {} addrspace(10)* %3, {} addrspace(10)** %.fca.2.gep4.i, align 16, !dbg !106
  store {} addrspace(10)* %3, {} addrspace(10)** %.fca.3.gep5.i, align 8, !dbg !106
  store {} addrspace(10)* %3, {} addrspace(10)** %.fca.4.gep6.i, align 16, !dbg !106
  store {} addrspace(10)* %3, {} addrspace(10)** %.fca.5.gep7.i, align 8, !dbg !106
  %i11.i = add i64 %4, -1
  %.inv.i.i = icmp sgt i64 %i11.i, -1
  %i12.i = select i1 %.inv.i.i, i64 %i11.i, i64 -1
  %"i13'ipc.i" = bitcast {} addrspace(10)* %1 to {} addrspace(10)* addrspace(10)*
  %i13.i = bitcast {} addrspace(10)* %0 to {} addrspace(10)* addrspace(10)*
  %"i14'ipc.i" = addrspacecast {} addrspace(10)* addrspace(10)* %"i13'ipc.i" to {} addrspace(10)* addrspace(11)*
  %i14.i = addrspacecast {} addrspace(10)* addrspace(10)* %i13.i to {} addrspace(10)* addrspace(11)*
  br i1 %.inv.i.i, label %top.split.us.i.i, label %julia_fooSend_1802.exit.thread.i, !dbg !109

julia_fooSend_1802.exit.thread.i:                 ; preds = %entry
  %i5219.i = call i64 inttoptr (i64 140230507985449 to i64 ()*)() #12, !dbg !132
  br label %diffejulia_foo_1799.inner.6.exit

top.split.us.i.i:                                 ; preds = %entry
  %13 = icmp ne {} addrspace(10)* %2, null
  call void @llvm.assume(i1 %13)
  %malloccall.i = call noalias nonnull dereferenceable(48) dereferenceable_or_null(48) i8* @malloc(i64 48)
  %"i24'ipl_malloccache.i" = bitcast i8* %malloccall.i to {} addrspace(10)**
  %14 = getelementptr inbounds i8, i8* %malloccall.i, i64 8, !dbg !109
  call void @llvm.memset.p0i8.i64(i8* nonnull align 1 dereferenceable(48) %14, i8 0, i64 40, i1 false), !dbg !109
  %i20.i = bitcast {} addrspace(10)* %2 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i21.i = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i20.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i22.i = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i21.i, i64 0, i32 1
  %i23.i = load i64, i64 addrspace(11)* %i22.i, align 8, !tbaa !54, !range !57
  %"i24'ipl.i" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  store {} addrspace(10)* %"i24'ipl.i", {} addrspace(10)** %"i24'ipl_malloccache.i", align 8, !invariant.group !136
  %i24.i = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i = bitcast {} addrspace(10)* %i24.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i, i64 0, i32 1
  %i28.i = bitcast {} addrspace(10)* %2 to double addrspace(13)* addrspace(10)*
  %i29.i = addrspacecast double addrspace(13)* addrspace(10)* %i28.i to double addrspace(13)* addrspace(11)*
  %i30.i = bitcast {} addrspace(10)* %i24.i to double addrspace(13)* addrspace(10)*
  %i31.i = addrspacecast double addrspace(13)* addrspace(10)* %i30.i to double addrspace(13)* addrspace(11)*
  %i32.i = icmp ugt i64 %i23.i, 30, !dbg !115
  %umax.i.i = select i1 %i32.i, i64 %i23.i, i64 30, !dbg !115
  %i33.i = add nsw i64 %umax.i.i, -28, !dbg !115
  %i34.i = udiv i64 %i33.i, 3, !dbg !115
  %i38.i = load i64, i64 addrspace(11)* %i27.i, align 8, !tbaa !54, !range !57
  %i40.i = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i29.i, align 16, !tbaa !72, !nonnull !4
  %i43.i = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i, align 8, !tbaa !72, !nonnull !4
  %15 = add i64 %i12.i, 1, !dbg !115
  %min.iters.check = icmp ult i64 %15, 5, !dbg !115
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck, !dbg !115

vector.memcheck:                                  ; preds = %top.split.us.i.i
  %scevgep = getelementptr double, double addrspace(13)* %i43.i, i64 2, !dbg !115
  %16 = add i64 %i12.i, 3, !dbg !115
  %scevgep8 = getelementptr double, double addrspace(13)* %i43.i, i64 %16, !dbg !115
  %scevgep10 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %17 = mul i64 %i12.i, 3, !dbg !115
  %18 = add i64 %17, 31, !dbg !115
  %scevgep12 = getelementptr double, double addrspace(13)* %i40.i, i64 %18, !dbg !115
  %bound0 = icmp ult double addrspace(13)* %scevgep, %scevgep12, !dbg !115
  %bound1 = icmp ult double addrspace(13)* %scevgep10, %scevgep8, !dbg !115
  %found.conflict = and i1 %bound0, %bound1, !dbg !115
  br i1 %found.conflict, label %scalar.ph, label %vector.ph, !dbg !115

vector.ph:                                        ; preds = %vector.memcheck
  %n.mod.vf = and i64 %15, 3, !dbg !115
  %19 = icmp eq i64 %n.mod.vf, 0, !dbg !115
  %20 = select i1 %19, i64 4, i64 %n.mod.vf, !dbg !115
  %n.vec = sub i64 %15, %20, !dbg !115
  %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert17 = insertelement <2 x i64> poison, i64 %i38.i, i32 0, !dbg !115
  %broadcast.splat18 = shufflevector <2 x i64> %broadcast.splatinsert17, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body, !dbg !115

vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !117
  %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %32, %vector.body ]
  %step.add = add <2 x i64> %vec.ind, <i64 2, i64 2>
  %21 = or i64 %index, 2, !dbg !117
  %22 = mul nuw nsw i64 %index, 3, !dbg !117
  %23 = mul nuw nsw i64 %21, 3, !dbg !117
  %24 = add nuw nsw i64 %22, 30, !dbg !115
  %25 = add nuw nsw i64 %23, 30, !dbg !115
  %26 = icmp ne <2 x i64> %vec.ind, %broadcast.splat, !dbg !115
  %27 = icmp ne <2 x i64> %step.add, %broadcast.splat, !dbg !115
  %28 = extractelement <2 x i1> %26, i32 0, !dbg !115
  call void @llvm.assume(i1 %28), !dbg !115
  %29 = extractelement <2 x i1> %26, i32 1, !dbg !115
  call void @llvm.assume(i1 %29), !dbg !115
  %30 = extractelement <2 x i1> %27, i32 0, !dbg !115
  call void @llvm.assume(i1 %30), !dbg !115
  %31 = extractelement <2 x i1> %27, i32 1, !dbg !115
  call void @llvm.assume(i1 %31), !dbg !115
  %32 = add <2 x i64> %vec.ind, <i64 4, i64 4>, !dbg !118
  %33 = icmp ult <2 x i64> %step.add, %broadcast.splat18, !dbg !120
  %34 = icmp ult <2 x i64> %32, %broadcast.splat18, !dbg !120
  %35 = extractelement <2 x i1> %33, i32 0, !dbg !120
  call void @llvm.assume(i1 %35), !dbg !120
  %36 = extractelement <2 x i1> %33, i32 1, !dbg !120
  call void @llvm.assume(i1 %36), !dbg !120
  %37 = extractelement <2 x i1> %34, i32 0, !dbg !120
  call void @llvm.assume(i1 %37), !dbg !120
  %38 = extractelement <2 x i1> %34, i32 1, !dbg !120
  call void @llvm.assume(i1 %38), !dbg !120
  %39 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %24, !dbg !115
  %40 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %25, !dbg !115
  %41 = bitcast double addrspace(13)* %39 to <6 x double> addrspace(13)*, !dbg !115
  %42 = bitcast double addrspace(13)* %40 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec = load <6 x double>, <6 x double> addrspace(13)* %41, align 8, !dbg !115, !tbaa !74
  %wide.vec21 = load <6 x double>, <6 x double> addrspace(13)* %42, align 8, !dbg !115, !tbaa !74
  %strided.vec = shufflevector <6 x double> %wide.vec, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec22 = shufflevector <6 x double> %wide.vec21, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %43 = extractelement <2 x i64> %step.add, i32 0, !dbg !120
  %44 = getelementptr inbounds double, double addrspace(13)* %i43.i, i64 %43, !dbg !120
  %45 = bitcast double addrspace(13)* %44 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec, <2 x double> addrspace(13)* %45, align 8, !dbg !120, !tbaa !74, !alias.scope !137, !noalias !140
  %46 = getelementptr inbounds double, double addrspace(13)* %44, i64 2, !dbg !120
  %47 = bitcast double addrspace(13)* %46 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec22, <2 x double> addrspace(13)* %47, align 8, !dbg !120, !tbaa !74, !alias.scope !137, !noalias !140
  %index.next = add i64 %index, 4, !dbg !117
  %48 = icmp eq i64 %index.next, %n.vec, !dbg !117
  br i1 %48, label %scalar.ph, label %vector.body, !dbg !117, !llvm.loop !142

scalar.ph:                                        ; preds = %vector.body, %vector.memcheck, %top.split.us.i.i
  %bc.resume.val = phi i64 [ 0, %top.split.us.i.i ], [ 0, %vector.memcheck ], [ %n.vec, %vector.body ]
  br label %L19.us.i.i, !dbg !115

L19.us.i.i:                                       ; preds = %L19.us.i.i, %scalar.ph
  %iv1.i = phi i64 [ %iv.next2.i, %L19.us.i.i ], [ %bc.resume.val, %scalar.ph ]
  %iv.next2.i = add nuw nsw i64 %iv1.i, 1, !dbg !117
  %i35.i = mul nuw nsw i64 %iv1.i, 3, !dbg !117
  %i36.i = add nuw nsw i64 %i35.i, 30, !dbg !115
  %exitcond24.not.i.i = icmp ne i64 %iv1.i, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i), !dbg !115
  %i37.i = add nuw nsw i64 %iv1.i, 2, !dbg !118
  %i39.i = icmp ult i64 %i37.i, %i38.i, !dbg !120
  call void @llvm.assume(i1 %i39.i), !dbg !120
  %i41.i = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i, !dbg !115
  %i42.i = load double, double addrspace(13)* %i41.i, align 8, !dbg !115, !tbaa !74
  %i44.i = getelementptr inbounds double, double addrspace(13)* %i43.i, i64 %i37.i, !dbg !120
  store double %i42.i, double addrspace(13)* %i44.i, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i = icmp eq i64 %iv1.i, %i12.i, !dbg !121
  br i1 %.not.us.i.i, label %L41.us.i.i, label %L19.us.i.i, !dbg !123, !llvm.loop !144

L41.us.i.i:                                       ; preds = %L19.us.i.i
  %"i24'ipl.i.1" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  %49 = bitcast i8* %14 to {} addrspace(10)**
  store {} addrspace(10)* %"i24'ipl.i.1", {} addrspace(10)** %49, align 8, !invariant.group !136
  %i24.i.1 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i.1 = bitcast {} addrspace(10)* %i24.i.1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i.1 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i.1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i.1 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i.1, i64 0, i32 1
  %i30.i.1 = bitcast {} addrspace(10)* %i24.i.1 to double addrspace(13)* addrspace(10)*
  %i31.i.1 = addrspacecast double addrspace(13)* addrspace(10)* %i30.i.1 to double addrspace(13)* addrspace(11)*
  %i38.i.1 = load i64, i64 addrspace(11)* %i27.i.1, align 8, !tbaa !54, !range !57
  %i43.i.1 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i.1, align 8, !tbaa !72, !nonnull !4
  br i1 %min.iters.check, label %scalar.ph24, label %vector.memcheck28, !dbg !115

vector.memcheck28:                                ; preds = %L41.us.i.i
  %scevgep30 = getelementptr double, double addrspace(13)* %i43.i.1, i64 4, !dbg !115
  %50 = add i64 %i12.i, 5, !dbg !115
  %scevgep32 = getelementptr double, double addrspace(13)* %i43.i.1, i64 %50, !dbg !115
  %scevgep34 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %51 = mul i64 %i12.i, 3, !dbg !115
  %52 = add i64 %51, 31, !dbg !115
  %scevgep36 = getelementptr double, double addrspace(13)* %i40.i, i64 %52, !dbg !115
  %bound038 = icmp ult double addrspace(13)* %scevgep30, %scevgep36, !dbg !115
  %bound139 = icmp ult double addrspace(13)* %scevgep34, %scevgep32, !dbg !115
  %found.conflict40 = and i1 %bound038, %bound139, !dbg !115
  br i1 %found.conflict40, label %scalar.ph24, label %vector.ph29, !dbg !115

vector.ph29:                                      ; preds = %vector.memcheck28
  %n.mod.vf42 = and i64 %15, 3, !dbg !115
  %53 = icmp eq i64 %n.mod.vf42, 0, !dbg !115
  %54 = select i1 %53, i64 4, i64 %n.mod.vf42, !dbg !115
  %n.vec43 = sub i64 %15, %54, !dbg !115
  %broadcast.splatinsert52 = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat53 = shufflevector <2 x i64> %broadcast.splatinsert52, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert56 = insertelement <2 x i64> poison, i64 %i38.i.1, i32 0, !dbg !115
  %broadcast.splat57 = shufflevector <2 x i64> %broadcast.splatinsert56, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body25, !dbg !115

vector.body25:                                    ; preds = %vector.body25, %vector.ph29
  %index44 = phi i64 [ 0, %vector.ph29 ], [ %index.next45, %vector.body25 ], !dbg !117
  %vec.ind48 = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph29 ], [ %66, %vector.body25 ]
  %step.add49 = add <2 x i64> %vec.ind48, <i64 2, i64 2>
  %55 = or i64 %index44, 2, !dbg !117
  %56 = mul nuw nsw i64 %index44, 3, !dbg !117
  %57 = mul nuw nsw i64 %55, 3, !dbg !117
  %58 = add nuw nsw i64 %56, 30, !dbg !115
  %59 = add nuw nsw i64 %57, 30, !dbg !115
  %60 = icmp ne <2 x i64> %vec.ind48, %broadcast.splat53, !dbg !115
  %61 = icmp ne <2 x i64> %step.add49, %broadcast.splat53, !dbg !115
  %62 = extractelement <2 x i1> %60, i32 0, !dbg !115
  call void @llvm.assume(i1 %62), !dbg !115
  %63 = extractelement <2 x i1> %60, i32 1, !dbg !115
  call void @llvm.assume(i1 %63), !dbg !115
  %64 = extractelement <2 x i1> %61, i32 0, !dbg !115
  call void @llvm.assume(i1 %64), !dbg !115
  %65 = extractelement <2 x i1> %61, i32 1, !dbg !115
  call void @llvm.assume(i1 %65), !dbg !115
  %66 = add <2 x i64> %vec.ind48, <i64 4, i64 4>, !dbg !118
  %67 = add <2 x i64> %vec.ind48, <i64 6, i64 6>, !dbg !118
  %68 = icmp ult <2 x i64> %66, %broadcast.splat57, !dbg !120
  %69 = icmp ult <2 x i64> %67, %broadcast.splat57, !dbg !120
  %70 = extractelement <2 x i1> %68, i32 0, !dbg !120
  call void @llvm.assume(i1 %70), !dbg !120
  %71 = extractelement <2 x i1> %68, i32 1, !dbg !120
  call void @llvm.assume(i1 %71), !dbg !120
  %72 = extractelement <2 x i1> %69, i32 0, !dbg !120
  call void @llvm.assume(i1 %72), !dbg !120
  %73 = extractelement <2 x i1> %69, i32 1, !dbg !120
  call void @llvm.assume(i1 %73), !dbg !120
  %74 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %58, !dbg !115
  %75 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %59, !dbg !115
  %76 = bitcast double addrspace(13)* %74 to <6 x double> addrspace(13)*, !dbg !115
  %77 = bitcast double addrspace(13)* %75 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec60 = load <6 x double>, <6 x double> addrspace(13)* %76, align 8, !dbg !115, !tbaa !74
  %wide.vec61 = load <6 x double>, <6 x double> addrspace(13)* %77, align 8, !dbg !115, !tbaa !74
  %strided.vec62 = shufflevector <6 x double> %wide.vec60, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec63 = shufflevector <6 x double> %wide.vec61, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %78 = extractelement <2 x i64> %66, i32 0, !dbg !120
  %79 = getelementptr inbounds double, double addrspace(13)* %i43.i.1, i64 %78, !dbg !120
  %80 = bitcast double addrspace(13)* %79 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec62, <2 x double> addrspace(13)* %80, align 8, !dbg !120, !tbaa !74, !alias.scope !145, !noalias !148
  %81 = getelementptr inbounds double, double addrspace(13)* %79, i64 2, !dbg !120
  %82 = bitcast double addrspace(13)* %81 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec63, <2 x double> addrspace(13)* %82, align 8, !dbg !120, !tbaa !74, !alias.scope !145, !noalias !148
  %index.next45 = add i64 %index44, 4, !dbg !117
  %83 = icmp eq i64 %index.next45, %n.vec43, !dbg !117
  br i1 %83, label %scalar.ph24, label %vector.body25, !dbg !117, !llvm.loop !150

scalar.ph24:                                      ; preds = %vector.body25, %vector.memcheck28, %L41.us.i.i
  %bc.resume.val46 = phi i64 [ 0, %L41.us.i.i ], [ 0, %vector.memcheck28 ], [ %n.vec43, %vector.body25 ]
  br label %L19.us.i.i.1, !dbg !115

julia_fooSend_1802.exit.i:                        ; preds = %L19.us.i.i.5
  %i52.i = call i64 inttoptr (i64 140230507985449 to i64 ()*)() #12, !dbg !132
  %"i16'ipl_unwrap.i" = load atomic {} addrspace(10)*, {} addrspace(10)** %"i'ipa.i" unordered, align 16
  br label %invertL41.us.i.i

inverttop.split.us.i.i:                           ; preds = %invertL19.preheader.us.i.i
  call void @free(i8* nonnull %malloccall.i)
  br label %diffejulia_foo_1799.inner.6.exit

invertL19.preheader.us.i.i:                       ; preds = %invertidxend9.us.i_phimerge.i
  %84 = add nsw i64 %"iv'ac.0.i", -1
  br i1 %.not.i, label %inverttop.split.us.i.i, label %invertL41.us.i.i

invertidxend9.us.i.i:                             ; preds = %invertL41.us.i.i, %invertidxend9.us.i_phimerge.i
  %"iv1'ac.0.i" = phi i64 [ %i12.i, %invertL41.us.i.i ], [ %91, %invertidxend9.us.i_phimerge.i ]
  %85 = load double addrspace(13)* addrspace(10)*, double addrspace(13)* addrspace(10)** %93, align 8
  %"i31'ipc_unwrap.i" = addrspacecast double addrspace(13)* addrspace(10)* %85 to double addrspace(13)* addrspace(11)*
  %"i43'il_phi_unwrap.i" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i31'ipc_unwrap.i", align 8, !dbg !120, !tbaa !72
  %i37_unwrap.i = add nuw nsw i64 %i18_unwrap.i, %"iv1'ac.0.i"
  %"i44'ipg_unwrap.i" = getelementptr inbounds double, double addrspace(13)* %"i43'il_phi_unwrap.i", i64 %i37_unwrap.i
  %86 = load double, double addrspace(13)* %"i44'ipg_unwrap.i", align 8
  store double 0.000000e+00, double addrspace(13)* %"i44'ipg_unwrap.i", align 8
  br i1 %.not.i, label %invertidxend9.us.i_phimerge.i, label %invertidxend9.us.i_phirc.i

invertidxend9.us.i_phirc.i:                       ; preds = %invertidxend9.us.i.i
  %"i47'il_phi_unwrap.i" = load atomic {} addrspace(10)*, {} addrspace(10)** %94 unordered, align 8, !dbg !126, !tbaa !50
  br label %invertidxend9.us.i_phimerge.i

invertidxend9.us.i_phimerge.i:                    ; preds = %invertidxend9.us.i.i, %invertidxend9.us.i_phirc.i
  %87 = phi {} addrspace(10)* [ %"i47'il_phi_unwrap.i", %invertidxend9.us.i_phirc.i ], [ %"i16'ipl_unwrap.i", %invertidxend9.us.i.i ]
  %"i28'ipc_unwrap.i" = bitcast {} addrspace(10)* %87 to double addrspace(13)* addrspace(10)*
  %"i29'ipc_unwrap.i" = addrspacecast double addrspace(13)* addrspace(10)* %"i28'ipc_unwrap.i" to double addrspace(13)* addrspace(11)*
  %"i40'il_phi_unwrap.i" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i29'ipc_unwrap.i", align 16, !dbg !115, !tbaa !72
  %i35_unwrap.i = mul nuw nsw i64 %"iv1'ac.0.i", 3
  %i36_unwrap.i = add nuw nsw i64 %i35_unwrap.i, 30
  %"i41'ipg_unwrap.i" = getelementptr inbounds double, double addrspace(13)* %"i40'il_phi_unwrap.i", i64 %i36_unwrap.i
  %88 = load double, double addrspace(13)* %"i41'ipg_unwrap.i", align 8
  %89 = fadd fast double %88, %86
  store double %89, double addrspace(13)* %"i41'ipg_unwrap.i", align 8
  %90 = icmp eq i64 %"iv1'ac.0.i", 0
  %91 = add nsw i64 %"iv1'ac.0.i", -1
  br i1 %90, label %invertL19.preheader.us.i.i, label %invertidxend9.us.i.i

invertL41.us.i.i:                                 ; preds = %invertL19.preheader.us.i.i, %julia_fooSend_1802.exit.i
  %"iv'ac.0.i" = phi i64 [ %84, %invertL19.preheader.us.i.i ], [ 5, %julia_fooSend_1802.exit.i ]
  %92 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %"i24'ipl_malloccache.i", i64 %"iv'ac.0.i"
  %93 = bitcast {} addrspace(10)** %92 to double addrspace(13)* addrspace(10)**
  %i17_unwrap.i = shl nuw nsw i64 %"iv'ac.0.i", 1
  %i18_unwrap.i = add nuw nsw i64 %i17_unwrap.i, 2
  %.not.i = icmp eq i64 %"iv'ac.0.i", 0
  %value_phi1.off.us.i_unwrap.i = add nuw nsw i64 %"iv'ac.0.i", 1
  %94 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %"i'ipa.i", i64 %value_phi1.off.us.i_unwrap.i
  br label %invertidxend9.us.i.i

diffejulia_foo_1799.inner.6.exit:                 ; preds = %julia_fooSend_1802.exit.thread.i, %inverttop.split.us.i.i
  %95 = load {} addrspace(10)*, {} addrspace(10)** %9, align 8, !tbaa !39
  %96 = bitcast {}*** %pgcstack to {} addrspace(10)**
  store {} addrspace(10)* %95, {} addrspace(10)** %96, align 8, !tbaa !39
  ret void

L19.us.i.i.1:                                     ; preds = %L19.us.i.i.1, %scalar.ph24
  %iv1.i.1 = phi i64 [ %iv.next2.i.1, %L19.us.i.i.1 ], [ %bc.resume.val46, %scalar.ph24 ]
  %iv.next2.i.1 = add nuw nsw i64 %iv1.i.1, 1, !dbg !117
  %i35.i.1 = mul nuw nsw i64 %iv1.i.1, 3, !dbg !117
  %i36.i.1 = add nuw nsw i64 %i35.i.1, 30, !dbg !115
  %exitcond24.not.i.i.1 = icmp ne i64 %iv1.i.1, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i.1), !dbg !115
  %i37.i.1 = add nuw nsw i64 %iv1.i.1, 4, !dbg !118
  %i39.i.1 = icmp ult i64 %i37.i.1, %i38.i.1, !dbg !120
  call void @llvm.assume(i1 %i39.i.1), !dbg !120
  %i41.i.1 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i.1, !dbg !115
  %i42.i.1 = load double, double addrspace(13)* %i41.i.1, align 8, !dbg !115, !tbaa !74
  %i44.i.1 = getelementptr inbounds double, double addrspace(13)* %i43.i.1, i64 %i37.i.1, !dbg !120
  store double %i42.i.1, double addrspace(13)* %i44.i.1, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i.1 = icmp eq i64 %iv1.i.1, %i12.i, !dbg !121
  br i1 %.not.us.i.i.1, label %L41.us.i.i.1, label %L19.us.i.i.1, !dbg !123, !llvm.loop !151

L41.us.i.i.1:                                     ; preds = %L19.us.i.i.1
  %"i24'ipl.i.2" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  %97 = getelementptr inbounds i8, i8* %malloccall.i, i64 16
  %98 = bitcast i8* %97 to {} addrspace(10)**
  store {} addrspace(10)* %"i24'ipl.i.2", {} addrspace(10)** %98, align 8, !invariant.group !136
  %i24.i.2 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i.2 = bitcast {} addrspace(10)* %i24.i.2 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i.2 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i.2 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i.2 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i.2, i64 0, i32 1
  %i30.i.2 = bitcast {} addrspace(10)* %i24.i.2 to double addrspace(13)* addrspace(10)*
  %i31.i.2 = addrspacecast double addrspace(13)* addrspace(10)* %i30.i.2 to double addrspace(13)* addrspace(11)*
  %i38.i.2 = load i64, i64 addrspace(11)* %i27.i.2, align 8, !tbaa !54, !range !57
  %i43.i.2 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i.2, align 8, !tbaa !72, !nonnull !4
  br i1 %min.iters.check, label %scalar.ph65, label %vector.memcheck69, !dbg !115

vector.memcheck69:                                ; preds = %L41.us.i.i.1
  %scevgep71 = getelementptr double, double addrspace(13)* %i43.i.2, i64 6, !dbg !115
  %99 = add i64 %i12.i, 7, !dbg !115
  %scevgep73 = getelementptr double, double addrspace(13)* %i43.i.2, i64 %99, !dbg !115
  %scevgep75 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %100 = mul i64 %i12.i, 3, !dbg !115
  %101 = add i64 %100, 31, !dbg !115
  %scevgep77 = getelementptr double, double addrspace(13)* %i40.i, i64 %101, !dbg !115
  %bound079 = icmp ult double addrspace(13)* %scevgep71, %scevgep77, !dbg !115
  %bound180 = icmp ult double addrspace(13)* %scevgep75, %scevgep73, !dbg !115
  %found.conflict81 = and i1 %bound079, %bound180, !dbg !115
  br i1 %found.conflict81, label %scalar.ph65, label %vector.ph70, !dbg !115

vector.ph70:                                      ; preds = %vector.memcheck69
  %n.mod.vf83 = and i64 %15, 3, !dbg !115
  %102 = icmp eq i64 %n.mod.vf83, 0, !dbg !115
  %103 = select i1 %102, i64 4, i64 %n.mod.vf83, !dbg !115
  %n.vec84 = sub i64 %15, %103, !dbg !115
  %broadcast.splatinsert93 = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat94 = shufflevector <2 x i64> %broadcast.splatinsert93, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert97 = insertelement <2 x i64> poison, i64 %i38.i.2, i32 0, !dbg !115
  %broadcast.splat98 = shufflevector <2 x i64> %broadcast.splatinsert97, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body66, !dbg !115

vector.body66:                                    ; preds = %vector.body66, %vector.ph70
  %index85 = phi i64 [ 0, %vector.ph70 ], [ %index.next86, %vector.body66 ], !dbg !117
  %vec.ind89 = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph70 ], [ %vec.ind.next92, %vector.body66 ]
  %step.add90 = add <2 x i64> %vec.ind89, <i64 2, i64 2>
  %104 = or i64 %index85, 2, !dbg !117
  %105 = mul nuw nsw i64 %index85, 3, !dbg !117
  %106 = mul nuw nsw i64 %104, 3, !dbg !117
  %107 = add nuw nsw i64 %105, 30, !dbg !115
  %108 = add nuw nsw i64 %106, 30, !dbg !115
  %109 = icmp ne <2 x i64> %vec.ind89, %broadcast.splat94, !dbg !115
  %110 = icmp ne <2 x i64> %step.add90, %broadcast.splat94, !dbg !115
  %111 = extractelement <2 x i1> %109, i32 0, !dbg !115
  call void @llvm.assume(i1 %111), !dbg !115
  %112 = extractelement <2 x i1> %109, i32 1, !dbg !115
  call void @llvm.assume(i1 %112), !dbg !115
  %113 = extractelement <2 x i1> %110, i32 0, !dbg !115
  call void @llvm.assume(i1 %113), !dbg !115
  %114 = extractelement <2 x i1> %110, i32 1, !dbg !115
  call void @llvm.assume(i1 %114), !dbg !115
  %115 = add nuw nsw <2 x i64> %vec.ind89, <i64 6, i64 6>, !dbg !118
  %116 = add <2 x i64> %vec.ind89, <i64 8, i64 8>, !dbg !118
  %117 = icmp ult <2 x i64> %115, %broadcast.splat98, !dbg !120
  %118 = icmp ult <2 x i64> %116, %broadcast.splat98, !dbg !120
  %119 = extractelement <2 x i1> %117, i32 0, !dbg !120
  call void @llvm.assume(i1 %119), !dbg !120
  %120 = extractelement <2 x i1> %117, i32 1, !dbg !120
  call void @llvm.assume(i1 %120), !dbg !120
  %121 = extractelement <2 x i1> %118, i32 0, !dbg !120
  call void @llvm.assume(i1 %121), !dbg !120
  %122 = extractelement <2 x i1> %118, i32 1, !dbg !120
  call void @llvm.assume(i1 %122), !dbg !120
  %123 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %107, !dbg !115
  %124 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %108, !dbg !115
  %125 = bitcast double addrspace(13)* %123 to <6 x double> addrspace(13)*, !dbg !115
  %126 = bitcast double addrspace(13)* %124 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec101 = load <6 x double>, <6 x double> addrspace(13)* %125, align 8, !dbg !115, !tbaa !74
  %wide.vec102 = load <6 x double>, <6 x double> addrspace(13)* %126, align 8, !dbg !115, !tbaa !74
  %strided.vec103 = shufflevector <6 x double> %wide.vec101, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec104 = shufflevector <6 x double> %wide.vec102, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %127 = extractelement <2 x i64> %115, i32 0, !dbg !120
  %128 = getelementptr inbounds double, double addrspace(13)* %i43.i.2, i64 %127, !dbg !120
  %129 = bitcast double addrspace(13)* %128 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec103, <2 x double> addrspace(13)* %129, align 8, !dbg !120, !tbaa !74, !alias.scope !152, !noalias !155
  %130 = getelementptr inbounds double, double addrspace(13)* %128, i64 2, !dbg !120
  %131 = bitcast double addrspace(13)* %130 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec104, <2 x double> addrspace(13)* %131, align 8, !dbg !120, !tbaa !74, !alias.scope !152, !noalias !155
  %index.next86 = add i64 %index85, 4, !dbg !117
  %vec.ind.next92 = add <2 x i64> %vec.ind89, <i64 4, i64 4>
  %132 = icmp eq i64 %index.next86, %n.vec84, !dbg !117
  br i1 %132, label %scalar.ph65, label %vector.body66, !dbg !117, !llvm.loop !157

scalar.ph65:                                      ; preds = %vector.body66, %vector.memcheck69, %L41.us.i.i.1
  %bc.resume.val87 = phi i64 [ 0, %L41.us.i.i.1 ], [ 0, %vector.memcheck69 ], [ %n.vec84, %vector.body66 ]
  br label %L19.us.i.i.2, !dbg !115

L19.us.i.i.2:                                     ; preds = %L19.us.i.i.2, %scalar.ph65
  %iv1.i.2 = phi i64 [ %iv.next2.i.2, %L19.us.i.i.2 ], [ %bc.resume.val87, %scalar.ph65 ]
  %iv.next2.i.2 = add nuw nsw i64 %iv1.i.2, 1, !dbg !117
  %i35.i.2 = mul nuw nsw i64 %iv1.i.2, 3, !dbg !117
  %i36.i.2 = add nuw nsw i64 %i35.i.2, 30, !dbg !115
  %exitcond24.not.i.i.2 = icmp ne i64 %iv1.i.2, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i.2), !dbg !115
  %i37.i.2 = add nuw nsw i64 %iv1.i.2, 6, !dbg !118
  %i39.i.2 = icmp ult i64 %i37.i.2, %i38.i.2, !dbg !120
  call void @llvm.assume(i1 %i39.i.2), !dbg !120
  %i41.i.2 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i.2, !dbg !115
  %i42.i.2 = load double, double addrspace(13)* %i41.i.2, align 8, !dbg !115, !tbaa !74
  %i44.i.2 = getelementptr inbounds double, double addrspace(13)* %i43.i.2, i64 %i37.i.2, !dbg !120
  store double %i42.i.2, double addrspace(13)* %i44.i.2, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i.2 = icmp eq i64 %iv1.i.2, %i12.i, !dbg !121
  br i1 %.not.us.i.i.2, label %L41.us.i.i.2, label %L19.us.i.i.2, !dbg !123, !llvm.loop !158

L41.us.i.i.2:                                     ; preds = %L19.us.i.i.2
  %"i24'ipl.i.3" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  %133 = getelementptr inbounds i8, i8* %malloccall.i, i64 24
  %134 = bitcast i8* %133 to {} addrspace(10)**
  store {} addrspace(10)* %"i24'ipl.i.3", {} addrspace(10)** %134, align 8, !invariant.group !136
  %i24.i.3 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i.3 = bitcast {} addrspace(10)* %i24.i.3 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i.3 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i.3 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i.3 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i.3, i64 0, i32 1
  %i30.i.3 = bitcast {} addrspace(10)* %i24.i.3 to double addrspace(13)* addrspace(10)*
  %i31.i.3 = addrspacecast double addrspace(13)* addrspace(10)* %i30.i.3 to double addrspace(13)* addrspace(11)*
  %i38.i.3 = load i64, i64 addrspace(11)* %i27.i.3, align 8, !tbaa !54, !range !57
  %i43.i.3 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i.3, align 8, !tbaa !72, !nonnull !4
  br i1 %min.iters.check, label %scalar.ph106, label %vector.memcheck110, !dbg !115

vector.memcheck110:                               ; preds = %L41.us.i.i.2
  %scevgep112 = getelementptr double, double addrspace(13)* %i43.i.3, i64 8, !dbg !115
  %135 = add i64 %i12.i, 9, !dbg !115
  %scevgep114 = getelementptr double, double addrspace(13)* %i43.i.3, i64 %135, !dbg !115
  %scevgep116 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %136 = mul i64 %i12.i, 3, !dbg !115
  %137 = add i64 %136, 31, !dbg !115
  %scevgep118 = getelementptr double, double addrspace(13)* %i40.i, i64 %137, !dbg !115
  %bound0120 = icmp ult double addrspace(13)* %scevgep112, %scevgep118, !dbg !115
  %bound1121 = icmp ult double addrspace(13)* %scevgep116, %scevgep114, !dbg !115
  %found.conflict122 = and i1 %bound0120, %bound1121, !dbg !115
  br i1 %found.conflict122, label %scalar.ph106, label %vector.ph111, !dbg !115

vector.ph111:                                     ; preds = %vector.memcheck110
  %n.mod.vf124 = and i64 %15, 3, !dbg !115
  %138 = icmp eq i64 %n.mod.vf124, 0, !dbg !115
  %139 = select i1 %138, i64 4, i64 %n.mod.vf124, !dbg !115
  %n.vec125 = sub i64 %15, %139, !dbg !115
  %broadcast.splatinsert134 = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat135 = shufflevector <2 x i64> %broadcast.splatinsert134, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert138 = insertelement <2 x i64> poison, i64 %i38.i.3, i32 0, !dbg !115
  %broadcast.splat139 = shufflevector <2 x i64> %broadcast.splatinsert138, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body107, !dbg !115

vector.body107:                                   ; preds = %vector.body107, %vector.ph111
  %index126 = phi i64 [ 0, %vector.ph111 ], [ %index.next127, %vector.body107 ], !dbg !117
  %vec.ind130 = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph111 ], [ %vec.ind.next133, %vector.body107 ]
  %step.add131 = add <2 x i64> %vec.ind130, <i64 2, i64 2>
  %140 = or i64 %index126, 2, !dbg !117
  %141 = mul nuw nsw i64 %index126, 3, !dbg !117
  %142 = mul nuw nsw i64 %140, 3, !dbg !117
  %143 = add nuw nsw i64 %141, 30, !dbg !115
  %144 = add nuw nsw i64 %142, 30, !dbg !115
  %145 = icmp ne <2 x i64> %vec.ind130, %broadcast.splat135, !dbg !115
  %146 = icmp ne <2 x i64> %step.add131, %broadcast.splat135, !dbg !115
  %147 = extractelement <2 x i1> %145, i32 0, !dbg !115
  call void @llvm.assume(i1 %147), !dbg !115
  %148 = extractelement <2 x i1> %145, i32 1, !dbg !115
  call void @llvm.assume(i1 %148), !dbg !115
  %149 = extractelement <2 x i1> %146, i32 0, !dbg !115
  call void @llvm.assume(i1 %149), !dbg !115
  %150 = extractelement <2 x i1> %146, i32 1, !dbg !115
  call void @llvm.assume(i1 %150), !dbg !115
  %151 = add nuw nsw <2 x i64> %vec.ind130, <i64 8, i64 8>, !dbg !118
  %152 = add <2 x i64> %vec.ind130, <i64 10, i64 10>, !dbg !118
  %153 = icmp ult <2 x i64> %151, %broadcast.splat139, !dbg !120
  %154 = icmp ult <2 x i64> %152, %broadcast.splat139, !dbg !120
  %155 = extractelement <2 x i1> %153, i32 0, !dbg !120
  call void @llvm.assume(i1 %155), !dbg !120
  %156 = extractelement <2 x i1> %153, i32 1, !dbg !120
  call void @llvm.assume(i1 %156), !dbg !120
  %157 = extractelement <2 x i1> %154, i32 0, !dbg !120
  call void @llvm.assume(i1 %157), !dbg !120
  %158 = extractelement <2 x i1> %154, i32 1, !dbg !120
  call void @llvm.assume(i1 %158), !dbg !120
  %159 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %143, !dbg !115
  %160 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %144, !dbg !115
  %161 = bitcast double addrspace(13)* %159 to <6 x double> addrspace(13)*, !dbg !115
  %162 = bitcast double addrspace(13)* %160 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec142 = load <6 x double>, <6 x double> addrspace(13)* %161, align 8, !dbg !115, !tbaa !74
  %wide.vec143 = load <6 x double>, <6 x double> addrspace(13)* %162, align 8, !dbg !115, !tbaa !74
  %strided.vec144 = shufflevector <6 x double> %wide.vec142, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec145 = shufflevector <6 x double> %wide.vec143, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %163 = extractelement <2 x i64> %151, i32 0, !dbg !120
  %164 = getelementptr inbounds double, double addrspace(13)* %i43.i.3, i64 %163, !dbg !120
  %165 = bitcast double addrspace(13)* %164 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec144, <2 x double> addrspace(13)* %165, align 8, !dbg !120, !tbaa !74, !alias.scope !159, !noalias !162
  %166 = getelementptr inbounds double, double addrspace(13)* %164, i64 2, !dbg !120
  %167 = bitcast double addrspace(13)* %166 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec145, <2 x double> addrspace(13)* %167, align 8, !dbg !120, !tbaa !74, !alias.scope !159, !noalias !162
  %index.next127 = add i64 %index126, 4, !dbg !117
  %vec.ind.next133 = add <2 x i64> %vec.ind130, <i64 4, i64 4>
  %168 = icmp eq i64 %index.next127, %n.vec125, !dbg !117
  br i1 %168, label %scalar.ph106, label %vector.body107, !dbg !117, !llvm.loop !164

scalar.ph106:                                     ; preds = %vector.body107, %vector.memcheck110, %L41.us.i.i.2
  %bc.resume.val128 = phi i64 [ 0, %L41.us.i.i.2 ], [ 0, %vector.memcheck110 ], [ %n.vec125, %vector.body107 ]
  br label %L19.us.i.i.3, !dbg !115

L19.us.i.i.3:                                     ; preds = %L19.us.i.i.3, %scalar.ph106
  %iv1.i.3 = phi i64 [ %iv.next2.i.3, %L19.us.i.i.3 ], [ %bc.resume.val128, %scalar.ph106 ]
  %iv.next2.i.3 = add nuw nsw i64 %iv1.i.3, 1, !dbg !117
  %i35.i.3 = mul nuw nsw i64 %iv1.i.3, 3, !dbg !117
  %i36.i.3 = add nuw nsw i64 %i35.i.3, 30, !dbg !115
  %exitcond24.not.i.i.3 = icmp ne i64 %iv1.i.3, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i.3), !dbg !115
  %i37.i.3 = add nuw nsw i64 %iv1.i.3, 8, !dbg !118
  %i39.i.3 = icmp ult i64 %i37.i.3, %i38.i.3, !dbg !120
  call void @llvm.assume(i1 %i39.i.3), !dbg !120
  %i41.i.3 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i.3, !dbg !115
  %i42.i.3 = load double, double addrspace(13)* %i41.i.3, align 8, !dbg !115, !tbaa !74
  %i44.i.3 = getelementptr inbounds double, double addrspace(13)* %i43.i.3, i64 %i37.i.3, !dbg !120
  store double %i42.i.3, double addrspace(13)* %i44.i.3, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i.3 = icmp eq i64 %iv1.i.3, %i12.i, !dbg !121
  br i1 %.not.us.i.i.3, label %L41.us.i.i.3, label %L19.us.i.i.3, !dbg !123, !llvm.loop !165

L41.us.i.i.3:                                     ; preds = %L19.us.i.i.3
  %"i24'ipl.i.4" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  %169 = getelementptr inbounds i8, i8* %malloccall.i, i64 32
  %170 = bitcast i8* %169 to {} addrspace(10)**
  store {} addrspace(10)* %"i24'ipl.i.4", {} addrspace(10)** %170, align 8, !invariant.group !136
  %i24.i.4 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i.4 = bitcast {} addrspace(10)* %i24.i.4 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i.4 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i.4 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i.4 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i.4, i64 0, i32 1
  %i30.i.4 = bitcast {} addrspace(10)* %i24.i.4 to double addrspace(13)* addrspace(10)*
  %i31.i.4 = addrspacecast double addrspace(13)* addrspace(10)* %i30.i.4 to double addrspace(13)* addrspace(11)*
  %i38.i.4 = load i64, i64 addrspace(11)* %i27.i.4, align 8, !tbaa !54, !range !57
  %i43.i.4 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i.4, align 8, !tbaa !72, !nonnull !4
  br i1 %min.iters.check, label %scalar.ph147, label %vector.memcheck151, !dbg !115

vector.memcheck151:                               ; preds = %L41.us.i.i.3
  %scevgep153 = getelementptr double, double addrspace(13)* %i43.i.4, i64 10, !dbg !115
  %171 = add i64 %i12.i, 11, !dbg !115
  %scevgep155 = getelementptr double, double addrspace(13)* %i43.i.4, i64 %171, !dbg !115
  %scevgep157 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %172 = mul i64 %i12.i, 3, !dbg !115
  %173 = add i64 %172, 31, !dbg !115
  %scevgep159 = getelementptr double, double addrspace(13)* %i40.i, i64 %173, !dbg !115
  %bound0161 = icmp ult double addrspace(13)* %scevgep153, %scevgep159, !dbg !115
  %bound1162 = icmp ult double addrspace(13)* %scevgep157, %scevgep155, !dbg !115
  %found.conflict163 = and i1 %bound0161, %bound1162, !dbg !115
  br i1 %found.conflict163, label %scalar.ph147, label %vector.ph152, !dbg !115

vector.ph152:                                     ; preds = %vector.memcheck151
  %n.mod.vf165 = and i64 %15, 3, !dbg !115
  %174 = icmp eq i64 %n.mod.vf165, 0, !dbg !115
  %175 = select i1 %174, i64 4, i64 %n.mod.vf165, !dbg !115
  %n.vec166 = sub i64 %15, %175, !dbg !115
  %broadcast.splatinsert175 = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat176 = shufflevector <2 x i64> %broadcast.splatinsert175, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert179 = insertelement <2 x i64> poison, i64 %i38.i.4, i32 0, !dbg !115
  %broadcast.splat180 = shufflevector <2 x i64> %broadcast.splatinsert179, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body148, !dbg !115

vector.body148:                                   ; preds = %vector.body148, %vector.ph152
  %index167 = phi i64 [ 0, %vector.ph152 ], [ %index.next168, %vector.body148 ], !dbg !117
  %vec.ind171 = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph152 ], [ %vec.ind.next174, %vector.body148 ]
  %step.add172 = add <2 x i64> %vec.ind171, <i64 2, i64 2>
  %176 = or i64 %index167, 2, !dbg !117
  %177 = mul nuw nsw i64 %index167, 3, !dbg !117
  %178 = mul nuw nsw i64 %176, 3, !dbg !117
  %179 = add nuw nsw i64 %177, 30, !dbg !115
  %180 = add nuw nsw i64 %178, 30, !dbg !115
  %181 = icmp ne <2 x i64> %vec.ind171, %broadcast.splat176, !dbg !115
  %182 = icmp ne <2 x i64> %step.add172, %broadcast.splat176, !dbg !115
  %183 = extractelement <2 x i1> %181, i32 0, !dbg !115
  call void @llvm.assume(i1 %183), !dbg !115
  %184 = extractelement <2 x i1> %181, i32 1, !dbg !115
  call void @llvm.assume(i1 %184), !dbg !115
  %185 = extractelement <2 x i1> %182, i32 0, !dbg !115
  call void @llvm.assume(i1 %185), !dbg !115
  %186 = extractelement <2 x i1> %182, i32 1, !dbg !115
  call void @llvm.assume(i1 %186), !dbg !115
  %187 = add nuw nsw <2 x i64> %vec.ind171, <i64 10, i64 10>, !dbg !118
  %188 = add <2 x i64> %vec.ind171, <i64 12, i64 12>, !dbg !118
  %189 = icmp ult <2 x i64> %187, %broadcast.splat180, !dbg !120
  %190 = icmp ult <2 x i64> %188, %broadcast.splat180, !dbg !120
  %191 = extractelement <2 x i1> %189, i32 0, !dbg !120
  call void @llvm.assume(i1 %191), !dbg !120
  %192 = extractelement <2 x i1> %189, i32 1, !dbg !120
  call void @llvm.assume(i1 %192), !dbg !120
  %193 = extractelement <2 x i1> %190, i32 0, !dbg !120
  call void @llvm.assume(i1 %193), !dbg !120
  %194 = extractelement <2 x i1> %190, i32 1, !dbg !120
  call void @llvm.assume(i1 %194), !dbg !120
  %195 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %179, !dbg !115
  %196 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %180, !dbg !115
  %197 = bitcast double addrspace(13)* %195 to <6 x double> addrspace(13)*, !dbg !115
  %198 = bitcast double addrspace(13)* %196 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec183 = load <6 x double>, <6 x double> addrspace(13)* %197, align 8, !dbg !115, !tbaa !74
  %wide.vec184 = load <6 x double>, <6 x double> addrspace(13)* %198, align 8, !dbg !115, !tbaa !74
  %strided.vec185 = shufflevector <6 x double> %wide.vec183, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec186 = shufflevector <6 x double> %wide.vec184, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %199 = extractelement <2 x i64> %187, i32 0, !dbg !120
  %200 = getelementptr inbounds double, double addrspace(13)* %i43.i.4, i64 %199, !dbg !120
  %201 = bitcast double addrspace(13)* %200 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec185, <2 x double> addrspace(13)* %201, align 8, !dbg !120, !tbaa !74, !alias.scope !166, !noalias !169
  %202 = getelementptr inbounds double, double addrspace(13)* %200, i64 2, !dbg !120
  %203 = bitcast double addrspace(13)* %202 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec186, <2 x double> addrspace(13)* %203, align 8, !dbg !120, !tbaa !74, !alias.scope !166, !noalias !169
  %index.next168 = add i64 %index167, 4, !dbg !117
  %vec.ind.next174 = add <2 x i64> %vec.ind171, <i64 4, i64 4>
  %204 = icmp eq i64 %index.next168, %n.vec166, !dbg !117
  br i1 %204, label %scalar.ph147, label %vector.body148, !dbg !117, !llvm.loop !171

scalar.ph147:                                     ; preds = %vector.body148, %vector.memcheck151, %L41.us.i.i.3
  %bc.resume.val169 = phi i64 [ 0, %L41.us.i.i.3 ], [ 0, %vector.memcheck151 ], [ %n.vec166, %vector.body148 ]
  br label %L19.us.i.i.4, !dbg !115

L19.us.i.i.4:                                     ; preds = %L19.us.i.i.4, %scalar.ph147
  %iv1.i.4 = phi i64 [ %iv.next2.i.4, %L19.us.i.i.4 ], [ %bc.resume.val169, %scalar.ph147 ]
  %iv.next2.i.4 = add nuw nsw i64 %iv1.i.4, 1, !dbg !117
  %i35.i.4 = mul nuw nsw i64 %iv1.i.4, 3, !dbg !117
  %i36.i.4 = add nuw nsw i64 %i35.i.4, 30, !dbg !115
  %exitcond24.not.i.i.4 = icmp ne i64 %iv1.i.4, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i.4), !dbg !115
  %i37.i.4 = add nuw nsw i64 %iv1.i.4, 10, !dbg !118
  %i39.i.4 = icmp ult i64 %i37.i.4, %i38.i.4, !dbg !120
  call void @llvm.assume(i1 %i39.i.4), !dbg !120
  %i41.i.4 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i.4, !dbg !115
  %i42.i.4 = load double, double addrspace(13)* %i41.i.4, align 8, !dbg !115, !tbaa !74
  %i44.i.4 = getelementptr inbounds double, double addrspace(13)* %i43.i.4, i64 %i37.i.4, !dbg !120
  store double %i42.i.4, double addrspace(13)* %i44.i.4, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i.4 = icmp eq i64 %iv1.i.4, %i12.i, !dbg !121
  br i1 %.not.us.i.i.4, label %L41.us.i.i.4, label %L19.us.i.i.4, !dbg !123, !llvm.loop !172

L41.us.i.i.4:                                     ; preds = %L19.us.i.i.4
  %"i24'ipl.i.5" = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %"i14'ipc.i" unordered, align 8
  %205 = getelementptr inbounds i8, i8* %malloccall.i, i64 40
  %206 = bitcast i8* %205 to {} addrspace(10)**
  store {} addrspace(10)* %"i24'ipl.i.5", {} addrspace(10)** %206, align 8, !invariant.group !136
  %i24.i.5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %i14.i unordered, align 8
  %i25.i.5 = bitcast {} addrspace(10)* %i24.i.5 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*
  %i26.i.5 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %i25.i.5 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*
  %i27.i.5 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %i26.i.5, i64 0, i32 1
  %i30.i.5 = bitcast {} addrspace(10)* %i24.i.5 to double addrspace(13)* addrspace(10)*
  %i31.i.5 = addrspacecast double addrspace(13)* addrspace(10)* %i30.i.5 to double addrspace(13)* addrspace(11)*
  %i38.i.5 = load i64, i64 addrspace(11)* %i27.i.5, align 8, !tbaa !54, !range !57
  %i43.i.5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i31.i.5, align 8, !tbaa !72, !nonnull !4
  br i1 %min.iters.check, label %scalar.ph188, label %vector.memcheck192, !dbg !115

vector.memcheck192:                               ; preds = %L41.us.i.i.4
  %scevgep194 = getelementptr double, double addrspace(13)* %i43.i.5, i64 12, !dbg !115
  %207 = add i64 %i12.i, 13, !dbg !115
  %scevgep196 = getelementptr double, double addrspace(13)* %i43.i.5, i64 %207, !dbg !115
  %scevgep198 = getelementptr double, double addrspace(13)* %i40.i, i64 30, !dbg !115
  %208 = mul i64 %i12.i, 3, !dbg !115
  %209 = add i64 %208, 31, !dbg !115
  %scevgep200 = getelementptr double, double addrspace(13)* %i40.i, i64 %209, !dbg !115
  %bound0202 = icmp ult double addrspace(13)* %scevgep194, %scevgep200, !dbg !115
  %bound1203 = icmp ult double addrspace(13)* %scevgep198, %scevgep196, !dbg !115
  %found.conflict204 = and i1 %bound0202, %bound1203, !dbg !115
  br i1 %found.conflict204, label %scalar.ph188, label %vector.ph193, !dbg !115

vector.ph193:                                     ; preds = %vector.memcheck192
  %n.mod.vf206 = and i64 %15, 3, !dbg !115
  %210 = icmp eq i64 %n.mod.vf206, 0, !dbg !115
  %211 = select i1 %210, i64 4, i64 %n.mod.vf206, !dbg !115
  %n.vec207 = sub i64 %15, %211, !dbg !115
  %broadcast.splatinsert216 = insertelement <2 x i64> poison, i64 %i34.i, i32 0, !dbg !115
  %broadcast.splat217 = shufflevector <2 x i64> %broadcast.splatinsert216, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  %broadcast.splatinsert220 = insertelement <2 x i64> poison, i64 %i38.i.5, i32 0, !dbg !115
  %broadcast.splat221 = shufflevector <2 x i64> %broadcast.splatinsert220, <2 x i64> poison, <2 x i32> zeroinitializer, !dbg !115
  br label %vector.body189, !dbg !115

vector.body189:                                   ; preds = %vector.body189, %vector.ph193
  %index208 = phi i64 [ 0, %vector.ph193 ], [ %index.next209, %vector.body189 ], !dbg !117
  %vec.ind212 = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph193 ], [ %vec.ind.next215, %vector.body189 ]
  %step.add213 = add <2 x i64> %vec.ind212, <i64 2, i64 2>
  %212 = or i64 %index208, 2, !dbg !117
  %213 = mul nuw nsw i64 %index208, 3, !dbg !117
  %214 = mul nuw nsw i64 %212, 3, !dbg !117
  %215 = add nuw nsw i64 %213, 30, !dbg !115
  %216 = add nuw nsw i64 %214, 30, !dbg !115
  %217 = icmp ne <2 x i64> %vec.ind212, %broadcast.splat217, !dbg !115
  %218 = icmp ne <2 x i64> %step.add213, %broadcast.splat217, !dbg !115
  %219 = extractelement <2 x i1> %217, i32 0, !dbg !115
  call void @llvm.assume(i1 %219), !dbg !115
  %220 = extractelement <2 x i1> %217, i32 1, !dbg !115
  call void @llvm.assume(i1 %220), !dbg !115
  %221 = extractelement <2 x i1> %218, i32 0, !dbg !115
  call void @llvm.assume(i1 %221), !dbg !115
  %222 = extractelement <2 x i1> %218, i32 1, !dbg !115
  call void @llvm.assume(i1 %222), !dbg !115
  %223 = add nuw nsw <2 x i64> %vec.ind212, <i64 12, i64 12>, !dbg !118
  %224 = add <2 x i64> %vec.ind212, <i64 14, i64 14>, !dbg !118
  %225 = icmp ult <2 x i64> %223, %broadcast.splat221, !dbg !120
  %226 = icmp ult <2 x i64> %224, %broadcast.splat221, !dbg !120
  %227 = extractelement <2 x i1> %225, i32 0, !dbg !120
  call void @llvm.assume(i1 %227), !dbg !120
  %228 = extractelement <2 x i1> %225, i32 1, !dbg !120
  call void @llvm.assume(i1 %228), !dbg !120
  %229 = extractelement <2 x i1> %226, i32 0, !dbg !120
  call void @llvm.assume(i1 %229), !dbg !120
  %230 = extractelement <2 x i1> %226, i32 1, !dbg !120
  call void @llvm.assume(i1 %230), !dbg !120
  %231 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %215, !dbg !115
  %232 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %216, !dbg !115
  %233 = bitcast double addrspace(13)* %231 to <6 x double> addrspace(13)*, !dbg !115
  %234 = bitcast double addrspace(13)* %232 to <6 x double> addrspace(13)*, !dbg !115
  %wide.vec224 = load <6 x double>, <6 x double> addrspace(13)* %233, align 8, !dbg !115, !tbaa !74
  %wide.vec225 = load <6 x double>, <6 x double> addrspace(13)* %234, align 8, !dbg !115, !tbaa !74
  %strided.vec226 = shufflevector <6 x double> %wide.vec224, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %strided.vec227 = shufflevector <6 x double> %wide.vec225, <6 x double> poison, <2 x i32> <i32 0, i32 3>, !dbg !115
  %235 = extractelement <2 x i64> %223, i32 0, !dbg !120
  %236 = getelementptr inbounds double, double addrspace(13)* %i43.i.5, i64 %235, !dbg !120
  %237 = bitcast double addrspace(13)* %236 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec226, <2 x double> addrspace(13)* %237, align 8, !dbg !120, !tbaa !74, !alias.scope !173, !noalias !176
  %238 = getelementptr inbounds double, double addrspace(13)* %236, i64 2, !dbg !120
  %239 = bitcast double addrspace(13)* %238 to <2 x double> addrspace(13)*, !dbg !120
  store <2 x double> %strided.vec227, <2 x double> addrspace(13)* %239, align 8, !dbg !120, !tbaa !74, !alias.scope !173, !noalias !176
  %index.next209 = add i64 %index208, 4, !dbg !117
  %vec.ind.next215 = add <2 x i64> %vec.ind212, <i64 4, i64 4>
  %240 = icmp eq i64 %index.next209, %n.vec207, !dbg !117
  br i1 %240, label %scalar.ph188, label %vector.body189, !dbg !117, !llvm.loop !178

scalar.ph188:                                     ; preds = %vector.body189, %vector.memcheck192, %L41.us.i.i.4
  %bc.resume.val210 = phi i64 [ 0, %L41.us.i.i.4 ], [ 0, %vector.memcheck192 ], [ %n.vec207, %vector.body189 ]
  br label %L19.us.i.i.5, !dbg !115

L19.us.i.i.5:                                     ; preds = %L19.us.i.i.5, %scalar.ph188
  %iv1.i.5 = phi i64 [ %iv.next2.i.5, %L19.us.i.i.5 ], [ %bc.resume.val210, %scalar.ph188 ]
  %iv.next2.i.5 = add nuw nsw i64 %iv1.i.5, 1, !dbg !117
  %i35.i.5 = mul nuw nsw i64 %iv1.i.5, 3, !dbg !117
  %i36.i.5 = add nuw nsw i64 %i35.i.5, 30, !dbg !115
  %exitcond24.not.i.i.5 = icmp ne i64 %iv1.i.5, %i34.i, !dbg !115
  call void @llvm.assume(i1 %exitcond24.not.i.i.5), !dbg !115
  %i37.i.5 = add nuw nsw i64 %iv1.i.5, 12, !dbg !118
  %i39.i.5 = icmp ult i64 %i37.i.5, %i38.i.5, !dbg !120
  call void @llvm.assume(i1 %i39.i.5), !dbg !120
  %i41.i.5 = getelementptr inbounds double, double addrspace(13)* %i40.i, i64 %i36.i.5, !dbg !115
  %i42.i.5 = load double, double addrspace(13)* %i41.i.5, align 8, !dbg !115, !tbaa !74
  %i44.i.5 = getelementptr inbounds double, double addrspace(13)* %i43.i.5, i64 %i37.i.5, !dbg !120
  store double %i42.i.5, double addrspace(13)* %i44.i.5, align 8, !dbg !120, !tbaa !74
  %.not.us.i.i.5 = icmp eq i64 %iv1.i.5, %i12.i, !dbg !121
  br i1 %.not.us.i.i.5, label %julia_fooSend_1802.exit.i, label %L19.us.i.i.5, !dbg !123, !llvm.loop !179
}

; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64.7(i8* nocapture writeonly, i8, i64, i1 immarg) #5

; Function Attrs: nofree nosync nounwind willreturn
declare void @llvm.assume(i1 noundef) #7

; Function Attrs: inaccessiblemem_or_argmemonly
declare void @jl_gc_queue_root({} addrspace(10)*) #8

; Function Attrs: allocsize(1)
declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #9

; Function Attrs: allocsize(1)
declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*, i64) #9

declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)

declare void @julia.push_gc_frame({} addrspace(10)**, i32)

declare {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)**, i32)

declare void @julia.pop_gc_frame({} addrspace(10)**)

; Function Attrs: allocsize(1)
declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #9

; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #5

attributes #0 = { "probe-stack"="inline-asm" }
attributes #1 = { inaccessiblememonly allocsize(1) }
attributes #2 = { cold noreturn nounwind }
attributes #3 = { argmemonly nofree nosync nounwind willreturn }
attributes #4 = { willreturn mustprogress "probe-stack"="inline-asm" }
attributes #5 = { argmemonly nofree nosync nounwind willreturn writeonly }
attributes #6 = { alwaysinline "probe-stack"="inline-asm" }
attributes #7 = { nofree nosync nounwind willreturn }
attributes #8 = { inaccessiblemem_or_argmemonly }
attributes #9 = { allocsize(1) }
attributes #10 = { nounwind }
attributes #11 = { noreturn }
attributes #12 = { willreturn mustprogress }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !5, !6, !8, !9, !10, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!3 = !DIFile(filename: "/home/ubuntu/LULESH.jl/examples/benchmark.jl", directory: ".")
!4 = !{}
!5 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!6 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!7 = !DIFile(filename: "/home/ubuntu/.julia/packages/GPUCompiler/mjc8g/src/runtime.jl", directory: ".")
!8 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!10 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!11 = !DIFile(filename: "/home/ubuntu/.julia/packages/Enzyme/P2eUh/src/compiler.jl", directory: ".")
!12 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!13 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!14 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!15 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!16 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!17 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!18 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!19 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!20 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!21 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!22 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!23 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!24 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!25 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!26 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!27 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!28 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!29 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!30 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!31 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!32 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!33 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!34 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!35 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!36 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !11, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
!37 = distinct !DISubprogram(name: "fooSend", linkageName: "julia_fooSend_1802", scope: null, file: !3, line: 36, type: !38, scopeLine: 36, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!38 = !DISubroutineType(types: !4)
!39 = !{!40, !40, i64 0}
!40 = !{!"jtbaa_gcframe", !41, i64 0}
!41 = !{!"jtbaa", !42, i64 0}
!42 = !{!"jtbaa"}
!43 = !DILocation(line: 38, scope: !37)
!44 = !DILocation(line: 29, scope: !45, inlinedAt: !47)
!45 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !46, file: !46, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!46 = !DIFile(filename: "tuple.jl", directory: ".")
!47 = !DILocation(line: 69, scope: !48, inlinedAt: !49)
!48 = distinct !DISubprogram(name: "iterate;", linkageName: "iterate", scope: !46, file: !46, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!49 = !DILocation(line: 69, scope: !48, inlinedAt: !43)
!50 = !{!51, !51, i64 0, i64 0}
!51 = !{!"jtbaa_const", !41, i64 0}
!52 = !{i64 40}
!53 = !{i64 16}
!54 = !{!55, !55, i64 0}
!55 = !{!"jtbaa_arraylen", !56, i64 0}
!56 = !{!"jtbaa_array", !41, i64 0}
!57 = !{i64 0, i64 9223372036854775807}
!58 = !DILocation(line: 861, scope: !59, inlinedAt: !61)
!59 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !60, file: !60, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!60 = !DIFile(filename: "array.jl", directory: ".")
!61 = !DILocation(line: 40, scope: !37)
!62 = !DILocation(line: 88, scope: !63, inlinedAt: !61)
!63 = distinct !DISubprogram(name: "*;", linkageName: "*", scope: !64, file: !64, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!64 = !DIFile(filename: "int.jl", directory: ".")
!65 = !DILocation(line: 87, scope: !66, inlinedAt: !67)
!66 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!67 = !DILocation(line: 655, scope: !68, inlinedAt: !61)
!68 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !69, file: !69, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!69 = !DIFile(filename: "operators.jl", directory: ".")
!70 = !DILocation(line: 903, scope: !71, inlinedAt: !61)
!71 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !60, file: !60, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!72 = !{!73, !73, i64 0}
!73 = !{!"jtbaa_arrayptr", !56, i64 0}
!74 = !{!75, !75, i64 0}
!75 = !{!"jtbaa_arraybuf", !76, i64 0}
!76 = !{!"jtbaa_data", !41, i64 0}
!77 = !DILocation(line: 468, scope: !78, inlinedAt: !80)
!78 = distinct !DISubprogram(name: "==;", linkageName: "==", scope: !79, file: !79, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!79 = !DIFile(filename: "promotion.jl", directory: ".")
!80 = !DILocation(line: 837, scope: !81, inlinedAt: !61)
!81 = distinct !DISubprogram(name: "iterate;", linkageName: "iterate", scope: !82, file: !82, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!82 = !DIFile(filename: "range.jl", directory: ".")
!83 = !DILocation(line: 69, scope: !48, inlinedAt: !84)
!84 = !DILocation(line: 42, scope: !37)
!85 = !DILocation(line: 87, scope: !66, inlinedAt: !84)
!86 = !DILocation(line: 29, scope: !45, inlinedAt: !83)
!87 = !DILocation(line: 87, scope: !66, inlinedAt: !83)
!88 = !DILocation(line: 28, scope: !89, inlinedAt: !90)
!89 = distinct !DISubprogram(name: "Something;", linkageName: "Something", scope: !3, file: !3, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!90 = !DILocation(line: 31, scope: !91, inlinedAt: !92)
!91 = distinct !DISubprogram(name: "Isend;", linkageName: "Isend", scope: !3, file: !3, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!92 = !DILocation(line: 44, scope: !37)
!93 = !{!94, !94, i64 0}
!94 = !{!"jtbaa_tag", !76, i64 0}
!95 = !{!96, !96, i64 0}
!96 = !{!"jtbaa_mutab", !97, i64 0}
!97 = !{!"jtbaa_value", !76, i64 0}
!98 = !DILocation(line: 369, scope: !99, inlinedAt: !101)
!99 = distinct !DISubprogram(name: "getptls;", linkageName: "getptls", scope: !100, file: !100, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!100 = !DIFile(filename: "boot.jl", directory: ".")
!101 = !DILocation(line: 48, scope: !102, inlinedAt: !104)
!102 = distinct !DISubprogram(name: "finalizer;", linkageName: "finalizer", scope: !103, file: !103, type: !38, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!103 = !DIFile(filename: "gcutils.jl", directory: ".")
!104 = !DILocation(line: 32, scope: !91, inlinedAt: !92)
!105 = !DILocation(line: 45, scope: !37)
!106 = !DILocation(line: 49, scope: !107, inlinedAt: !108)
!107 = distinct !DISubprogram(name: "foo", linkageName: "julia_foo_1799", scope: null, file: !3, line: 47, type: !38, scopeLine: 47, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !5, retainedNodes: !4)
!108 = distinct !DILocation(line: 0, scope: !107)
!109 = !DILocation(line: 38, scope: !37, inlinedAt: !110)
!110 = distinct !DILocation(line: 49, scope: !107, inlinedAt: !108)
!111 = !DILocation(line: 29, scope: !45, inlinedAt: !112)
!112 = distinct !DILocation(line: 69, scope: !48, inlinedAt: !113)
!113 = distinct !DILocation(line: 69, scope: !48, inlinedAt: !114)
!114 = distinct !DILocation(line: 38, scope: !37, inlinedAt: !110)
!115 = !DILocation(line: 861, scope: !59, inlinedAt: !116)
!116 = distinct !DILocation(line: 40, scope: !37, inlinedAt: !110)
!117 = !DILocation(line: 88, scope: !63, inlinedAt: !116)
!118 = !DILocation(line: 87, scope: !66, inlinedAt: !119)
!119 = distinct !DILocation(line: 655, scope: !68, inlinedAt: !116)
!120 = !DILocation(line: 903, scope: !71, inlinedAt: !116)
!121 = !DILocation(line: 468, scope: !78, inlinedAt: !122)
!122 = distinct !DILocation(line: 837, scope: !81, inlinedAt: !116)
!123 = !DILocation(line: 40, scope: !37, inlinedAt: !110)
!124 = !DILocation(line: 69, scope: !48, inlinedAt: !125)
!125 = distinct !DILocation(line: 42, scope: !37, inlinedAt: !110)
!126 = !DILocation(line: 29, scope: !45, inlinedAt: !127)
!127 = distinct !DILocation(line: 69, scope: !48, inlinedAt: !125)
!128 = !DILocation(line: 42, scope: !37, inlinedAt: !110)
!129 = !DILocation(line: 28, scope: !89, inlinedAt: !130)
!130 = distinct !DILocation(line: 31, scope: !91, inlinedAt: !131)
!131 = distinct !DILocation(line: 44, scope: !37, inlinedAt: !110)
!132 = !DILocation(line: 369, scope: !99, inlinedAt: !133)
!133 = distinct !DILocation(line: 48, scope: !102, inlinedAt: !134)
!134 = distinct !DILocation(line: 32, scope: !91, inlinedAt: !131)
!135 = !DILocation(line: 48, scope: !102, inlinedAt: !134)
!136 = distinct !{}
!137 = !{!138}
!138 = distinct !{!138, !139}
!139 = distinct !{!139, !"LVerDomain"}
!140 = !{!141}
!141 = distinct !{!141, !139}
!142 = distinct !{!142, !143}
!143 = !{!"llvm.loop.isvectorized", i32 1}
!144 = distinct !{!144, !143}
!145 = !{!146}
!146 = distinct !{!146, !147}
!147 = distinct !{!147, !"LVerDomain"}
!148 = !{!149}
!149 = distinct !{!149, !147}
!150 = distinct !{!150, !143}
!151 = distinct !{!151, !143}
!152 = !{!153}
!153 = distinct !{!153, !154}
!154 = distinct !{!154, !"LVerDomain"}
!155 = !{!156}
!156 = distinct !{!156, !154}
!157 = distinct !{!157, !143}
!158 = distinct !{!158, !143}
!159 = !{!160}
!160 = distinct !{!160, !161}
!161 = distinct !{!161, !"LVerDomain"}
!162 = !{!163}
!163 = distinct !{!163, !161}
!164 = distinct !{!164, !143}
!165 = distinct !{!165, !143}
!166 = !{!167}
!167 = distinct !{!167, !168}
!168 = distinct !{!168, !"LVerDomain"}
!169 = !{!170}
!170 = distinct !{!170, !168}
!171 = distinct !{!171, !143}
!172 = distinct !{!172, !143}
!173 = !{!174}
!174 = distinct !{!174, !175}
!175 = distinct !{!175, !"LVerDomain"}
!176 = !{!177}
!177 = distinct !{!177, !175}
!178 = distinct !{!178, !143}
!179 = distinct !{!179, !143}
)

signal (11): Segmentation fault
in expression starting at /home/ubuntu/LULESH.jl/examples/benchmark.jl:73
main at /home/ubuntu/LULESH.jl/examples/benchmark.jl:67
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1788 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:126
Allocations: 25940506 (Pool: 25927274; Big: 13232); GC: 31