EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
437 stars 62 forks source link

Issue with Enzymanigans #1287

Closed jlk9 closed 6 months ago

jlk9 commented 6 months ago
oldFunc: ; Function Attrs: mustprogress willreturn
define internal fastcc void @preprocess_julia___sort__25_13514({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) "enzyme_inactive" "enzymejl_parmtype"="4743859760" "enzymejl_parmtype_ref"="2" %0, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) "enzymejl_parmtype"="11277284944" "enzymejl_parmtype_ref"="1" %1, { i64, i64 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(16) "enzyme_inactive" "enzymejl_parmtype"="4797345728" "enzymejl_parmtype_ref"="1" %2) unnamed_addr #54 !dbg !2315 {
top:
  %3 = call {}*** @julia.get_pgcstack()
  %4 = call {}*** @julia.get_pgcstack()
  %5 = call {}*** @julia.get_pgcstack()
  %6 = bitcast {}*** %4 to {}**
  %7 = getelementptr inbounds {}*, {}** %6, i64 -14
  %8 = getelementptr inbounds {}*, {}** %7, i64 16
  %9 = bitcast {}** %8 to i8**
  %10 = load i8*, i8** %9, align 8
  %11 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) {} addrspace(10)* @julia.gc_alloc_obj({}** %7, i64 24, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 11783900496 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !1331
  call void @zeroType.80({} addrspace(10)* %11, i8 0, i64 24), !enzyme_zerostack !32
  %12 = bitcast {} addrspace(10)* %11 to { {} addrspace(10)*, i64, i64 } addrspace(10)*, !enzyme_caststack !32
  %13 = bitcast {}*** %5 to {}**
  %14 = getelementptr inbounds {}*, {}** %13, i64 -14
  %15 = getelementptr inbounds {}*, {}** %14, i64 16
  %16 = bitcast {}** %15 to i8**
  %17 = load i8*, i8** %16, align 8
  %18 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) {} addrspace(10)* @julia.gc_alloc_obj({}** %14, i64 24, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 11783900496 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !1331
  call void @zeroType.79({} addrspace(10)* %18, i8 0, i64 24), !enzyme_zerostack !32
  %19 = bitcast {} addrspace(10)* %18 to { {} addrspace(10)*, i64, i64 } addrspace(10)*, !enzyme_caststack !32
  %20 = bitcast {}*** %3 to {}**
  %21 = getelementptr inbounds {}*, {}** %20, i64 -14
  %22 = getelementptr inbounds {}*, {}** %21, i64 16
  %23 = bitcast {}** %22 to i8**
  %24 = load i8*, i8** %23, align 8
  %25 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) {} addrspace(10)* @julia.gc_alloc_obj({}** %21, i64 24, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 11783900496 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !1331
  call void @zeroType.81({} addrspace(10)* %25, i8 0, i64 24), !enzyme_zerostack !32
  %26 = bitcast {} addrspace(10)* %25 to { {} addrspace(10)*, i64, i64 } addrspace(10)*, !enzyme_caststack !32
  %27 = call {}*** @julia.get_pgcstack() #55
  %ptls_field76 = getelementptr inbounds {}**, {}*** %27, i64 2
  %28 = bitcast {}*** %ptls_field76 to i64***
  %ptls_load7778 = load i64**, i64*** %28, align 8, !tbaa !33
  %29 = getelementptr inbounds i64*, i64** %ptls_load7778, i64 2
  %safepoint = load i64*, i64** %29, align 8, !tbaa !37, !invariant.load !32
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint) #55, !dbg !2316
  fence syncscope("singlethread") seq_cst
  %30 = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %2, i64 0, i32 0, !dbg !2317
  %31 = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %2, i64 0, i32 1, !dbg !2320
  %unbox = load i64, i64 addrspace(11)* %31, align 8, !dbg !2322
  %unbox2 = load i64, i64 addrspace(11)* %30, align 8, !dbg !2322
  %32 = add i64 %unbox, 1, !dbg !2322
  %33 = sub i64 %32, %unbox2, !dbg !2324
  %34 = call noalias nonnull "enzyme_inactive" {} addrspace(10)* @ijl_alloc_array_1d({} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 4743859760 to {}*) to {} addrspace(10)*), i64 %33) #56, !dbg !2325
  %.neg = add i64 %unbox2, -1, !dbg !2327
  %35 = sub i64 1, %unbox2, !dbg !2327
  %.not = icmp eq i64 addrspace(11)* %31, null
  %guard_res27 = select i1 %.not, i64 undef, i64 %unbox
  %.not8087 = icmp sle i64 %guard_res27, %unbox2, !dbg !2329
  %36 = sub i64 %guard_res27, %unbox2
  %37 = icmp slt i64 %36, 21
  %or.cond88 = or i1 %.not8087, %37, !dbg !2330
  br i1 %or.cond88, label %L73, label %L30.lr.ph, !dbg !2330

L30.lr.ph:                                        ; preds = %top
  %.fca.0.gep51 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %12, i64 0, i32 0
  %.fca.1.gep53 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %12, i64 0, i32 1
  %.fca.2.gep55 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %12, i64 0, i32 2
  %38 = addrspacecast { {} addrspace(10)*, i64, i64 } addrspace(10)* %12 to { {} addrspace(10)*, i64, i64 } addrspace(11)*
  %.fca.0.gep57 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %19, i64 0, i32 0
  %.fca.1.gep59 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %19, i64 0, i32 1
  %.fca.2.gep61 = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %19, i64 0, i32 2
  %39 = addrspacecast { {} addrspace(10)*, i64, i64 } addrspace(10)* %19 to { {} addrspace(10)*, i64, i64 } addrspace(11)*
  br label %L30, !dbg !2330

L30:                                              ; preds = %union_move49, %L30.lr.ph
  %iv = phi i64 [ %iv.next, %union_move49 ], [ 0, %L30.lr.ph ]
  %value_phi892 = phi i8 [ 0, %L30.lr.ph ], [ %value_phi19, %union_move49 ]
  %value_phi590 = phi i64 [ %unbox2, %L30.lr.ph ], [ %value_phi18, %union_move49 ]
  %value_phi489 = phi i64 [ %guard_res27, %L30.lr.ph ], [ %value_phi17, %union_move49 ]
  %40 = mul nsw i64 %iv, -1, !dbg !2331
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !2331
  %41 = trunc i64 %40 to i1, !dbg !2331
  %42 = xor i1 %41, true, !dbg !2331
  br i1 %41, label %L31, label %L39, !dbg !2331

L31:                                              ; preds = %L30
  %43 = add i64 %value_phi590, %35, !dbg !2332
  %44 = add i64 %value_phi489, %35, !dbg !2332
  %45 = call fastcc i64 @julia_partition__13544({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %43, i64 signext %44, i64 signext %35, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i8 zeroext %value_phi892, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 noundef signext 0) #55, !dbg !2333
  br label %L45, !dbg !2334

L39:                                              ; preds = %L30
  %46 = call fastcc i64 @julia_partition__13544({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i64 signext %value_phi590, i64 signext %value_phi489, i64 signext %.neg, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i8 zeroext %value_phi892, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %.neg) #55, !dbg !2335
  br label %L45, !dbg !2334

L45:                                              ; preds = %L39, %L31
  %value_phi15 = phi i64 [ %45, %L31 ], [ %46, %L39 ]
  %47 = sub i64 %value_phi15, %value_phi590, !dbg !2336
  %48 = sub i64 %value_phi489, %value_phi15, !dbg !2336
  %.not81 = icmp slt i64 %47, %48, !dbg !2338
  br i1 %.not81, label %L52, label %L61, !dbg !2337

L52:                                              ; preds = %L45
  %49 = add i64 %value_phi15, -1, !dbg !2339
  %50 = zext i1 %42 to i8, !dbg !2341
  store {} addrspace(10)* %34, {} addrspace(10)* addrspace(10)* %.fca.0.gep57, align 8, !dbg !2341, !noalias !2342
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %18, {} addrspace(10)* %34), !dbg !2341
  store i64 %value_phi590, i64 addrspace(10)* %.fca.1.gep59, align 8, !dbg !2341, !noalias !2342
  store i64 %49, i64 addrspace(10)* %.fca.2.gep61, align 8, !dbg !2341, !noalias !2342
  call fastcc void @julia___sort__25_13534({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i64 signext %35, i8 zeroext %50, i8 zeroext %value_phi892, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, { {} addrspace(10)*, i64, i64 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(24) %39) #55, !dbg !2341
  %51 = add i64 %value_phi15, 1, !dbg !2345
  %52 = and i8 %value_phi892, 1, !dbg !2347
  %53 = xor i8 %52, 1, !dbg !2334
  br label %union_move49, !dbg !2334

L61:                                              ; preds = %L45
  %54 = add i64 %value_phi15, 1, !dbg !2349
  %55 = and i8 %value_phi892, 1, !dbg !2351
  %56 = zext i1 %42 to i8, !dbg !2352
  %57 = xor i8 %55, 1, !dbg !2352
  store {} addrspace(10)* %34, {} addrspace(10)* addrspace(10)* %.fca.0.gep51, align 8, !dbg !2352, !noalias !2342
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %11, {} addrspace(10)* %34), !dbg !2352
  store i64 %54, i64 addrspace(10)* %.fca.1.gep53, align 8, !dbg !2352, !noalias !2342
  store i64 %value_phi489, i64 addrspace(10)* %.fca.2.gep55, align 8, !dbg !2352, !noalias !2342
  call fastcc void @julia___sort__25_13534({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i64 signext %35, i8 zeroext %56, i8 zeroext %57, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, { {} addrspace(10)*, i64, i64 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(24) %38) #55, !dbg !2352
  %58 = add i64 %value_phi15, -1, !dbg !2353
  br label %union_move49, !dbg !2334

L73.loopexit:                                     ; preds = %union_move49
  br label %L73, !dbg !2355

L73:                                              ; preds = %L73.loopexit, %top
  %value_phi4.lcssa = phi i64 [ %guard_res27, %top ], [ %value_phi17, %L73.loopexit ]
  %value_phi5.lcssa = phi i64 [ %unbox2, %top ], [ %value_phi18, %L73.loopexit ]
  %value_phi7.off0.lcssa = phi i1 [ false, %top ], [ %42, %L73.loopexit ]
  %value_phi8.lcssa = phi i8 [ 0, %top ], [ %value_phi19, %L73.loopexit ]
  %.not84 = icmp slt i64 %value_phi4.lcssa, %value_phi5.lcssa, !dbg !2355
  br i1 %.not84, label %common.ret, label %L77, !dbg !2356

common.ret:                                       ; preds = %L86, %L73
  ret void, !dbg !2357

L77:                                              ; preds = %L73
  br i1 %value_phi7.off0.lcssa, label %L78, label %L84, !dbg !2358

L78:                                              ; preds = %L77
  %59 = add i64 %value_phi5.lcssa, %35, !dbg !2359
  %60 = add i64 %value_phi4.lcssa, 1, !dbg !2360
  %61 = sub i64 %60, %value_phi5.lcssa, !dbg !2359
  call fastcc void @julia__copyto_impl__13525({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %value_phi5.lcssa, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i64 signext %59, i64 signext %61) #55, !dbg !2361
  br label %L84, !dbg !2361

L84:                                              ; preds = %L78, %L77
  %62 = and i8 %value_phi8.lcssa, 1, !dbg !2362
  %.not85 = icmp eq i8 %62, 0, !dbg !2362
  br i1 %.not85, label %L86, label %L85, !dbg !2362

L85:                                              ; preds = %L84
  call fastcc void @julia_reverse__13523({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %value_phi5.lcssa, i64 signext %value_phi4.lcssa) #55, !dbg !2362
  br label %L86, !dbg !2362

L86:                                              ; preds = %L85, %L84
  %.fca.0.gep = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %26, i64 0, i32 0, !dbg !2363
  store {} addrspace(10)* %34, {} addrspace(10)* addrspace(10)* %.fca.0.gep, align 8, !dbg !2363, !noalias !2342
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %25, {} addrspace(10)* %34), !dbg !2363
  %.fca.1.gep = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %26, i64 0, i32 1, !dbg !2363
  store i64 %value_phi5.lcssa, i64 addrspace(10)* %.fca.1.gep, align 8, !dbg !2363, !noalias !2342
  %.fca.2.gep = getelementptr { {} addrspace(10)*, i64, i64 }, { {} addrspace(10)*, i64, i64 } addrspace(10)* %26, i64 0, i32 2, !dbg !2363
  store i64 %value_phi4.lcssa, i64 addrspace(10)* %.fca.2.gep, align 8, !dbg !2363, !noalias !2342
  %63 = addrspacecast { {} addrspace(10)*, i64, i64 } addrspace(10)* %26 to { {} addrspace(10)*, i64, i64 } addrspace(11)*, !dbg !2363
  call fastcc void @julia__sort__13521({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, { {} addrspace(10)*, i64, i64 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(24) %63) #55, !dbg !2363
  br label %common.ret

union_move49:                                     ; preds = %L61, %L52
  %value_phi17 = phi i64 [ %value_phi489, %L52 ], [ %58, %L61 ]
  %value_phi18 = phi i64 [ %51, %L52 ], [ %value_phi590, %L61 ]
  %value_phi19 = phi i8 [ %53, %L52 ], [ %value_phi892, %L61 ]
  %.not80 = icmp sle i64 %value_phi17, %value_phi18, !dbg !2329
  %64 = sub i64 %value_phi17, %value_phi18
  %65 = icmp slt i64 %64, 21
  %or.cond = or i1 %.not80, %65, !dbg !2330
  br i1 %or.cond, label %L73.loopexit, label %L30, !dbg !2330
}

 krc[  %46 = call fastcc i64 @julia_partition__13544({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i64 signext %value_phi590, i64 signext %value_phi489, i64 signext %.neg, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i8 zeroext %value_phi892, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %.neg) #55, !dbg !70] = 1
 krc[  %45 = call fastcc i64 @julia_partition__13544({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 signext %43, i64 signext %44, i64 signext %35, { {} addrspace(10)* } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(8) %1, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %34, i8 zeroext %value_phi892, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, i64 noundef signext 0) #55, !dbg !68] = 1
 krc[  %guard_res27 = select i1 %.not, i64 undef, i64 %unbox] = 1
 krc[  %25 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) {} addrspace(10)* @julia.gc_alloc_obj({}** %21, i64 24, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 11783900496 to {}*) to {} addrspace(10)*)), !enzyme_fromstack !33] = 1
 krc[  %63 = addrspacecast { {} addrspace(10)*, i64, i64 } addrspace(10)* %26 to { {} addrspace(10)*, i64, i64 } addrspace(11)*, !dbg !103] = 1
 krc[  %unbox = load i64, i64 addrspace(11)* %31, align 8, !dbg !50] = 0
 krc[  %unbox2 = load i64, i64 addrspace(11)* %30, align 8, !dbg !50] = 0
 krc[  %26 = bitcast {} addrspace(10)* %25 to { {} addrspace(10)*, i64, i64 } addrspace(10)*, !enzyme_caststack !32] = 1
 krc[  %34 = call noalias nonnull "enzyme_inactive" {} addrspace(10)* @ijl_alloc_array_1d({} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 4743859760 to {}*) to {} addrspace(10)*), i64 %33) #56, !dbg !56] = 1
 krc[  %35 = sub i64 1, %unbox2, !dbg !61] = 1
 krc[  %.not84 = icmp slt i64 %value_phi4.lcssa, %value_phi5.lcssa, !dbg !93] = 1
 krc[  %58 = add i64 %value_phi15, -1, !dbg !91] = 1
 krc[  %47 = sub i64 %value_phi15, %value_phi590, !dbg !71] = 1
 krc[  %51 = add i64 %value_phi15, 1, !dbg !81] = 1
 krc[  %65 = icmp slt i64 %64, 21] = 1
 krc[  %or.cond = or i1 %.not80, %65, !dbg !65] = 1
 krc[  %.not80 = icmp sle i64 %value_phi17, %value_phi18, !dbg !63] = 1
 krc[  %64 = sub i64 %value_phi17, %value_phi18] = 1
 krc[  %48 = sub i64 %value_phi489, %value_phi15, !dbg !71] = 1
 krc[  %or.cond88 = or i1 %.not8087, %37, !dbg !65] = 1
 krc[  %33 = sub i64 %32, %unbox2, !dbg !54] = 1
 krc[  %32 = add i64 %unbox, 1, !dbg !50] = 1
 krc[  %.neg = add i64 %unbox2, -1, !dbg !61] = 1
 krc[  %.not81 = icmp slt i64 %47, %48, !dbg !73] = 1
 krc[  %36 = sub i64 %guard_res27, %unbox2] = 1
 krc[  %.not85 = icmp eq i8 %62, 0, !dbg !102] = 0
 krc[  %.not8087 = icmp sle i64 %guard_res27, %unbox2, !dbg !63] = 1
 krc[  %37 = icmp slt i64 %36, 21] = 1
 krc[  %62 = and i8 %value_phi8.lcssa, 1, !dbg !102] = 1
 krc[  %value_phi15 = phi i64 [ %45, %L31 ], [ %46, %L39 ]] = 0
 krc[  %value_phi489 = phi i64 [ %guard_res27, %L30.lr.ph ], [ %value_phi17, %union_move49 ]] = 0
 krc[  %value_phi590 = phi i64 [ %unbox2, %L30.lr.ph ], [ %value_phi18, %union_move49 ]] = 0
 krc[  %value_phi4.lcssa = phi i64 [ %guard_res27, %top ], [ %value_phi17, %L73.loopexit ]] = 0
 krc[  %value_phi5.lcssa = phi i64 [ %unbox2, %top ], [ %value_phi18, %L73.loopexit ]] = 0
 krc[  %value_phi17 = phi i64 [ %value_phi489, %L52 ], [ %58, %L61 ]] = 1
 krc[  %value_phi8.lcssa = phi i8 [ 0, %top ], [ %value_phi19, %L73.loopexit ]] = 1
 krc[  %value_phi7.off0.lcssa = phi i1 [ false, %top ], [ %42, %L73.loopexit ]] = 0
 krc[  %value_phi18 = phi i64 [ %51, %L52 ], [ %value_phi590, %L61 ]] = 1
 krc[  %value_phi892 = phi i8 [ 0, %L30.lr.ph ], [ %value_phi19, %union_move49 ]] = 0
 cur:   %value_phi15 = phi i64 [ %45, %L31 ], [ %46, %L39 ]
 origInst:   %34 = call noalias nonnull "enzyme_inactive" {} addrspace(10)* @ijl_alloc_array_1d({} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 4743859760 to {}*) to {} addrspace(10)*), i64 %33) #56, !dbg !56

Assertion failed: (false && "caching potentially capturing/offset of allocation"), function needsCacheWholeAllocation, file /workspace/srcdir/Enzyme/enzyme/Enzyme/GradientUtils.cpp, line 9366.

[63084] signal (6): Abort trap: 6 in expression starting at /Users/jkump/Desktop/Enzymanigans.jl/stable_diffusion/stable_diffusion.jl:168 __pthread_kill at /usr/lib/system/libsystem_kernel.dylib (unknown line) Allocations: 224319021 (Pool: 224062862; Big: 256159); GC: 237 zsh: abort julia --project stable_diffusion/stable_diffusion.jl

@wsmoses

wsmoses commented 6 months ago

Closed by latest jll