EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
440 stars 62 forks source link

Illegal type analysis in SVector \ SMatrix #1805

Open anhi opened 1 week ago

anhi commented 1 week ago

The following line works in Julia:

@SVector([0.0, 0.0, 0.00]) / @SMatrix([1.0])
3-element MVector{3, Float64} with indices SOneTo(3):
 0.0
 0.0
 0.0

but throws an illegal type analysis error in autodiff:

autodiff(Reverse, /, Active, Active(@SVector([0.0, 0.0, 0.0])), Active(@SMatrix([1.0])))

``` julia> autodiff(Reverse, /, Active, Active(@SVector([0.0, 0.0, 0.0])), Active(@SMatrix([1.0]))) ERROR: Enzyme compilation failed due to illegal type analysis. Current scope: ; Function Attrs: mustprogress willreturn define noalias nonnull dereferenceable(24) "enzyme_type"="{}" "enzymejl_parmtype"="13475424528" "enzymejl_parmtype_ref"="1" {} addrspace(10)* @preprocess_julia___20125_inner.1([1 x [3 x double]] "enzyme_type"="{[-1]:Float@double}" "enzymejl_parmtype"="5462949968" "enzymejl_parmtype_ref"="0" %0, [1 x [1 x double]] "enzyme_type"="{[-1]:Float@double}" "enzymejl_parmtype"="5462833104" "enzymejl_parmtype_ref"="0" %1) local_unnamed_addr #15 !dbg !188 { entry: %2 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) i8* @malloc(i64 24), !enzyme_fromstack !130 %newstruct.i = bitcast i8* %2 to [1 x [1 x [3 x double]]]*, !enzyme_caststack !19 %3 = call noalias nonnull dereferenceable(8) dereferenceable_or_null(8) i8* @malloc(i64 8), !enzyme_fromstack !130 %newstruct2.i = bitcast i8* %3 to [1 x [1 x double]]*, !enzyme_caststack !19 %4 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) i8* @malloc(i64 24), !enzyme_fromstack !130 %sret_box.i = bitcast i8* %4 to [24 x i8]*, !enzyme_caststack !19 %5 = call {}*** @julia.get_pgcstack() #16, !dbg !189 %.fca.0.0.extract3 = extractvalue [1 x [3 x double]] %0, 0, 0, !dbg !189 %.fca.0.1.extract = extractvalue [1 x [3 x double]] %0, 0, 1, !dbg !189 %.fca.0.2.extract = extractvalue [1 x [3 x double]] %0, 0, 2, !dbg !189 %.fca.0.0.extract = extractvalue [1 x [1 x double]] %1, 0, 0, !dbg !189 %6 = bitcast [1 x [1 x double]]* %newstruct2.i to i8* %7 = bitcast [1 x [1 x [3 x double]]]* %newstruct.i to i8* %8 = getelementptr inbounds [24 x i8], [24 x i8]* %sret_box.i, i64 0, i64 0 call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* noundef nonnull align 8 dereferenceable(24) %8) #16 %current_task1.i74 = getelementptr inbounds {}**, {}*** %5, i64 -14 %current_task1.i = bitcast {}*** %current_task1.i74 to {}** %ptls_field.i75 = getelementptr inbounds {}**, {}*** %5, i64 2 %9 = bitcast {}*** %ptls_field.i75 to i64*** %ptls_load.i7677 = load i64**, i64*** %9, align 8, !tbaa !20, !noalias !190 %10 = getelementptr inbounds i64*, i64** %ptls_load.i7677, i64 2 %safepoint.i = load i64*, i64** %10, align 8, !tbaa !24, !noalias !190 fence syncscope("singlethread") seq_cst call void @julia.safepoint(i64* %safepoint.i) #16, !dbg !193, !noalias !190 fence syncscope("singlethread") seq_cst %memcpy_refined_dst.i = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]]* %newstruct2.i, i64 0, i64 0, i64 0, !dbg !195 store double %.fca.0.0.extract, double* %memcpy_refined_dst.i, align 8, !dbg !195, !tbaa !113, !alias.scope !115, !noalias !199 %.innerparm.sroa.0.0..sroa_idx = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 0, !dbg !202 store double %.fca.0.0.extract3, double* %.innerparm.sroa.0.0..sroa_idx, align 8, !dbg !202, !alias.scope !122, !noalias !205 %.innerparm.sroa.2.0..sroa_idx4 = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 1, !dbg !202 store double %.fca.0.1.extract, double* %.innerparm.sroa.2.0..sroa_idx4, align 8, !dbg !202, !alias.scope !122, !noalias !205 %.innerparm.sroa.3.0..sroa_idx5 = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 2, !dbg !202 store double %.fca.0.2.extract, double* %.innerparm.sroa.3.0..sroa_idx5, align 8, !dbg !202, !alias.scope !122, !noalias !205 %11 = addrspacecast [1 x [1 x double]]* %newstruct2.i to [1 x [1 x double]] addrspace(11)*, !dbg !206 %12 = addrspacecast [1 x [1 x [3 x double]]]* %newstruct.i to [1 x [1 x [3 x double]]] addrspace(11)*, !dbg !206 %13 = call fastcc { {} addrspace(10)*, i8 } @julia___20131([1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %11, [1 x [1 x [3 x double]]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) %12) #16, !dbg !206, !noalias !190 %14 = extractvalue { {} addrspace(10)*, i8 } %13, 0, !dbg !206 %15 = extractvalue { {} addrspace(10)*, i8 } %13, 1, !dbg !206 %16 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !206 %17 = and i8 %15, 127, !dbg !206 %exactly_isa.i.not = icmp eq i8 %17, 1, !dbg !206 br i1 %exactly_isa.i.not, label %box_union, label %L19.i, !dbg !206 L19.i: ; preds = %entry %isboxed.i = icmp eq i8 %15, -128, !dbg !206 br i1 %isboxed.i, label %isa.i, label %L80.i, !dbg !206 L80.i: ; preds = %isa.i, %L19.i call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 5290665296 to {}*) to {} addrspace(12)*)) #17, !dbg !206 unreachable, !dbg !206 L91.i: ; preds = %isa.i %18 = addrspacecast {} addrspace(10)* %14 to [1 x {} addrspace(10)*] addrspace(11)*, !dbg !207 %getfield_addr.i = getelementptr inbounds [1 x {} addrspace(10)*], [1 x {} addrspace(10)*] addrspace(11)* %18, i64 0, i64 0, !dbg !207 %getfield.i = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %getfield_addr.i unordered, align 8, !dbg !207, !tbaa !94, !alias.scope !89, !noalias !128, !nonnull !19, !dereferenceable !129, !align !130 %19 = addrspacecast {} addrspace(10)* %getfield.i to i8 addrspace(11)*, !dbg !209 %newstruct15.i = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 4495875472 to {}*) to {} addrspace(10)*)) #18, !dbg !211 %20 = addrspacecast {} addrspace(10)* %newstruct15.i to i8 addrspace(11)*, !dbg !211 call void @llvm.memcpy.p11i8.p11i8.i64(i8 addrspace(11)* noundef align 8 dereferenceable(24) %20, i8 addrspace(11)* noundef align 1 dereferenceable(24) %19, i64 noundef 24, i1 noundef false) #16, !dbg !211 br label %L102.i.thread, !dbg !206 L96.i: ; preds = %isa.i %21 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %14) #16, !dbg !212 %22 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %16) #19, !dbg !214 %coercion.i = bitcast {}* %22 to double*, !dbg !216 %pointerref.i = load double, double* %coercion.i, align 1, !dbg !216, !tbaa !87, !alias.scope !89, !noalias !217 call void @llvm.julia.gc_preserve_end(token %21) #16, !dbg !212, !noalias !190 %23 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %14) #16, !dbg !212 %24 = getelementptr inbounds double, double* %coercion.i, i64 1, !dbg !216 %pointerref31.i = load double, double* %24, align 1, !dbg !216, !tbaa !87, !alias.scope !89, !noalias !217 call void @llvm.julia.gc_preserve_end(token %23) #16, !dbg !212, !noalias !190 %25 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %14) #16, !dbg !212 %26 = getelementptr inbounds double, double* %coercion.i, i64 2, !dbg !216 %pointerref36.i = load double, double* %26, align 1, !dbg !216, !tbaa !87, !alias.scope !89, !noalias !217 call void @llvm.julia.gc_preserve_end(token %25) #16, !dbg !212, !noalias !190 %newstruct18.i = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 13492663120 to {}*) to {} addrspace(10)*)) #18, !dbg !211 %27 = addrspacecast {} addrspace(10)* %newstruct18.i to i8 addrspace(11)*, !dbg !211 %.sroa.0.0..sroa_cast = addrspacecast {} addrspace(10)* %newstruct18.i to double addrspace(11)*, !dbg !211 store double %pointerref.i, double addrspace(11)* %.sroa.0.0..sroa_cast, align 1, !dbg !211 %.sroa.4.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %27, i64 8, !dbg !211 %.sroa.4.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.4.0..sroa_idx to double addrspace(11)*, !dbg !211 store double %pointerref31.i, double addrspace(11)* %.sroa.4.0..sroa_cast, align 1, !dbg !211 %.sroa.5.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %27, i64 16, !dbg !211 %.sroa.5.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.5.0..sroa_idx to double addrspace(11)*, !dbg !211 store double %pointerref36.i, double addrspace(11)* %.sroa.5.0..sroa_cast, align 1, !dbg !211 br label %L102.i.thread, !dbg !206 L102.i.thread: ; preds = %L96.i, %L91.i %.ph = phi {} addrspace(10)* [ %newstruct15.i, %L91.i ], [ %newstruct18.i, %L96.i ] call void @llvm.lifetime.end.p0i8(i64 noundef 8, i8* noundef nonnull %6) #16, !dbg !206 call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* noundef nonnull %7) #16, !dbg !206 call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* noundef nonnull %8) #16, !dbg !206 br label %common.ret, !dbg !189 isa.i: ; preds = %L19.i %28 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* %14) #19, !dbg !206 %29 = addrspacecast {} addrspace(10)* %28 to {} addrspace(11)*, !dbg !206 %30 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %29) #19, !dbg !206 %magicptr = ptrtoint {}* %30 to i64, !dbg !206 switch i64 %magicptr, label %L80.i [ i64 4494085008, label %L91.i i64 13501115600, label %L96.i ], !dbg !206 common.ret: ; preds = %box_union, %L102.i.thread %common.ret.op = phi {} addrspace(10)* [ %boxunion, %box_union ], [ %.ph, %L102.i.thread ] ret {} addrspace(10)* %common.ret.op, !dbg !189 box_union: ; preds = %entry %31 = bitcast [24 x i8]* %sret_box.i to {}*, !dbg !206 %32 = addrspacecast {}* %31 to {} addrspace(11)*, !dbg !206 %33 = icmp slt i8 %15, 0, !dbg !206 %34 = select i1 %33, {} addrspace(11)* %16, {} addrspace(11)* %32, !dbg !206 %35 = bitcast {} addrspace(11)* %34 to [3 x double] addrspace(11)*, !dbg !218 %memcpy_refined_src6.i = getelementptr inbounds [3 x double], [3 x double] addrspace(11)* %35, i64 0, i64 1, !dbg !219 %memcpy_refined_src8.i = getelementptr inbounds [3 x double], [3 x double] addrspace(11)* %35, i64 0, i64 2, !dbg !219 %36 = bitcast {} addrspace(11)* %34 to double addrspace(11)*, !dbg !195 %37 = load double, double addrspace(11)* %36, align 8, !dbg !195, !tbaa !113, !alias.scope !115, !noalias !153 %38 = load double, double addrspace(11)* %memcpy_refined_src6.i, align 8, !dbg !195, !tbaa !113, !alias.scope !115, !noalias !153 %39 = load double, double addrspace(11)* %memcpy_refined_src8.i, align 8, !dbg !195, !tbaa !113, !alias.scope !115, !noalias !153 call void @llvm.lifetime.end.p0i8(i64 noundef 8, i8* noundef nonnull %6) #16, !dbg !206 call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* noundef nonnull %7) #16, !dbg !206 call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* noundef nonnull %8) #16, !dbg !206 %boxunion = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 5462828368 to {}*) to {} addrspace(10)*)) #20, !dbg !189 %40 = bitcast {} addrspace(10)* %boxunion to [1 x [3 x double]] addrspace(10)*, !dbg !189 %.repack = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 0, !dbg !189 store double %37, double addrspace(10)* %.repack, align 8, !dbg !189, !noalias !221 %.repack79 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 1, !dbg !189 store double %38, double addrspace(10)* %.repack79, align 8, !dbg !189, !noalias !221 %.repack81 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 2, !dbg !189 store double %39, double addrspace(10)* %.repack81, align 8, !dbg !189, !noalias !221 br label %common.ret } Type analysis state: %.fca.0.0.extract3 = extractvalue [1 x [3 x double]] %0, 0, 0, !dbg !21: {[-1]:Float@double}, intvals: {} %3 = call noalias nonnull dereferenceable(8) dereferenceable_or_null(8) i8* @malloc(i64 8), !enzyme_fromstack !20: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %getfield_addr.i = getelementptr inbounds [1 x {} addrspace(10)*], [1 x {} addrspace(10)*] addrspace(11)* %18, i64 0, i64 0, !dbg !62: {[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Float@double, [-1,0,8]:Float@double, [-1,0,16]:Float@double}, intvals: {} %.fca.0.0.extract = extractvalue [1 x [1 x double]] %1, 0, 0, !dbg !21: {[-1]:Float@double}, intvals: {} %14 = extractvalue { {} addrspace(10)*, i8 } %13, 0, !dbg !61: {[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Float@double, [-1,0,8]:Float@double, [-1,0,16]:Float@double}, intvals: {} %33 = icmp slt i8 %15, 0, !dbg !61: {[-1]:Integer}, intvals: {} %13 = call fastcc { {} addrspace(10)*, i8 } @julia___20131([1 x [1 x double]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %11, [1 x [1 x [3 x double]]] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) %12) #16, !dbg !61, !noalias !26: {[0]:Pointer, [8]:Integer}, intvals: {} %8 = getelementptr inbounds [24 x i8], [24 x i8]* %sret_box.i, i64 0, i64 0: {[-1]:Pointer}, intvals: {} %newstruct15.i = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 4495875472 to {}*) to {} addrspace(10)*)) #18, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %memcpy_refined_dst.i = getelementptr inbounds [1 x [1 x double]], [1 x [1 x double]]* %newstruct2.i, i64 0, i64 0, i64 0, !dbg !33: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %6 = bitcast [1 x [1 x double]]* %newstruct2.i to i8*: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %7 = bitcast [1 x [1 x [3 x double]]]* %newstruct.i to i8*: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %memcpy_refined_src6.i = getelementptr inbounds [3 x double], [3 x double] addrspace(11)* %35, i64 0, i64 1, !dbg !96: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %memcpy_refined_src8.i = getelementptr inbounds [3 x double], [3 x double] addrspace(11)* %35, i64 0, i64 2, !dbg !96: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %.ph = phi {} addrspace(10)* [ %newstruct15.i, %L91.i ], [ %newstruct18.i, %L96.i ]: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %common.ret.op = phi {} addrspace(10)* [ %boxunion, %box_union ], [ %.ph, %L102.i.thread ]: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} [1 x [3 x double]] %0: {[-1]:Float@double}, intvals: {} [1 x [1 x double]] %1: {[-1]:Float@double}, intvals: {} %.repack = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 0, !dbg !21: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %boxunion = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 5462828368 to {}*) to {} addrspace(10)*)) #20, !dbg !21: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %.innerparm.sroa.2.0..sroa_idx4 = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 1, !dbg !53: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %.innerparm.sroa.3.0..sroa_idx5 = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 2, !dbg !53: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %.innerparm.sroa.0.0..sroa_idx = getelementptr inbounds [1 x [1 x [3 x double]]], [1 x [1 x [3 x double]]]* %newstruct.i, i64 0, i64 0, i64 0, i64 0, !dbg !53: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %.repack81 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 2, !dbg !21: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %.repack79 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]] addrspace(10)* %40, i64 0, i64 0, i64 1, !dbg !21: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %newstruct18.i = call noalias nonnull dereferenceable(24) "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 24, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 13492663120 to {}*) to {} addrspace(10)*)) #18, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %5 = call {}*** @julia.get_pgcstack() #16, !dbg !21: {[-1]:Pointer, [-1,16]:Pointer}, intvals: {} %39 = load double, double addrspace(11)* %memcpy_refined_src8.i, align 8, !dbg !33, !tbaa !41, !alias.scope !43, !noalias !100: {[-1]:Float@double}, intvals: {} %34 = select i1 %33, {} addrspace(11)* %16, {} addrspace(11)* %32, !dbg !61: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %38 = load double, double addrspace(11)* %memcpy_refined_src6.i, align 8, !dbg !33, !tbaa !41, !alias.scope !43, !noalias !100: {[-1]:Float@double}, intvals: {} %.fca.0.2.extract = extractvalue [1 x [3 x double]] %0, 0, 2, !dbg !21: {[-1]:Float@double}, intvals: {} %.fca.0.1.extract = extractvalue [1 x [3 x double]] %0, 0, 1, !dbg !21: {[-1]:Float@double}, intvals: {} %22 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %16) #19, !dbg !85: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %26 = getelementptr inbounds double, double* %coercion.i, i64 2, !dbg !89: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %24 = getelementptr inbounds double, double* %coercion.i, i64 1, !dbg !89: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double}, intvals: {} %.sroa.4.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %27, i64 8, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %32 = addrspacecast {}* %31 to {} addrspace(11)*, !dbg !61: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %37 = load double, double addrspace(11)* %36, align 8, !dbg !33, !tbaa !41, !alias.scope !43, !noalias !100: {[-1]:Float@double}, intvals: {} %31 = bitcast [24 x i8]* %sret_box.i to {}*, !dbg !61: {[-1]:Pointer}, intvals: {} %35 = bitcast {} addrspace(11)* %34 to [3 x double] addrspace(11)*, !dbg !93: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %36 = bitcast {} addrspace(11)* %34 to double addrspace(11)*, !dbg !33: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} {}* inttoptr (i64 5462828368 to {}*): {[-1]:Anything}, intvals: {} {} addrspace(10)* addrspacecast ({}* inttoptr (i64 5462828368 to {}*) to {} addrspace(10)*): {[-1]:Anything}, intvals: {} %17 = and i8 %15, 127, !dbg !61: {[-1]:Integer}, intvals: {} %exactly_isa.i.not = icmp eq i8 %17, 1, !dbg !61: {[-1]:Integer}, intvals: {} %isboxed.i = icmp eq i8 %15, -128, !dbg !61: {[-1]:Integer}, intvals: {} %15 = extractvalue { {} addrspace(10)*, i8 } %13, 1, !dbg !61: {[-1]:Integer}, intvals: {} %40 = bitcast {} addrspace(10)* %boxunion to [1 x [3 x double]] addrspace(10)*, !dbg !21: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %.sroa.0.0..sroa_cast = addrspacecast {} addrspace(10)* %newstruct18.i to double addrspace(11)*, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %20 = addrspacecast {} addrspace(10)* %newstruct15.i to i8 addrspace(11)*, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %.sroa.5.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.5.0..sroa_idx to double addrspace(11)*, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} {} addrspace(12)* addrspacecast ({}* inttoptr (i64 5290665296 to {}*) to {} addrspace(12)*): {[-1]:Anything}, intvals: {} %28 = call "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @julia.typeof({} addrspace(10)* %14) #19, !dbg !61: {[-1]:Pointer}, intvals: {} %2 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) i8* @malloc(i64 24), !enzyme_fromstack !20: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %safepoint.i = load i64*, i64** %10, align 8, !tbaa !29, !noalias !26: {}, intvals: {} %coercion.i = bitcast {}* %22 to double*, !dbg !89: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %11 = addrspacecast [1 x [1 x double]]* %newstruct2.i to [1 x [1 x double]] addrspace(11)*, !dbg !61: {[-1]:Pointer, [-1,0]:Float@double}, intvals: {} %.sroa.4.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.4.0..sroa_idx to double addrspace(11)*, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} {}* inttoptr (i64 13492663120 to {}*): {[-1]:Anything}, intvals: {} {} addrspace(10)* addrspacecast ({}* inttoptr (i64 13492663120 to {}*) to {} addrspace(10)*): {[-1]:Anything}, intvals: {} %getfield.i = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %getfield_addr.i unordered, align 8, !dbg !62, !tbaa !66, !alias.scope !70, !noalias !71, !nonnull !19, !dereferenceable !72, !align !20: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} {}* inttoptr (i64 4495875472 to {}*): {[-1]:Anything}, intvals: {} %ptls_load.i7677 = load i64**, i64*** %9, align 8, !tbaa !22, !noalias !26: {[-1]:Pointer}, intvals: {} {} addrspace(10)* addrspacecast ({}* inttoptr (i64 4495875472 to {}*) to {} addrspace(10)*): {[-1]:Anything}, intvals: {} %29 = addrspacecast {} addrspace(10)* %28 to {} addrspace(11)*, !dbg !61: {[-1]:Pointer}, intvals: {} %18 = addrspacecast {} addrspace(10)* %14 to [1 x {} addrspace(10)*] addrspace(11)*, !dbg !62: {[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Float@double, [-1,0,8]:Float@double, [-1,0,16]:Float@double}, intvals: {} %magicptr = ptrtoint {}* %30 to i64, !dbg !61: {[-1]:Pointer}, intvals: {} %pointerref36.i = load double, double* %26, align 1, !dbg !89, !tbaa !91, !alias.scope !70, !noalias !92: {[-1]:Float@double}, intvals: {} %27 = addrspacecast {} addrspace(10)* %newstruct18.i to i8 addrspace(11)*, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} {}* inttoptr (i64 5290665296 to {}*): {[-1]:Anything}, intvals: {} %pointerref31.i = load double, double* %24, align 1, !dbg !89, !tbaa !91, !alias.scope !70, !noalias !92: {[-1]:Float@double}, intvals: {} %4 = call noalias nonnull dereferenceable(24) dereferenceable_or_null(24) i8* @malloc(i64 24), !enzyme_fromstack !20: {[-1]:Pointer}, intvals: {} %.sroa.5.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %27, i64 16, !dbg !79: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %16 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !61: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %19 = addrspacecast {} addrspace(10)* %getfield.i to i8 addrspace(11)*, !dbg !73: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %current_task1.i = bitcast {}*** %current_task1.i74 to {}**: {}, intvals: {} %12 = addrspacecast [1 x [1 x [3 x double]]]* %newstruct.i to [1 x [1 x [3 x double]]] addrspace(11)*, !dbg !61: {[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double}, intvals: {} %pointerref.i = load double, double* %coercion.i, align 1, !dbg !89, !tbaa !91, !alias.scope !70, !noalias !92: {[-1]:Float@double}, intvals: {} %9 = bitcast {}*** %ptls_field.i75 to i64***: {[-1]:Pointer, [-1,0]:Pointer}, intvals: {} %30 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %29) #19, !dbg !61: {[-1]:Pointer}, intvals: {} %10 = getelementptr inbounds i64*, i64** %ptls_load.i7677, i64 2: {[-1]:Pointer}, intvals: {} %sret_box.i = bitcast i8* %4 to [24 x i8]*, !enzyme_caststack !19: {[-1]:Pointer}, intvals: {} %current_task1.i74 = getelementptr inbounds {}**, {}*** %5, i64 -14: {[-1]:Pointer}, intvals: {} %ptls_field.i75 = getelementptr inbounds {}**, {}*** %5, i64 2: {[-1]:Pointer, [-1,0]:Pointer}, intvals: {} %newstruct2.i = bitcast i8* %3 to [1 x [1 x double]]*, !enzyme_caststack !19: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} %newstruct.i = bitcast i8* %2 to [1 x [1 x [3 x double]]]*, !enzyme_caststack !19: {[-1]:Pointer, [-1,-1]:Float@double}, intvals: {} Illegal updateAnalysis prev:{[-1]:Pointer, [-1,0]:Float@double, [-1,8]:Float@double, [-1,16]:Float@double} new: {[-1]:Pointer, [-1,0]:Pointer, [-1,0,0]:Float@double, [-1,0,8]:Float@double, [-1,0,16]:Float@double} val: %16 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !61 origin= %16 = addrspacecast {} addrspace(10)* %14 to {} addrspace(11)*, !dbg !61 MethodInstance for /(::SVector{3, Float64}, ::SMatrix{1, 1, Float64, 1}) Caused by: Stacktrace: [1] / @ ~/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/LinearAlgebra/src/generic.jl:1156 [2] / @ ~/.julia/juliaup/julia-1.10.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/LinearAlgebra/src/generic.jl:0 Stacktrace: [1] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}, data2::Ptr{LLVM.API.LLVMOpaqueValue}, B::Ptr{LLVM.API.LLVMOpaqueBuilder}) @ Enzyme.Compiler ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:2306 [2] EnzymeCreateAugmentedPrimal(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{…}, TA::Enzyme.TypeAnalysis, returnUsed::Bool, shadowReturnUsed::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{…}, forceAnonymousTape::Bool, width::Int64, atomicAdd::Bool) @ Enzyme.API ~/.julia/packages/Enzyme/TiboG/src/api.jl:199 [3] enzyme!(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{…}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…}) @ Enzyme.Compiler ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:4128 [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing) @ Enzyme.Compiler ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:6438 [5] codegen @ ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:5614 [inlined] [6] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool) @ Enzyme.Compiler ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:7241 [7] _thunk @ ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:7241 [inlined] [8] cached_compilation @ ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:7282 [inlined] [9] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007d04}, ::Type{Const{…}}, ::Type{Duplicated{…}}, tt::Type{Tuple{…}}, ::Val{Enzyme.API.DEM_ReverseModeGradient}, ::Val{1}, ::Val{(false, false, false)}, ::Val{false}, ::Val{true}, ::Type{FFIABI}, ::Val{true}) @ Enzyme.Compiler ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:7355 [10] #s2080#19052 @ ~/.julia/packages/Enzyme/TiboG/src/compiler.jl:7407 [inlined] [11] var"#s2080#19052"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any) @ Enzyme.Compiler ./none:0 [12] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any}) @ Core ./boot.jl:602 [13] autodiff @ ~/.julia/packages/Enzyme/TiboG/src/Enzyme.jl:263 [inlined] [14] autodiff(::ReverseMode{false, FFIABI, false, false}, ::typeof(/), ::Type{Active}, ::Active{SVector{3, Float64}}, ::Active{SMatrix{1, 1, Float64, 1}}) @ Enzyme ~/.julia/packages/Enzyme/TiboG/src/Enzyme.jl:332 [15] top-level scope @ REPL[39]:1 [16] top-level scope @ none:1 Some type information was truncated. Use `show(err)` to see complete types. ```

Versions:

Enzyme v0.12.36 StaticArrays v1.9.7

julia> versioninfo() Julia Version 1.10.4 Commit 48d4fd48430 (2024-06-04 10:41 UTC) Build Info: Official https://julialang.org/ release Platform Info: OS: macOS (arm64-apple-darwin22.4.0) CPU: 8 × Apple M2 WORD_SIZE: 64 LIBM: libopenlibm LLVM: libLLVM-15.0.7 (ORCJIT, apple-m1) Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)

wsmoses commented 2 days ago

So it looks liek the issue here is that staticarrays is actually type unstable in this case.


julia> tup = (Active(@SVector([0.0, 0.0, 0.0])), Active(@SMatrix([1.0])))
(Active{SVector{3, Float64}}([0.0, 0.0, 0.0]), Active{SMatrix{1, 1, Float64, 1}}([1.0;;]))

julia> @code_typed tup[1].val / tup[2].val
CodeInfo(
1 ──        nothing::Nothing
│    %2   = StaticArrays.getfield(B, :data)::Tuple{Float64}
│    %3   = Base.getfield(%2, 1, true)::Float64
│    %4   = StaticArrays.tuple(%3)::Tuple{Float64}
│    %5   = %new(SMatrix{1, 1, Float64, 1}, %4)::SMatrix{1, 1, Float64, 1}
│    %6   = %new(LinearAlgebra.Adjoint{Float64, SVector{3, Float64}}, A)::LinearAlgebra.Adjoint{Float64, SVector{3, Float64}}
│    %7   = invoke LinearAlgebra.:\(%5::SMatrix{1, 1, Float64, 1}, %6::LinearAlgebra.Adjoint{Float64, SVector{3, Float64}})::Union{LinearAlgebra.Adjoint{Float64, MVector{3, Float64}}, SMatrix{1, 3, Float64, 3}, MMatrix{1, 3, Float64, 3}}
│    %8   = (isa)(%7, SMatrix{1, 3, Float64, 3})::Bool
└───        goto #3 if not %8
2 ── %10  = π (%7, SMatrix{1, 3, Float64, 3})
│    %11  = StaticArrays.getfield(%10, :data)::Tuple{Float64, Float64, Float64}
│    %12  = Base.getfield(%11, 1, true)::Float64
│    %13  = StaticArrays.getfield(%10, :data)::Tuple{Float64, Float64, Float64}
│    %14  = Base.getfield(%13, 2, true)::Float64
│    %15  = StaticArrays.getfield(%10, :data)::Tuple{Float64, Float64, Float64}
│    %16  = Base.getfield(%15, 3, true)::Float64
│    %17  = StaticArrays.tuple(%12, %14, %16)::Tuple{Float64, Float64, Float64}
└───        goto #28
3 ── %19  = (isa)(%7, LinearAlgebra.Adjoint{Float64, MVector{3, Float64}})::Bool
└───        goto #5 if not %19
4 ── %21  = π (%7, LinearAlgebra.Adjoint{Float64, MVector{3, Float64}})
│    %22  = Base.getfield(%21, :parent)::MVector{3, Float64}
└───        goto #28
5 ── %24  = (isa)(%7, MMatrix{1, 3, Float64, 3})::Bool
└───        goto #27 if not %24
6 ── %26  = π (%7, MMatrix{1, 3, Float64, 3})
└───        goto #11 if not true
7 ── %28  = Core.tuple(1)::Tuple{Int64}
│    %29  = Base.sle_int(1, 1)::Bool
│    %30  = Base.sle_int(1, 3)::Bool
│    %31  = Base.and_int(%29, %30)::Bool
└───        goto #9 if not %31
8 ──        goto #10
9 ──        invoke Base.throw_boundserror(%26::MMatrix{1, 3, Float64, 3}, %28::Tuple{Int64})::Union{}
└───        unreachable
10 ─        nothing::Nothing
11 ┄ %37  = $(Expr(:gc_preserve_begin, :(%26)))
│    %38  = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Nothing}, svec(Any), 0, :(:ccall), :(%26)))::Ptr{Nothing}
│    %39  = Base.bitcast(Ptr{Float64}, %38)::Ptr{Float64}
│    %40  = Base.pointerref(%39, 1, 1)::Float64
│           $(Expr(:gc_preserve_end, :(%37)))
└───        goto #12
12 ─        goto #17 if not true
13 ─ %44  = Core.tuple(2)::Tuple{Int64}
│    %45  = Base.sle_int(1, 2)::Bool
│    %46  = Base.sle_int(2, 3)::Bool
│    %47  = Base.and_int(%45, %46)::Bool
└───        goto #15 if not %47
14 ─        goto #16
15 ─        invoke Base.throw_boundserror(%26::MMatrix{1, 3, Float64, 3}, %44::Tuple{Int64})::Union{}
└───        unreachable
16 ─        nothing::Nothing
17 ┄ %53  = $(Expr(:gc_preserve_begin, :(%26)))
│    %54  = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Nothing}, svec(Any), 0, :(:ccall), :(%26)))::Ptr{Nothing}
│    %55  = Base.bitcast(Ptr{Float64}, %54)::Ptr{Float64}
│    %56  = Base.pointerref(%55, 2, 1)::Float64
│           $(Expr(:gc_preserve_end, :(%53)))
└───        goto #18
18 ─        goto #23 if not true
19 ─ %60  = Core.tuple(3)::Tuple{Int64}
│    %61  = Base.sle_int(1, 3)::Bool
│    %62  = Base.sle_int(3, 3)::Bool
│    %63  = Base.and_int(%61, %62)::Bool
└───        goto #21 if not %63
20 ─        goto #22
21 ─        invoke Base.throw_boundserror(%26::MMatrix{1, 3, Float64, 3}, %60::Tuple{Int64})::Union{}
└───        unreachable
22 ─        nothing::Nothing
23 ┄ %69  = $(Expr(:gc_preserve_begin, :(%26)))
│    %70  = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Nothing}, svec(Any), 0, :(:ccall), :(%26)))::Ptr{Nothing}
│    %71  = Base.bitcast(Ptr{Float64}, %70)::Ptr{Float64}
│    %72  = Base.pointerref(%71, 3, 1)::Float64
│           $(Expr(:gc_preserve_end, :(%69)))
└───        goto #24
24 ─ %75  = StaticArrays.tuple(%40, %56, %72)::Tuple{Float64, Float64, Float64}
│    %76  = %new(MMatrix{3, 1, Float64, 3}, %75)::MMatrix{3, 1, Float64, 3}
└───        goto #25
25 ─        goto #26
26 ─        goto #28
27 ─        Core.throw(ErrorException("fatal error in type inference (type bound)"))::Union{}
└───        unreachable
28 ┄ %82  = φ (#2 => true, #4 => false, #26 => false)::Bool
│    %83  = φ (#2 => %17)::Tuple{Float64, Float64, Float64}
│    %84  = φ (#2 => false, #4 => true, #26 => false)::Bool
│    %85  = φ (#2 => false, #4 => false, #26 => true)::Bool
│    %86  = φ (#4 => %22, #26 => %76)::Union{MVector{3, Float64}, MMatrix{3, 1, Float64, 3}}
└───        goto #30 if not %82
29 ─ %88  = %new(SMatrix{3, 1, Float64, 3}, %83)::SMatrix{3, 1, Float64, 3}
└───        goto #35
30 ─        goto #32 if not %84
31 ─ %91  = π (%86, MVector{3, Float64})
│    %92  = StaticArrays.getfield(%91, :data)::Tuple{Float64, Float64, Float64}
│    %93  = %new(MVector{3, Float64}, %92)::MVector{3, Float64}
└───        goto #35
32 ─        goto #34 if not %85
33 ─ %96  = π (%86, MMatrix{3, 1, Float64, 3})
│    %97  = StaticArrays.getfield(%96, :data)::Tuple{Float64, Float64, Float64}
│    %98  = %new(MMatrix{3, 1, Float64, 3}, %97)::MMatrix{3, 1, Float64, 3}
└───        goto #35
34 ─        Core.throw(ErrorException("fatal error in type inference (type bound)"))::Union{}
└───        unreachable
35 ┄ %102 = φ (#29 => %88, #31 => %93, #33 => %98)::Union{MVector{3, Float64}, SMatrix{3, 1, Float64, 3}, MMatrix{3, 1, Float64, 3}}
└───        return %102
) => Union{MVector{3, Float64}, SMatrix{3, 1, Float64, 3}, MMatrix{3, 1, Float64, 3}}

This produces a union which causes us to yell.

vchuravy commented 10 hours ago

This produces a union which causes us to yell.

Could we produce a better error in this case? Or does one of the hidden flags help?