Closed ToucheSir closed 6 months ago
Yeah for the immediate future at least, you should go fix the union in NNLib here (which would also be a generic performance benefit without AD even).
I'm (on 0.11.2) also hitting
ERROR: Enzyme compilation failed due to illegal type analysis.
Current scope:
; Function Attrs: mustprogress willreturn
define internal fastcc i64 @preprocess_julia_partition__9106({} addrspace(10)* noundef nonnull readonly align 16 dereferenceable(40) %0, i64 signext %1, i64 signext %2, i64 signext %3, { {} addrspace(10)*, i8, i8 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(16) %4, i8 zeroext %5, { {} addrspace(10)*, i8, i8 } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(16) %6, i64 signext %7) unnamed_addr #80 !dbg !5321 {
top:
However
Caused by:
Stacktrace:
[1] setindex!
@ ./array.jl:969
[2] partition!
@ ./sort.jl:1004
Is this related or should I try to reduce to a MWE?
That is unrelated, a minimal example is helpful @toollu
Closing as a ifelse on a union of different types is considered unsupported atm
I found this when trying to diff through https://github.com/FluxML/NNlib.jl/blob/acf87f5316e7579ac1e7eb16a278f43a9ca435dc/src/softmax.jl#L115.
MWE:
Looking at the generated IR, it appears Julia is union splitting the result of the
ifelse
and only truncating if it turns out to be Float64. Which seems a little unnecessary since LLVM ends up promoting it to a double unconditionally anyhow. Strange codegen aside, this specific example should be relatively easy to fix on the NNlib side, but I suspect there are many more examples lurking out there.Error:
``` ERROR: LoadError: Enzyme compilation failed due to illegal type analysis. Current scope: ; Function Attrs: mustprogress willreturn define float @preprocess_julia_f_1435_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #6 !dbg !68 { entry: %2 = alloca float, align 4 %.0.sroa_cast4 = bitcast float* %2 to i8* call void @llvm.lifetime.start.p0i8(i64 4, i8* %.0.sroa_cast4) %3 = call {}*** @julia.get_pgcstack() #7 %4 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !69 %5 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %4 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !69 %6 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %5, i64 0, i32 1, !dbg !69 %7 = load i64, i64 addrspace(11)* %6, align 8, !dbg !69, !tbaa !12, !range !17, !alias.scope !18, !noalias !21 %.not = icmp eq i64 %7, 0, !dbg !69 br i1 %.not, label %oob.i, label %idxend2.i, !dbg !69 L16.i: ; preds = %idxend2.i %8 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !72 %9 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !72 %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i64 0, i32 1, !dbg !72 %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !72, !tbaa !12, !range !17, !alias.scope !18, !noalias !21 %.not10 = icmp eq i64 %11, 0, !dbg !72 br i1 %.not10, label %oob3.i, label %idxend4.i, !dbg !72 L21.i: ; preds = %idxend2.i %12 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !72 %13 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %12 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !72 %14 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %13, i64 0, i32 1, !dbg !72 %15 = load i64, i64 addrspace(11)* %14, align 8, !dbg !72, !tbaa !12, !range !17, !alias.scope !18, !noalias !21 %.not13 = icmp eq i64 %15, 0, !dbg !72 br i1 %.not13, label %oob7.i, label %idxend8.i, !dbg !72 oob.i: ; preds = %entry %16 = alloca i64, align 8, !dbg !69 store i64 1, i64* %16, align 8, !dbg !69, !noalias !73 %17 = addrspacecast {} addrspace(10)* %1 to {} addrspace(12)*, !dbg !69 call void @ijl_bounds_error_ints({} addrspace(12)* noundef %17, i64* noundef nonnull align 8 %16, i64 noundef 1) #8, !dbg !69 unreachable, !dbg !69 idxend2.i: ; preds = %entry %18 = bitcast {} addrspace(10)* %1 to float addrspace(13)* addrspace(10)*, !dbg !69 %19 = addrspacecast float addrspace(13)* addrspace(10)* %18 to float addrspace(13)* addrspace(11)*, !dbg !69 %20 = load float addrspace(13)*, float addrspace(13)* addrspace(11)* %19, align 16, !dbg !69, !tbaa !32, !alias.scope !76, !noalias !21, !nonnull !6 %21 = load float, float addrspace(13)* %20, align 4, !dbg !69, !tbaa !35, !alias.scope !38, !noalias !39 %22 = bitcast float %21 to i32, !dbg !77 %23 = icmp slt i32 %22, 0, !dbg !80 %24 = fcmp une float %21, 0x7FF0000000000000, !dbg !81 %25 = or i1 %24, %23, !dbg !83 store float %21, float* %2, align 4, !dbg !83, !noalias !73 %.0.sroa_cast3 = addrspacecast float* %2 to double addrspace(11)*, !dbg !83 %26 = select i1 %25, double addrspace(11)* %.0.sroa_cast3, double addrspace(11)* addrspacecast (double* @_j_const1 to double addrspace(11)*), !dbg !83 br i1 %25, label %L16.i, label %L21.i, !dbg !84 oob3.i: ; preds = %L16.i %27 = alloca i64, align 8, !dbg !72 store i64 1, i64* %27, align 8, !dbg !72, !noalias !73 %28 = addrspacecast {} addrspace(10)* %0 to {} addrspace(12)*, !dbg !72 call void @ijl_bounds_error_ints({} addrspace(12)* noundef %28, i64* noundef nonnull align 8 %27, i64 noundef 1) #8, !dbg !72 unreachable, !dbg !72 idxend4.i: ; preds = %L16.i %29 = bitcast {} addrspace(10)* %0 to float addrspace(13)* addrspace(10)*, !dbg !72 %30 = addrspacecast float addrspace(13)* addrspace(10)* %29 to float addrspace(13)* addrspace(11)*, !dbg !72 %31 = load float addrspace(13)*, float addrspace(13)* addrspace(11)* %30, align 16, !dbg !72, !tbaa !32, !alias.scope !76, !noalias !21, !nonnull !6 %32 = bitcast double addrspace(11)* %26 to float addrspace(11)*, !dbg !72 %33 = load float, float addrspace(11)* %32, align 4, !dbg !72, !tbaa !58 store float %33, float addrspace(13)* %31, align 4, !dbg !72, !tbaa !35, !alias.scope !38, !noalias !85 br label %julia_f_1435_inner.exit, !dbg !84 oob7.i: ; preds = %L21.i %34 = alloca i64, align 8, !dbg !72 store i64 1, i64* %34, align 8, !dbg !72, !noalias !73 %35 = addrspacecast {} addrspace(10)* %0 to {} addrspace(12)*, !dbg !72 call void @ijl_bounds_error_ints({} addrspace(12)* noundef %35, i64* noundef nonnull align 8 %34, i64 noundef 1) #8, !dbg !72 unreachable, !dbg !72 idxend8.i: ; preds = %L21.i %36 = load double, double addrspace(11)* %26, align 4, !dbg !86, !tbaa !58 %37 = fptrunc double %36 to float, !dbg !86 %38 = bitcast {} addrspace(10)* %0 to float addrspace(13)* addrspace(10)*, !dbg !72 %39 = addrspacecast float addrspace(13)* addrspace(10)* %38 to float addrspace(13)* addrspace(11)*, !dbg !72 %40 = load float addrspace(13)*, float addrspace(13)* addrspace(11)* %39, align 16, !dbg !72, !tbaa !32, !alias.scope !76, !noalias !21, !nonnull !6 store float %37, float addrspace(13)* %40, align 4, !dbg !72, !tbaa !35, !alias.scope !38, !noalias !85 br label %julia_f_1435_inner.exit, !dbg !84 julia_f_1435_inner.exit: ; preds = %idxend8.i, %idxend4.i %41 = phi float [ %37, %idxend8.i ], [ %33, %idxend4.i ] %.0.sroa_cast5 = bitcast float* %2 to i8*, !dbg !89 call void @llvm.lifetime.end.p0i8(i64 4, i8* %.0.sroa_cast5), !dbg !89 ret float %41, !dbg !90 } Type analysis state: