EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
439 stars 62 forks source link

LLVM error: function failed verification for reverse rule with complex inputs #758

Closed sethaxen closed 1 year ago

sethaxen commented 1 year ago

On the following reimplementation of dot, the defined reverse-mode rule works fine for real inputs but raises an ERROR: LLVM error: function failed verification (4) for complex inputs:

using Enzyme, LinearAlgebra

_scalar_dot(x, y) = conj(x) * y

function _dot(X::StridedArray{T}, Y::StridedArray{T}) where {T<:Union{Real,Complex}}
    return mapreduce(_scalar_dot, +, X, Y)
end

function EnzymeRules.augmented_primal(
    config::EnzymeRules.ConfigWidth{1},
    func::Const{typeof(_dot)},
    RT::Type{<:Union{Const,Active}},
    X::Duplicated{<:StridedArray{T}},
    Y::Duplicated{<:StridedArray{T}},
) where {T<:Union{Real,Complex}}
    r = func.val(X.val, Y.val)
    primal = EnzymeRules.needs_primal(config) ? r : nothing
    shadow = EnzymeRules.needs_shadow(config) ? zero(r) : nothing
    tape = (copy(X.val), copy(Y.val))
    return EnzymeRules.AugmentedReturn(primal, shadow, tape)
end

function EnzymeRules.reverse(
    config::EnzymeRules.ConfigWidth{1},
    ::Const{typeof(_dot)},
    dret::Union{Active,Type{<:Const}},
    tape,
    X::Duplicated{<:StridedArray{T}},
    Y::Duplicated{<:StridedArray{T}},
) where {T<:Union{Real,Complex}}
    if !(dret isa Type{<:Const})
        Xtape, Ytape = tape
        X.dval .+= dret.val .* Ytape
        Y.dval .+= dret.val .* Xtape
    end
    return (nothing, nothing)
end

n = 10

x, y = randn(n), randn(n);
∂x, ∂y = map(zero, (x, y));
_dot(x, y) ≈ LinearAlgebra.dot(x, y)  # returns True
autodiff(Reverse, _dot, Const, Duplicated(x, ∂x), Duplicated(y, ∂y))  # returns ((nothing, nothing),)

x, y = randn(ComplexF64, n), randn(ComplexF64, n);
∂x, ∂y = map(zero, (x, y));
_dot(x, y) ≈ LinearAlgebra.dot(x, y)  # returns True
autodiff(Reverse, _dot, Const, Duplicated(x, ∂x), Duplicated(y, ∂y))  # prints warnings and errors (see below)
```julia Stored value type does not match pointer operand type! store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 [2 x {} addrspace(10)*]; Function Attrs: mustprogress willreturn define [2 x double] @preprocess_julia__dot_2158mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #6 { entry: %2 = alloca [2 x double], align 8 call void @julia__dot_2158([2 x double]* noalias nocapture noundef nonnull writeonly sret([2 x double]) align 8 dereferenceable(16) %2, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) #9 %.fca.0.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0 %.fca.0.load = load double, double* %.fca.0.gep, align 8 %.fca.0.insert = insertvalue [2 x double] poison, double %.fca.0.load, 0 %.fca.1.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1 %.fca.1.load = load double, double* %.fca.1.gep, align 8 %.fca.1.insert = insertvalue [2 x double] %.fca.0.insert, double %.fca.1.load, 1 ret [2 x double] %.fca.1.insert } ; Function Attrs: mustprogress willreturn define internal void @diffejulia__dot_2158mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* %"'", {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1, {} addrspace(10)* %"'1") local_unnamed_addr #6 { entry: %2 = alloca { [2 x double], [2 x {} addrspace(10)*] }, align 8 %3 = alloca [2 x {} addrspace(10)*], align 8 %4 = call {}*** @julia.get_pgcstack() %5 = call {}*** @julia.get_pgcstack() %6 = call {}*** @julia.get_pgcstack() %7 = call {}*** @julia.get_pgcstack() %8 = call {}*** @julia.get_pgcstack() %9 = alloca [2 x double], align 8 %10 = bitcast {}*** %8 to {}** %11 = getelementptr inbounds {}*, {}** %10, i64 -13 %12 = getelementptr inbounds {}*, {}** %11, i64 15 %13 = bitcast {}** %12 to i8** %14 = load i8*, i8** %13, align 8 %15 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %11, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %16 = bitcast {} addrspace(10)* %15 to [2 x {} addrspace(10)*] addrspace(10)* %17 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %16 to [2 x {} addrspace(10)*] addrspace(11)* %18 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %18, align 8 %19 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %19, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %15, {} addrspace(10)* %0, {} addrspace(10)* %"'") %20 = bitcast {}*** %7 to {}** %21 = getelementptr inbounds {}*, {}** %20, i64 -13 %22 = getelementptr inbounds {}*, {}** %21, i64 15 %23 = bitcast {}** %22 to i8** %24 = load i8*, i8** %23, align 8 %25 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %21, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %26 = bitcast {} addrspace(10)* %25 to [2 x {} addrspace(10)*] addrspace(10)* %27 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %26 to [2 x {} addrspace(10)*] addrspace(11)* %28 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %28, align 8 %29 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %29, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %25, {} addrspace(10)* %1, {} addrspace(10)* %"'1") call void @julia_augmented_primal_2271({ [2 x double], [2 x {} addrspace(10)*] }* sret({ [2 x double], [2 x {} addrspace(10)*] }) %2, [2 x {} addrspace(10)*]* %3, [2 x {} addrspace(10)*] addrspace(11)* %17, [2 x {} addrspace(10)*] addrspace(11)* %27) %30 = load { [2 x double], [2 x {} addrspace(10)*] }, { [2 x double], [2 x {} addrspace(10)*] }* %2, align 8 %31 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 0 %32 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 %33 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 br label %invertentry invertentry: ; preds = %entry %34 = bitcast {}*** %6 to {}** %35 = getelementptr inbounds {}*, {}** %34, i64 -13 %36 = getelementptr inbounds {}*, {}** %35, i64 15 %37 = bitcast {}** %36 to i8** %38 = load i8*, i8** %37, align 8 %39 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %35, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %40 = bitcast {} addrspace(10)* %39 to [2 x {} addrspace(10)*] addrspace(10)* %41 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %40 to [2 x {} addrspace(10)*] addrspace(11)* %42 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %42, align 8 %43 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %43, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %39, {} addrspace(10)* %0, {} addrspace(10)* %"'") %44 = bitcast {}*** %5 to {}** %45 = getelementptr inbounds {}*, {}** %44, i64 -13 %46 = getelementptr inbounds {}*, {}** %45, i64 15 %47 = bitcast {}** %46 to i8** %48 = load i8*, i8** %47, align 8 %49 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %45, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %50 = bitcast {} addrspace(10)* %49 to [2 x {} addrspace(10)*] addrspace(10)* %51 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %50 to [2 x {} addrspace(10)*] addrspace(11)* %52 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %52, align 8 %53 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %53, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %49, {} addrspace(10)* %1, {} addrspace(10)* %"'1") %54 = bitcast {}*** %4 to {}** %55 = getelementptr inbounds {}*, {}** %54, i64 -13 %56 = getelementptr inbounds {}*, {}** %55, i64 15 %57 = bitcast {}** %56 to i8** %58 = load i8*, i8** %57, align 8 %59 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %55, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502179445264 to {}*) to {} addrspace(10)*)) %60 = bitcast {} addrspace(10)* %59 to [2 x {} addrspace(10)*] addrspace(10)* store [2 x {} addrspace(10)*] %33, [2 x {} addrspace(10)*] addrspace(10)* %60, align 8 %61 = extractvalue [2 x {} addrspace(10)*] %33, 0 %62 = extractvalue [2 x {} addrspace(10)*] %33, 1 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %59, {} addrspace(10)* %61, {} addrspace(10)* %62) %63 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %60 to [2 x {} addrspace(10)*] addrspace(11)* call void @julia_reverse_2283([2 x {} addrspace(10)*] addrspace(11)* %63, [2 x {} addrspace(10)*] addrspace(11)* %41, [2 x {} addrspace(10)*] addrspace(11)* %51) ret void } Stored value type does not match pointer operand type! store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 [2 x {} addrspace(10)*]; Function Attrs: mustprogress willreturn define [2 x double] @preprocess_julia__dot_2288mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #6 { entry: %2 = alloca [2 x double], align 8 call void @julia__dot_2288([2 x double]* noalias nocapture noundef nonnull writeonly sret([2 x double]) align 8 dereferenceable(16) %2, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) #9 %.fca.0.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0 %.fca.0.load = load double, double* %.fca.0.gep, align 8 %.fca.0.insert = insertvalue [2 x double] poison, double %.fca.0.load, 0 %.fca.1.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1 %.fca.1.load = load double, double* %.fca.1.gep, align 8 %.fca.1.insert = insertvalue [2 x double] %.fca.0.insert, double %.fca.1.load, 1 ret [2 x double] %.fca.1.insert } ; Function Attrs: mustprogress willreturn define internal void @diffejulia__dot_2288mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* %"'", {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1, {} addrspace(10)* %"'1") local_unnamed_addr #6 { entry: %2 = alloca { [2 x double], [2 x {} addrspace(10)*] }, align 8 %3 = alloca [2 x {} addrspace(10)*], align 8 %4 = call {}*** @julia.get_pgcstack() %5 = call {}*** @julia.get_pgcstack() %6 = call {}*** @julia.get_pgcstack() %7 = call {}*** @julia.get_pgcstack() %8 = call {}*** @julia.get_pgcstack() %9 = alloca [2 x double], align 8 %10 = bitcast {}*** %8 to {}** %11 = getelementptr inbounds {}*, {}** %10, i64 -13 %12 = getelementptr inbounds {}*, {}** %11, i64 15 %13 = bitcast {}** %12 to i8** %14 = load i8*, i8** %13, align 8 %15 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %11, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %16 = bitcast {} addrspace(10)* %15 to [2 x {} addrspace(10)*] addrspace(10)* %17 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %16 to [2 x {} addrspace(10)*] addrspace(11)* %18 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %18, align 8 %19 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %19, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %15, {} addrspace(10)* %0, {} addrspace(10)* %"'") %20 = bitcast {}*** %7 to {}** %21 = getelementptr inbounds {}*, {}** %20, i64 -13 %22 = getelementptr inbounds {}*, {}** %21, i64 15 %23 = bitcast {}** %22 to i8** %24 = load i8*, i8** %23, align 8 %25 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %21, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %26 = bitcast {} addrspace(10)* %25 to [2 x {} addrspace(10)*] addrspace(10)* %27 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %26 to [2 x {} addrspace(10)*] addrspace(11)* %28 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %28, align 8 %29 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %29, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %25, {} addrspace(10)* %1, {} addrspace(10)* %"'1") call void @julia_augmented_primal_2290({ [2 x double], [2 x {} addrspace(10)*] }* sret({ [2 x double], [2 x {} addrspace(10)*] }) %2, [2 x {} addrspace(10)*]* %3, [2 x {} addrspace(10)*] addrspace(11)* %17, [2 x {} addrspace(10)*] addrspace(11)* %27) %30 = load { [2 x double], [2 x {} addrspace(10)*] }, { [2 x double], [2 x {} addrspace(10)*] }* %2, align 8 %31 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 0 %32 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 %33 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 br label %invertentry invertentry: ; preds = %entry %34 = bitcast {}*** %6 to {}** %35 = getelementptr inbounds {}*, {}** %34, i64 -13 %36 = getelementptr inbounds {}*, {}** %35, i64 15 %37 = bitcast {}** %36 to i8** %38 = load i8*, i8** %37, align 8 %39 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %35, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %40 = bitcast {} addrspace(10)* %39 to [2 x {} addrspace(10)*] addrspace(10)* %41 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %40 to [2 x {} addrspace(10)*] addrspace(11)* %42 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %42, align 8 %43 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %43, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %39, {} addrspace(10)* %0, {} addrspace(10)* %"'") %44 = bitcast {}*** %5 to {}** %45 = getelementptr inbounds {}*, {}** %44, i64 -13 %46 = getelementptr inbounds {}*, {}** %45, i64 15 %47 = bitcast {}** %46 to i8** %48 = load i8*, i8** %47, align 8 %49 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %45, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %50 = bitcast {} addrspace(10)* %49 to [2 x {} addrspace(10)*] addrspace(10)* %51 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %50 to [2 x {} addrspace(10)*] addrspace(11)* %52 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %52, align 8 %53 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %53, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %49, {} addrspace(10)* %1, {} addrspace(10)* %"'1") %54 = bitcast {}*** %4 to {}** %55 = getelementptr inbounds {}*, {}** %54, i64 -13 %56 = getelementptr inbounds {}*, {}** %55, i64 15 %57 = bitcast {}** %56 to i8** %58 = load i8*, i8** %57, align 8 %59 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %55, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502179445264 to {}*) to {} addrspace(10)*)) %60 = bitcast {} addrspace(10)* %59 to [2 x {} addrspace(10)*] addrspace(10)* store [2 x {} addrspace(10)*] %33, [2 x {} addrspace(10)*] addrspace(10)* %60, align 8 %61 = extractvalue [2 x {} addrspace(10)*] %33, 0 %62 = extractvalue [2 x {} addrspace(10)*] %33, 1 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %59, {} addrspace(10)* %61, {} addrspace(10)* %62) %63 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %60 to [2 x {} addrspace(10)*] addrspace(11)* call void @julia_reverse_2295([2 x {} addrspace(10)*] addrspace(11)* %63, [2 x {} addrspace(10)*] addrspace(11)* %41, [2 x {} addrspace(10)*] addrspace(11)* %51) ret void } Stored value type does not match pointer operand type! store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 [2 x {} addrspace(10)*]; Function Attrs: mustprogress willreturn define [2 x double] @preprocess_julia__dot_2297mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #6 { entry: %2 = alloca [2 x double], align 8 call void @julia__dot_2297([2 x double]* noalias nocapture noundef nonnull writeonly sret([2 x double]) align 8 dereferenceable(16) %2, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) #9 %.fca.0.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0 %.fca.0.load = load double, double* %.fca.0.gep, align 8 %.fca.0.insert = insertvalue [2 x double] poison, double %.fca.0.load, 0 %.fca.1.gep = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1 %.fca.1.load = load double, double* %.fca.1.gep, align 8 %.fca.1.insert = insertvalue [2 x double] %.fca.0.insert, double %.fca.1.load, 1 ret [2 x double] %.fca.1.insert } ; Function Attrs: mustprogress willreturn define internal void @diffejulia__dot_2297mustwrap_inner.1({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* %"'", {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1, {} addrspace(10)* %"'1") local_unnamed_addr #6 { entry: %2 = alloca { [2 x double], [2 x {} addrspace(10)*] }, align 8 %3 = alloca [2 x {} addrspace(10)*], align 8 %4 = call {}*** @julia.get_pgcstack() %5 = call {}*** @julia.get_pgcstack() %6 = call {}*** @julia.get_pgcstack() %7 = call {}*** @julia.get_pgcstack() %8 = call {}*** @julia.get_pgcstack() %9 = alloca [2 x double], align 8 %10 = bitcast {}*** %8 to {}** %11 = getelementptr inbounds {}*, {}** %10, i64 -13 %12 = getelementptr inbounds {}*, {}** %11, i64 15 %13 = bitcast {}** %12 to i8** %14 = load i8*, i8** %13, align 8 %15 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %11, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %16 = bitcast {} addrspace(10)* %15 to [2 x {} addrspace(10)*] addrspace(10)* %17 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %16 to [2 x {} addrspace(10)*] addrspace(11)* %18 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %18, align 8 %19 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %17, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %19, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %15, {} addrspace(10)* %0, {} addrspace(10)* %"'") %20 = bitcast {}*** %7 to {}** %21 = getelementptr inbounds {}*, {}** %20, i64 -13 %22 = getelementptr inbounds {}*, {}** %21, i64 15 %23 = bitcast {}** %22 to i8** %24 = load i8*, i8** %23, align 8 %25 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %21, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %26 = bitcast {} addrspace(10)* %25 to [2 x {} addrspace(10)*] addrspace(10)* %27 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %26 to [2 x {} addrspace(10)*] addrspace(11)* %28 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %28, align 8 %29 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %27, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %29, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %25, {} addrspace(10)* %1, {} addrspace(10)* %"'1") call void @julia_augmented_primal_2299({ [2 x double], [2 x {} addrspace(10)*] }* sret({ [2 x double], [2 x {} addrspace(10)*] }) %2, [2 x {} addrspace(10)*]* %3, [2 x {} addrspace(10)*] addrspace(11)* %17, [2 x {} addrspace(10)*] addrspace(11)* %27) %30 = load { [2 x double], [2 x {} addrspace(10)*] }, { [2 x double], [2 x {} addrspace(10)*] }* %2, align 8 %31 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 0 %32 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 store [2 x {} addrspace(10)*] %32, [2 x double]* %9, align 8 %33 = extractvalue { [2 x double], [2 x {} addrspace(10)*] } %30, 1 br label %invertentry invertentry: ; preds = %entry %34 = bitcast {}*** %6 to {}** %35 = getelementptr inbounds {}*, {}** %34, i64 -13 %36 = getelementptr inbounds {}*, {}** %35, i64 15 %37 = bitcast {}** %36 to i8** %38 = load i8*, i8** %37, align 8 %39 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %35, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %40 = bitcast {} addrspace(10)* %39 to [2 x {} addrspace(10)*] addrspace(10)* %41 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %40 to [2 x {} addrspace(10)*] addrspace(11)* %42 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 0 store {} addrspace(10)* %0, {} addrspace(10)* addrspace(11)* %42, align 8 %43 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %41, i64 0, i32 1 store {} addrspace(10)* %"'", {} addrspace(10)* addrspace(11)* %43, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %39, {} addrspace(10)* %0, {} addrspace(10)* %"'") %44 = bitcast {}*** %5 to {}** %45 = getelementptr inbounds {}*, {}** %44, i64 -13 %46 = getelementptr inbounds {}*, {}** %45, i64 15 %47 = bitcast {}** %46 to i8** %48 = load i8*, i8** %47, align 8 %49 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %45, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502404295248 to {}*) to {} addrspace(10)*)) %50 = bitcast {} addrspace(10)* %49 to [2 x {} addrspace(10)*] addrspace(10)* %51 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %50 to [2 x {} addrspace(10)*] addrspace(11)* %52 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 0 store {} addrspace(10)* %1, {} addrspace(10)* addrspace(11)* %52, align 8 %53 = getelementptr [2 x {} addrspace(10)*], [2 x {} addrspace(10)*] addrspace(11)* %51, i64 0, i32 1 store {} addrspace(10)* %"'1", {} addrspace(10)* addrspace(11)* %53, align 8 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %49, {} addrspace(10)* %1, {} addrspace(10)* %"'1") %54 = bitcast {}*** %4 to {}** %55 = getelementptr inbounds {}*, {}** %54, i64 -13 %56 = getelementptr inbounds {}*, {}** %55, i64 15 %57 = bitcast {}** %56 to i8** %58 = load i8*, i8** %57, align 8 %59 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %55, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140502179445264 to {}*) to {} addrspace(10)*)) %60 = bitcast {} addrspace(10)* %59 to [2 x {} addrspace(10)*] addrspace(10)* store [2 x {} addrspace(10)*] %33, [2 x {} addrspace(10)*] addrspace(10)* %60, align 8 %61 = extractvalue [2 x {} addrspace(10)*] %33, 0 %62 = extractvalue [2 x {} addrspace(10)*] %33, 1 call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %59, {} addrspace(10)* %61, {} addrspace(10)* %62) %63 = addrspacecast [2 x {} addrspace(10)*] addrspace(10)* %60 to [2 x {} addrspace(10)*] addrspace(11)* call void @julia_reverse_2304([2 x {} addrspace(10)*] addrspace(11)* %63, [2 x {} addrspace(10)*] addrspace(11)* %41, [2 x {} addrspace(10)*] addrspace(11)* %51) ret void } ERROR: LLVM error: function failed verification (4) Stacktrace: [1] handle_error(reason::Cstring) @ LLVM ~/.julia/packages/LLVM/TLGyi/src/core/context.jl:118 [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool) @ Enzyme.API ~/.julia/packages/Enzyme/IW0EK/src/api.jl:124 [3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool, Bool}, returnPrimal::Bool, jlrules::Vector{String}, expectedTapeType::Type) @ Enzyme.Compiler ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:6898 [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.ThreadSafeContext, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing) @ Enzyme.Compiler ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8149 [5] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, ctx::Nothing, postopt::Bool) @ Enzyme.Compiler ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8662 [6] _thunk @ ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8659 [inlined] [7] cached_compilation @ ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8697 [inlined] [8] #s287#191 @ ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8755 [inlined] [9] var"#s287#191"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ::Any, ::Any, ::Any, ::Any, tt::Any, ::Any, ::Any, ::Any, ::Any, ::Any) @ Enzyme.Compiler ./none:0 [10] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any}) @ Core ./boot.jl:602 [11] thunk(::Val{0x00000000000082bd}, ::Type{Const{typeof(_dot)}}, ::Type{Const}, tt::Type{Tuple{Duplicated{Vector{ComplexF64}}, Duplicated{Vector{ComplexF64}}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false, false)}, ::Val{false}) @ Enzyme.Compiler ~/.julia/packages/Enzyme/IW0EK/src/compiler.jl:8714 [12] autodiff(::EnzymeCore.ReverseMode{false}, ::Const{typeof(_dot)}, ::Type{Const}, ::Duplicated{Vector{ComplexF64}}, ::Vararg{Duplicated{Vector{ComplexF64}}}) @ Enzyme ~/.julia/packages/Enzyme/IW0EK/src/Enzyme.jl:199 [13] autodiff(::EnzymeCore.ReverseMode{false}, ::typeof(_dot), ::Type, ::Duplicated{Vector{ComplexF64}}, ::Vararg{Duplicated{Vector{ComplexF64}}}) @ Enzyme ~/.julia/packages/Enzyme/IW0EK/src/Enzyme.jl:214 [14] top-level scope @ REPL[16]:1 ```

Environment

julia> using Pkg; Pkg.status()
Status `/tmp/jl_jIfMzJ/Project.toml`
  [7da242da] Enzyme v0.11.0 `https://github.com/EnzymeAD/Enzyme.jl.git#main`

julia> versioninfo()
Julia Version 1.9.0-rc2
Commit 72aec423c2a (2023-04-01 10:41 UTC)
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 8 × 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-14.0.6 (ORCJIT, tigerlake)
  Threads: 1 on 8 virtual cores
Environment:
  JULIA_CMDSTAN_HOME = /home/sethaxen/software/cmdstan/2.30.1/
  JULIA_EDITOR = code
wsmoses commented 1 year ago

Fixed by https://github.com/EnzymeAD/Enzyme.jl/pull/759