EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
439 stars 62 forks source link

Failed when trying to create a new vector whose length depends on input #366

Closed kirimaru-jp closed 2 years ago

kirimaru-jp commented 2 years ago

I am trying to create a new (Float64) vector based on an input. Here is a simplified example:

julia> function f(x)

    L1 = 1.0
    L2 = 100.0

    n1 = x
    while n1 > L1
        n1 /= 2.0
    end

    n2 = x
    while n2 < L2
        n2 *= 2.0
    end

    n1 = floor(n1)
    n2 = ceil(n2)

    S = [n1:1.0:n2;]
    # S = collect(n1:1.0:n2)
    return sum(S)
end

f (generic function with 1 method)
julia> f(10.0)
12880.0

It failed when I tried to to calculate the autodiff based on Enzyme.jl (version 0.10.1, Julia 1.7.1):

using Enzyme
Enzyme.autodiff( Reverse, f, Active, Active(10.0) )

Here is the log:

ERROR: Enzyme compilation failed.
Current scope: 
; Function Attrs: uwtable willreturn mustprogress
define internal fastcc void @preprocess_julia_floatrange_6115({ [2 x double], [2 x double], i64, i64 }* noalias nocapture noundef nonnull writeonly sret({ [2 x double], [2 x double], i64, i64 }) align 8 dereferenceable(48) %0, i64 noundef signext %1, i64 noundef signext %2, i64 signext %3, i64 signext %4) unnamed_addr #17 !dbg !1635 {
top:
  %malloccall3 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %5 = bitcast i8* %malloccall3 to [2 x double]*
  %malloccall4 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %6 = bitcast i8* %malloccall4 to [2 x i64]*
  %malloccall2 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %7 = bitcast i8* %malloccall2 to [2 x double]*
  %malloccall5 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %8 = bitcast i8* %malloccall5 to [2 x i64]*
  %malloccall1 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %9 = bitcast i8* %malloccall1 to [2 x i64]*
  %malloccall6 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %10 = bitcast i8* %malloccall6 to [2 x double]*
  %malloccall = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %11 = bitcast i8* %malloccall to [2 x i64]*
  %malloccall7 = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16), !enzyme_fromstack !1325
  %12 = bitcast i8* %malloccall7 to [2 x double]*
  %13 = call {}*** @julia.get_pgcstack() #16
  %14 = icmp slt i64 %3, 2, !dbg !1636
  %.not = icmp eq i64 %2, 0
  %or.cond = or i1 %.not, %14, !dbg !1637
  br i1 %or.cond, label %L108, label %L8, !dbg !1637

L8:                                               ; preds = %top
  %15 = sub i64 0, %1, !dbg !1638
  %16 = sitofp i64 %15 to double, !dbg !1640
  %17 = sitofp i64 %2 to double, !dbg !1640
  %18 = fdiv double %16, %17, !dbg !1644
  %19 = fadd double %18, 1.000000e+00, !dbg !1645
  %20 = call double @llvm.rint.f64(double %19) #16, !dbg !1647
  %21 = fcmp ult double %20, 0xC3E0000000000000, !dbg !1649
  %22 = fcmp uge double %20, 0x43E0000000000000, !dbg !1650
  %23 = or i1 %21, %22, !dbg !1650
  br i1 %23, label %L23, label %L37, !dbg !1650

L23:                                              ; preds = %L8
  %ptls_field2678 = getelementptr inbounds {}**, {}*** %13, i64 2305843009213693954, !dbg !1651
  %24 = bitcast {}*** %ptls_field2678 to i8**, !dbg !1651
  %ptls_load277980 = load i8*, i8** %24, align 8, !dbg !1651, !tbaa !96
  %25 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load277980, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 286677216 to {}*) to {} addrspace(10)*)) #20, !dbg !1651
  %26 = bitcast {} addrspace(10)* %25 to double addrspace(10)*, !dbg !1651
  store double %20, double addrspace(10)* %26, align 8, !dbg !1651, !tbaa !98
  %27 = call cc38 nonnull {} addrspace(10)* bitcast ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* @jl_invoke to {} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 258965920 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 329076272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 184449464 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 284662976 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %25) #16, !dbg !1651
  %28 = addrspacecast {} addrspace(10)* %27 to {} addrspace(12)*, !dbg !1651
  call void @jl_throw({} addrspace(12)* %28) #21, !dbg !1651
  unreachable, !dbg !1651

L37:                                              ; preds = %L8
  %29 = fptosi double %20 to i64, !dbg !1652
  %30 = freeze i64 %29, !dbg !1652
  %.not62 = icmp sgt i64 %30, %3, !dbg !1654
  %31 = icmp sgt i64 %30, 1, !dbg !1656
  %32 = select i1 %31, i64 %30, i64 1, !dbg !1656
  %33 = select i1 %.not62, i64 %3, i64 %32, !dbg !1656
  %34 = add i64 %33, -1, !dbg !1657
  %35 = sub i64 %3, %33, !dbg !1657
  %.not72 = icmp slt i64 %35, %34, !dbg !1661
  %36 = select i1 %.not72, i64 %34, i64 %35, !dbg !1662
  %37 = sitofp i64 %36 to double, !dbg !1663
  %38 = call fastcc double @julia_log2_6124(double %37) #22, !dbg !1667
  %39 = call double @llvm.ceil.f64(double %38) #16, !dbg !1668
  %40 = fcmp ult double %39, 0xC3E0000000000000, !dbg !1670
  %41 = fcmp uge double %39, 0x43E0000000000000, !dbg !1671
  %42 = or i1 %40, %41, !dbg !1671
  br i1 %42, label %L53, label %L85, !dbg !1671

L53:                                              ; preds = %L37
  %ptls_field2174 = getelementptr inbounds {}**, {}*** %13, i64 2305843009213693954, !dbg !1672
  %43 = bitcast {}*** %ptls_field2174 to i8**, !dbg !1672
  %ptls_load227576 = load i8*, i8** %43, align 8, !dbg !1672, !tbaa !96
  %44 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_load227576, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 286677216 to {}*) to {} addrspace(10)*)) #20, !dbg !1672
  %45 = bitcast {} addrspace(10)* %44 to double addrspace(10)*, !dbg !1672
  store double %39, double addrspace(10)* %45, align 8, !dbg !1672, !tbaa !98
  %46 = call cc38 nonnull {} addrspace(10)* bitcast ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* @jl_invoke to {} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*, {} addrspace(10)*)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 258965920 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 329076272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 184449464 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 284662976 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %44) #16, !dbg !1672
  %47 = addrspacecast {} addrspace(10)* %46 to {} addrspace(12)*, !dbg !1672
  call void @jl_throw({} addrspace(12)* %47) #21, !dbg !1672
  unreachable, !dbg !1672

L85:                                              ; preds = %L37
  %48 = mul i64 %34, %2, !dbg !1673
  %49 = add i64 %48, %1, !dbg !1675
  %50 = fptosi double %39 to i64, !dbg !1676
  %51 = freeze i64 %50, !dbg !1676
  %52 = add i64 %51, 1, !dbg !1678
  %.inv = icmp slt i64 %52, 27, !dbg !1679
  %53 = select i1 %.inv, i64 %52, i64 27, !dbg !1679
  %54 = getelementptr inbounds [2 x i64], [2 x i64]* %6, i64 0, i64 0, !dbg !1680
  store i64 %49, i64* %54, align 8, !dbg !1680, !tbaa !34
  %55 = getelementptr inbounds [2 x i64], [2 x i64]* %6, i64 0, i64 1, !dbg !1680
  store i64 %4, i64* %55, align 8, !dbg !1680, !tbaa !34
  %56 = getelementptr inbounds [2 x i64], [2 x i64]* %8, i64 0, i64 0, !dbg !1680
  store i64 %2, i64* %56, align 8, !dbg !1680, !tbaa !34
  %57 = getelementptr inbounds [2 x i64], [2 x i64]* %8, i64 0, i64 1, !dbg !1680
  store i64 %4, i64* %57, align 8, !dbg !1680, !tbaa !34
  %58 = addrspacecast [2 x i64]* %6 to [2 x i64] addrspace(11)*, !dbg !1681
  call fastcc void @julia_TwicePrecision_6146([2 x double]* noalias nocapture noundef nonnull writeonly sret([2 x double]) align 8 dereferenceable(16) %10, [2 x i64] addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(16) %58) #17, !dbg !1681
  %59 = addrspacecast [2 x i64]* %8 to [2 x i64] addrspace(11)*, !dbg !1682
  call fastcc void @julia_TwicePrecision_6146([2 x double]* noalias nocapture noundef nonnull writeonly sret([2 x double]) align 8 dereferenceable(16) %12, [2 x i64] addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(16) %59) #17, !dbg !1682
  %60 = icmp slt i64 %53, 0, !dbg !1683
  %61 = shl nsw i64 -1, %53, !dbg !1687
  %62 = icmp ugt i64 %53, 63, !dbg !1687
  %63 = select i1 %62, i64 0, i64 %61, !dbg !1687
  %64 = sub i64 0, %53, !dbg !1688
  %65 = lshr i64 -1, %64, !dbg !1689
  %66 = icmp ugt i64 %64, 63, !dbg !1689
  %67 = select i1 %66, i64 0, i64 %65, !dbg !1689
  %68 = select i1 %60, i64 %67, i64 %63, !dbg !1684
  %69 = bitcast [2 x double]* %12 to i64*, !dbg !1690
  %70 = load i64, i64* %69, align 8, !dbg !1690, !tbaa !34
  %71 = and i64 %70, %68, !dbg !1692
  %72 = icmp slt i64 %33, 1, !dbg !1693
  %73 = icmp sgt i64 %33, %3, !dbg !1694
  %value_phi8 = or i1 %72, %73, !dbg !1694
  br i1 %value_phi8, label %L96, label %L94, !dbg !1694

L94:                                              ; preds = %L85
  %74 = getelementptr inbounds [2 x double], [2 x double]* %12, i64 0, i64 1, !dbg !1697
  %75 = load double, double* %74, align 8, !dbg !1699, !tbaa !34
  %76 = bitcast i64 %71 to double, !dbg !1690
  %.cast = bitcast i64 %70 to double, !dbg !1700
  %77 = fsub double %.cast, %76, !dbg !1700
  %78 = fadd double %77, %75, !dbg !1699
  %.sroa.0.sroa.0.0..sroa.0.0..sroa_cast30.sroa_cast = bitcast [2 x double]* %10 to i8*, !dbg !1701
  %79 = bitcast { [2 x double], [2 x double], i64, i64 }* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture noundef nonnull writeonly align 8 dereferenceable(16) %79, i8* noundef nonnull align 8 dereferenceable(16) %.sroa.0.sroa.0.0..sroa.0.0..sroa_cast30.sroa_cast, i64 noundef 16, i1 noundef false) #16, !dbg !1701
  %.sroa.0.sroa.2.0..sroa.0.0..sroa_cast.sroa_idx37 = getelementptr inbounds { [2 x double], [2 x double], i64, i64 }, { [2 x double], [2 x double], i64, i64 }* %0, i64 0, i32 1, i64 0, !dbg !1680
  %80 = bitcast double* %.sroa.0.sroa.2.0..sroa.0.0..sroa_cast.sroa_idx37 to i64*, !dbg !1680
…
Stacktrace:

julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}) at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool) at D:.julia\packages\Enzyme\Wanbg\src\api.jl

enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f), Tuple{Float64}}}, mod::LLVM.Module, primalf::LLVM.Function, adjoint::GPUCompiler.FunctionSpec{typeof(f), Tuple{Active{Float64}}}, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, dupClosure::Bool, wrap::Bool, modifiedBetween::Bool, returnPrimal::Bool) at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f), Tuple{Float64}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.Context, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing) at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

_thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f), Tuple{Float64}}}) at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

cached_compilation(job::GPUCompiler.CompilerJob, key::UInt64, specid::UInt64) at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

#s565#115 at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

var"#s565#115"(F::Any, Fn::Any, DF::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, specid::Any, ReturnPrimal::Any, ::Any, #unused#::Type, f::Any, df::Any, #unused#::Type, tt::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any) at .\none

(::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any}) at .\boot.jl

thunk at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl

thunk at D:.julia\packages\Enzyme\Wanbg\src\compiler.jl (repeats 2 times)

autodiff(#unused#::Enzyme.ReverseMode, f::typeof(f), #unused#::Type{Active}, args::Active{Float64}) at D:.julia\packages\Enzyme\Wanbg\src\Enzyme.jl

How can it be fixed?

vchuravy commented 2 years ago

This might be a duplicate of #274, since we fail on floatrange