Can I do something differently here?

  [21141c5a] AMDGPU v1.0.1
  [7da242da] Enzyme v0.12.34
  [09ab397b] StructArrays v0.6.18

julia> versioninfo()
Julia Version 1.10.3
Commit 0b4590a5507 (2024-04-30 10:59 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 24 × AMD Ryzen 9 3900XT 12-Core Processor
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, znver2)
Threads: 24 default, 0 interactive, 12 GC (on 24 virtual cores)
  LD_LIBRARY_PATH = :/lib:/nix/store/sy1k5jj33a8frbmf3sx1hd1ksx0schqx-clr-6.0.2/lib:/nix/store/7d8yhjypiybk9j3lqa2wk1chdpn8346m-rocm-runtime-6.0.2/lib:/nix/store/yx40lzzcww59nppw2xwakwlnq23sh7av-rocblas-6.0.2/lib:/nix/store/raj03q8wil10103d050d0ngx4k0qfjyc-mesa-24.0.7-drivers/lib

using StructArrays, AMDGPU, ForwardDiff, Enzyme
s((;γ, λ, η), t) = exp(-η * t) * exp(γ / λ * (1 - exp(λ * t)))
h((;γ, λ, η), t) = η + γ * exp(λ * t)
cmf(θ, d, t) = s(θ, t) * (h(θ, t) ^ d)
tcmf(θ, l, d, t) = (cmf(θ, d, t)) / ( s(θ, l))
lln(params, (;Z,df)) = let
    Θ = exp.(Z * params)
    sa = @views StructArray(γ=Θ[:,1],λ=Θ[:,2],η=Θ[:,3])
    lls = log.(tcmf.(sa, df.ls, df.ds, df.ts))
    @noinline sum(lls)
let n = 10, k = 5, p = 3
    Z = ROCArray(rand(n,k))
    params = ROCArray(-3 .+ rand(k, p))
    df = StructArray(
        ls=ROCArray(30.0 .+ 10rand(n)),
        ds=ROCArray(rand(n) .< 0.1),
        ts=ROCArray(50 .+ 10rand(n)),
    data = (;Z,df)
    f(params) = lln(params, data)
    @assert isfinite(f(params))    
    Enzyme.gradient(Enzyme.Reverse, Enzyme.Const(f), params)

LoadError: Enzyme compilation failed.
Current scope: 
 [1] modifyproperty!
   @ ./Base.jl:74
 [2] account!
   @ ~/.julia/packages/AMDGPU/a1v0k/src/memory.jl:128
 [3] #HIPBuffer#6
   @ ~/.julia/packages/AMDGPU/a1v0k/src/runtime/memory/hip.jl:58

  [1] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}, data2::Ptr{LLVM.API.LLVMOpaqueValue}, B::Ptr{LLVM.API.LLVMOpaqueBuilder})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:2347
  [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/02rde/src/api.jl:163
  [3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:4104
  [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:6358
  [5] codegen
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:5545 [inlined]
  [6] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7159
  [7] _thunk
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7159 [inlined]
  [8] cached_compilation
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7200 [inlined]
  [9] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b52}, ::Type{Const{var"#f#17"{@NamedTuple{Z::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}, df::StructVector{@NamedTuple{ls::Float64, ds::Bool, ts::Float64}, @NamedTuple{ls::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ds::ROCArray{Bool, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ts::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}, Int64}}}}}, ::Type{Active}, tt::Type{Tuple{Duplicated{ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false)}, ::Val{false}, ::Val{false}, ::Type{FFIABI}, ::Val{false})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7273
 [10] #s2048#18999
    @ ~/.julia/packages/Enzyme/02rde/src/compiler.jl:7325 [inlined]
 [11] var"#s2048#18999"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
    @ Enzyme.Compiler ./none:0
 [12] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [13] autodiff
    @ ~/.julia/packages/Enzyme/02rde/src/Enzyme.jl:315 [inlined]
 [14] gradient(rm::ReverseMode{false, FFIABI, false, false}, f::Const{var"#f#17"{@NamedTuple{Z::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer}, df::StructVector{@NamedTuple{ls::Float64, ds::Bool, ts::Float64}, @NamedTuple{ls::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ds::ROCArray{Bool, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ts::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}, Int64}}}}, x::ROCArray{Float64, 2, AMDGPU.Runtime.Mem.HIPBuffer})
    @ Enzyme ~/.julia/packages/Enzyme/02rde/src/Enzyme.jl:1049
 [15] top-level scope
wsmoses commented 2 months ago

I think the same Enzyme CUDA extension needs to be added to AMDGPU.jl (in this case you're hitting an issue in the constructor for a HIPArray).


Would you like to try a PR to AMDGPU.jl for this and cc me?

wsmoses commented 2 months ago

moved to https://github.com/JuliaGPU/AMDGPU.jl/issues/667