EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
459 stars 66 forks source link

Tape type mismatch #1793

Open pxl-th opened 2 months ago

pxl-th commented 2 months ago

MWE (PR: https://github.com/JuliaGPU/AMDGPU.jl/pull/668):

using AMDGPU
using EnzymeCore, Enzyme

function square_kernel!(x)
    i = workitemIdx().x
    x[i] *= x[i]
    return
end

function square!(x)
    @roc groupsize=length(x) gridsize=1 square_kernel!(x)
    return
end

function main()
    A = ROCArray(collect(1.0:64.0))
    dA = ROCArray(ones(Float64, 64))
    Enzyme.autodiff(Reverse, square!, Duplicated(A, dA))
    @assert all(dA .≈ (2:2:128))
    return
end
main()
ERROR: LoadError: AssertionError: Enzyme : mismatch between innerTy LLVM.PointerType([1 x { { i64, {} addrspace(10)*, {} addrspace(10)* } }] addrspace(11)*) and tape type LLVM.PointerType({} addrspace(10)*)
tape_idx=3
true_idx=3
isKWCall=true
kwtup=@NamedTuple{groupsize::Int64, gridsize::Int64}
funcTy=typeof(EnzymeCore.EnzymeRules.reverse)
isghostty(funcTy)=true
miRT=Union{}
sret=nothing
returnRoots=nothing
swiftself=false
RT=Const{Nothing}
rev_RT=Union{}
applicablefn=false
tape=LLVM.UndefValue(0x0000000016c5f450)
llvmf=void (i64, [2 x i64] addrspace(11)*, [1 x { { i64, {} addrspace(10)*, {} addrspace(10)* } }] addrspace(11)*, {} addrspace(10)*, [2 x {} addrspace(10)*] addrspace(11)*)
TapeT=Any
mi=MethodInstance for Core.kwcall(::@NamedTuple{groupsize::Int64, gridsize::Int64}, ::AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}, ::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer})
ami=MethodInstance for Core.kwcall(::@NamedTuple{groupsize::Int64, gridsize::Int64}, ::typeof(EnzymeCore.EnzymeRules.augmented_primal), ::EnzymeCore.EnzymeRules.ConfigWidth{1, false, false, (false, false)}, ::Const{AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}}, ::Type{Const{Nothing}}, ::Duplicated{ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}})
rev_TT =Tuple{UInt64, typeof(Core.kwcall), @NamedTuple{groupsize::Int64, gridsize::Int64}, typeof(EnzymeCore.EnzymeRules.reverse), EnzymeCore.EnzymeRules.ConfigWidth{1, false, false, (false, false)}, Const{AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}}, Type{Const{Nothing}}, Any, Duplicated{ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}}

Stacktrace:
  [1] enzyme_custom_common_rev
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/rules/customrules.jl:833 [inlined]
  [2] enzyme_custom_rev(B::LLVM.IRBuilder, orig::LLVM.CallInst, gutils::Enzyme.Compiler.GradientUtils, tape::LLVM.UndefValue)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/rules/customrules.jl:1130
  [3] enzyme_custom_rev_cfunc(B::Ptr{LLVM.API.LLVMOpaqueBuilder}, OrigCI::Ptr{LLVM.API.LLVMOpaqueValue}, gutils::Ptr{Nothing}, tape::Ptr{LLVM.API.LLVMOpaqueValue})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/rules/llvmrules.jl:27
  [4] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/Tb3Iu/src/api.jl:163
  [5] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:4150
  [6] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:6404
  [7] codegen
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:5591 [inlined]
  [8] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:7205
  [9] _thunk
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:7205 [inlined]
 [10] cached_compilation
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:7246 [inlined]
 [11] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b20}, ::Type{Const{typeof(square!)}}, ::Type{Const{Nothing}}, tt::Type{Tuple{Duplicated{ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}}}, ::Val{Enzyme.API.DEM_ReverseModeCombined}, ::Val{1}, ::Val{(false, false)}, ::Val{false}, ::Val{false}, ::Type{FFIABI}, ::Val{true})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:7319
 [12] #s2055#18999
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/compiler.jl:7371 [inlined]
 [13] var"#s2055#18999"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
    @ Enzyme.Compiler ./none:0
 [14] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [15] autodiff
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/Enzyme.jl:315 [inlined]
 [16] autodiff
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/Enzyme.jl:348 [inlined]
 [17] autodiff
    @ ~/.julia/packages/Enzyme/Tb3Iu/src/Enzyme.jl:329 [inlined]
 [18] main()
    @ Main ~/.julia/dev/AMDGPU/t.jl:18
 [19] top-level scope
    @ ~/.julia/dev/AMDGPU/t.jl:24
wsmoses commented 2 months ago

@pxl-th can you let me know what happens when you run this PR: https://github.com/EnzymeAD/Enzyme.jl/pull/1796

pxl-th commented 2 months ago

Still the same error:

ERROR: LoadError: AssertionError: Enzyme : mismatch between innerTy LLVM.PointerType([1 x { { i64, {} addrspace(10)*, {} addrspace(10)* } }] addrspace(11)*) and tape type LLVM.PointerType({} addrspace(10)*)
tape_idx=3
true_idx=3
isKWCall=true
kwtup=@NamedTuple{groupsize::Int64, gridsize::Int64}
funcTy=EnzymeCore.EnzymeRules.ConfigWidth{1, false, false, (false, false)}
isghostty(funcTy)=true
miRT=Union{}
sret=nothing
returnRoots=nothing
swiftself=false
RT=Const{Nothing}
rev_RT=Union{}
applicablefn=false
tape=LLVM.UndefValue(0x000000000f9f8710)
llvmf=void (i64, [2 x i64] addrspace(11)*, [1 x { { i64, {} addrspace(10)*, {} addrspace(10)* } }] addrspace(11)*, {} addrspace(10)*, [2 x {} addrspace(10)*] addrspace(11)*)
TapeT=Any
mi=MethodInstance for Core.kwcall(::@NamedTuple{groupsize::Int64, gridsize::Int64}, ::AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}, ::ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer})
ami=MethodInstance for Core.kwcall(::@NamedTuple{groupsize::Int64, gridsize::Int64}, ::typeof(EnzymeCore.EnzymeRules.augmented_primal), ::EnzymeCore.EnzymeRules.ConfigWidth{1, false, false, (false, false)}, ::Const{AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}}, ::Type{Const{Nothing}}, ::Duplicated{ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}})
rev_TT =Tuple{UInt64, typeof(Core.kwcall), @NamedTuple{groupsize::Int64, gridsize::Int64}, typeof(EnzymeCore.EnzymeRules.reverse), EnzymeCore.EnzymeRules.ConfigWidth{1, false, false, (false, false)}, Const{AMDGPU.Runtime.HIPKernel{typeof(square_kernel!), Tuple{AMDGPU.Device.ROCDeviceVector{Float64, 1}}}}, Type{Const{Nothing}}, Any, Duplicated{ROCArray{Float64, 1, AMDGPU.Runtime.Mem.HIPBuffer}}}

Stacktrace:
  [1] enzyme_custom_common_rev
    @ ~/.julia/dev/Enzyme/src/rules/customrules.jl:833 [inlined]
wsmoses commented 2 months ago

can you retry? [and paste the updated error] -- I just pushed another commit

pxl-th commented 2 months ago

Now segfaulting:

[13222] signal (11.1): Segmentation fault
in expression starting at /home/pxlth/.julia/dev/AMDGPU/t.jl:24
typekeyvalue_hash at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/jltypes.c:1622 [inlined]
lookup_typevalue at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/jltypes.c:1059
jl_inst_arg_tuple_type at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/jltypes.c:2162
jl_f_tuple at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/builtins.c:868 [inlined]
jl_f_tuple at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/builtins.c:863
absint at /home/pxlth/.julia/dev/Enzyme/src/absint.jl:105
abs_typeof at /home/pxlth/.julia/dev/Enzyme/src/absint.jl:423
unknown function (ip: 0x7c94bb300343)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
abs_typeof at /home/pxlth/.julia/dev/Enzyme/src/absint.jl:159
abs_typeof at /home/pxlth/.julia/dev/Enzyme/src/absint.jl:153
unknown function (ip: 0x7c94bb300409)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
check_ir! at /home/pxlth/.julia/dev/Enzyme/src/compiler/validation.jl:696
check_ir! at /home/pxlth/.julia/dev/Enzyme/src/compiler/validation.jl:346
check_ir! at /home/pxlth/.julia/dev/Enzyme/src/compiler/validation.jl:316
check_ir at /home/pxlth/.julia/dev/Enzyme/src/compiler/validation.jl:158 [inlined]
#codegen#18949 at /home/pxlth/.julia/dev/Enzyme/src/compiler.jl:5652
codegen at /home/pxlth/.julia/dev/Enzyme/src/compiler.jl:5608 [inlined]
#89 at /home/pxlth/.julia/dev/Enzyme/src/Enzyme.jl:801
#JuliaContext#152 at /home/pxlth/.julia/packages/GPUCompiler/VPC5n/src/driver.jl:34
JuliaContext at /home/pxlth/.julia/packages/GPUCompiler/VPC5n/src/driver.jl:25 [inlined]
tape_type at /home/pxlth/.julia/dev/Enzyme/src/Enzyme.jl:800 [inlined]
#augmented_primal#18 at /home/pxlth/.julia/dev/AMDGPU/ext/EnzymeCoreExt/EnzymeCoreExt.jl:191
augmented_primal at /home/pxlth/.julia/dev/AMDGPU/ext/EnzymeCoreExt/EnzymeCoreExt.jl:179 [inlined]
macro expansion at /home/pxlth/.julia/dev/AMDGPU/src/highlevel.jl:175 [inlined]
square! at /home/pxlth/.julia/dev/AMDGPU/t.jl:11 [inlined]
diffejulia_square__2956wrap at /home/pxlth/.julia/dev/AMDGPU/t.jl:0
macro expansion at /home/pxlth/.julia/dev/Enzyme/src/compiler.jl:7179 [inlined]
enzyme_call at /home/pxlth/.julia/dev/Enzyme/src/compiler.jl:6788 [inlined]
CombinedAdjointThunk at /home/pxlth/.julia/dev/Enzyme/src/compiler.jl:6665 [inlined]
autodiff at /home/pxlth/.julia/dev/Enzyme/src/Enzyme.jl:320 [inlined]
autodiff at /home/pxlth/.julia/dev/Enzyme/src/Enzyme.jl:348 [inlined]
autodiff at /home/pxlth/.julia/dev/Enzyme/src/Enzyme.jl:329 [inlined]
main at /home/pxlth/.julia/dev/AMDGPU/t.jl:18
unknown function (ip: 0x7c94b912d142)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
do_call at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/interpreter.c:126
eval_value at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/interpreter.c:223
eval_stmt_value at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/interpreter.c:174 [inlined]
eval_body at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/interpreter.c:617
jl_interpret_toplevel_thunk at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/interpreter.c:775
jl_toplevel_eval_flex at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/toplevel.c:934
jl_toplevel_eval_flex at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/toplevel.c:877
ijl_toplevel_eval_in at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/toplevel.c:985
eval at ./boot.jl:385 [inlined]
include_string at ./loading.jl:2076
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
_include at ./loading.jl:2136
include at ./Base.jl:495
jfptr_include_46394.1 at /home/pxlth/bin/julia-1.10.3/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
exec_options at ./client.jl:318
_start at ./client.jl:552
jfptr__start_82726.1 at /home/pxlth/bin/julia-1.10.3/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
true_main at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/jlapi.c:582
jl_repl_entrypoint at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/jlapi.c:731
main at julia (unknown line)
unknown function (ip: 0x7c94f2c29d8f)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 206207229 (Pool: 205944898; Big: 262331); GC: 124
Segmentation fault (core dumped)