Open ffrancesco94 opened 2 weeks ago
In a clean environment, it gives this somewhat similar message:
ERROR: LoadError: GPUCompiler.InvalidIRError(GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}(MethodInstance for (::WaterLily.var"#gpu_##kern_#545#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}}})(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, ::AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, ::AMDGPU.Device.ROCDeviceArray{Float32, 4, 1}, ::AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, ::AMDGPU.Device.ROCDeviceMatrix{Float32, 1}, ::CartesianIndex{2}), GPUCompiler.CompilerConfig{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}(GPUCompiler.GCNCompilerTarget("gfx1030", "+wavefrontsize32,-wavefrontsize64"), AMDGPU.Compiler.HIPCompilerParams(false, true), true, nothing, :specfunc, true, 2), 0x0000000000007bc2), Tuple{String, Vector{Base.StackTraces.StackFrame}, Any}[("dynamic function invocation", [_cutdim at multidimensional.jl:863, to_indices at indices.jl:355, to_indices at indices.jl:344, view at subarray.jl:183, maybeview at views.jl:148, dotview at broadcast.jl:1244, fill! at Body.jl:42, macro expansion at util.jl:111, gpu_ at macros.jl:95, gpu_ at none:0], getindex(t::Tuple, i::Int64) @ Base tuple.jl:31)])
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/validation.jl:147
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:458 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:457 [inlined]
[5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:103
[6] emit_llvm
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:97 [inlined]
[7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:136
[8] codegen
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:115 [inlined]
[9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:111
[10] compile
@ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:103 [inlined]
[11] #40
@ ~/.julia/packages/AMDGPU/a1v0k/src/compiler/codegen.jl:194 [inlined]
[12] JuliaContext(f::AMDGPU.Compiler.var"#40#41"{GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:52
[13] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:42
[14] hipcompile(job::GPUCompiler.CompilerJob)
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/a1v0k/src/compiler/codegen.jl:193
[15] actual_compilation(cache::Dict{Any, AMDGPU.HIP.HIPFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}, compiler::typeof(AMDGPU.Compiler.hipcompile), linker::typeof(AMDGPU.Compiler.hiplink))
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:237
[16] cached_compilation(cache::Dict{Any, AMDGPU.HIP.HIPFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.HIPCompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:151
[17] macro expansion
@ ~/.julia/packages/AMDGPU/a1v0k/src/compiler/codegen.jl:161 [inlined]
[18] macro expansion
@ ./lock.jl:267 [inlined]
[19] hipfunction(f::WaterLily.var"#gpu_##kern_#545#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}}}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, AMDGPU.Device.ROCDeviceArray{Float32, 4, 1}, AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, AMDGPU.Device.ROCDeviceMatrix{Float32, 1}, CartesianIndex{2}}}; kwargs::@Kwargs{})
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/a1v0k/src/compiler/codegen.jl:155
[20] hipfunction(f::WaterLily.var"#gpu_##kern_#545#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}}}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(64, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, AMDGPU.Device.ROCDeviceArray{Float32, 4, 1}, AMDGPU.Device.ROCDeviceArray{Float32, 3, 1}, AMDGPU.Device.ROCDeviceMatrix{Float32, 1}, CartesianIndex{2}}})
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/a1v0k/src/compiler/codegen.jl:154
[21] macro expansion
@ ~/.julia/packages/AMDGPU/a1v0k/src/highlevel.jl:172 [inlined]
[22] (::KernelAbstractions.Kernel{AMDGPU.ROCKernels.ROCBackend, KernelAbstractions.NDIteration.StaticSize{(64,)}, KernelAbstractions.NDIteration.DynamicSize, WaterLily.var"#gpu_##kern_#545#223"{WaterLily.var"#fill!#220"{2, Float32, Int64, AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}}}})(::AMDGPU.ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64}, workgroupsize::Nothing)
@ AMDGPU.ROCKernels ~/.julia/packages/AMDGPU/a1v0k/src/ROCKernels.jl:86
[23] (::WaterLily.var"##kern#544#221"{WaterLily.var"#fill!#220"{2, Float32, Int64, AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}}, Flow{2, Float32, AMDGPU.ROCArray{Float32, 2, AMDGPU.Runtime.Mem.HIPBuffer}, AMDGPU.ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, AMDGPU.ROCArray{Float32, 4, AMDGPU.Runtime.Mem.HIPBuffer}}})(μ₀::AMDGPU.ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, μ₁::AMDGPU.ROCArray{Float32, 4, AMDGPU.Runtime.Mem.HIPBuffer}, V::AMDGPU.ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, σ::AMDGPU.ROCArray{Float32, 2, AMDGPU.Runtime.Mem.HIPBuffer}, ::Val{12})
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/util.jl:114
[24] macro expansion
@ ~/.julia/packages/WaterLily/raEyO/src/util.jl:116 [inlined]
[25] measure!(a::Flow{2, Float32, AMDGPU.ROCArray{Float32, 2, AMDGPU.Runtime.Mem.HIPBuffer}, AMDGPU.ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, AMDGPU.ROCArray{Float32, 4, AMDGPU.Runtime.Mem.HIPBuffer}}, body::AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}; t::Float32, ϵ::Int64)
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/Body.jl:45
[26] Simulation(dims::Tuple{Int64, Int64}, u_BC::Tuple{Int64, Int64}, L::Float64; Δt::Float64, ν::Float64, g::Nothing, U::Nothing, ϵ::Int64, perdir::Tuple{}, uλ::Nothing, exitBC::Bool, body::AutoBody{WaterLily.var"#comp#234"{Bool, var"#10#14"{SVector{3, ComplexF64}, Float64}, WaterLily.var"#231#235"}, WaterLily.var"#231#235"}, T::Type, mem::Type)
@ WaterLily ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:75
[27] Simulation
@ ~/.julia/packages/WaterLily/raEyO/src/WaterLily.jl:65 [inlined]
[28] run_TwoD_julia(; p::Int64, Re::Int64, stop::Float64)
@ Main ~/MEGA/Julia/WaterLily-examples/examples/TwoD_Julia.jl:24
[29] top-level scope
@ ~/MEGA/Julia/WaterLily-examples/examples/TwoD_Julia.jl:42
[30] include(fname::String)
@ Base.MainInclude ./client.jl:489
[31] run(debug_session::VSCodeDebugger.DebugAdapter.DebugSession, error_handler::VSCodeDebugger.var"#3#4"{String})
@ VSCodeDebugger.DebugAdapter ~/.vscode-oss/extensions/julialang.language-julia-1.120.2-universal/scripts/packages/DebugAdapter/src/packagedef.jl:122
[32] startdebugger()
@ VSCodeDebugger ~/.vscode-oss/extensions/julialang.language-julia-1.120.2-universal/scripts/packages/VSCodeDebugger/src/VSCodeDebugger.jl:45
[33] top-level scope
@ ~/.vscode-oss/extensions/julialang.language-julia-1.120.2-universal/scripts/debugger/run_debugger.jl:12
[34] include(mod::Module, _path::String)
@ Base ./Base.jl:495
[35] exec_options(opts::Base.JLOptions)
@ Base ./client.jl:318
[36] _start()
@ Base ./client.jl:552
in expression starting at /home/fra/MEGA/Julia/WaterLily-examples/examples/TwoD_Julia.jl:42
Hi! I recently fixed a bug when creating a Body
that appreared in AMD GPUs:
https://github.com/WaterLily-jl/WaterLily.jl/commit/890ceddb0f62f8d97cb3d207ffcea9d31530a4c7#diff-770286d43b6a0ad13ee676bd9ff2c34a3743c734bd2239d3755b2f8146fd7a1a
So, are you using the latest version in master?
And what is your ROCm version? See issue https://github.com/WaterLily-jl/WaterLily.jl/issues/145 for ROCm versions compatibility.
I am on master (I think: from Pkg> I did add WaterLily#master
). AMDGPU version is 1.0.1. This is my AMDGPU.versioninfo()
output:
[ Info: AMDGPU versioninfo
┌───────────┬──────────────────┬───────────┬────────────────────────────────────────────────────────────────────────────────────┐
│ Available │ Name │ Version │ Path │
├───────────┼──────────────────┼───────────┼────────────────────────────────────────────────────────────────────────────────────┤
│ + │ LLD │ - │ /opt/rocm/llvm/bin/ld.lld │
│ + │ Device Libraries │ - │ /home/fra/.julia/artifacts/5ad5ecb46e3c334821f54c1feecc6c152b7b6a45/amdgcn/bitcode │
│ + │ HIP │ 6.0.32831 │ /opt/rocm/lib/libamdhip64.so │
│ + │ rocBLAS │ 4.0.0 │ /opt/rocm/lib/librocblas.so │
│ + │ rocSOLVER │ 3.24.0 │ /opt/rocm/lib/librocsolver.so │
│ + │ rocALUTION │ - │ /opt/rocm/lib/librocalution.so │
│ + │ rocSPARSE │ - │ /opt/rocm/lib/librocsparse.so │
│ + │ rocRAND │ 2.10.5 │ /opt/rocm/lib/librocrand.so │
│ + │ rocFFT │ 1.0.27 │ /opt/rocm/lib/librocfft.so │
│ + │ MIOpen │ 3.0.0 │ /opt/rocm/lib/libMIOpen.so │
└───────────┴──────────────────┴───────────┴────────────────────────────────────────────────────────────────────────────────────┘
Could it be that ROCm 6 is too recent? If I try to use AMDGPU with their own artifacts (AMDGPU.use_artifacts!()
) it complains about missing .so files, I am not sure if I'm doing something wrong in that approach.
Hi again. It seems that the error that I posted at first was not the one stemming from master
. Running it from master
gives the following stacktrace instead:
ERROR: LoadError: InvalidIRError: compiling MethodInstance for (::WaterLily.var"#gpu_##kern_#545#223"{…})(::KernelAbstractions.CompilerMetadata{…}, ::ROCDeviceArray{…}, ::ROCDeviceArray{…}, ::ROCDeviceArray{…}, ::AMDGPU.Device.ROCDeviceMatrix{…}, ::CartesianIndex{…}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to kwcall(::NamedTuple, ::typeof(measure), sdf, map, x, t) @ WaterLily ~/.julia/packages/WaterLily/hWQfE/src/AutoBody.jl:115)
Stacktrace:
[1] measure
@ ~/.julia/packages/WaterLily/hWQfE/src/AutoBody.jl:110
[2] fill!
@ ~/.julia/packages/WaterLily/hWQfE/src/Body.jl:37
[3] macro expansion
@ ~/.julia/packages/WaterLily/hWQfE/src/util.jl:111
[4] gpu_
@ ~/.julia/packages/KernelAbstractions/60cqT/src/macros.jl:95
[5] gpu_
@ ./none:0
Seems awfully close to that Body
bug you mentioned.
Hi! Arch/Manjaro user with a RX 6950 XT. I was trying to run the
TwoD_Julia
tutorial with an AMD GPU backend. I can successfully dousing AMDGPU
(I should say that some tests fail though) with and without bundled ROCm artifacts. I can follow the basic AMD tutorials (e.g. creating arrays on the GPU and summing them and also offload functions with the @roc macro). Running the WaterLily tutorial withmem=AMDGPU.ROCArray
when building the Simulation() struct throws the following error:Julia version: 1.10.
Any suggestions?