SciML / NeuralPDE.jl

Physics-Informed Neural Networks (PINN) Solvers of (Partial) Differential Equations for Scientific Machine Learning (SciML) accelerated simulation
https://docs.sciml.ai/NeuralPDE/stable/
Other
973 stars 195 forks source link

NeuralPDE with LuxAMDGPU, Optimize.solve throws error #835

Open mre2110 opened 6 months ago

mre2110 commented 6 months ago

Describe the bug 🐞

Solving a simple bvp with AMDGPU backend breaks down in the Optimization.solve step with an error

Expected behavior

Optimizer should run smoothly (as it is the case when using the CUDA backend)

It would be great to have the optimizer running on AMD hardware

Minimal Reproducible Example πŸ‘‡

using Lux, ComponentArrays, OptimizationOptimisers
using Test, NeuralPDE
using Optimization

import ModelingToolkit: Interval, infimum, supremum
using Random

Random.seed!(100);

using LuxAMDGPU

println(LuxAMDGPU.functional())

AMDGPU.allowscalar(false)
const gpud = gpu_device();

maxit = 2000
nprint = 100

ncallback = 0

callback = function (p, l)
    global ncallback, nprint

    if ncallback % nprint == 0
        println("Iteration: $ncallback, Loss is: $l")
    end

    ncallback += 1

    return false
end;

@parameters x
@variables u(..)

k(x) =  1
f(x) =  1 

Dx = Differential(x)

eq = -Dx( (k(x) * Dx(u(x))) ) ~ f(x)
bcs = [
    u(0) ~ 0,
    u(1) ~ 0]

domains = [
    x ∈ Interval(0.0, 1.0)]

@named pdesys = PDESystem(eq, bcs, domains, [x], [u(x)]) 

inner = 30
chain = Lux.Chain(Lux.Dense(1, inner, Lux.Οƒ),
                Lux.Dense(inner, inner, Lux.Οƒ),
                Lux.Dense(inner, inner, Lux.Οƒ),
                Lux.Dense(inner, inner, Lux.Οƒ),
                Lux.Dense(inner, inner, Lux.Οƒ),
                Lux.Dense(inner, 1))

##rng = AMDGPU.rocrand_rng()
rng = Random.default_rng()
ps = Lux.setup(rng, chain)[1] |> ComponentArray |> gpud |> f32

#strategy = StochasticTraining(100)

const dx = 0.1
strategy = GridTraining(dx)

discretization = PhysicsInformedNN(chain, strategy; init_params = ps)

prob = discretize(pdesys, discretization);

ncallback = 0
res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); callback = callback, maxiters = maxit);

Error & Stacktrace ⚠️

InvalidIRError: compiling MethodInstance for (::GPUArrays.var"#broadcast_kernel#38")(::AMDGPU.ROCKernelContext, ::ComponentVector{Float32, AMDGPU.Device.ROCDeviceVector{Float32, 1}, Tuple{Axis{(layer_1 = ViewAxis(1:60, Axis(weight = ViewAxis(1:30, ShapedAxis((30, 1))), bias = ViewAxis(31:60, ShapedAxis((30, 1))))), layer_2 = ViewAxis(61:990, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_3 = ViewAxis(991:1920, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_4 = ViewAxis(1921:2850, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_5 = ViewAxis(2851:3780, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_6 = ViewAxis(3781:3811, Axis(weight = ViewAxis(1:30, ShapedAxis((1, 30))), bias = ViewAxis(31:31, ShapedAxis((1, 1))))))}}}, ::Base.Broadcast.Broadcasted{AMDGPU.ROCArrayStyle{1, AMDGPU.Runtime.Mem.HIPBuffer}, Tuple{ComponentArrays.CombinedAxis{Axis{(layer_1 = ViewAxis(1:60, Axis(weight = ViewAxis(1:30, ShapedAxis((30, 1))), bias = ViewAxis(31:60, ShapedAxis((30, 1))))), layer_2 = ViewAxis(61:990, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_3 = ViewAxis(991:1920, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_4 = ViewAxis(1921:2850, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_5 = ViewAxis(2851:3780, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_6 = ViewAxis(3781:3811, Axis(weight = ViewAxis(1:30, ShapedAxis((1, 30))), bias = ViewAxis(31:31, ShapedAxis((1, 1))))))}, Base.OneTo{Int64}}}, typeof(identity), Tuple{Base.Broadcast.Extruded{ComponentVector{Float32, AMDGPU.Device.ROCDeviceVector{Float32, 1}, Tuple{Axis{(layer_1 = ViewAxis(1:60, Axis(weight = ViewAxis(1:30, ShapedAxis((30, 1))), bias = ViewAxis(31:60, ShapedAxis((30, 1))))), layer_2 = ViewAxis(61:990, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_3 = ViewAxis(991:1920, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_4 = ViewAxis(1921:2850, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_5 = ViewAxis(2851:3780, Axis(weight = ViewAxis(1:900, ShapedAxis((30, 30))), bias = ViewAxis(901:930, ShapedAxis((30, 1))))), layer_6 = ViewAxis(3781:3811, Axis(weight = ViewAxis(1:30, ShapedAxis((1, 30))), bias = ViewAxis(31:31, ShapedAxis((1, 1))))))}}}, Tuple{Bool}, Tuple{Int64}}}}, ::Int64) resulted in invalid LLVM IR

Environment (please complete the following information):

Status `~/.julia/environments/v1.10/Project.toml`
βŒƒ [21141c5a] AMDGPU v0.8.10
  [6e4b80f9] BenchmarkTools v1.5.0
  [b0b7db55] ComponentArrays v0.15.10
  [6748aba7] DeepEquilibriumNetworks v2.0.3
  [0c46a032] DifferentialEquations v7.13.0
  [7073ff75] IJulia v1.24.2
  [033835bb] JLD2 v0.4.46
βŒƒ [b2108857] Lux v0.5.22
  [83120cb1] LuxAMDGPU v0.2.2
  [eb30cadb] MLDatasets v0.7.14
  [f1d291b0] MLUtils v0.4.4
βŒ… [961ee093] ModelingToolkit v8.75.0
βŒƒ [315f7962] NeuralPDE v5.12.0
  [0b1bfda6] OneHotArrays v0.2.5
  [3bd65402] Optimisers v0.3.2
βŒƒ [7f7a1694] Optimization v3.22.0
  [42dfb2eb] OptimizationOptimisers v0.2.1
  [1dea7af3] OrdinaryDiffEq v6.74.0
βŒƒ [91a5bcdd] Plots v1.40.1
  [295af30f] Revise v3.5.14
βŒƒ [1ed8b502] SciMLSensitivity v7.56.0
  [e88e6eb3] Zygote v0.6.69
  [b77e0a4c] InteractiveUtils
  [9a3f8284] Random
  [10745b16] Statistics v1.10.0
  [8dfed614] Test
Julia Version 1.10.1
Commit 7790d6f0641 (2024-02-13 20:41 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 32 Γ— Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, ivybridge)
Threads: 16 default, 0 interactive, 8 GC (on 32 virtual cores)
Environment:
  LD_LIBRARY_PATH = /opt/rocm/lib
  JULIA_NUM_THREADS = 16

Additional context

The suggested change (replace [Optimization.jl/ext/OptimizationZygoteExt.jl

L95) with a direct copyto! instead of broadcasting) lead to an error

MethodError: no method matching copyto!