Investigate better initialization strategies for higher dimensional problems

utkarsh530 commented 8 months ago

From the Neural ODE example:

using SimpleChains, StaticArrays, OrdinaryDiffEq

u0 = @SArray [2.0, 0.0]
datasize = 30
tspan = (0.0, 1.5)
tsteps = range(tspan[1], tspan[2], length = datasize)

function trueODE(u, p, t)
    true_A = @SMatrix [-0.1 2.0; -2.0 -0.1]
    ((u .^ 3)'true_A)'
end

prob = ODEProblem(trueODE, u0, tspan)
data = Array(solve(prob, Tsit5(), saveat = tsteps))

sc = SimpleChain(static(2),
    Activation(x -> x .* 3),
    TurboDense{true}(tanh, static(2)),
    TurboDense{true}(identity, static(2)))

p_nn = SimpleChains.init_params(sc, Float64)

function nn_fn(u::T, p, t)::T where {T}
    nn, ps = p
    return nn(u, ps)
end

nn = (u, p, t) -> sc(u, p)

p_static = SArray{Tuple{size(p_nn)...}}(p_nn...)

prob_nn = ODEProblem(nn_fn, u0, tspan, (sc, p_static))

n_particles = 10_000

function loss(u, p)
    odeprob, t = p
    prob = remake(odeprob; p = u)
    pred = Array(solve(prob, Tsit5(), saveat = t))
    sum(abs2, data .- pred)
end

lb = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(-1),
    length(optprob.u0))...)
ub = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(1),
    length(optprob.u0))...)

optprob = OptimizationProblem(loss, prob_nn.p[2], (prob_n, tsteps); lb = lb, ub = ub)

using PSOGPU
using CUDA

gbest, particles = PSOGPU.init_particles(optprob, n_particles)

gpu_data = cu([SVector{length(prob_nn.u0), eltype(prob_nn.u0)}(@view data[:, i])
               for i in 1:length(tsteps)])

gpu_particles = cu(particles)

CUDA.allowscalar(false)

gg = copy(gpu_particles)

function prob_func(prob, gpu_particle)
    return remake(prob, p = (prob.p[1], gpu_particle.position))
end

@time gsol = PSOGPU.parameter_estim_ode!(prob_nn,
    gg,
    gbest,
    gpu_data,
    lb,
    ub; saveat = tsteps, dt = 0.1, prob_func = prob_func, maxiters = 1000)

The best solution that I was able to get was:

gbest = PSOGPU.PSOGBest{SVector{12, Float64}, Float64}([0.6874401673017222, -15.593770810494114, 0.46391816314374695, -0.9466586408201431, -0.7460369202292048, 0.05240032443001442, -4.078687202392302, 1.1292140007987868, 1.3178086552221244, 0.3031322864356441, -0.96809309999136, 0.09967222862683514], 75.45484892026886)
gbest.cost #75.45484892026886

Compared to ADAM:

julia> res.objective
7.6828628f0

There's a lot of scope of improvement 😓

A decent-sized neural network should work (as shown in the docs in python PSO library), I think we should try to get this working for starters: https://pyswarms.readthedocs.io/en/latest/examples/usecases/train_neural_network.html

cc @ChrisRackauckas

ChrisRackauckas commented 8 months ago

PSO methods generally need many more than 1000 iterations. Check how you do on that NN case first though, but in general I would be surprised if 1000 iterations is good enough.

utkarsh530 commented 8 months ago

Updated the example, seems better and faster than ADAM now:


julia> @time gsol = PSOGPU.parameter_estim_ode!(prob_nn,
           gpu_particles,
           gbest,
           gpu_data,
           lb,
           ub; saveat = tsteps, dt = 0.1f0, prob_func = prob_func, maxiters = 100)
  1.047137 seconds (26.52 k allocations: 1.267 MiB)
PSOGPU.PSOGBest{SVector{12, Float32}, Float32}(Float32[-3.0210462, 15.476199, -11.9110565, -5.3153186, 10.184191, 13.0345955, 6.292011, -4.4231596, 2.4189205, 5.8844023, -2.1717668, -0.31813943], 1.4513348f0)
#loss: 1.4513348f0

SciML / PSOGPU.jl

Investigate better initialization strategies for higher dimensional problems #21