Optimization with (L)BFGS gives DimensionMismatch(“dimensions must match”)

schlichtanders commented 1 year ago

using DifferentialEquations, Optim, LinearAlgebra

function f(dxdt,x,p,t)
    dxdt[1] = -p[1]*x[1] + p[2]
    dxdt[2] = p[1]*x[1]
end

tspan = (0.,8.)
tdata = range(0.,stop=8.,length=10)
p0 = [1., 1.0]
x0 = [10.,0.]

prob = ODEProblem(f,x0,tspan,p0)

sol = DifferentialEquations.solve(prob; saveat=tdata)

xdata = sol[2,:] + randn(length(sol[2,:])) # in-silico data

function loss_function(x)
    _prob = remake(prob; u0=convert.(eltype(x), prob.u0), p=x)
    sol_new = DifferentialEquations.solve(_prob; saveat=tdata)
    norm(sol_new[2,:] - xdata)^2
end

optimize(loss_function, [2.0, 3.0], NelderMead())  # OK
optimize(loss_function, [2.0, 3.0], LBFGS())  # ERROR,  BFGS also errors

errors with

┌ Warning: Instability detected. Aborting
└ @ SciMLBase ~/.julia/packages/SciMLBase/pmM4h/src/integrator_interface.jl:523
ERROR: DimensionMismatch: dimensions must match: a has dims (Base.OneTo(1),), b has dims (Base.OneTo(10),), mismatch at 1
Stacktrace:
[1] promote_shape
@ ./indices.jl:178 [inlined]
[2] promote_shape
@ ./indices.jl:169 [inlined]
[3] -(A::Vector{Float64}, B::Vector{Float64})
@ Base ./arraymath.jl:7
[4] loss_function(x::Vector{Float64})
@ Main ./REPL[10]:4
[5] finite_difference_gradient!(df::Vector{Float64}, f::typeof(loss_function), x::Vector{Float64}, cache::FiniteDiff.GradientCache{Nothing, Nothing, Nothing, Vector{Float64}, Val{:central}(), Float64, Val{true}()}; relstep::Float64, absstep::Float64,dir::Bool)
@ FiniteDiff ~/.julia/packages/FiniteDiff/zdZSa/src/gradients.jl:318
[6] finite_difference_gradient!
@ ~/.julia/packages/FiniteDiff/zdZSa/src/gradients.jl:258 [inlined]
[7] (::NLSolversBase.var"#g!#15"{typeof(loss_function), FiniteDiff.GradientCache{Nothing, Nothing, Nothing, Vector{Float64}, Val{:central}(), Float64, Val{true}()}})(storage::Vector{Float64}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/cfJrN/src/objective_types/oncedifferentiable.jl:57
[8] (::NLSolversBase.var"#fg!#16"{typeof(loss_function)})(storage::Vector{Float64}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/cfJrN/src/objective_types/oncedifferentiable.jl:61
[9] value_gradient!!(obj::OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/cfJrN/src/interface.jl:82
[10] value_gradient!(obj::OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/cfJrN/src/interface.jl:69
[11] value_gradient!(obj::Optim.ManifoldObjective{OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}}, x::Vector{Float64})
@ Optim ~/.julia/packages/Optim/Zq1jM/src/Manifolds.jl:50
[12] (::LineSearches.var"#ϕdϕ#6"{Optim.ManifoldObjective{OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}}, Vector{Float64}, Vector{Float64}, Vector{Float64}})(α::Float64)
@ LineSearches ~/.julia/packages/LineSearches/G1LRk/src/LineSearches.jl:84
[13] (::LineSearches.HagerZhang{Float64, Base.RefValue{Bool}})(ϕ::Function, ϕdϕ::LineSearches.var"#ϕdϕ#6"{Optim.ManifoldObjective{OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}}, Vector{Float64}, Vector{Float64}, Vector{Float64}}, c::Float64, phi_0::Float64, dphi_0::Float64)
@ LineSearches ~/.julia/packages/LineSearches/G1LRk/src/hagerzhang.jl:139
[14] HagerZhang
@ ~/.julia/packages/LineSearches/G1LRk/src/hagerzhang.jl:101 [inlined]
[15] perform_linesearch!(state::Optim.LBFGSState{Vector{Float64}, Vector{Vector{Float64}}, Vector{Vector{Float64}}, Float64, Vector{Float64}}, method::LBFGS{Nothing, LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Optim.var"#19#21"}, d::Optim.ManifoldObjective{OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}})
@ Optim ~/.julia/packages/Optim/Zq1jM/src/utilities/perform_linesearch.jl:59
[16] update_state!(d::OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}, state::Optim.LBFGSState{Vector{Float64}, Vector{Vector{Float64}}, Vector{Vector{Float64}}, Float64, Vector{Float64}}, method::LBFGS{Nothing, LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Optim.var"#19#21"})
@ Optim ~/.julia/packages/Optim/Zq1jM/src/multivariate/solvers/first_order/l_bfgs.jl:204
[17] optimize(d::OnceDifferentiable{Float64, Vector{Float64}, Vector{Float64}}, initial_x::Vector{Float64}, method::LBFGS{Nothing, LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Optim.var"#19#21"}, options::Optim.Options{Float64, Nothing}, state::Optim.LBFGSState{Vector{Float64}, Vector{Vector{Float64}}, Vector{Vector{Float64}}, Float64, Vector{Float64}})
@ Optim ~/.julia/packages/Optim/Zq1jM/src/multivariate/optimize/optimize.jl:54
[18] optimize
@ ~/.julia/packages/Optim/Zq1jM/src/multivariate/optimize/optimize.jl:36 [inlined]
[19] #optimize#89
@ ~/.julia/packages/Optim/Zq1jM/src/multivariate/optimize/interface.jl:142 [inlined]
[20] optimize(f::Function, initial_x::Vector{Float64}, method::LBFGS{Nothing, LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Optim.var"#19#21"}, options::Optim.Options{Float64, Nothing}) (repeats 2 times)
@ Optim ~/.julia/packages/Optim/Zq1jM/src/multivariate/optimize/interface.jl:138
[21] top-level scope
@ REPL[12]:1

The problem already exists for more than 3 years - it was first reported in https://discourse.julialang.org/t/optimization-with-lbfgs-gives-dimensionmismatch-dimensions-must-match/22167 but apparently never raised enough to get fixed.

It would be really great if this can be fixed.

julia> versioninfo()
Julia Version 1.8.1
Commit afb6c60d69a (2022-09-06 15:09 UTC)
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 8 × Intel(R) Core(TM) i7-1065G7 CPU @ 1.30GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-13.0.1 (ORCJIT, icelake-client)
Threads: 1 on 8 virtual cores

ChrisRackauckas commented 1 year ago

You're missing a check for sol_new.retcode == :Success. Your ODE solution is diverging at some parameters. https://sensitivity.sciml.ai/dev/training_tips/divergence/

schlichtanders commented 1 year ago

thank you very much! A beginners fault apparently. Closing the issue then

schlichtanders commented 1 year ago

An addition: It might be good to adapt documentation/examples to always account for this special case respectively.

Here one updated example from the original UDE paper https://github.com/ChrisRackauckas/universal_differential_equations/blob/master/LotkaVolterra/scenario_2.jl#L104-L124

function predict(θ, X = Xₙ[:,1], T = t)
    _prob = remake(prob_nn, u0 = X, tspan = (T[1], T[end]), p = θ)
    Array(solve(_prob, Vern7(), saveat = T,
                abstol=1e-6, reltol=1e-6,
                sensealg = ForwardDiffSensitivity()
                ))
end

# Multiple shooting like loss
function loss(θ)
    # Start with a regularization on the network
    l = convert(eltype(θ), 1e-3)*sum(abs2, θ[2:end]) ./ length(θ[2:end])
    for i in 1:size(XS,1)
        X̂ = predict(θ, [XS[i,1], YS[i,1]], TS[i, :])
        # Full prediction in x
        l += sum(abs2, XS[i,:] .- X̂[1,:])
        # Add the boundary condition in y
        l += abs(YS[i, 2] .- X̂[2, end])
    end
    return l
end

If I understand it correctly, this does not account for retcode == :Success

ChrisRackauckas commented 1 year ago

Indeed we should always add it to the examples, regardless if the defaults tend to diverge or not.

JuliaNLSolvers / Optim.jl

Optimization with (L)BFGS gives DimensionMismatch(“dimensions must match”) #1011