odow / SDDP.jl

A JuMP extension for Stochastic Dual Dynamic Programming
https://sddp.dev
Other
293 stars 62 forks source link

UnicyclicGraph with Integer variables #751

Closed Thuener closed 2 months ago

Thuener commented 2 months ago

Hi Oscar, I'm trying to get back to SDDP problems. This library is getting better each year. Congrats!!

I'm trying to experiment with UnicyclicGraph, but I'm getting some strange convergence issues because of one Integer variable. Here is an example:

using HiGHS
using SDDP

order_size = 100_000
price = 40
cost = 3
T = 12
Ω = [1]
θ = [1.0]

model = SDDP.PolicyGraph(
    SDDP.UnicyclicGraph(0.95; num_nodes = T), # SDDP.LinearGraph(T*8),
sense = :Max,
upper_bound = 10^6,
optimizer = HiGHS.Optimizer,
) do sp, t
    @variable(sp, 0 <= inv, SDDP.State, initial_value = 0) # Inventory
    @variable(sp, 0 <= prod, SDDP.State, initial_value = 0) # Production

    @variable(sp, num_orders >= 0, Int) # Number of orders
    @variable(sp, sales >= 0)           # Sales
    @variable(sp, demand)               # Demand

    @constraint(sp, sales_c,   sales <= inv.in)
    @constraint(sp, demand_c,  sales <= demand)
    @constraint(sp, stock_c,   inv.out == inv.in + prod.out - sales)
    @constraint(sp, orders_c,  prod.out == num_orders*order_size)

    @stageobjective(sp, price*sales -cost*prod.out )

    #Random variables
    SDDP.parameterize(sp, Ω, θ) do ω
        return JuMP.fix(demand, 500_000)
    end
end

The model can get out of the local optimal:

-------------------------------------------------------------------
         SDDP.jl (c) Oscar Dowson and contributors, 2017-23
-------------------------------------------------------------------
problem
  nodes           : 12
  state variables : 2
  scenarios       : Inf
  existing cuts   : false
options
  solver          : serial mode
  risk measure    : SDDP.Expectation()
  sampling scheme : SDDP.InSampleMonteCarlo
subproblem structure
  VariableRef                             : [8, 8]
  AffExpr in MOI.EqualTo{Float64}         : [2, 2]
  AffExpr in MOI.LessThan{Float64}        : [2, 2]
  VariableRef in MOI.GreaterThan{Float64} : [4, 4]
  VariableRef in MOI.Integer              : [1, 1]
  VariableRef in MOI.LessThan{Float64}    : [1, 1]
numerical stability report
  matrix range     [1e+00, 1e+05]
  objective range  [1e+00, 4e+01]
  bounds range     [1e+06, 1e+06]
  rhs range        [0e+00, 0e+00]
-------------------------------------------------------------------
 iteration    simulation      bound        time (s)     solves  pid
-------------------------------------------------------------------
         1   0.000000e+00  1.000000e+06  4.317000e+00       409   1
         6   0.000000e+00  1.000000e+06  5.468000e+00      2982   1
         9   0.000000e+00  1.000000e+06  7.792000e+00      5121   1
        11   0.000000e+00  1.000000e+06  8.794000e+00      5747   1
        12   0.000000e+00  1.000000e+06  1.519200e+01      8532   1
        15   0.000000e+00  1.000000e+06  2.092800e+01     10071   1
        17   0.000000e+00  1.000000e+06  3.035000e+01     12041   1
        18   0.000000e+00  1.000000e+06  3.618100e+01     13170   1
        20   0.000000e+00  1.000000e+06  4.562200e+01     14780   1
        21   0.000000e+00  1.000000e+06  5.157900e+01     15669   1
        22   0.000000e+00  1.000000e+06  7.478100e+01     18358   1
        23   0.000000e+00  1.000000e+06  8.758400e+01     19607   1
        25   0.000000e+00  1.000000e+06  9.449500e+01     20305   1
...

If I change the policy graph to SDDP.LinearGraph(T*8) then we get:

-------------------------------------------------------------------
         SDDP.jl (c) Oscar Dowson and contributors, 2017-23
-------------------------------------------------------------------
problem
  nodes           : 96
  state variables : 2
  scenarios       : 1.00000e+00
  existing cuts   : false
options
  solver          : serial mode
  risk measure    : SDDP.Expectation()
  sampling scheme : SDDP.InSampleMonteCarlo
subproblem structure
  VariableRef                             : [8, 8]
  AffExpr in MOI.EqualTo{Float64}         : [2, 2]
  AffExpr in MOI.LessThan{Float64}        : [2, 2]
  VariableRef in MOI.GreaterThan{Float64} : [4, 5]
  VariableRef in MOI.Integer              : [1, 1]
  VariableRef in MOI.LessThan{Float64}    : [1, 1]
numerical stability report
  matrix range     [1e+00, 1e+05]
  objective range  [1e+00, 4e+01]
  bounds range     [1e+06, 1e+06]
  rhs range        [0e+00, 0e+00]
-------------------------------------------------------------------
 iteration    simulation      bound        time (s)     solves  pid
-------------------------------------------------------------------
         1   0.000000e+00  1.000000e+06  1.169999e-01       192   1
        26   3.700000e+06  1.000000e+06  1.160000e+00      4992   1
        44   3.700000e+06  1.000000e+06  2.176000e+00      8448   1
        59   3.700000e+06  1.000000e+06  3.205000e+00     11328   1
        72   3.700000e+06  1.000000e+06  4.318000e+00     13824   1
        83   3.700000e+06  1.000000e+06  5.429000e+00     15936   1
        93   3.700000e+06  1.000000e+06  6.464000e+00     17856   1
       102   3.700000e+06  1.000000e+06  7.501000e+00     19584   1
       110   3.700000e+06  1.000000e+06  8.618000e+00     21120   1
       118   3.700000e+06  1.000000e+06  9.648000e+00     22656   1
       148   3.700000e+06  1.000000e+06  1.473400e+01     28416   1
...

Any idea on what could be the issue?

odow commented 2 months ago

upper_bound = 10^6,

Your upper bound is too small.

Try rescaling order_size down a bit.

odow commented 2 months ago

Here's what I have:

using SDDP
import HiGHS
T = 12
c_order_size, c_price, c_cost = 10, 40, 3
Ω, P = [45], [1.0]
model = SDDP.PolicyGraph(
    SDDP.UnicyclicGraph(0.95; num_nodes = T);
    sense = :Max,
    upper_bound = 10^7,
    optimizer = HiGHS.Optimizer,
) do sp, t
    @variables(sp, begin
        x_inv >= 0, SDDP.State, (initial_value = 0)
        u_num_orders >= 0, Int
        u_prod >= 0
        u_sales >= 0
    end)
    @constraints(sp, begin
        u_sales <= x_inv.in
        u_prod == c_order_size * u_num_orders
        x_inv.out == x_inv.in + u_prod - u_sales
    end)
    @stageobjective(sp, c_price * u_sales - c_cost * u_prod)
    SDDP.parameterize(sp, Ω, P) do ω
        return set_upper_bound(u_sales, ω)
    end
end
SDDP.train(
    model; 
    iteration_limit = 50,
    duality_handler = SDDP.StrengthenedConicDuality(),
    log_every_iteration = true,
)
Thuener commented 2 months ago

Thanks, Oscar. I owe you another one.

Why use StrengthenedConicDuality?

Do you have any account for donations to the SDDP.jl project?

odow commented 2 months ago

Why use StrengthenedConicDuality?

In my quick tests, it seemed to be a little tighter than the default. (Also known as strengthened benders.)

Do you have any account for donations to the SDDP.jl project?

For you, most definitely not.

Thuener commented 2 months ago

Let me close this one. Thanks for the help! The conclusion is that infinity policies will require a bigger upper bound.

odow commented 2 months ago

The conclusion is that infinity policies will require a bigger upper bound.

Since we compute the discounted objective, the bound is approximately 1 / (1 - p) larger than the finite case of a single cycle. Since you have p =0.95, the bound is 20 times larger than you might "expect".

For your model, a naive bound is to assume you make perfect production decisions, and that you always sell maximum demand:

julia> (500_000 * 40 - 3 * 500_000) * 12 * 1 / (1 - 0.95)
4.439999999999996e9

4e9 is quite large :smile: