MadNLP / MadNLP.jl

A solver for nonlinear programming
MIT License
169 stars 14 forks source link

LapackGPUSolver goes into feasibility restoration when LapackCPUSolver does not #205

Open dlcole3 opened 2 years ago

dlcole3 commented 2 years ago

I get an error in the attached script because the solver is trying to perform feasibility restoration. If I change the tolerance to 1e-6, there is no error. If I run the equivalent problem on the CPU using LapackCPUSolver and MadNLP.InteriorPointSolver instead of LapackGPUSolver and MadNLPGPU.CuInteriorPointSolver, then the problem solves fine and does not try to perform a feasibility restoration.


using MatrixEquations
using MadNLPGPU
using NLPModels, QuadraticModels

function MadNLP.jac_dense!(nlp::DenseLQDynamicModel{T, V, M1, M2, M3}, x, jac) where {T, V, M1<: AbstractMatrix, M2 <: AbstractMatrix, M3 <: AbstractMatrix}
    NLPModels.increment!(nlp, :neval_jac)

    J = nlp.data.A
    copyto!(jac, J)
end

function MadNLP.hess_dense!(nlp::DenseLQDynamicModel{T, V, M1, M2, M3}, x, w1l, hess; obj_weight = 1.0) where {T, V, M1<: AbstractMatrix, M2 <: AbstractMatrix, M3 <: AbstractMatrix}
    NLPModels.increment!(nlp, :neval_hess)
    H = nlp.data.H
    copyto!(hess, H)
end
function build_3D_heating_AB(dx, nx, dt)

    A = zeros(nx^3, nx^3)
    B = zeros(nx^3, 6)

    k = 400. # thermal conductivity of copper, W/(m-K)
    k2 = 400
    rho = 8960. # density of copper, kg/m^3
    specificHeat = 386. # specific heat of copper, J/(kg-K)

    conduction_constant = k * dt / rho / specificHeat / dx^2
    input_constant = k2 * dt / rho / specificHeat / dx^2

    # Set A matrix
    for i in 1:nx^3
        A[i, i] = 1 - 6 * conduction_constant
        # Set links in x direction
        if i%nx != 0 && i%nx != 1
            A[i, i - 1] = conduction_constant
            A[i, i + 1] = conduction_constant
            #y has boundaries if i%100 < 10 or i %100 >90
            #z has boundaries if i%1000 < 100 and i%1000 > 900
            #A[i, i] += -2 * conduction_constant
        elseif i%nx == 0
            A[i, i - 1] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        else
            A[i, i + 1] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        end

        # Set links in the y direction
        if i%(nx^2) in 1:nx
            A[i, i + nx] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        elseif i%(nx^2) == 0 || i%(nx^2) > nx^2 - nx
            A[i, i - nx] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        else
            A[i, i + nx] = conduction_constant
            A[i, i - nx] = conduction_constant
            #A[i, i] += -2 * conduction_constant
        end

        # Set links in the z direction
        if i <= nx^2
            A[i, i + nx^2] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        elseif i > nx^3 - nx^2
            A[i, i - nx^2] = conduction_constant
            #A[i, i] += -1 * conduction_constant
        else
            A[i, i + nx^2] = conduction_constant
            A[i, i - nx^2] = conduction_constant
            #A[i, i] += -2 * conduction_constant
        end
    end

    #Set B matrix
    B[1:nx^2, 1] .= input_constant
    B[(nx^3 - nx^2):(nx^3), 2] .= input_constant
    for i in 1:nx^3
        if i%nx == 1
            B[i, 3] += input_constant
        end
        if i %nx == 0
            B[i, 4] += input_constant
        end
        if i%nx^2 in 1:nx
            B[i, 5] += input_constant
        end
        if i%nx^2 == 0 || i%nx^2 > nx^2 - nx
            B[i, 6] += input_constant
        end
    end
    return A, B
end

function set_d!(d, nx, N, Tmax, Tstart)
    fill!(d, Tstart)
    Tdiff = (Tmax - Tstart)/2
    Tmin = (Tmax - Tstart)/10
    for j in 1:(N + 1)
        for i in 1:nx^3
            x = i%nx
            y = div(i % nx^2, nx)
            z = div(i, nx^2)
            d[i, j] = Tstart + Tmin +  (1 - z/nx) * (2 * sin(3.14159 * x/nx) + 2 * sin(3.14159 * y/nx)) * Tdiff * (j / N / 3)
        end
    end
end

function build_3D_PDE(N, nx, dx, dt, Tmax, Tstart; dense::Bool = true, implicit = false)

    ns = nx^3
    nu = 6

    Q  = 10. * Matrix(LinearAlgebra.I, ns, ns)
    Qf = 10. * Matrix(LinearAlgebra.I, ns, ns)./dt
    R  = 1.0 * Matrix(LinearAlgebra.I, nu, nu)./10

    A, B = build_3D_heating_AB(dx, nx, dt)

    s0 = fill(Tstart, ns)
    sl = fill(200., ns)
    su = fill(550., ns)
    ul = fill(300., nu)
    uu = fill(500., nu)

    S = -.001 * Matrix(I, ns, nu)
    Q_scale = 1
    R_scale = 1

    if dense
        if implicit
            lqdm = DenseLQDynamicModel(s0, A, B, Q, R, N; Qf = Qf, sl = sl, su = su, ul = ul, uu = uu, S = S, implicit=implicit)
        else
            lqdm = DenseLQDynamicModel(s0, A, B, Q, R, N; Qf = Qf, sl = sl, su = su, ul = ul, uu = uu, S = S)
        end
    else
        lqdm = SparseLQDynamicModel(s0, sparse(A), sparse(B), sparse(Q), sparse(R), N; Qf = sparse(Qf), sl = sl, su = su, ul = ul, uu = uu, S = sparse(S))
    end

    d = zeros(nx^3, N + 1)

    set_d!(d, nx, N, Tmax, Tstart)

    block_Q = SparseArrays.sparse([],[],eltype(Q)[], ns * (N + 1), ns * (N + 1))

    for i in 1:N
        block_Q[(1 + (i - 1) * ns):(ns * i), (1 + (i - 1) * ns):(ns * i)] = Q
    end

    block_Q[(1 + ns * N):end, (1 + ns * N):end] = Qf

    Qd    = zeros(size(d, 1))
    Qdvec = zeros(length(d))
    dQd   = 0

    for i in 1:N
        LinearAlgebra.mul!(Qd, Q, d[:, i])
        Qdvec[(1 + ns * (i - 1)):ns * i] = Qd

        dQd += LinearAlgebra.dot(Qd, d[:, i])
    end

    LinearAlgebra.mul!(Qd, Qf, d[:, N + 1])
    Qdvec[(1 + ns * N):end] = Qd

    dQd += LinearAlgebra.dot(Qd, d[:, N + 1])

    # Add c and c0 that result from (x-d)^T Q (x-d) in the objective function
    if dense
        block_A = lqdm.blocks.A
        block_B = lqdm.blocks.B

        As0 = zeros(size(block_A, 1))
        LinearAlgebra.mul!(As0, block_A, s0)
        dQB = zeros(nu * N)
        dQB_sub_block = zeros(nu)

        for i in 1:N
            B_sub_block = block_B[(1 + ns * (i - 1)):ns * i, :]
            for j in N:-1:i
                Qd_sub_block = Qdvec[(1 + ns * j):(ns * (j + 1))]
                LinearAlgebra.mul!(dQB_sub_block, B_sub_block', Qd_sub_block)

                dQB[(1 + nu * (j - i)):nu * (j - i + 1)] .+= dQB_sub_block
            end
        end

        lqdm.data.c0 += dQd / 2
        lqdm.data.c0 += -LinearAlgebra.dot(Qdvec, As0)
        lqdm.data.c  += - dQB
    else
        uvec = zeros(nu * N)
        full_Qd = vcat(Qdvec, uvec)

        lqdm.data.c0 += dQd / 2
        lqdm.data.c  += - full_Qd
    end

    return lqdm
end

N = 250
nx = 9
lenx = .02
dt = .5
Tmax = 350.
Tstart = 300.

lqdm = build_3D_PDE(N, nx, lenx, dt, Tmax, Tstart; dense = true, implicit = false)

madnlp_options = Dict{Symbol, Any}(
    :kkt_system=>MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
    :linear_solver=>LapackGPUSolver,
    :jacobian_constant=>true,
    :hessian_constant=>true,
    :lapack_algorithm=>MadNLP.CHOLESKY,
    :nlp_scaling=>false,
    :max_iter=>100
)

ips1 = MadNLPGPU.CuInteriorPointSolver(lqdm, option_dict=madnlp_options)
sol_ref = MadNLP.optimize!(ips1)```
sshin23 commented 2 years ago

@dlcole3 could you also share the error message?

dlcole3 commented 2 years ago

Error message:

ERROR: LoadError: GPU compilation of kernel #broadcast_kernel#17(CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64) failed
KernelError: passing and using non-bitstype argument

Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, which is not isbits:
  .args is of type Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}} which is not isbits.
    .1 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}} which is not isbits.
      .args is of type Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}} which is not isbits.
        .1 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}} which is not isbits.
          .args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}} which is not isbits.
            .1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
              .x is of type Vector{Float64} which is not isbits.
            .2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}} which is not isbits.
              .args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}} which is not isbits.
                .1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
                  .x is of type Vector{Float64} which is not isbits.
                .2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
                  .x is of type Vector{Float64} which is not isbits.
        .2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}} which is not isbits.
          .args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}} which is not isbits.
            .1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
              .x is of type Vector{Float64} which is not isbits.
            .2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}} which is not isbits.
              .args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}} which is not isbits.
                .1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
                  .x is of type Vector{Float64} which is not isbits.
                .2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
                  .x is of type Vector{Float64} which is not isbits.
    .2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}} which is not isbits.
     .args is of type Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}
, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}} which is not isbits.
        .2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}} which is not isbits.
          .args is of type Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}} which is not isbits.
            .2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
              .x is of type Vector{Float64} which is not isbits.

Stacktrace:
  [1] check_invocation(job::GPUCompiler.CompilerJob)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/validation.jl:86
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:413 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:252 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:412 [inlined]
  [5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/utils.jl:64
  [6] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context)
    @ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:354
  [7] #224
    @ ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:347 [inlined]
  [8] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#17", Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}}}})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:74
  [9] cufunction_compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:346
 [10] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/cache.jl:90
 [11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:299
 [12] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}})
    @ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:293
 [13] macro expansion
    @ ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:102 [inlined]
 [14] #launch_heuristic#248
    @ ~/.julia/packages/CUDA/tTK8Y/src/gpuarrays.jl:17 [inlined]
 [15] _copyto!(dest::CUDA.CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Vector{Float64}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Vector{Float64}, Vector{Float64}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Vector{Float64}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Vector{Float64}, Vector{Float64}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{Base.RefValue{typeof(^)}, Vector{Float64}, Base.RefValue{Val{2}}}}}}}})
    @ GPUArrays ~/.julia/packages/GPUArrays/gok9K/src/host/broadcast.jl:73
 [16] materialize!
    @ ~/.julia/packages/GPUArrays/gok9K/src/host/broadcast.jl:51 [inlined]
 [17] materialize!
    @ ./broadcast.jl:868 [inlined]
 [18] set_aug_RR!(kkt::MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}, ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}}, RR::MadNLP.RobustRestorer{Float64})
    @ MadNLP ~/git/MadNLP.jl/src/IPM/kernels.jl:19
 [19] robust!(ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}})
    @ MadNLP ~/git/MadNLP.jl/src/IPM/solver.jl:409
[20] optimize!(ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}})
    @ MadNLP ~/git/MadNLP.jl/src/IPM/solver.jl:99
 [21] top-level scope
    @ ~/Moonshot/Moonshot_files/updated_timing_files/GPU_error_script.jl:245
 [22] include(fname::String)
    @ Base.MainInclude ./client.jl:451
 [23] top-level scope
    @ REPL[1]:1
 [24] top-level scope
    @ ~/.julia/packages/CUDA/tTK8Y/src/initialization.jl:52
in expression starting at 
sshin23 commented 2 years ago

Thanks for reporting @dlcole3. We'll look into it