tvercaut / LSQR-cpp

This is a c++ port initially performed by Luis Ibanez of the LSQR library of Chris Paige and Michael Saunders. The same methodology was applied to the LSMR library of David Fong and Michael Saunders.
BSD 3-Clause "New" or "Revised" License
22 stars 4 forks source link

Implement bound constraints (e.g. with ADMM or Dog Leg) #3

Open tvercaut opened 8 years ago

tvercaut commented 8 years ago

Implementing bound constraints with ADMM should be relatively easy:
http://stanford.edu/~eryu/nnlsqr.html
https://web.stanford.edu/~boyd/papers/pdf/admm_slides.pdf (Slide 27)

Quick and dirty python implementation:

import numpy as np
import scipy.optimize

def lsmr_with_init(A,b,x0):
    r0 = b - scipy.sparse.linalg.aslinearoperator(A).matvec(x0)
    deltax_pack = scipy.sparse.linalg.lsmr(A,r0)
    return x0 + deltax_pack[0]

def admm_nnlsqr(A,b):
    # ADMM parameter
    # Optimal choice product of the maximum and minimum singular values
    # Heuristic choice: mean of singular values, i.e. squared Frobenius norm over dimension
    rho = np.dot(A.ravel(),A.ravel()) / A.shape[1]
    sqrt_half_rho = np.sqrt(rho/2)
    print 'sqrt_half_rho=',sqrt_half_rho

    # initialisation
    zero_init = False
    if zero_init:
        x_pack = (np.zeros(A.shape[1]),0)
        z = np.zeros(A.shape[1])
        u = np.zeros(A.shape[1])
    else:
        # from x=z=u=0 the first iteration is a normally a projection of
        # the unconstrained damped least squares. Let's forget the damping and check
        # whether we need to constrain things
        x_pack = scipy.sparse.linalg.lsmr(A,b)

        if np.all(x_pack[0]>0):
            # no constraint needed
            return x_pack[0]

        z = np.clip(x_pack[0],0,np.inf)
        u = x_pack[0]-z

    # construct the damped least squares matrix
    # todo try and use directly the damped version of lsmr
    Adamped = scipy.sparse.linalg.LinearOperator(
        A.shape+np.array([A.shape[1], 0]),
        matvec = lambda y: np.concatenate([ A.dot(y), sqrt_half_rho*y ]),
        rmatvec = lambda y: y[0:A.shape[0]].dot(A) + sqrt_half_rho*y[A.shape[0]:],
        )

    tol = 1e-5
    max_iter = 5000
    diff = np.inf
    iter = 0
    while ( diff>tol and iter<max_iter ):
        iter += 1
        xold = x_pack[0]
        #x_pack = scipy.sparse.linalg.lsmr( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]) )
        x_pack = (lsmr_with_init( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]), xold ), 0)
        zold = z
        z = np.clip(x_pack[0]+u,0,np.inf)
        #diff = np.linalg.norm(z-zold)
        #diff = np.linalg.norm(x_pack[0]-xold)
        diff = np.linalg.norm(x_pack[0]-z)
        u += x_pack[0]-z

    return z
tvercaut commented 8 years ago

An alternative could be to implement a Dog Leg type optimisation routines:
http://dx.doi.org/10.1002/nla.502 from Bellaria et al.
http://www.cs.uoi.gr/~lagaris/papers/PREPRINTS/dogbox.pdf from Voglis and Lagers

Quick and dirty python implementation

def dogbox_nnlsqr(A,b):
    # A Rectangular Trust Region Dogleg Approach for
    # Unconstrained and Bound Constrained Nonlinear Optimization 
    Aop = scipy.sparse.linalg.aslinearoperator(A)

    def isfeasible(xx, xlb, xub):
        return np.all([xx>=xlb, xx<=xub])

    x = np.zeros(A.shape[1])
    #passive_set = np.ones(A.shape[1], dtype=bool)
    delta = np.inf

    tol = 1e-5
    max_iter = 50
    iter = 0
    while (iter<max_iter ):
        iter += 1

        lb = np.fmax(-x,-delta)
        ub = delta*np.ones(A.shape[1])

        b_centered = b-Aop.matvec(x)
        mg = Aop.rmatvec(b_centered)

        print 'iter', iter
        #print 'lb', lb
        #print 'ub', ub
        #print 'mg', mg
        print 'x', x

        passive_set = ~( ( (x<=lb) & (mg<0) ) | ( (x>=ub) & (mg>0) ) )
        print 'passive_set',passive_set

        def xreduce(xx):
            return xx[passive_set]

        def xexpand(xred):
            xe = np.zeros(A.shape[1])
            xe[passive_set] = xred
            return xe

        Ared = scipy.sparse.linalg.LinearOperator(
            np.array([A.shape[0], np.count_nonzero(passive_set)]),
            matvec = lambda y: Aop.matvec(xexpand(y)),
            rmatvec = lambda y: xreduce(Aop.rmatvec(y)),
            )

        xrold = xreduce(x)
        br_centered = b-Ared.matvec(xrold)

        xr_pack = scipy.sparse.linalg.lsmr(Ared,br_centered)
        xr_newton = xr_pack[0];

        lbr = xreduce(lb)
        ubr = xreduce(ub)

        if isfeasible(xr_newton, lbr, ubr):
            sr = xr_newton
        else:
            mgr = Ared.rmatvec(b)
            gTgr = np.dot(mgr,mgr)
            mAgr = Ared.matvec(mgr)
            gTATAgr = np.dot(mAgr,mAgr)

            xr_cauchy = (gTgr/gTATAgr)*mgr

            if isfeasible(xr_cauchy, lbr, ubr):
                NnC = xr_newton-xr_cauchy;

                idx = (NnC>0)
                alphau = np.min( (ubr[idx] - xr_cauchy[idx]) / NnC[idx] )
                alphal = np.min( (lbr[~idx] - xr_cauchy[~idx]) / NnC[~idx] )
                print 'alpha', alphau, alphal

                sr =  xr_cauchy + np.fmin(alphau,alphal)*NnC
            else:
                idx = (xr_cauchy>0)
                betau = np.min( ubr[idx] / xr_cauchy[idx] )
                betal = np.min( lbr[~idx] / xr_cauchy[~idx] )
                print 'beta', betau, betal

                PC = np.fmin(betau,betal)*xr_cauchy
                NnPC = xr_newton-PC;

                idx = (NnPC>0)
                alphau = np.min( (ubr[idx] - PC[idx]) / NnPC[idx] )
                alphal = np.min( lbr[~idx] - PC[~idx] / NnPC[~idx] )
                print 'alpha', alphau, alphal

                sr =  PC + np.fmin(alphau,alphal)*NnPC

        if ( np.dot(sr,sr)<tol ):
            break

        x = x + xexpand(sr)

    return x

It would also be nice to compare it to the approach of BCLS:
Section 4.2 of https://www.cs.ubc.ca/~mpf/pdfs/2007FriedlanderHatz.pdf
https://www.cs.ubc.ca/~mpf/pubs/computing-nonnegative-tensor-factorizations/
http://www.cs.ubc.ca/~mpf/bcls/