Implement bound constraints (e.g. with ADMM or Dog Leg)

Implementing bound constraints with ADMM should be relatively easy:
http://stanford.edu/~eryu/nnlsqr.html
https://web.stanford.edu/~boyd/papers/pdf/admm_slides.pdf (Slide 27)

Quick and dirty python implementation:

import numpy as np
import scipy.optimize

def lsmr_with_init(A,b,x0):
    r0 = b - scipy.sparse.linalg.aslinearoperator(A).matvec(x0)
    deltax_pack = scipy.sparse.linalg.lsmr(A,r0)
    return x0 + deltax_pack[0]

def admm_nnlsqr(A,b):
    # ADMM parameter
    # Optimal choice product of the maximum and minimum singular values
    # Heuristic choice: mean of singular values, i.e. squared Frobenius norm over dimension
    rho = np.dot(A.ravel(),A.ravel()) / A.shape[1]
    sqrt_half_rho = np.sqrt(rho/2)
    print 'sqrt_half_rho=',sqrt_half_rho

    # initialisation
    zero_init = False
    if zero_init:
        x_pack = (np.zeros(A.shape[1]),0)
        z = np.zeros(A.shape[1])
        u = np.zeros(A.shape[1])
    else:
        # from x=z=u=0 the first iteration is a normally a projection of
        # the unconstrained damped least squares. Let's forget the damping and check
        # whether we need to constrain things
        x_pack = scipy.sparse.linalg.lsmr(A,b)

        if np.all(x_pack[0]>0):
            # no constraint needed
            return x_pack[0]

        z = np.clip(x_pack[0],0,np.inf)
        u = x_pack[0]-z

    # construct the damped least squares matrix
    # todo try and use directly the damped version of lsmr
    Adamped = scipy.sparse.linalg.LinearOperator(
        A.shape+np.array([A.shape[1], 0]),
        matvec = lambda y: np.concatenate([ A.dot(y), sqrt_half_rho*y ]),
        rmatvec = lambda y: y[0:A.shape[0]].dot(A) + sqrt_half_rho*y[A.shape[0]:],
        )

    tol = 1e-5
    max_iter = 5000
    diff = np.inf
    iter = 0
    while ( diff>tol and iter<max_iter ):
        iter += 1
        xold = x_pack[0]
        #x_pack = scipy.sparse.linalg.lsmr( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]) )
        x_pack = (lsmr_with_init( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]), xold ), 0)
        zold = z
        z = np.clip(x_pack[0]+u,0,np.inf)
        #diff = np.linalg.norm(z-zold)
        #diff = np.linalg.norm(x_pack[0]-xold)
        diff = np.linalg.norm(x_pack[0]-z)
        u += x_pack[0]-z

    return z

An alternative could be to implement a Dog Leg type optimisation routines:
http://dx.doi.org/10.1002/nla.502 from Bellaria et al.
http://www.cs.uoi.gr/~lagaris/papers/PREPRINTS/dogbox.pdf from Voglis and Lagers

Quick and dirty python implementation

def dogbox_nnlsqr(A,b):
    # A Rectangular Trust Region Dogleg Approach for
    # Unconstrained and Bound Constrained Nonlinear Optimization 
    Aop = scipy.sparse.linalg.aslinearoperator(A)

    def isfeasible(xx, xlb, xub):
        return np.all([xx>=xlb, xx<=xub])

    x = np.zeros(A.shape[1])
    #passive_set = np.ones(A.shape[1], dtype=bool)
    delta = np.inf

    tol = 1e-5
    max_iter = 50
    iter = 0
    while (iter<max_iter ):
        iter += 1

        lb = np.fmax(-x,-delta)
        ub = delta*np.ones(A.shape[1])

        b_centered = b-Aop.matvec(x)
        mg = Aop.rmatvec(b_centered)

        print 'iter', iter
        #print 'lb', lb
        #print 'ub', ub
        #print 'mg', mg
        print 'x', x

        passive_set = ~( ( (x<=lb) & (mg<0) ) | ( (x>=ub) & (mg>0) ) )
        print 'passive_set',passive_set

        def xreduce(xx):
            return xx[passive_set]

        def xexpand(xred):
            xe = np.zeros(A.shape[1])
            xe[passive_set] = xred
            return xe

        Ared = scipy.sparse.linalg.LinearOperator(
            np.array([A.shape[0], np.count_nonzero(passive_set)]),
            matvec = lambda y: Aop.matvec(xexpand(y)),
            rmatvec = lambda y: xreduce(Aop.rmatvec(y)),
            )

        xrold = xreduce(x)
        br_centered = b-Ared.matvec(xrold)

        xr_pack = scipy.sparse.linalg.lsmr(Ared,br_centered)
        xr_newton = xr_pack[0];

        lbr = xreduce(lb)
        ubr = xreduce(ub)

        if isfeasible(xr_newton, lbr, ubr):
            sr = xr_newton
        else:
            mgr = Ared.rmatvec(b)
            gTgr = np.dot(mgr,mgr)
            mAgr = Ared.matvec(mgr)
            gTATAgr = np.dot(mAgr,mAgr)

            xr_cauchy = (gTgr/gTATAgr)*mgr

            if isfeasible(xr_cauchy, lbr, ubr):
                NnC = xr_newton-xr_cauchy;

                idx = (NnC>0)
                alphau = np.min( (ubr[idx] - xr_cauchy[idx]) / NnC[idx] )
                alphal = np.min( (lbr[~idx] - xr_cauchy[~idx]) / NnC[~idx] )
                print 'alpha', alphau, alphal

                sr =  xr_cauchy + np.fmin(alphau,alphal)*NnC
            else:
                idx = (xr_cauchy>0)
                betau = np.min( ubr[idx] / xr_cauchy[idx] )
                betal = np.min( lbr[~idx] / xr_cauchy[~idx] )
                print 'beta', betau, betal

                PC = np.fmin(betau,betal)*xr_cauchy
                NnPC = xr_newton-PC;

                idx = (NnPC>0)
                alphau = np.min( (ubr[idx] - PC[idx]) / NnPC[idx] )
                alphal = np.min( lbr[~idx] - PC[~idx] / NnPC[~idx] )
                print 'alpha', alphau, alphal

                sr =  PC + np.fmin(alphau,alphal)*NnPC

        if ( np.dot(sr,sr)<tol ):
            break

        x = x + xexpand(sr)

    return x

It would also be nice to compare it to the approach of BCLS:
Section 4.2 of https://www.cs.ubc.ca/~mpf/pdfs/2007FriedlanderHatz.pdf
https://www.cs.ubc.ca/~mpf/pubs/computing-nonnegative-tensor-factorizations/
http://www.cs.ubc.ca/~mpf/bcls/

tvercaut / LSQR-cpp

Implement bound constraints (e.g. with ADMM or Dog Leg) #3