mathurinm / celer

Fast solver for L1-type problems: Lasso, sparse Logisitic regression, Group Lasso, weighted Lasso, Multitask Lasso, etc.
https://mathurinm.github.io/celer/
BSD 3-Clause "New" or "Revised" License
199 stars 33 forks source link

Convergence warning when using warm_start=True #201

Closed PABannier closed 3 years ago

PABannier commented 3 years ago

Hello @mathurinm !

After the patch you made to fix the warm_start issue (#199 ), MultiTaskLasso no longer raises an error.

However, after many trials, when using warm_start=True in MultiTaskLasso, the solver fails to converge and issues a warning: ConvergenceWarning: Objective did not converge: duality gap: 0.0055864603018013215, tolerance: 1.8021110008703545e-06. Increasingtolmay make the solver faster without affecting the results much. Fitting data with very small alpha causes precision issues.

I tried changing the tolerance to 1e-3 but the problem persists. The warning is issued when fitting with alpha_max * 0.8.

Note: when turning warm_start to False, the solver converges normally.

Find hereafter a snippet to reproduce the error:

import time
import numpy as np
from numpy.linalg import norm
from sklearn.model_selection import KFold
from celer import MultiTaskLasso

N_SAMPLES = 10
N_FEATURES = 15
N_TASKS = 5

def compute_alpha_max(X, Y):
    B = X.T @ Y
    b = norm(B, axis=1)
    return np.max(b) / X.shape[0]

def fit_reweighted_lasso(X, Y, alpha, n_iter, warm_start, tol):
    penalty = lambda u: 1 / (
        2 * np.sqrt(np.linalg.norm(u, axis=1)) + np.finfo(float).eps
    )

    regressor = MultiTaskLasso(
        alpha, fit_intercept=False, warm_start=warm_start, tol=tol
    )

    w = np.ones(N_FEATURES)

    for _ in range(n_iter):
        X_w = X / w[np.newaxis, :]

        regressor.fit(X_w, Y)

        coef = (regressor.coef_ / w).T
        w = penalty(coef)

    return coef

def cross_val(X, Y, alpha, warm_start, tol):
    kf = KFold()

    durations = []

    for i, (trn_index, val_index) in enumerate(kf.split(X, Y)):
        X_train, Y_train = X[trn_index, :], Y[trn_index, :]
        X_valid, Y_valid = X[val_index, :], Y[val_index, :]

        start = time.time()
        fit_reweighted_lasso(X_train, Y_train, alpha, 10, warm_start, tol)
        duration = time.time() - start
        durations.append(duration)

    return durations

if __name__ == "__main__":
    rng = np.random.default_rng(42)
    X = rng.random((N_SAMPLES, N_FEATURES))
    Y = rng.random((N_SAMPLES, N_TASKS))

    warm_start = True

    max_alpha = compute_alpha_max(X, Y)
    alphas = np.geomspace(max_alpha, max_alpha / 10, 30)

    for alpha in alphas:
        durations = cross_val(X, Y, alpha, warm_start, 1e-6)
        print(np.mean(durations))
mathurinm commented 3 years ago

Thanks for the reproducing example, it was easy to run.

Just to mention, you can make it even better for me by being more minimal, i.e. getting the value of alpha, and the CV fold for which it breaks, hence removing one function:

import numpy as np
from numpy.linalg import norm
from sklearn.model_selection import KFold
from celer import MultiTaskLasso

N_SAMPLES = 10
N_FEATURES = 15
N_TASKS = 5

def compute_alpha_max(X, Y):
    return np.max(norm(X.T @ Y, axis=1)) / X.shape[0]

def fit_reweighted_lasso(X, Y, alpha, n_iter, warm_start, tol):
    def penalty(u): return 1 / (
        2 * np.sqrt(np.linalg.norm(u, axis=1)) + np.finfo(float).eps
    )

    regressor = MultiTaskLasso(
        alpha, fit_intercept=False, warm_start=warm_start, tol=tol, verbose=1)

    w = np.ones(N_FEATURES)

    for k in range(n_iter):
        print(f"Reweighting number {k}")
        X_w = X / w[np.newaxis, :]

        regressor.fit(X_w, Y)

        coef = (regressor.coef_ / w).T
        w = penalty(coef)

    return coef

if __name__ == "__main__":
    rng = np.random.default_rng(42)
    X = rng.random((N_SAMPLES, N_FEATURES))
    Y = rng.random((N_SAMPLES, N_TASKS))

    max_alpha = compute_alpha_max(X, Y)
    alphas = np.geomspace(max_alpha, max_alpha / 10, 30)

    alpha = alphas[5]
    kf = KFold()
    trn_index, val_index = list(kf.split(X, Y))[3]
    X_train, Y_train = X[trn_index, :], Y[trn_index, :]
    fit_reweighted_lasso(X_train, Y_train, alpha, 3, True, 1e-6)
mathurinm commented 3 years ago

fixed in #202