Closed elephaint closed 3 weeks ago
Check out this pull request on
See visual diffs & provide feedback on Jupyter Notebooks.
Powered by ReviewNB
Test for lasso equivalence of methods (removed it from the code to keep it clean):
#| hide
# Lasso test old vs new
@njit
def _lasso_old(X: np.ndarray, y: np.ndarray,
lambda_reg: float, max_iters: int = 1_000,
tol: float = 1e-4):
# lasso cyclic coordinate descent
n, feats = X.shape
norms = (X ** 2).sum(axis=0)
beta = np.zeros(feats, dtype=np.float32)
beta_changes = np.zeros(feats, dtype=np.float32)
residuals = y.copy()
for it in range(max_iters):
for i, betai in enumerate(beta):
# is feature is close to zero, we
# continue to the next.
# in this case is optimal betai= 0
# print(beta)
if abs(norms[i]) < 1e-8:
continue
xi = X[:, i]
#we calculate the normalized derivative
rho = betai + xi.flatten().dot(residuals) / norms[i] #(norms[i] + 1e-3)
#soft threshold
beta[i] = np.sign(rho) * max(np.abs(rho) - lambda_reg * n / norms[i], 0.)#(norms[i] + 1e-3), 0.)
beta_changes[i] = np.abs(betai - beta[i])
if beta[i] != betai:
residuals += (betai - beta[i]) * xi
if max(beta_changes) < tol:
break
#print(it)
return beta
S = np.array([[1., 1., 1., 1.],
[1., 1., 0., 0.],
[0., 0., 1., 1.],
[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]])
h = 2
_y = np.array([10., 5., 4., 2., 1.])
y_bottom = np.vstack([i * _y for i in range(1, 5)])
y_hat_bottom_insample = np.roll(y_bottom, 1)
y_hat_bottom = np.vstack([i * np.ones(h) for i in range(1, 5)])
idx_bottom = [3, 4, 5, 6]
y_hat=S @ y_hat_bottom
y_insample=S @ y_bottom
idx_bottom=idx_bottom
n_hiers, n_bottom = S.shape
for method in ["reg", "reg_bu"]:
for lambda_reg in [None, 0.1, 10, 1000]:
for with_nans in [True, False]:
y_hat_bottom_insample = np.roll(y_bottom, 1)
if with_nans:
y_hat_bottom_insample[:, 0] = np.nan
y_hat_insample=S @ y_hat_bottom_insample
y_insample=S @ y_bottom
nan_idx = np.isnan(y_hat_insample).any(axis=0)
y_insample = y_insample[:, ~nan_idx]
y_hat_insample = y_hat_insample[:, ~nan_idx]
h = min(y_hat.shape[1], y_hat_insample.shape[1])
y_hat_insample = y_hat_insample[:, -h:] # shape (h, n_hiers)
y_insample = y_insample[:, -h:]
if method == 'reg':
X = np.kron(S, y_hat_insample.T)
z = y_hat_insample.reshape(-1)
if lambda_reg is None:
lambda_reg = np.max(np.abs(X.T.dot(z)))
else:
lambda_reg = lambda_reg
beta_old = _lasso_old(X, z, lambda_reg)
beta_new = _lasso(X, z, lambda_reg, max_iters=1000, tol=1e-4)
np.testing.assert_allclose(beta_new, beta_old, atol=1e-5, rtol=1e-3)
if method == 'reg_bu':
X = np.kron(S, y_hat_insample.T)
Pbu = np.zeros_like(S)
Pbu[idx_bottom] = S[idx_bottom]
z = y_hat_insample.reshape(-1) - X @ Pbu.reshape(-1)
if lambda_reg is None:
lambda_reg = np.max(np.abs(X.T.dot(z)))
else:
lambda_reg = lambda_reg
beta_old = _lasso_old(X, z, lambda_reg)
beta_new = _lasso(X, z, lambda_reg, max_iters=1000, tol=1e-4)
np.testing.assert_allclose(beta_new, beta_old, atol=1e-5, rtol=1e-3)
double
dtype everywhere; reconcilers are sensitive to low(er) precision, for now unify todouble
and maybe in the future turn everything tosingle
Lasso (ERM-reg / ERM-reg_bu) micro optimization before and after (the method is still slow as **** ):