Closed veni-vidi-vici-dormivi closed 2 months ago
All modified and coverable lines are covered by tests :white_check_mark:
Project coverage is 52.56%. Comparing base (
9b0b76b
) to head (50f3aa2
). Report is 66 commits behind head on main.
:umbrella: View full report in Codecov by Sentry.
:loudspeaker: Have feedback on the report? Share it here.
This part was not yet transferred by Shruti, right? Right.
Is this vectorized over the grid points? Yes.
So I don't 100% what the original Code is doing either (or actually I do understand what it is doing I think but not entirely why). I'll give you an overview over how the Code was originally:
This is the fitting of the AR process:
coeff_0={}
coeff_1={}
data = np.array(train_residue_trans_new).transpose((1, 0, 2)) # transformed residuals from power transformer as input
for model in models:
print('start with AR(1) model for',model)
coeff_0[model]=np.zeros([12,idx_l])
coeff_1[model]=np.zeros([12,idx_l])
for i_grid in tqdm(np.arange(idx_l)):
for i_mon in range(12):
if i_mon==0:
ar1_month = curve_fit(lin_func, data[:-1,-1:,i_grid].reshape(-1), data[1:,i_mon,i_grid].reshape(-1),
bounds=([-1,-np.inf], [1, np.inf]))[0] ## bounded gamma_1 term [-1,1] because otherwise sqrt(1-gamma_1**2) is NAN for calculation of analytic covariance matrix
## lin_func is simply a*x+b
coeff_0[model][i_mon,i_grid]=ar1_month[1]
coeff_1[model][i_mon,i_grid]=ar1_month[0]
else:
ar1_month=curve_fit(lin_func, data[:,i_mon-1,i_grid].reshape(-1), data[:,i_mon,i_grid].reshape(-1),
bounds=([-1,-np.inf], [1, np.inf]))[0]
coeff_0[model][i_mon,i_grid]=ar1_month[1]
coeff_1[model][i_mon,i_grid]=ar1_month[0]
So we are fitting the coefficients of the linear function always with the previous month as predictor for the next month. So far so good.
Afterwards, this is how we go on to work on the covariance matrix:
# we make deterministic predictions with the AR process
AR_process=np.zeros([np.array(train_residue_trans[model]).transpose(1,0,2).reshape(-1,idx_l).shape[0]+120,
idx_l]).reshape(-1,12,idx_l)
for t in np.arange(1,AR_process.shape[0]):
for i_mon in range(12):
if i_mon==0:
AR_process[t,i_mon,:]=coeff_0[model][i_mon,:]+coeff_1[model][i_mon,:]*AR_process[t-1,11,:]
else:
AR_process[t,i_mon,:]=coeff_0[model][i_mon,:]+coeff_1[model][i_mon,:]*AR_process[t,i_mon-1,:]
AR_process= AR_process[10:,:,:]
# then we subtract the predictions from the AR process from the transformed residuals
train_residue_all_spat[model]=np.array(train_residue_trans[model]).transpose(1,0,2).reshape(-1,12,idx_l)-AR_process
# then we train the covariance on this
nr_folds = nr_ts*run_nrs[model]
print('number folds', nr_folds)
fold_out_list = np.arange(nr_folds)
idx_fo_tot={}
j=0
for i in fold_out_list:
idx_fo = np.zeros(nr_folds,dtype=bool)
idx_fo[j:j+1]=True
idx_fo_tot[i]=idx_fo
j+=1
# carry out cross-validation to determine the localisation radius L
# print('start with localisation radius for',model)
df_llh_cv_all[model]={}
df_llh_cv_all[model]=Parallel(n_jobs=12,verbose=10)(delayed(leave_one_out)(L_set,nr_folds,train_residue_all_spat[model][:,i_mon,:],idx_fo_tot,phi) for i_mon in range(12))
We remove the deterministic predictions from the AR process from the transformed residuals and fit the covariance on that.
For emulating, this is what happens:
# adjust empirical covariance matrix
for i_mon in tqdm(range(12)):
L_sel=df_llh_cv_all[model][i_mon]['L_sel']
ecov_res=np.cov(train_residue_trans_emu[:,i_mon,:],rowvar=False)
cov_res=phi[L_sel]*ecov_res
cov_innov[i_mon]=np.zeros_like(cov_res)
for i in np.arange(idx_l):
for j in np.arange(idx_l):
cov_innov[i_mon][i,j]=np.sqrt(1-coeff_1[model][i_mon,i]**2)*np.sqrt(1-coeff_1[model][i_mon,j]**2)*cov_res[i,j]
cov_innov[i_mon][np.isnan(cov_innov[i_mon])]=0
# draw from the multivariate distribution
print('innovations drawn')
emu_res[model]={}
innov_emu={}
for i_mon in tqdm(range(12)):
innov_emu[i_mon] = np.random.multivariate_normal(np.zeros(idx_l),cov_innov[i_mon],size=[nr_emus,nr_ts+buffer],check_valid='warn')
start = dt.datetime.now()
# combine deterministic AR process part with innovations
for k in np.arange(nr_emus):
emu_res[model][k]=np.zeros([nr_ts+buffer,12,idx_l])
for t in np.arange(1,emu_res[model][k].shape[0]):
for i_mon in range(12):
if i_mon==0:
emu_res[model][k][t,i_mon,:]=coeff_0[model][i_mon,:]+coeff_1[model][i_mon,:]*emu_res[model][k][t-1,11,:]+innov_emu[i_mon][k,t] # emu_res does not have a stochastic component
else:
emu_res[model][k][t,i_mon,:]=coeff_0[model][i_mon,:]+coeff_1[model][i_mon,:]*emu_res[model][k][t,i_mon-1,:]+innov_emu[i_mon][k,t]
emu_res[model][k]=emu_res[model][k][buffer:,:,:]
for i_mon in range(12):
emu_res[model][k][:,i_mon,:]=power_inv_transform(power_trans[model][i_mon],emu_res[model][k][:,i_mon,:],y_all[model], n_gridpoints=idx_l)
So here we just predict the AR process deterministically and add the innovations on top afterwards the result is then back transformed . That's also what it says in the paper:
Thanks - that's a handful to unpack - I passed it through black and did a mild refactoring.
coeff_0 = {}
coeff_1 = {}
# transformed residuals from power transformer as input
data = np.array(train_residue_trans_new).transpose((1, 0, 2))
for model in models:
print(f"start with AR(1) model for {model}")
coeff_0[model] = np.zeros([12, idx_l])
coeff_1[model] = np.zeros([12, idx_l])
for i_grid in tqdm(np.arange(idx_l)):
for i_mon in range(12):
# get this and last month's data
if i_mon == 0:
xdata = data[:-1, -1:, i_grid].reshape(-1),
ydata = data[1:, i_mon, i_grid].reshape(-1)
else:
xdata = data[:, i_mon - 1, i_grid].reshape(-1),
ydata = data[:, i_mon, i_grid].reshape(-1)
# bounded gamma_1 term [-1,1] because otherwise sqrt(1 - gamma_1**2)
# is nan for calculation of analytic covariance matrix
# lin_func is a*x+b
bounds = ([-1, -np.inf], [1, np.inf])
ar1_month = curve_fit(lin_func, xdata, ydata, bounds=bounds)[0]
coeff_0[model][i_mon, i_grid] = ar1_month[1]
coeff_1[model][i_mon, i_grid] = ar1_month[0]
train_residue_trans_arr = np.array(train_residue_trans[model]).transpose(1, 0, 2)
shape = (train_residue_trans_arr.reshape(-1, idx_l).shape[0] + 120, idx_l)
# we make deterministic predictions with the AR processtrain_residue_trans_arr
AR_process = np.zeros(shape).reshape(-1, 12, idx_l)
for t in range(1, AR_process.shape[0]):
for i_mon in range(12):
if i_mon == 0:
sel = t - 1, 11, slice(None)
else:
sel = t, i_mon - 1, slice(None)
AR_process[t, i_mon, :] = (
coeff_0[model][i_mon, :] + coeff_1[model][i_mon, :] * AR_process[sel]
)
AR_process = AR_process[10:, :, :]
# then we subtract the predictions from the AR process from the transformed residuals
train_resid_all[model] = train_residue_trans_arr.reshape(-1, 12, idx_l) - AR_process
# then we train the covariance on this
nr_folds = nr_ts * run_nrs[model]
print(f"number folds: {nr_folds}")
idx_fo_tot = {}
for i in range(nr_folds):
idx_fo = np.zeros(nr_folds, dtype=bool)
idx_fo[i : i + 1] = True
idx_fo_tot[i] = idx_fo
# carry out cross-validation to determine the localisation radius L
# print('start with localisation radius for',model)
df_llh_cv_all[model] = Parallel(n_jobs=12, verbose=10)(
delayed(leave_one_out)(
L_set, nr_folds, train_resid_all[model][:, i_mon, :], idx_fo_tot, phi
)
for i_mon in range(12)
)
# adjust empirical covariance matrix
for i_mon in tqdm(range(12)):
L_sel = df_llh_cv_all[model][i_mon]["L_sel"]
ecov_res = np.cov(train_residue_trans_emu[:, i_mon, :], rowvar=False)
cov_res = phi[L_sel] * ecov_res
cov_innov[i_mon] = _adjust_ecov_ar1_np(cov_res, coeff_1[model][i_mon])
# TODO: there should not be any nans?
cov_innov[i_mon][np.isnan(cov_innov[i_mon])] = 0
# draw from the multivariate distribution
print("innovations drawn")
emu_res[model] = {}
innov_emu = {}
for i_mon in tqdm(range(12)):
innov_emu[i_mon] = np.random.multivariate_normal(
np.zeros(idx_l),
cov_innov[i_mon],
size=[nr_emus, nr_ts + buffer],
check_valid="warn",
)
def _draw_ar(coeff_0, coeff_1, innov_emu, nr_ts, buffer, idx_l):
out = np.zeros([nr_ts + buffer, 12, idx_l])
for t in range(1, nr_ts + buffer):
for i_mon in range(12):
if i_mon == 0:
sel = t - 1, 11, slice(None)
else:
sel = t, i_mon - 1, slice(None)
# emu_res does not have a stochastic component
out[t, i_mon, :] = (
coeff_0[model][i_mon, :]
+ coeff_1[model][i_mon, :] * out[sel]
+ innov_emu[i_mon][k, t]
)
out = out[buffer:, :, :]
return out
# combine deterministic AR process part with innovations
for k in range(nr_emus):
arr_ar = _draw_ar(coeff_0[model], coeff_1[model], innov_emu, nr_ts, buffer, idx_l)
emu_res[model][k] = arr_ar
for i_mon in range(12):
emu_res[model][k][:, i_mon, :] = power_inv_transform(
power_trans[model][i_mon],
emu_res[model][k][:, i_mon, :],
y_all[model],
n_gridpoints=idx_l,
)
Thanks - that's a handful to unpack - I passed it through black and did a mild refactoring.
Part 1
coeff_0 = {} coeff_1 = {} # transformed residuals from power transformer as input data = np.array(train_residue_trans_new).transpose((1, 0, 2)) for model in models: print(f"start with AR(1) model for {model}") coeff_0[model] = np.zeros([12, idx_l]) coeff_1[model] = np.zeros([12, idx_l]) for i_grid in tqdm(range(idx_l)): for i_mon in range(12): # get this and last month's data if i_mon == 0: xdata = data[:-1, -1:, i_grid].reshape(-1), ydata = data[1:, i_mon, i_grid].reshape(-1) else: xdata = data[:, i_mon - 1, i_grid].reshape(-1), ydata = data[:, i_mon, i_grid].reshape(-1) # bounded gamma_1 term [-1,1] because otherwise sqrt(1 - gamma_1**2) # is nan for calculation of analytic covariance matrix # lin_func is a*x+b bounds = ([-1, -np.inf], [1, np.inf]) ar1_month = curve_fit(lin_func, xdata, ydata, bounds=bounds)[0] coeff_0[model][i_mon, i_grid] = ar1_month[1] coeff_1[model][i_mon, i_grid] = ar1_month[0]
Part 2
train_residue_trans_arr = np.array(train_residue_trans[model]).transpose(1, 0, 2) shape = (train_residue_trans_arr.reshape(-1, idx_l).shape[0] + 120, idx_l) # we make deterministic predictions with the AR processtrain_residue_trans_arr AR_process = np.zeros(shape).reshape(-1, 12, idx_l) for t in range(1, AR_process.shape[0]): for i_mon in range(12): if i_mon == 0: sel = t - 1, 11, slice(None) else: sel = t, i_mon - 1, slice(None) AR_process[t, i_mon, :] = ( coeff_0[model][i_mon, :] + coeff_1[model][i_mon, :] * AR_process[sel] ) AR_process = AR_process[10:, :, :] # then we subtract the predictions from the AR process from the transformed residuals train_resid_all[model] = train_residue_trans_arr.reshape(-1, 12, idx_l) - AR_process # then we train the covariance on this nr_folds = nr_ts * run_nrs[model] print(f"number folds: {nr_folds}") idx_fo_tot = {} for i in range(nr_folds): idx_fo = np.zeros(nr_folds, dtype=bool) idx_fo[i : i + 1] = True idx_fo_tot[i] = idx_fo # carry out cross-validation to determine the localisation radius L # print('start with localisation radius for',model) df_llh_cv_all[model] = Parallel(n_jobs=12, verbose=10)( delayed(leave_one_out)( L_set, nr_folds, train_resid_all[model][:, i_mon, :], idx_fo_tot, phi ) for i_mon in range(12) )
Part 3
# adjust empirical covariance matrix for i_mon in tqdm(range(12)): L_sel = df_llh_cv_all[model][i_mon]["L_sel"] ecov_res = np.cov(train_residue_trans_emu[:, i_mon, :], rowvar=False) cov_res = phi[L_sel] * ecov_res cov_innov[i_mon] = _adjust_ecov_ar1_np(cov_res, coeff_1[model][i_mon]) # TODO: there should not be any nans? cov_innov[i_mon][np.isnan(cov_innov[i_mon])] = 0 # draw from the multivariate distribution print("innovations drawn") emu_res[model] = {} innov_emu = {} for i_mon in tqdm(range(12)): innov_emu[i_mon] = np.random.multivariate_normal( np.zeros(idx_l), cov_innov[i_mon], size=[nr_emus, nr_ts + buffer], check_valid="warn", ) def _draw_ar(coeff_0, coeff_1, innov_emu, nr_ts, buffer, idx_l): out = np.zeros([nr_ts + buffer, 12, idx_l]) for t in range(1, nr_ts + buffer): for i_mon in range(12): if i_mon == 0: sel = t - 1, 11, slice(None) else: sel = t, i_mon - 1, slice(None) # emu_res does not have a stochastic component out[t, i_mon, :] = ( coeff_0[model][i_mon, :] + coeff_1[model][i_mon, :] * out[sel] + innov_emu[i_mon][k, t] ) out = out[buffer:, :, :] return out # combine deterministic AR process part with innovations for k in range(nr_emus): arr_ar = _draw_ar(coeff_0[model], coeff_1[model], innov_emu, nr_ts, buffer, idx_l) emu_res[model][k] = arr_ar for i_mon in range(12): emu_res[model][k][:, i_mon, :] = power_inv_transform( power_trans[model][i_mon], emu_res[model][k][:, i_mon, :], y_all[model], n_gridpoints=idx_l, )
Okay, I would opt for merging this in this state because this is still rather close to what Shruti did originally and work on #472 in another PR.
Okay, I would opt for merging this in this state because this is still rather close to what Shruti did originally and work on #472 in another PR.
Fair enough! I'll give it another look
I'll give it another look
Thanks!
Actually I think now it would really pay off to extract the drawing of the innovations:
That way we can reuse it for the monthly drawing and not duplicate it.
But if we do that, do we put that function in _auto_regression
or _localized_covariance
?
I can also do the extraction of the drawing of the innovations in another PR if you prefer that @mathause, I just wanted to quickly check here how it would work.
CHANGELOG.rst
Similar to MESMER, we also use an AR1 process to emulate local variability in MESMER-M. However, we cannot recycle the original AR functionalities since MESMER-M fits AR parameters for each month individually (➡️ how does June depend on May, how does July depend on June?). In this PR I add the functionalities to achieve this.