Closed mattiasoldani closed 3 years ago
# function: single event data gaussian smearing for biplots
def evSmear(dfIn, lsVar, lsSigma, nIter):
dfIn = dfIn[lsVar+lsSigma].dropna()
dictOut = {lsVar[0]: np.ndarray(shape=nIter*dfIn.count()[0]),
lsVar[1]: np.ndarray(shape=nIter*dfIn.count()[0]),
"old_"+lsVar[0]: np.ndarray(shape=nIter*dfIn.count()[0]),
"old_"+lsVar[1]: np.ndarray(shape=nIter*dfIn.count()[0]),
"old_index": np.ndarray(shape=nIter*dfIn.count()[0])}
i = 0
for ind, a, b, ea, eb in zip(df.index, dfIn[lsVar[0]].to_numpy(), dfIn[lsVar[1]].to_numpy(), dfIn[lsSigma[0]].to_numpy(), dfIn[lsSigma[1]].to_numpy()):
means = [a, b]
stds = [ea, eb]
corr = 0
if nIter != 1:
covs = [[stds[0]**2 , stds[0]*stds[1]*corr],
[stds[0]*stds[1]*corr, stds[1]**2]]
else:
covs = [[0, 0], [0, 0]]
smeared = np.random.multivariate_normal(means, covs, nIter).T
dictOut[lsVar[0]][i*nIter:(i+1)*nIter] = smeared[0]
dictOut[lsVar[1]][i*nIter:(i+1)*nIter] = smeared[1]
dictOut["old_"+lsVar[0]][i*nIter:(i+1)*nIter] = np.array([a for i in range(nIter)])
dictOut["old_"+lsVar[1]][i*nIter:(i+1)*nIter] = np.array([b for i in range(nIter)])
dictOut["old_index"][i*nIter:(i+1)*nIter] = np.array([ind for i in range(nIter)])
i += 1
return dictOut
Of course, this should be extended to the 1d and Nd (N>2) cases.
Up to now I've written this:
def evSmear(dfIn, lsVar, lsSigma, nIter, bKeepOld):
# check whether the input arguments make sense
if len(lsVar) != len(lsSigma):
print ("list of variables and list of corresponding errors have different length --> operation not performed")
return {}
for [s for s in lsVar+lsSigma]:
if not (s in df.columns):
print("variable %s not in input dataframe --> operation not performed")
return {}
# setting up input dataframe properly
# events in which at least 1 of the variables to be studied are NaN are excluded
dfIn = dfIn[lsVar+lsSigma].dropna()
ind = list(dfIn.index)
var = {}
for iVar in lsVar+lsSigma: # each dfIn variable into a list; lsVar-then-lsSigma; both elements of lsVar & lsSigma are included here
var.update({iVar: list(df[iVar])})
# preparing the output dictionary (original index & values included only if bKeepOld is True)
dictOut = {"old_index": np.ndarray(shape=nIter*dfIn.count()[0])} if bKeepOld else {}
for iVar in lsVar:
dictOut.update({iVar: np.ndarray(shape=nIter*dfIn.count()[0])})
if bKeepOld:
dictOut.update({"old_"+iVar: np.ndarray(shape=nIter*dfIn.count()[0])})
for i in range(len(ind)): # loop over original events
# retrieving the event central value & sigma for each variable
means, stds = [], []
for iVar in lsVar+lsSigma:
if iVar in lsVar:
means.append(var[iVar][i])
elif iVar in lsSigma:
stds.append(var[iVar][i])
# gaussian doping
cov = np.zeroes((len(lsVar), len(lsVar)), float)
if nIter != 1: # note: if nIter == 1, null covariance matrix is used --> output data equal the input ones
np.fill_diagonal(cov, np.array(stds)**2)
outStat = np.random.multivariate_normal(means, covs, nIter).T
# filling the output dictionary
for k, iVar in enumerate(lsVar):
dictOut[iVar][i*nIter:(i+1)*nIter] = smeared[k]
if bKeepOld:
dictOut["old_"+iVar][i*nIter:(i+1)*nIter] = np.array([means[iVar] for j in range(nIter)])
dictOut["old_index"][i*nIter:(i+1)*nIter] = np.array([ind[i] for j in range(nIter)])
return dictOut
I still have to test it.
Implemented! Now I just have to produce some documentation and example plots...
already done in the KLEVER18 analysis -- function evSmear(...) --> check