mattiasoldani / succolib

A set of handy, Python-based tools for the INSULAb detectors data analysis
https://github.com/mattiasoldani/succolib
MIT License
0 stars 0 forks source link

creating 2d histogram by smearing each event with errors on x & y #4

Closed mattiasoldani closed 3 years ago

mattiasoldani commented 4 years ago

already done in the KLEVER18 analysis -- function evSmear(...) --> check

mattiasoldani commented 3 years ago
# function: single event data gaussian smearing for biplots
def evSmear(dfIn, lsVar, lsSigma, nIter):
    dfIn = dfIn[lsVar+lsSigma].dropna()
    dictOut = {lsVar[0]: np.ndarray(shape=nIter*dfIn.count()[0]),
               lsVar[1]: np.ndarray(shape=nIter*dfIn.count()[0]),
               "old_"+lsVar[0]: np.ndarray(shape=nIter*dfIn.count()[0]),
               "old_"+lsVar[1]: np.ndarray(shape=nIter*dfIn.count()[0]),
               "old_index": np.ndarray(shape=nIter*dfIn.count()[0])}
    i = 0
    for ind, a, b, ea, eb in zip(df.index, dfIn[lsVar[0]].to_numpy(), dfIn[lsVar[1]].to_numpy(), dfIn[lsSigma[0]].to_numpy(), dfIn[lsSigma[1]].to_numpy()):
        means = [a, b]  
        stds = [ea, eb]
        corr = 0
        if nIter != 1:
            covs = [[stds[0]**2          , stds[0]*stds[1]*corr], 
                    [stds[0]*stds[1]*corr,           stds[1]**2]] 
        else:
            covs = [[0, 0], [0, 0]]
        smeared = np.random.multivariate_normal(means, covs, nIter).T
        dictOut[lsVar[0]][i*nIter:(i+1)*nIter] = smeared[0]
        dictOut[lsVar[1]][i*nIter:(i+1)*nIter] = smeared[1]
        dictOut["old_"+lsVar[0]][i*nIter:(i+1)*nIter] = np.array([a for i in range(nIter)])
        dictOut["old_"+lsVar[1]][i*nIter:(i+1)*nIter] = np.array([b for i in range(nIter)])
        dictOut["old_index"][i*nIter:(i+1)*nIter] = np.array([ind for i in range(nIter)])
        i += 1
    return dictOut
mattiasoldani commented 3 years ago

Of course, this should be extended to the 1d and Nd (N>2) cases.

mattiasoldani commented 3 years ago

Up to now I've written this:

def evSmear(dfIn, lsVar, lsSigma, nIter, bKeepOld):

    # check whether the input arguments make sense
    if len(lsVar) != len(lsSigma):
        print ("list of variables and list of corresponding errors have different length --> operation not performed")
        return {}

    for [s for s in lsVar+lsSigma]:
        if not (s in df.columns):
            print("variable %s not in input dataframe --> operation not performed")
            return {}

    # setting up input dataframe properly
    # events in which at least 1 of the variables to be studied are NaN are excluded
    dfIn = dfIn[lsVar+lsSigma].dropna()

    ind = list(dfIn.index)
    var = {}
    for iVar in lsVar+lsSigma:  # each dfIn variable into a list; lsVar-then-lsSigma; both elements of lsVar & lsSigma are included here
        var.update({iVar: list(df[iVar])})

    # preparing the output dictionary (original index & values included only if bKeepOld is True)
    dictOut = {"old_index": np.ndarray(shape=nIter*dfIn.count()[0])} if bKeepOld else {}
    for iVar in lsVar:
        dictOut.update({iVar: np.ndarray(shape=nIter*dfIn.count()[0])})
        if bKeepOld:
            dictOut.update({"old_"+iVar: np.ndarray(shape=nIter*dfIn.count()[0])})

    for i in range(len(ind)):  # loop over original events

        # retrieving the event central value & sigma for each variable
        means, stds = [], []     
        for iVar in lsVar+lsSigma:
            if iVar in lsVar:
                means.append(var[iVar][i])
            elif iVar in lsSigma:
                stds.append(var[iVar][i])

        # gaussian doping
        cov = np.zeroes((len(lsVar), len(lsVar)), float)   
        if nIter != 1:  # note: if nIter == 1, null covariance matrix is used --> output data equal the input ones
            np.fill_diagonal(cov, np.array(stds)**2)
        outStat = np.random.multivariate_normal(means, covs, nIter).T

        # filling the output dictionary
        for k, iVar in enumerate(lsVar):
            dictOut[iVar][i*nIter:(i+1)*nIter] = smeared[k]
            if bKeepOld:
                dictOut["old_"+iVar][i*nIter:(i+1)*nIter] = np.array([means[iVar] for j in range(nIter)])
                dictOut["old_index"][i*nIter:(i+1)*nIter] = np.array([ind[i] for j in range(nIter)])

    return dictOut

I still have to test it.

mattiasoldani commented 3 years ago

Implemented! Now I just have to produce some documentation and example plots...