chenlingantelope / MSscRNAseq2019

Analysis for 2019 submission "Integrated single cell analysis of blood and cerebrospinal fluid leukocytes in multiple sclerosis" Schafflick1, Xu, Hartlehnert1 et. al
MIT License
21 stars 8 forks source link

How to print the CSEA plots? #5

Open zhangdudu0 opened 1 year ago

zhangdudu0 commented 1 year ago
def CSEA(genelist, genenames, norm_X, condition, filt,p=1,interval=50,use_Zscore=False,n_permute=1000):
    norm_X = norm_X[filt,:]
    condition = condition[filt]
    exprs = [np.asarray(norm_X[:,i]).ravel() for i,x in enumerate(genenames) if x in genelist]
    gene = [x for i,x in enumerate(genenames) if x in genelist]
    exprs = np.asarray(exprs)
    exprs = pd.DataFrame(exprs.T, columns=gene)
    if use_Zscore:
        exprs = (exprs-exprs.mean(axis=0))/exprs.std(axis=0)
    else:
        exprs = exprs/exprs.mean(axis=0)
    score = exprs.sum(axis=1)
    score = (score-np.min(score))/(np.max(score)-np.min(score)) #normalization
    ranked = pd.DataFrame(np.asarray([condition, score]).T,columns=['s','score'])
    ranked = ranked.sample(frac=1)
    ranked = ranked.sort_values(by='score',ascending=False)
    s = np.asarray(ranked['s'])
    score = np.asarray(ranked['score'])
    msES = ES_fast(score, s, p=p,interval=interval)

    permuted=[]
    for i in range(n_permute): #randomizing the gene set signature or the labels
        perm = np.random.permutation(len(s))
        es = ES_fast(score, s[perm], p=p,interval=interval)
        permuted.append(es)
    null = np.asarray([np.max(x) for x in permuted])
    pvalue = 1-np.mean(np.max(msES)>null)
    return msES, permuted, ranked, pvalue