Analysis for 2019 submission "Integrated single cell analysis of blood and cerebrospinal fluid leukocytes in multiple sclerosis" Schafflick1, Xu, Hartlehnert1 et. al
def CSEA(genelist, genenames, norm_X, condition, filt,p=1,interval=50,use_Zscore=False,n_permute=1000):
norm_X = norm_X[filt,:]
condition = condition[filt]
exprs = [np.asarray(norm_X[:,i]).ravel() for i,x in enumerate(genenames) if x in genelist]
gene = [x for i,x in enumerate(genenames) if x in genelist]
exprs = np.asarray(exprs)
exprs = pd.DataFrame(exprs.T, columns=gene)
if use_Zscore:
exprs = (exprs-exprs.mean(axis=0))/exprs.std(axis=0)
else:
exprs = exprs/exprs.mean(axis=0)
score = exprs.sum(axis=1)
score = (score-np.min(score))/(np.max(score)-np.min(score)) #normalization
ranked = pd.DataFrame(np.asarray([condition, score]).T,columns=['s','score'])
ranked = ranked.sample(frac=1)
ranked = ranked.sort_values(by='score',ascending=False)
s = np.asarray(ranked['s'])
score = np.asarray(ranked['score'])
msES = ES_fast(score, s, p=p,interval=interval)
permuted=[]
for i in range(n_permute): #randomizing the gene set signature or the labels
perm = np.random.permutation(len(s))
es = ES_fast(score, s[perm], p=p,interval=interval)
permuted.append(es)
null = np.asarray([np.max(x) for x in permuted])
pvalue = 1-np.mean(np.max(msES)>null)
return msES, permuted, ranked, pvalue