for each marker clip value to mean of top 20 values
def mediantop20(subAD):
outAD = subAD.copy()
for ix,x in enumerate(outAD.var_names):
aX = subAD[:,x].X.flatten()
top20 = np.sort(aX)[-20:]
outAD.X[:,ix] = np.clip(subAD[:,x].X.flatten(),0,np.median(top20))
return outAD
remove expression outliers from the data
def removeOutliers(ad):
# separate each sample
s = {}
for sID in ad.obs.ImageID.sort_values().unique():
print(sID)
s[sID] = ad[ad.obs['ImageID']==sID]
print(s[sID].X.max(axis=0))
s[sID] = mediantop20(s[sID])
print(s[sID].X.max(axis=0))
outAD = sc.concat(s.values())
return outAD
Hi there,
I have been encountering problems with the Numpy ArrayMemoryError when phenotyping the cells. This is the anndata that I feed into SciMap:
AnnData object with n_obs × n_vars = 10008 × 66 obs: 'region', 'slide', 'cell_id', 'ImageID', 'leiden' obsm: 'X_umap', 'spatial'
and this is the error I have been receiving:
numpy.core._exceptions._ArrayMemoryError: Unable to allocate 135. TiB for an array with shape (18588824900024,) and data type int64
This is the code that I have been running:
import numpy as np import anndata as ad import scanpy as sc import scimap as sm import pandas as pd import sys
path_adata = str(sys.argv[1]) path_workflow = str(sys.argv[2]) path_output = str(sys.argv[3])
for each marker clip value to mean of top 20 values
def mediantop20(subAD): outAD = subAD.copy() for ix,x in enumerate(outAD.var_names): aX = subAD[:,x].X.flatten() top20 = np.sort(aX)[-20:] outAD.X[:,ix] = np.clip(subAD[:,x].X.flatten(),0,np.median(top20)) return outAD
remove expression outliers from the data
def removeOutliers(ad):
rescale and run cell phenotyping
adata = ad.read_h5ad(path_adata) print(adata)
newAD = removeOutliers(adata) newAD = sm.pp.rescale(newAD,imageid='ImageID', method='by_image')
phenotype the cells
phenoDF = pd.read_csv(path_workflow)
phenoDF.drop(inplace=True, columns=malGenes)
sm.tl.phenotype_cells(newAD,phenoDF,label='phenotype') newAD.obs.groupby('ImageID').phenotype.value_counts() newAD.write_h5ad(path_output)