When I used the code to run reference cell type signatures:
import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import os
import gc
this line forces theano to use the GPU and should go before importing cell2location
adata_file = f"{ref_run_name}/sc.h5ad"
adata_ref = sc.read_h5ad(adata_file)
mod = cell2location.models.RegressionModel.load(f"{ref_run_name}", adata_ref)
export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_ref.varm.keys():
inf_aver = adata_ref.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mufg{i}'
for i in adata_ref.uns['mod']['factor_names']]].copy()
else:
inf_aver = adata_ref.var[[f'means_per_cluster_mufg{i}'
for i in adata_ref.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_ref.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]
When I used the code to run reference cell type signatures: import sys import scanpy as sc import anndata import pandas as pd import numpy as np import os import gc
this line forces theano to use the GPU and should go before importing cell2location
os.environ["THEANO_FLAGS"] = 'device=cuda0,floatX=float32,force_device=True'
if using the CPU uncomment this:
os.environ["THEANO_FLAGS"] = 'device=cpu,floatX=float32,openmp=True,force_device=True'
import cell2location
import matplotlib as mpl from matplotlib import rcParams import matplotlib.pyplot as plt import seaborn as sns
silence scanpy that prints a lot of warnings
import warnings warnings.filterwarnings('ignore')
Step 1 Loading data
results_folder = './'
create paths and names to results folders for reference regression and cell2location models
ref_run_name = f'{results_folder}/reference_signatures' run_name = f'{results_folder}/cell2location_map' os.makedirs(ref_run_name, exist_ok=True) os.makedirs(run_name, exist_ok=True)
load st data
adata_st = sc.read_h5ad('st.h5ad') adata_st
find mitochondria-encoded (MT) genes
adata_st.var['MT_gene'] = [gene.startswith('MT-') for gene in adata_st.var.index]
remove MT genes for spatial mapping (keeping their counts in the object)
adata_st.obsm['MT'] = adata_st[:, adata_st.var['MT_gene'].values].X.toarray() adata_st = adata_st[:, ~adata_st.var['MT_gene'].values] adata_st
load sc-rna data
adata_ref = sc.read_h5ad('sce.h5ad') adata_ref
from cell2location.utils.filtering import filter_genes selected = filter_genes(adata_ref, cell_count_cutoff=5, cell_percentage_cutoff2=0.03, nonz_mean_cutoff=1.12)
filter the object
adata_ref = adata_ref[:, selected].copy()
Step 2 NB regression
prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=adata_ref, batch_key='orig.ident', # 10X reaction / sample / batch labels_key='celltype' # cell type, covariate used for constructing signatures
categorical_covariate_keys=['Method'] # multiplicative technical effects (platform, 3' vs 5', donor effect)
create the regression model
from cell2location.models import RegressionModel mod = RegressionModel(adata_ref)
view anndata_setup as a sanity check
mod.view_anndata_setup()
training model
mod.train(max_epochs=250, use_gpu=True,train_size=1) mod.plot_history(20) plt.savefig(f"{ref_run_name}/ref_train_history.png") plt.clf()
In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_ref = mod.export_posterior( adata_ref, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': True} )
Save model
mod.save(f"{ref_run_name}", overwrite=True)
Save anndata object with results
adata_file = f"{ref_run_name}/sc.h5ad" adata_ref.write(adata_file) adata_file
mod.plot_QC() plt.savefig(f"{ref_run_name}/ref_train_QC.png") plt.clf()
adata_file = f"{ref_run_name}/sc.h5ad" adata_ref = sc.read_h5ad(adata_file) mod = cell2location.models.RegressionModel.load(f"{ref_run_name}", adata_ref)
export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_ref.varm.keys(): inf_aver = adata_ref.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mufg{i}' for i in adata_ref.uns['mod']['factor_names']]].copy() else: inf_aver = adata_ref.var[[f'means_per_cluster_mufg{i}' for i in adata_ref.uns['mod']['factor_names']]].copy() inf_aver.columns = adata_ref.uns['mod']['factor_names'] inf_aver.iloc[0:5, 0:5]
However,my elbo plot was like this:
Does anyone know what causes this?