When I used the code to run reference cell type signatures: import sys import scanpy as sc import anndata import pandas as pd import numpy as np import os import gc

this line forces theano to use the GPU and should go before importing cell2location

os.environ["THEANO_FLAGS"] = 'device=cuda0,floatX=float32,force_device=True'

if using the CPU uncomment this:

os.environ["THEANO_FLAGS"] = 'device=cpu,floatX=float32,openmp=True,force_device=True'

import cell2location

import matplotlib as mpl from matplotlib import rcParams import matplotlib.pyplot as plt import seaborn as sns

silence scanpy that prints a lot of warnings

import warnings warnings.filterwarnings('ignore')

Step 1 Loading data

results_folder = './'

create paths and names to results folders for reference regression and cell2location models

ref_run_name = f'{results_folder}/reference_signatures' run_name = f'{results_folder}/cell2location_map' os.makedirs(ref_run_name, exist_ok=True) os.makedirs(run_name, exist_ok=True)

load st data

adata_st = sc.read_h5ad('st.h5ad') adata_st

find mitochondria-encoded (MT) genes

adata_st.var['MT_gene'] = [gene.startswith('MT-') for gene in adata_st.var.index]

remove MT genes for spatial mapping (keeping their counts in the object)

adata_st.obsm['MT'] = adata_st[:, adata_st.var['MT_gene'].values].X.toarray() adata_st = adata_st[:, ~adata_st.var['MT_gene'].values] adata_st

load sc-rna data

adata_ref = sc.read_h5ad('sce.h5ad') adata_ref

from cell2location.utils.filtering import filter_genes selected = filter_genes(adata_ref, cell_count_cutoff=5, cell_percentage_cutoff2=0.03, nonz_mean_cutoff=1.12)

filter the object

adata_ref = adata_ref[:, selected].copy()

Step 2 NB regression

prepare anndata for the regression model

cell2location.models.RegressionModel.setup_anndata(adata=adata_ref, batch_key='orig.ident', # 10X reaction / sample / batch labels_key='celltype' # cell type, covariate used for constructing signatures

categorical_covariate_keys=['Method'] # multiplicative technical effects (platform, 3' vs 5', donor effect)

create the regression model

from cell2location.models import RegressionModel mod = RegressionModel(adata_ref)

view anndata_setup as a sanity check

mod.view_anndata_setup()

training model

mod.train(max_epochs=250, use_gpu=True,train_size=1) mod.plot_history(20) plt.savefig(f"{ref_run_name}/ref_train_history.png") plt.clf()

In this section, we export the estimated cell abundance (summary of the posterior distribution).

adata_ref = mod.export_posterior( adata_ref, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': True} )

Save model

mod.save(f"{ref_run_name}", overwrite=True)

Save anndata object with results

adata_file = f"{ref_run_name}/sc.h5ad" adata_ref.write(adata_file) adata_file

mod.plot_QC() plt.savefig(f"{ref_run_name}/ref_train_QC.png") plt.clf()

adata_file = f"{ref_run_name}/sc.h5ad" adata_ref = sc.read_h5ad(adata_file) mod = cell2location.models.RegressionModel.load(f"{ref_run_name}", adata_ref)

export estimated expression in each cluster

if 'means_per_cluster_mu_fg' in adata_ref.varm.keys(): inf_aver = adata_ref.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mufg{i}' for i in adata_ref.uns['mod']['factor_names']]].copy() else: inf_aver = adata_ref.var[[f'means_per_cluster_mufg{i}' for i in adata_ref.uns['mod']['factor_names']]].copy() inf_aver.columns = adata_ref.uns['mod']['factor_names'] inf_aver.iloc[0:5, 0:5]

However,my elbo plot was like this: ref_train_history

Does anyone know what causes this?

BayraktarLab / cell2location

ELBO loss plot was empty #361