Closed debbie28 closed 2 years ago
Hi @debbie28!
This is a great question! I will divide it in two parts:
'I have a cisTopic analysis in R, can I still use it for SCENIC+?'
Yes, you can! There is no need to repeat the topic modelling in Python :).
For this, you will need to do the following in R first:
# Export data to python
library(arrow)
path <- OUTDIR
cisTopic_obj <- readRDS(PATH_TO_YOUR_CISTOPICOBJECT)
modelMat <- modelMatSelection(cisTopic_obj, 'cell', 'Probability')
modelMat <- as.data.frame(modelMat)
write_feather(modelMat, sink=paste0(path, 'model_to_pycisTopic/cell_topic.feather'))
modelMat <- modelMatSelection(cisTopic_obj, 'region', 'Probability', all.regions=TRUE)
modelMat <- as.data.frame(modelMat)
write_feather(modelMat, sink=paste0(path, 'model_to_pycisTopic/topic_region.feather'))
# You can also save the count.matrix
ct <- cisTopicObject@count.matrix
ct <- as.data.frame(ct)
write_feather(ct, sink=paste0(path, 'model_to_pycisTopic/count_matrix.feather'))
And then in Python:
import pycisTopic
## 1. Initialize cisTopic object
from pycisTopic.cistopic_class import *
# Load count matrix
matrix_path=PATH_TO_FRAGMENTS_MATRIX
fragment_matrix = pd.read_feather(matrix_path)
cisTopic_obj = create_cistopic_object(fragment_matrix)
# Also add the cell annotation, cell_data should be a pandas df with cells as rows (cell names as index) and variables as columns
cisTopic_obj.add_cell_data(cell_data)
# 2. Add model
from pycisTopic.utils import *
model = load_cisTopic_model(PATH_TO_THE_FOLDER_WHERE_YOU_SAVED_DATA_FROM_R)
cistopic_obj.add_LDA_model(model)
# 3. Get imputed accessibility matrix
from pycisTopic.diff_features import *
imputed_acc_obj = impute_accessibility(cistopic_obj, selected_cells=None, selected_regions=None, scale_factor=10**6)
# 4. DARs
normalized_imputed_acc_obj = normalize_scores(imputed_acc_obj, scale_factor=10**4)
variable_regions = find_highly_variable_features(normalized_imputed_acc_obj,
min_disp = 0.05,
min_mean = 0.0125,
max_mean = 3,
max_disp = np.inf,
n_bins=20,
n_top_features=None,
plot=True)
markers_dict= find_diff_features(cistopic_obj,
imputed_acc_obj,
variable='cell_type', # Here put the column of cell_data that you want to have DARs for
var_features=variable_regions,
contrasts=None,
adjpval_thr=0.05,
log2fc_thr=np.log2(1.5),
n_cpu=5)
# You can continue with pycistarget.
Feel free to change the function if you do not want to work in feather format.
def load_cisTopic_model(path_to_cisTopic_model_matrices):
metrics = None
coherence = None
marg_topic = None
topic_ass = None
cell_topic = pd.read_feather(path_to_cisTopic_model_matrices + "cell_topic.feather")
cell_topic.index = ["Topic" + str(x) for x in range(1, cell_topic.shape[0] + 1)]
topic_region = pd.read_feather(
path_to_cisTopic_model_matrices + "topic_region.feather"
)
topic_region.index = ["Topic" + str(x) for x in range(1, topic_region.shape[0] + 1)]
topic_region = topic_region.T
parameters = None
model = cisTopicLDAModel(
metrics, coherence, marg_topic, topic_ass, cell_topic, topic_region, parameters
)
return model
'I have an analysis with another tool (e.g. Signac/ArchR), can I still use it for SCENIC+?'
Here you have two options:
I will add these responses in a FAQs tab in the readthedocs page as I find them very relevant, thanks for the heads up!
C
Hi @cbravo93!
I am doing what you suggest:
1. In R:
# Export data to python
library(arrow)
path <- here::here()
cisTopic_obj <- cisTopicObject
modelMat <- modelMatSelection(cisTopic_obj, 'cell', 'Probability')
modelMat <- as.data.frame(modelMat)
write_feather(modelMat, sink=paste0(path, '/model_to_pycisTopic/cell_topic.feather'))
modelMat <- modelMatSelection(cisTopic_obj, 'region', 'Probability', all.regions=TRUE)
modelMat <- as.data.frame(modelMat)
write_feather(modelMat, sink=paste0(path, '/model_to_pycisTopic/topic_region.feather'))
# You can also save the count.matrix
ct <- as.matrix(cisTopicObject@count.matrix)
ct <- as.data.frame(ct)
write_feather(ct, sink=paste0(path, '/model_to_pycisTopic/count_matrix.feather'))
cell_data <- cisTopicObject@cell.data # Adjust this to your actual object and slot
write.csv(cell_data, file = paste0(path, '/model_to_pycisTopic/cell_data.csv'), row.names = TRUE)
2. In Python
def load_cisTopic_model(path_to_cisTopic_model_matrices):
metrics = None
coherence = None
marg_topic = None
topic_ass = None
cell_topic = pd.read_feather(path_to_cisTopic_model_matrices + "cell_topic.feather")
cell_topic.index = ["Topic" + str(x) for x in range(1, cell_topic.shape[0] + 1)]
topic_region = pd.read_feather(
path_to_cisTopic_model_matrices + "topic_region.feather"
)
topic_region.index = ["Topic" + str(x) for x in range(1, topic_region.shape[0] + 1)]
topic_region = topic_region.T
parameters = None
model = cisTopicLDAModel(
metrics, coherence, marg_topic, topic_ass, cell_topic, topic_region, parameters
)
return model
import pycisTopic
## 1. Initialize cisTopic object
from pycisTopic.cistopic_class import *
# Load count matrix
matrix_path="/homes/users/gfuentes/scratch/projects/tfs/model_to_pycisTopic/topic_region.feather"
fragment_matrix = pd.read_feather(matrix_path)
cisTopic_obj = create_cistopic_object(fragment_matrix)
# Also add the cell annotation, cell_data should be a pandas df with cells as rows (cell names as index) and variables as columns
import pandas as pd
# Read the cell data CSV file into a pandas DataFrame
cell_data = pd.read_csv('/homes/users/gfuentes/scratch/projects/tfs/model_to_pycisTopic/cell_data.csv', index_col=0)
cisTopic_obj.add_cell_data(cell_data)
# 2. Add model
from pycisTopic.utils import *
path_data_R = "/homes/users/gfuentes/scratch/projects/tfs/model_to_pycisTopic/"
model = load_cisTopic_model(path_data_R)
cistopic_obj.add_LDA_model(model)
# 3. Get imputed accessibility matrix
from pycisTopic.diff_features import *
imputed_acc_obj = impute_accessibility(cistopic_obj, selected_cells=None, selected_regions=None, scale_factor=10**6)
# 4. DARs
normalized_imputed_acc_obj = normalize_scores(imputed_acc_obj, scale_factor=10**4)
variable_regions = find_highly_variable_features(normalized_imputed_acc_obj,
min_disp = 0.05,
min_mean = 0.0125,
max_mean = 3,
max_disp = np.inf,
n_bins=20,
n_top_features=None,
plot=True)
markers_dict= find_diff_features(cistopic_obj,
imputed_acc_obj,
variable='cell_type', # Here put the column of cell_data that you want to have DARs for
var_features=variable_regions,
contrasts=None,
adjpval_thr=0.05,
log2fc_thr=np.log2(1.5),
n_cpu=5)
# You can continue with pycistarget.
But I get the following:
2024-09-16 12:43:17,745 cisTopic INFO Converting fragment matrix to sparse matrix
2024-09-16 12:43:17,823 cisTopic INFO Creating CistopicObject
/homes/aplic/noarch/software/Miniconda3/4.9.2/envs/scenicplus/lib/python3.11/site-packages/pycisTopic/cistopic_class.py:604: RuntimeWarning: divide by zero encountered in log10
np.log10(cisTopic_nr_frag),
/homes/aplic/noarch/software/Miniconda3/4.9.2/envs/scenicplus/lib/python3.11/site-packages/pycisTopic/cistopic_class.py:606: RuntimeWarning: divide by zero encountered in log10
np.log10(cisTopic_nr_acc),
2024-09-16 12:43:20,925 cisTopic INFO Done!
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[48], line 18
16 from pycisTopic.utils import *
17 path_data_R = "/homes/users/gfuentes/scratch/projects/tfs/model_to_pycisTopic/"
---> 18 model = load_cisTopic_model(path_data_R)
19 cistopic_obj.add_LDA_model(model)
21 # 3. Get imputed accessibility matrix
Cell In[47], line 14, in load_cisTopic_model(path_to_cisTopic_model_matrices)
12 topic_region = topic_region.T
13 parameters = None
---> 14 model = cisTopicLDAModel(
15 metrics, coherence, marg_topic, topic_ass, cell_topic, topic_region, parameters
16 )
17 return model
NameError: name 'cisTopicLDAModel' is not defined
And I don't know how to solve it... Do you have any idea on what could be happening?
Thank you so much!
Georgina
Hi @debbie28
You should import the class
from pycisTopic.lda_models import cisTopicLDAModel
Seppe
Hello! Thanks for developing SCENIC+. I really want to use this package to redo some of my multiome analyses. However I have already done a fair bit of work using Seurat/Signac and CisTopic in R, prior to this package coming out. I was wondering if I can use the CisTopic object I generated through R and use that as the input for SCENIC+ . I have done the TOPIC analysis and most of the initial preprocessing in R and it would be great if I could just import that to Python . Please let me know if this is possible.
Thanks Debbie