aertslab / SCopeLoomR

R package (compatible with SCope) to create generic .loom files and extend them with other data e.g.: SCENIC regulons, Seurat clusters and markers, ...
MIT License
40 stars 15 forks source link

add_seurat_clustering is not working #31

Open saeedfc opened 3 years ago

saeedfc commented 3 years ago
exp_mtx1 <- GetAssayData(SO, assay = "RNA", slot = "data")
exp_mtx1 <- as.matrix(exp_mtx1)

umap_mtx <- as.data.frame(Embeddings(SO, reduction = "umap"))

file.name <- "expanded.loom"
build_loom(
  file.name=file.name,
  dgem=exp_mtx1,
  title="Data 2020", # A name for your data
  genome="Human", # Just for user information, not used internally
  default.embedding=umap_mtx,
  default.embedding.name="Seurat_UMAP"
)

loom <- open_loom("expanded.loom", mode = 'r+')

#Load the DEG markers file as a data frame
wilcoxon_test_markers_csv <- read.csv("/mnt/DATA1/Markers_Fib_Jan27.csv", row.names = 1)
#Save it as an RDS file
saveRDS(wilcoxon_test_markers_csv, "/mnt/DATA1/Fibrosis/Full Scale Analysis/FibroBlasts/Markers_Fib_Jan27.rds")
#Make a named list of path to the markers file
#Name should be the resolution mentioned on the column name of corresponding seurat clustering for which you calculated the markers Eg: here 'res.0.25'
wilcoxon_test_markers <- list(res.0.25 = "/mnt/DATA1/Markers_Fib_Jan27.rds")

#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a #data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.25 = unique(as.numeric(as.character(SO$integrated_snn_res.0.25))), annotation = unique(as.character(SO$subtype)))
head(seurat.annotation)
#  integrated_snn_res.0.25        annotation
#1                      1                 F3A
#2                      2               Act_F
#3                      0               Res_I
#4                      4              Res_II
#5                      3                 SC3
#6                      6               mixed
#7                      5                 TFR

#Add the clustering here
add_seurat_clustering(loom = loom
                      , seurat = SO
                      , seurat.markers.file.path.list = wilcoxon_test_markers
                      ,seurat.clustering.prefix = 'integrated_snn_res.'
                      ,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
                      ,annotation.cluster.id.cn = 'integrated_snn_res.0.25' #Column name for seurat cluster numbers of the annotation dataframe
                      ,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
                      , seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
                      , seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
                      , seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))

I get this error. Can you kindly let me know, what could be going wrong here?

Error in if (nchar(x = d) > 0) { : argument is of length zero
> traceback()
5: FUN(X[[i]], ...)
4: lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
       cluster.id <- unique.clusters[[cluster.idx]]
       if (is.numeric(x = cluster.id)) {
           description <- paste("NDA - Cluster", cluster.id)
       }
       else if (is.character(x = cluster.id)) {
           cluster.id <- cluster.idx - 1
           description <- cluster.id
       }
       else {
           stop("Cluster labels are required to be of class character or numeric.")
       }
       if (!is.null(x = annotation)) {
           annotation <- annotation[names(x = clusters)]
           d <- as.character(x = unique(x = annotation[clusters == 
               cluster.id]))
           if (length(x = d) > 1) {
               stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
           }
           if (nchar(x = d) > 0) {
               description <- paste0(d, " (", cluster.id, ")")
           }
       }
       return(list(id = cluster.id, description = description))
   })
3: add_global_md_clustering(loom = loom, id = id, group = group, 
       name = name, clusters = clusters_as_factor_ids, annotation = annotation)
2: add_annotated_clustering(loom = loom, group = "Seurat", name = paste("Seurat,", 
       paste0(seurat.clustering.prefix, res)), clusters = cluster_ids, 
       annotation = cluster_annotation, is.default = is_default_clustering, 
       overwrite.default = default.clustering.overwrite)
1: add_seurat_clustering(loom = loom, seurat = SO, seurat.markers.file.path.list = wilcoxon_test_markers, 
       seurat.clustering.prefix = "integrated_snn_res.", annotation = seurat.annotation, 
       annotation.cluster.id.cn = "integrated_snn_res.0.25", annotation.cluster.description.cn = "annotation", 
       seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj"), 
       seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value"), 
       seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", 
           "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))

Thanks, Saeed

dweemx commented 3 years ago

Hi @saeedfc,

Which version of SCopeLoomR are you using ?

saeedfc commented 3 years ago

Hi @dweemx ,

I use '0.10.1' version. All other functions work well. Only the above part of adding seurat clustering + markers do not work. Adding column attributes is working, so I can visualize annotations. But not the markers. Kind regards, Saeed

dweemx commented 3 years ago

Hey @saeedfc, I couldn't reproduce your error with the latest version. The funcion add_seurat_clustering works for me:

add_seurat_clustering(loom = loom,
      seurat = so,
      seurat.assay = "RNA",
      seurat.clustering.prefix = "SCT_snn_res.",
      default.clustering.resolution = "res.3",
      seurat.markers.file.path.list = list(
         SCT_snn_res.0.5 = "10xtest_seurat_markers.rds.gz",
          SCT_snn_res.3 = "10xtest_seurat_markers2.rds.gz"
      ),
      seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj"),
      seurat.marker.metric.names = c("Avg. logFC", "adjusted P-value"),
      seurat.marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)"),
      annotation = annotation,
      annotation.cluster.id.cn = "cluster_id",
      annotation.cluster.description.cn = "cluster_name")

Could you try to use the latest version of SCopeLoomR (version 0.10.4) please ? It should be fixed in that version

saeedfc commented 3 years ago

Hi,

I just installed the latest version. now the command run without any error. But the markers do not get added.

wilcoxon_test_markers <- list(res.0.2 = paste0(getwd(),"/Markers_0.2.rds")

#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.2 = unique(as.numeric(as.character(SO$integrated_snn_res.0.2))), annotation = unique(as.character(SO$subtype)))

add_seurat_clustering(loom = loom
                      , seurat = SO
                      , seurat.markers.file.path.list = wilcoxon_test_markers
                      ,seurat.clustering.prefix = 'integrated_snn_res.'
                      ,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
                      ,annotation.cluster.id.cn = 'integrated_snn_res.0.2' #Column name for seurat cluster numbers of the annotation dataframe
                      ,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
                      , seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
                      , seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
                      , seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))

output below

[1] "Seurat, integrated_snn_res.1"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
[1] "Clustering ID: 0"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 1 have not been computed.[1] "Seurat, integrated_snn_res.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 1"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 2 have not been computed.[1] "Seurat, integrated_snn_res.0.6"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 2"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.6 have not been computed.[1] "Seurat, integrated_snn_res.0.25"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 3"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.25 have not been computed.[1] "Seurat, integrated_snn_res.0.4"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 4"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.4 have not been computed.[1] "Seurat, integrated_snn_res.0.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 5"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.2 have not been computed.[1] "Seurat, integrated_snn_res.0.3"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 6"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.3 have not been computed.

It came out as if there was no markers calculated for res 0.2 even though I provided the path in the list.

tropfenameimer commented 3 years ago

hi saeed, can you try again but with slightly different marker names:

wilcoxon_test_markers <- list(integrated_snn_res.0.2 = paste0(getwd(),"/Markers_0.2.rds")
saeedfc commented 3 years ago

Hi @tropfenameimer ,

Thanks! It seems to have worked now. output


[1] "Seurat, integrated_snn_res.0.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
[1] "Clustering ID: 0"
[1] "Adding Seurat markers..."
[1] "Adding markers for clustering 0..."
[1] "Adding metrics for clustering 0..."

However, upon uploading to scope, I do not understand how I can utilize the markers list? Is this just a way to store the markers list in the loom file? Or is there a tool in Scope where we can see the top markers of a cluster?

Also, you may kindly consider about updating the vignette. The format, for instance the naming (integrated_snn_res.0.2 instead of res.0.2) I used here, and all is difficult to decipher from the vignette. Due to different errors and troubles over time, this is how I now assemble my loom object from Seurat. The features might have got better since I made this script. I leave it here in case if it is useful for someone else.

##Import Seurat Object
SO <- readRDS("Seurat_Object")
##Extract normalized RNA counts for visualization
exp_mtx <- GetAssayData(SO, assay = "RNA", slot = "data")
exp_mtx <- as.matrix(exp_mtx1)
## Extract UMAP embedding
umap_mtx <- as.data.frame(Embeddings(SO, reduction = "umap"))
##Create your basic loom file
file.name <- "Data_Feb18_2021.loom"
build_loom(
  file.name=file.name,
  dgem=exp_mtx,
  title="Data 2020", # A namr for your data
  genome="Human", # Just for user information, not used internally
  default.embedding=umap_mtx,
  default.embedding.name="Seurat_UMAP"
)
#Now you have created the basic loom file. This file is now sufficient to visualize genes on the embeddings.

#Below is how I add the clusterings. 
#open the loom file first
loom <- open_loom("Data_Feb18_2021.loom", mode = 'r+')

#Load the DEG markers file as a data frame
wilcoxon_test_markers_csv <- read.csv(paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.csv"), row.names = 1, stringsAsFactors = F)
#Save it as an RDS file
saveRDS(wilcoxon_test_markers, paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))
#Make a named list of path to the markers file
#Name should be the resolution mentioned on the column name of corresponding seurat clustering for which you calculated the markers Eg: here 'res.0.2'
wilcoxon_test_markers <- list(integrated_snn_res.0.2 = paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))

#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.2 = unique(as.numeric(as.character(SO$integrated_snn_res.0.2))), annotation = unique(as.character(SO$subtype)))

#Add the clustering here
add_seurat_clustering(loom = loom
                      , seurat = SO
                      , seurat.markers.file.path.list = wilcoxon_test_markers
                      ,seurat.clustering.prefix = 'integrated_snn_res.'
                      ,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
                      ,annotation.cluster.id.cn = 'integrated_snn_res.0.2' #Column name for seurat cluster numbers of the annotation dataframe
                      ,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
                      , seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
                      , seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
                      , seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))

add_seurat_clustering(loom = loom
                      , seurat = SO
                      , seurat.markers.file.path.list = wilcoxon_test_markers
                      ,seurat.clustering.prefix = 'integrated_snn_res.'
                      ,annotation = NULL #the DF of annotation; if not annotated, use NULL
                      ,annotation.cluster.id.cn = NULL #Column name for seurat cluster numbers of the annotation dataframe
                      ,annotation.cluster.description.cn = NULL #Column name for the cell types of the annotation dataframe
                      , seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
                      , seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
                      , seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))

##Add extra clusterings as column attributes
add_col_attr(loom=loom, key = "Resolution_1.0",value= as.character(SO@meta.data$res_1.0), as.annotation=T)
add_col_attr(loom=loom, key = "Resolution_0.6",value= as.character(SO@meta.data$res_0.6), as.annotation=T)
add_col_attr(loom=loom, key = "Resolution_0.4",value= as.character(SO@meta.data$res_0.4), as.annotation=T)
##Addother categorical variables or annotations
add_col_attr(loom=loom, key = "Sample_type", value= as.character(SO@meta.data$Sample_type),
             as.annotation=T)
add_col_attr(loom=loom, key = "Sample", value= as.character(SO@meta.data$Sample), as.annotation=T)
add_col_attr(loom=loom, key = "Patient_Type", value= as.character(SO@meta.data$Patient_Type), as.annotation=T)
add_col_attr(loom=loom, key = "Segment", value= as.character(SO@meta.data$Segment), as.annotation=T)
add_col_attr(loom=loom, key = "Gender", value= as.character(SO@meta.data$Gender), as.annotation=T)
add_col_attr(loom=loom, key = "Cell_Cycle_Phase", value= as.character(SO@meta.data$Phase), as.annotation=T)
##Add continuous variables or metrics
add_col_attr(loom=loom, key = "nCount_RNA", value= as.numeric(SO@meta.data$nCount_RNA), as.metric = T)
add_col_attr(loom=loom, key = "nFeature_RNA", value= as.numeric(SO@meta.data$nFeature_RNA), as.metric = T)
add_col_attr(loom=loom, key = "percent.ribo_protein", value= as.numeric(SO@meta.data$percent.ribo), as.metric = T)
add_col_attr(loom=loom, key = "percent.mt", value= as.numeric(SO@meta.data$percent.mt), as.metric = T)
add_col_attr(loom=loom, key = "S_phase_module_score", value= as.numeric(SO@meta.data$S.Score), as.metric = T)
add_col_attr(loom=loom, key = "G2M_phase_module_score", value= as.numeric(SO@meta.data$G2M.Score), as.metric = T)
add_col_attr(loom=loom, key = "ribo_protein_gene_module_score", value= as.numeric(SO@meta.data$rp_gene_module_score), as.metric = T)

finalize(loom=loom) 
close_loom(loom= loom)

Kind regards, Saeed

dweemx commented 3 years ago

Hi @saeedfc,

I agree with you. Vignette is quite old and it would be nice to have it refurbished. Unfortunately, I haven't taken time to update it yet.

Regarding the markers in SCope, you can see them in the right sidebar when you query a cluster.

saeedfc commented 3 years ago

Hi @dweemx ,

Thanks for the remarks on scope. Could it be that it didn't work for me as then? I tried some of the data you have already deposited as well. There also if I select a cluster, I don't see the markers associated with the cluster, rather I do see which clustering, how many proportion of cells etc..

Kind regards, Saeed

tropfenameimer commented 3 years ago

hi @saeedfc, we are working on improving SCopeLoomR and the documentation.

in the mean time, please try adding annotated clusters along with markers as follows:

cluster_name <- "integrated_snn_res.0.2"
cluster_list <- setNames(SO@meta.data[, cluster_name], rownames(SO@meta.data))
annot_list <- setNames(seurat.annotation[cluster_list, "annotation"], names(cluster_list))

add_annotated_clustering(loom = loom,
        group = "Seurat",
        name = cluster_name,
        clusters = cluster_list,
        annotation = annot_list,
        is.default = T,
        overwrite.default = NULL)

markers <- readRDS(paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))
add_clustering_markers(loom = loom,
        clustering.id = 0,
        clustering.markers = split(markers, markers$cluster),
        marker.metric.accessors = c("avg_logFC", "p_val_adj"),
        marker.metric.names = c("Avg. logFC", "adjusted P-value"),
        marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)"))
close_loom(loom)

you should then be able to select a cluster in SCope and see the marker list for this cluster in the right-hand pannel.

saeedfc commented 3 years ago

@tropfenameimer

Hi,

I tried this code; However, I am getting an error. Trying to work it out, however, no solution till now.

> add_annotated_clustering(loom = loom,
         group = "Seurat",
         name = cluster_name,
         clusters = cluster_list,
         annotation = annot_list,
         is.default = T,
         overwrite.default = T)
[1] "Clusterings created..."
Error in if (nchar(x = d) > 0) { : missing value where TRUE/FALSE needed
> traceback()
4: FUN(X[[i]], ...)
3: lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
       cluster.id <- unique.clusters[[cluster.idx]]
       if (is.numeric(x = cluster.id)) {
           description <- paste("NDA - Cluster", cluster.id)
       }
       else if (is.character(x = cluster.id)) {
           cluster.id <- cluster.idx - 1
           description <- cluster.id
       }
       else {
           stop("Cluster labels are required to be of class character or numeric.")
       }
       if (!is.null(x = annotation)) {
           annotation <- annotation[names(x = clusters)]
           d <- as.character(x = unique(x = annotation[clusters == 
               cluster.id]))
           if (length(x = d) > 1) {
               stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
           }
           if (nchar(x = d) > 0) {
               description <- paste0(d, " (", cluster.id, ")")
           }
       }
       return(list(id = cluster.id, description = description))
   })
2: add_global_md_clustering(loom = loom, id = id, group = group, 
       name = name, clusters = clusters_as_factor_ids, annotation = annotation)
1: add_annotated_clustering(loom = loom, group = "Seurat", name = cluster_name, 
       clusters = cluster_list, annotation = annot_list, is.default = T, 
       overwrite.default = T)
dmsalsgh97 commented 3 years ago

Hi, I'm encountering the same issue...

In my think, this error occurs when the cluster has no cell.

Because I'm dealing with WT/KO conditions, I integrated WT/KO Seurat objects using CCA, and clustered them. So there can be an empty cluster for the specific condition.

I've run the code below for 6 Seurat objects, but the error occurred only when there is an 'empty cluster'.

library(Seurat)
library(SCopeLoomR)

SeuratObj <- readRDS("~/SP.sct.pca15.rds")
SeuratObj[["percent.mt"]] <- PercentageFeatureSet(SeuratObj, pattern = "^mt-")
SeuratObj[["groud_id"]] <- lapply(list(SeuratObj@meta.data[,1]), function(x) substr(x,3,8))

H_Epi <- subset(SeuratObj, groud_id == "H_Epi ")

build_loom(file.name = "~/H.Sp.sct.pca15.loom",
        dgem = H_Epi@assays$RNA@counts,
        title = "H.SQ.sct.pca15",
        default.embedding = H_Epi@reductions$umap@cell.embeddings,
        default.embedding.name = "umap.rna")

loom <- open_loom("/node210data/project/mouse_NK_cell/scenic_out/Conditional/H.Sp.sct.pca15.loom", mode = "r+")

add_embedding(loom = loom, 
              embedding = H_Epi@reductions$pca@cell.embeddings,
              name = "pca")

add_col_attr(loom = loom, key = "batch",
              value = H_Epi@meta.data$orig.ident, as.annotation = TRUE)

kmeans_markers <- FindAllMarkers(H_Epi, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5)

add_seurat_clustering(loom = loom,
        seurat = H_Epi,
        seurat.assay = "RNA",
        seurat.clustering.prefix = "integrated_snn_res.",
        default.clustering.resolution = "res.0.5"
)

add_clustering_markers(loom = loom, 
        clustering.id = 1,
        clustering.markers = split(kmeans_markers, kmeans_markers$cluster),
        marker.metric.accessors = c("avg_logFC", "p_val_adj"),
        marker.metric.names = c("Avg. log2FC", "adjusted P-value"),
        marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)")
)

close_loom(loom)

below is the error message from add_seurat_clustering.

[1] "Seurat, integrated_snn_res.0.8"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
Error in if (nchar(x = d) > 0) {: argument is of length zero
Traceback:

1. add_seurat_clustering(loom = loom, seurat = L_Epi, seurat.assay = "RNA", 
 .     seurat.clustering.prefix = "integrated_snn_res.", default.clustering.resolution = "res.0.5")
2. add_annotated_clustering(loom = loom, group = "Seurat", name = paste("Seurat,", 
 .     paste0(seurat.clustering.prefix, res)), clusters = cluster_ids, 
 .     annotation = cluster_annotation, is.default = is_default_clustering, 
 .     overwrite.default = default.clustering.overwrite)
3. add_global_md_clustering(loom = loom, id = id, group = group, 
 .     name = name, clusters = clusters_as_factor_ids, annotation = annotation)
4. lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
 .     cluster.id <- unique.clusters[[cluster.idx]]
 .     if (is.numeric(x = cluster.id)) {
 .         description <- paste("NDA - Cluster", cluster.id)
 .     }
 .     else if (is.character(x = cluster.id)) {
 .         cluster.id <- cluster.idx - 1
 .         description <- cluster.id
 .     }
 .     else {
 .         stop("Cluster labels are required to be of class character or numeric.")
 .     }
 .     if (!is.null(x = annotation)) {
 .         annotation <- annotation[names(x = clusters)]
 .         d <- as.character(x = unique(x = annotation[clusters == 
 .             cluster.id]))
 .         if (length(x = d) > 1) {
 .             stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
 .         }
 .         if (nchar(x = d) > 0) {
 .             description <- paste0(d, " (", cluster.id, ")")
 .         }
 .     }
 .     return(list(id = cluster.id, description = description))
 . })
5. FUN(X[[i]], ...)

Thansk,