rnabioco / clustifyr

Infer cell types in scRNA-seq data using bulk RNA-seq or gene sets
https://rnabioco.github.io/clustifyr/
MIT License
112 stars 14 forks source link

use with spatial data #370

Closed raysinensis closed 3 years ago

raysinensis commented 3 years ago

"At ~50um, spots from the visium assay will encompass the expression profiles of multiple cells. For the growing list of systems where scRNA-seq data is available, users may be interested to 'deconvolute' each of the spatial voxels to predict the underlying composition of cell types. In preparing this vignette, we tested a wide variety of decovonlution and integration methods, using a reference scRNA-seq dataset of ~14,000 adult mouse cortical cell taxonomy from the Allen Institute, generated with the SMART-Seq2 protocol. We consistently found superior performance using integration methods (as opposed to deconvolution methods), likely because of substantially different noise models that characterize spatial and single-cell datasets, and integration methods are specifiically designed to be robust to these differences."

Should compare performance of clustifyr vs seuart anchor approach

raysinensis commented 3 years ago

for cortex and reference RDS, see Seurat tutorial

clustify

library(tidyverse)
library(clustifyr)
## build ref
allen_reference <- readRDS("allen_cortex.rds")
allen_ref <- average_clusters(allen_reference@assays$RNA@counts,
                              metadata = allen_reference@meta.data,
                              cluster_col = "subclass",
                              if_log = FALSE)

## find variable genes (better performance with clustifyr than SCT variable genes)
library(M3Drop)
Normalized_data <- M3DropCleanData(cortex@assays$Spatial@counts,
                                   labels = rownames(cortex@assays$Spatial@counts),
                                   is.counts = TRUE,
                                   suppress.plot = TRUE)
fits <- M3DropDropoutModels(Normalized_data$data, suppress.plot = TRUE)
markers_M3Drop <- M3DropFeatureSelection(Normalized_data$data,
                                             mt_method = "fdr",
                                             mt_threshold = 0.01,
                                             suppress.plot = TRUE)

## cell ident assignment, per "cell", using raw counts instead of SCT normalized data
ls2 <- clustify(cortex@assays$Spatial@counts, 
                ref_mat = allen_ref,
                metadata = cortex@meta.data,
                cluster_col = "seurat_clusters",
                query_genes = markers_M3Drop$Gene,
                per_cell = T,
                if_log = FALSE) %>% 
  cor_to_call(metadata = cortex@meta.data,
              cluster_col = "seurat_clusters") %>% 
  call_to_metadata(metadata = cortex@meta.data,
                   cluster_col = "seurat_clusters", 
                   per_cell = T)

## alternatively, use wrapper function
ls3 <- clustify(cortex, 
                ref_mat = allen_ref,
                cluster_col = "seurat_clusters",
                query_genes = markers_M3Drop$Gene,
                per_cell = T,
                if_log = FALSE)
cortex_ref <- seurat_ref(cortex, cluster_col = "seurat_clusters", if_log = FALSE)

## plot
Idents(cortex) <- "type"
SpatialDimPlot(cortex, label = F,
               cells.highlight = CellsByIdentities(object = cortex, 
                                                   idents = c("Astro", 
                                                              "L2/3 IT", 
                                                              "L4", 
                                                              "L5 PT", 
                                                              "L5 IT", 
                                                              "L6 CT", 
                                                              "L6 IT", 
                                                              "L6b", 
                                                              "Oligo")),
               facet.highlight = TRUE)

spatial_clustifyr

clustify_list

library(tidyverse)
library(clustifyr)
## recluster at high resolution, since clustify_lists works best on clusters
cortex <- SCTransform(cortex, assay = "Spatial", verbose = FALSE)
cortex <- RunPCA(cortex, assay = "SCT", verbose = FALSE)
cortex <- FindNeighbors(cortex, reduction = "pca", dims = 1:30)
cortex <- FindClusters(cortex, verbose = FALSE, resolution = 2)

# find predictive markers, use `presto` for much faster marker finding
allen_reference <- readRDS("allen_cortex.rds")
allen_marker <- presto::wilcoxauc(allen_reference, 'subclass') %>% 
  filter(auc > 0.8) %>% 
  filter(group %in% c("Astro", "L2/3 IT", "L4", "L5 PT", "L5 IT", "L6 CT", "L6 IT", "L6b", "Oligo"))

allen_marker_list <- split(allen_marker$feature, allen_marker$group) %>% 
  pos_neg_marker()

## clustify_lists wrapper function
ls3 <- clustify_lists(cortex, 
                      marker = allen_marker_list,
                      marker_inmatrix = TRUE,
                      cluster_col = "seurat_clusters",
                      metric = "posneg",
                      if_log = FALSE)

## plot
Idents(ls3) <- "type"
SpatialDimPlot(ls3, crop = F, pt.size.factor = 1, label = TRUE, label.size = 2) + NoLegend()

clustify_lists_spatial

raysinensis commented 3 years ago

To do: in seurat wrapper, being able to select assay warn about SCTransform?