Closed raysinensis closed 3 years ago
for cortex and reference RDS, see Seurat tutorial
library(tidyverse)
library(clustifyr)
## build ref
allen_reference <- readRDS("allen_cortex.rds")
allen_ref <- average_clusters(allen_reference@assays$RNA@counts,
metadata = allen_reference@meta.data,
cluster_col = "subclass",
if_log = FALSE)
## find variable genes (better performance with clustifyr than SCT variable genes)
library(M3Drop)
Normalized_data <- M3DropCleanData(cortex@assays$Spatial@counts,
labels = rownames(cortex@assays$Spatial@counts),
is.counts = TRUE,
suppress.plot = TRUE)
fits <- M3DropDropoutModels(Normalized_data$data, suppress.plot = TRUE)
markers_M3Drop <- M3DropFeatureSelection(Normalized_data$data,
mt_method = "fdr",
mt_threshold = 0.01,
suppress.plot = TRUE)
## cell ident assignment, per "cell", using raw counts instead of SCT normalized data
ls2 <- clustify(cortex@assays$Spatial@counts,
ref_mat = allen_ref,
metadata = cortex@meta.data,
cluster_col = "seurat_clusters",
query_genes = markers_M3Drop$Gene,
per_cell = T,
if_log = FALSE) %>%
cor_to_call(metadata = cortex@meta.data,
cluster_col = "seurat_clusters") %>%
call_to_metadata(metadata = cortex@meta.data,
cluster_col = "seurat_clusters",
per_cell = T)
## alternatively, use wrapper function
ls3 <- clustify(cortex,
ref_mat = allen_ref,
cluster_col = "seurat_clusters",
query_genes = markers_M3Drop$Gene,
per_cell = T,
if_log = FALSE)
cortex_ref <- seurat_ref(cortex, cluster_col = "seurat_clusters", if_log = FALSE)
## plot
Idents(cortex) <- "type"
SpatialDimPlot(cortex, label = F,
cells.highlight = CellsByIdentities(object = cortex,
idents = c("Astro",
"L2/3 IT",
"L4",
"L5 PT",
"L5 IT",
"L6 CT",
"L6 IT",
"L6b",
"Oligo")),
facet.highlight = TRUE)
library(tidyverse)
library(clustifyr)
## recluster at high resolution, since clustify_lists works best on clusters
cortex <- SCTransform(cortex, assay = "Spatial", verbose = FALSE)
cortex <- RunPCA(cortex, assay = "SCT", verbose = FALSE)
cortex <- FindNeighbors(cortex, reduction = "pca", dims = 1:30)
cortex <- FindClusters(cortex, verbose = FALSE, resolution = 2)
# find predictive markers, use `presto` for much faster marker finding
allen_reference <- readRDS("allen_cortex.rds")
allen_marker <- presto::wilcoxauc(allen_reference, 'subclass') %>%
filter(auc > 0.8) %>%
filter(group %in% c("Astro", "L2/3 IT", "L4", "L5 PT", "L5 IT", "L6 CT", "L6 IT", "L6b", "Oligo"))
allen_marker_list <- split(allen_marker$feature, allen_marker$group) %>%
pos_neg_marker()
## clustify_lists wrapper function
ls3 <- clustify_lists(cortex,
marker = allen_marker_list,
marker_inmatrix = TRUE,
cluster_col = "seurat_clusters",
metric = "posneg",
if_log = FALSE)
## plot
Idents(ls3) <- "type"
SpatialDimPlot(ls3, crop = F, pt.size.factor = 1, label = TRUE, label.size = 2) + NoLegend()
To do: in seurat wrapper, being able to select assay warn about SCTransform?
"At ~50um, spots from the visium assay will encompass the expression profiles of multiple cells. For the growing list of systems where scRNA-seq data is available, users may be interested to 'deconvolute' each of the spatial voxels to predict the underlying composition of cell types. In preparing this vignette, we tested a wide variety of decovonlution and integration methods, using a reference scRNA-seq dataset of ~14,000 adult mouse cortical cell taxonomy from the Allen Institute, generated with the SMART-Seq2 protocol. We consistently found superior performance using integration methods (as opposed to deconvolution methods), likely because of substantially different noise models that characterize spatial and single-cell datasets, and integration methods are specifiically designed to be robust to these differences."
Should compare performance of clustifyr vs seuart anchor approach