Closed mandylr closed 5 years ago
These marker genes are now in pbmc_marker_genes
.
library(scrunchy)
plot_dims_multi(
fsce_tidy,
features = pbmc_marker_genes$gene_name,
x = UMAP1,
y = UMAP2,
size = 0.5
)
Created on 2019-01-15 by the reprex package (v0.2.1)
I used this SingleR package to classify cell types and then added those cell classifications to the scrunchy objects. From my understanding SingleR will identify cell types independent of clusters (and thus takes like 2 hours to run) or it can assign cell types to clusters. It uses a reference bulk RNA seq data from isolated cell types ( It works pretty well and may be useful in determining heterogeneity within cell types.
library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.5.2
#> Warning: package 'purrr' was built under R version 3.5.2
library(scrunchy)
library(MultiAssayExperiment)
#> Loading required package: SummarizedExperiment
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#>
#> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#> clusterExport, clusterMap, parApply, parCapply, parLapply,
#> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following object is masked from 'package:scrunchy':
#>
#> normalize
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind,
#> colMeans, colnames, colSums, dirname, do.call, duplicated,
#> eval, evalq, Filter, Find, get, grep, grepl, intersect,
#> is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
#> paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
#> Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
#> table, tapply, union, unique, unsplit, which, which.max,
#> which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following object is masked from 'package:tidyr':
#>
#> expand
#> The following object is masked from 'package:base':
#>
#> expand.grid
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#> The following object is masked from 'package:purrr':
#>
#> reduce
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: DelayedArray
#> Loading required package: matrixStats
#>
#> Attaching package: 'matrixStats'
#> The following objects are masked from 'package:Biobase':
#>
#> anyMissing, rowMedians
#> The following object is masked from 'package:dplyr':
#>
#> count
#> Loading required package: BiocParallel
#> Warning: package 'BiocParallel' was built under R version 3.5.2
#>
#> Attaching package: 'DelayedArray'
#> The following objects are masked from 'package:matrixStats':
#>
#> colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
#> The following object is masked from 'package:purrr':
#>
#> simplify
#> The following objects are masked from 'package:base':
#>
#> aperm, apply
library(SingleCellExperiment)
#> Warning: package 'SingleCellExperiment' was built under R version 3.5.2
source("/Users/mandyricher/src/scrunchy/R/tidiers.R")
dir = "/Users/mandyricher/hesselberthlab/projects/10x_haircut/20181214/analysis/"
#Load cell type classifications from SingleR classifications
df = read_tsv(paste0(dir, "SingleR_cell_classifications.tsv.gz"))
#> Parsed with column specification:
#> cols(
#> cell_id = col_character(),
#> SingleR = col_character(),
#> HPCA1 = col_character(),
#> HPCA2 = col_character(),
#> Blueprint_Endcode1 = col_character(),
#> Blueprint_Endcode2 = col_character()
#> )
df
#> # A tibble: 3,895 x 6
#> cell_id SingleR HPCA1 HPCA2 Blueprint_Endco… Blueprint_Endco…
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 AAACCTGG… CD4 T ce… T_cell:C… T_cell:… CD4+ T-cells CD4+ T-cells
#> 2 AAACCTGG… CD8 T ce… T_cell:C… T_cell:… CD8+ Tem CD8+ Tem
#> 3 AAACCTGT… CD4 T ce… T_cell:C… T_cell:… CD4+ T-cells CD4+ T-cells
#> 4 AAACGGGA… CD14+ Mo… Monocyte… Monocyt… Monocytes Monocytes
#> 5 AAACGGGA… CD4 T ce… T_cell:C… T_cell:… CD4+ T-cells CD4+ T-cells
#> 6 AAACGGGA… CD4 T ce… T_cell:C… T_cell:… CD4+ T-cells CLP
#> 7 AAACGGGC… CD4 T ce… T_cell:C… T_cell:… CD4+ T-cells CD4+ T-cells
#> 8 AAACGGGC… NK cells NK_cell:… NK_cell… NK cells NK cells
#> 9 AAACGGGG… CD4 T ce… T_cell:C… T_cell:… CD4+ Tem CD4+ Tem
#> 10 AAAGATGC… NK cells NK_cell NK_cell… NK cells NK cells
#> # … with 3,885 more rows
#Load scrunchy data, cell types already added to colData using add_labels()
load(paste0(dir, "FSCE_objects_with_cell_types_from_SingleR.RData"))
colData(fsce2[['rnaseq']])
#> DataFrame with 4009 rows and 8 columns
#> cell_id k_cluster cell_type
#> <character> <character> <character>
#> AAACCTGGTACCATCA AAACCTGGTACCATCA 7 CD4 T cells
#> AAACCTGGTATGAATG AAACCTGGTATGAATG 5 CD8 T cells
#> AAACCTGTCAACACGT AAACCTGTCAACACGT 7 CD4 T cells
#> AAACGGGAGAGCAATT AAACGGGAGAGCAATT 8 CD14+ Monocytes
#> AAACGGGAGTACGCGA AAACGGGAGTACGCGA 7 CD4 T cells
#> ... ... ... ...
#> TTTGTCAGTTCAGGCC TTTGTCAGTTCAGGCC 7 CD4 T cells
#> TTTGTCAGTTGTTTGG TTTGTCAGTTGTTTGG 7 CD4 T cells
#> TTTGTCAGTTTGACAC TTTGTCAGTTTGACAC 6 NK cells
#> TTTGTCATCAATACCG TTTGTCATCAATACCG 8 FCGR3A+ Monocytes
#> TTTGTCATCTGATTCT TTTGTCATCTGATTCT 4 CD4 T cells
#> SingleR HPCA1
#> <character> <character>
#> AAACCTGGTACCATCA CD4 T cells T_cell:CD4+_Naive
#> AAACCTGGTATGAATG CD8 T cells T_cell:CD8+_effector_memory
#> AAACCTGTCAACACGT CD4 T cells T_cell:CD4+_Naive
#> AAACGGGAGAGCAATT CD14+ Monocytes Monocyte:CD16-
#> AAACGGGAGTACGCGA CD4 T cells T_cell:CD4+_Naive
#> ... ... ...
#> TTTGTCAGTTCAGGCC CD4 T cells T_cell:CD4+_Naive
#> TTTGTCAGTTGTTTGG CD4 T cells T_cell:CD4+_Naive
#> TTTGTCAGTTTGACAC NK cells NK_cell
#> TTTGTCATCAATACCG FCGR3A+ Monocytes Monocyte:CD16+
#> TTTGTCATCTGATTCT CD4 T cells T_cell:CD4+_central_memory
#> HPCA2 Blueprint_Endcode1
#> <character> <character>
#> AAACCTGGTACCATCA T_cell:CD4+_Naive CD4+ T-cells
#> AAACCTGGTATGAATG T_cell:CD4+_effector_memory CD8+ Tem
#> AAACCTGTCAACACGT T_cell:CD4+_Naive CD4+ T-cells
#> AAACGGGAGAGCAATT Monocyte:CD14+ Monocytes
#> AAACGGGAGTACGCGA T_cell:CD4+_Naive CD4+ T-cells
#> ... ... ...
#> TTTGTCAGTTCAGGCC T_cell:CD4+_Naive CD4+ T-cells
#> TTTGTCAGTTGTTTGG T_cell:CD4+_Naive CD4+ T-cells
#> TTTGTCAGTTTGACAC NK_cell NK cells
#> TTTGTCATCAATACCG Monocyte:CD16+ Monocytes
#> TTTGTCATCTGATTCT T_cell:CD4+_central_memory CD4+ Tcm
#> Blueprint_Endcode2
#> <character>
#> AAACCTGGTACCATCA CD4+ T-cells
#> AAACCTGGTATGAATG CD8+ Tem
#> AAACCTGTCAACACGT CD4+ T-cells
#> AAACGGGAGAGCAATT Monocytes
#> AAACGGGAGTACGCGA CD4+ T-cells
#> ... ...
#> TTTGTCAGTTCAGGCC CD4+ T-cells
#> TTTGTCAGTTGTTTGG CD4+ T-cells
#> TTTGTCAGTTTGACAC NK cells
#> TTTGTCATCAATACCG Monocytes
#> TTTGTCATCTGATTCT CD4+ T-cells
fsce2_tidy = tidy_all(fsce2) %>%
filter(cell_type %in% unique(df$SingleR))
plot_dims(fsce2_tidy,
x = UMAP1, y = UMAP2,
k_cluster)
plot_dims(fsce2_tidy,
x = UMAP1, y = UMAP2,
SingleR)
#> Warning: Removed 31 rows containing missing values (geom_point).
discrete_palette_default <- c(
palette_OkabeIto_black,
scales::brewer_pal(palette = "Paired")(12),
scales::brewer_pal(palette = "Set1")(9),
scales::brewer_pal(palette = "Set2")(8),
scales::brewer_pal(palette = "Dark2")(8)
)
# Using the simple classificiations
plot_activity(fsce2_tidy, Uracil_45, SingleR) + xlim(0, 6.2) + theme(legend.position="none")
#> Warning: Removed 31 rows containing missing values (geom_point).
# HPCA classifications
plot_activity(fsce2_tidy, Uracil_45, HPCA1) + xlim(0, 6.2) + theme(legend.position="none") +
scale_color_manual(values = discrete_palette_default)
#> Scale for 'colour' is already present. Adding another scale for
#> 'colour', which will replace the existing scale.
# Blueprint endcode classifications
plot_activity(fsce2_tidy, Uracil_45, Blueprint_Endcode1) + xlim(0, 6.2) + theme(legend.position="none") +
scale_color_manual(values = discrete_palette_default)
#> Scale for 'colour' is already present. Adding another scale for
#> 'colour', which will replace the existing scale.
Created on 2019-02-05 by the reprex package (v0.2.1)
I used this list to make the following cell type classifications.
Created on 2019-01-09 by the reprex package (v0.2.1)