ixxmu / mp_duty

抓取网络文章到github issues保存
https://archives.duty-machine.now.sh/
122 stars 30 forks source link

SingCellaR代码实操(一):PBMCs的标准流程 #2758

Closed ixxmu closed 2 years ago

ixxmu commented 2 years ago

https://mp.weixin.qq.com/s/m5Z6wWjR4zDUlFLfohh9fg

ixxmu commented 2 years ago

SingCellaR代码实操(一):PBMCs的标准流程 by 生信宝库

前言

Immugent在前一篇推文:SingCellaR:一站式研究细胞分化轨迹的功能调节的利器中,基于一项造血干细胞的研究结果初步介绍了SingCellaR的功能框架。从本节推文开始后续会有5篇有关SingCellaR代码实操的教程,分别不同角度分析多种来源的数据,以解决和发现不同的科学问题。

为了方便大家复现本次展示的流程,Immugent使用的是10X官网的5k_pbmc_v3数据,大家可以直接到10x genomics官网下载相应的数据,即可以进行本节代码的复现。



分析流程

首先是将数据导入到SingCellaR中。

library(SingCellaR)

data_matrices_dir<-"../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/filtered_feature_bc_matrix/"
PBMCs<-new("SingCellaR")
PBMCs@dir_path_10x_matrix<-data_matrices_dir
PBMCs@sample_uniq_id<-"PBMCs"

load_matrices_from_cellranger(PBMCs,cellranger.version = 3)

PBMCs
## An object of class SingCellaR with a matrix of : 33538 genes across 5025 samples.


一行代码实现QC结果的展示,顶!

plot_cells_annotation(PBMCs,type="histogram")


还有其它QC结果展示的图,都很美观。

plot_cells_annotation(PBMCs,type="boxplot")
plot_UMIs_vs_Detected_genes(PBMCs)

检测双细胞和过滤双细胞。

filter_cells_and_genes(PBMCs,
                       min_UMIs=1000,
                       max_UMIs=30000,
                       min_detected_genes=300,
                       max_detected_genes=5000,
                       max_percent_mito=15,
                       genes_with_expressing_cells = 10,isRemovedDoublets = FALSE)

DoubletDetection_with_scrublet(PBMCs)
table(PBMCs@meta.data$Scrublet_type)
## 
## Doublets  Singlet 
##       96     4929

filter_cells_and_genes(PBMCs,
                       min_UMIs=1000,
                       max_UMIs=30000,
                       min_detected_genes=300,
                       max_detected_genes=5000,
                       max_percent_mito=15,
                       genes_with_expressing_cells = 10,isRemovedDoublets = TRUE)


以下就是和Seurat类似的单细胞常规处理流程。

normalize_UMIs(PBMCs,use.scaled.factor = T)
remove_unwanted_confounders(PBMCs,residualModelFormulaStr="~UMI_count+percent_mito")
get_variable_genes_by_fitting_GLM_model(PBMCs,mean_expr_cutoff = 0.05,disp_zscore_cutoff = 0.05)

remove_unwanted_genes_from_variable_gene_set(PBMCs,gmt.file = "../SingCellaR_example_datasets//Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
                                             removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
plot_variable_genes(PBMCs)                                             
runPCA(PBMCs,use.components=30,use.regressout.data = T)
plot_PCA_Elbowplot(PBMCs)
runTSNE(PBMCs,dim_reduction_method = "pca",n.dims.use = 10)
plot_tsne_label_by_qc(PBMCs,point.size = 0.1)

runUMAP(PBMCs,dim_reduction_method = "pca",n.dims.use = 10,n.neighbors = 20,
        uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(PBMCs,feature = "UMI_count",point.size = 0.1,mark.feature = F)
identifyClusters(PBMCs,n.dims.use = 10,n.neighbors = 100,knn.metric = "euclidean")
plot_tsne_label_by_clusters(PBMCs,show_method = "louvain",point.size = 2)
plot_umap_label_by_clusters(PBMCs,show_method = "louvain",point.size = 0.80)

下面介绍SingCellaR的一项特有功能分析--Force-directed graph analysis,它结合了力定向图来可视化细胞轨迹。

runFA2_ForceDirectedGraph(PBMCs,n.dims.use = 10,
                          n.neighbors = 5,n.seed = 1,fa2_n_iter = 1000)
plot_forceDirectedGraph_label_by_clusters(PBMCs,show_method = "louvain",vertex.size = 1,
                                          background.color = "black")                          
findMarkerGenes(PBMCs,cluster.type = "louvain")
plot_heatmap_for_marker_genes(PBMCs,cluster.type = "louvain",n.TopGenes = 10,rowFont.size = 5)

cl3_marker_genes<-getMarkerGenesInfo(PBMCs,cluster.type = "louvain",cluster_id = "cl3")
cl3_marker_genes<-cl3_marker_genes[order(cl3_marker_genes$log2FC,decreasing = T),]
head(cl3_marker_genes)

plot_tsne_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_forceDirectedGraph_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_violin_for_marker_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_bubble_for_genes_per_cluster(PBMCs,cluster.type = "louvain",IsApplyClustering = T,
                            IsClusterByRow = T,IsClusterByColumn = T,
                            gene_list=c("CCR7","CD163",
                                        "GNLY","CD72",
                                        "CD8A","CD8B","IL3RA"
                                        ),show.percent = T,buble.scale = 12,point.color1 = "gray",
                                        point.color2 = "firebrick1")
runKNN_Graph(PBMCs,n.dims.use = 10,n.neighbors = 10)
library(threejs)
plot_3D_knn_graph_label_by_clusters(PBMCs,show_method = "louvain",vertext.size = 0.25)
plot_3D_knn_graph_label_by_gene(PBMCs,show.gene = "GNLY",vertext.size = 0.30)
clustering_KMeansOnKNN_Graph(PBMCs, k=10, iter.max = 100)

plot_tsne_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)
plot_umap_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)

最后,SingCellaR还内置了GSEA分析流程,并且将结果可视化。

pre_rankedGenes_for_GSEA<-identifyGSEAPrerankedGenes_for_all_clusters(PBMCs,
                                                                    cluster.type = "louvain")
save(pre_rankedGenes_for_GSEA,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")                 
#Run fGSEA
load("../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")
fgsea_Results<-Run_fGSEA_for_multiple_comparisons(GSEAPrerankedGenes_list = pre_rankedGenes_for_GSEA, eps = 0,
                    gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.hematopoiesis.signature.genes.gmt")
plot_heatmap_for_fGSEA_all_clusters(fgsea_Results,isApplyCutoff = TRUE,
                                    use_pvalues_for_clustering=T,
                                    show_NES_score = T,fontsize_row = 10,
                                    adjusted_pval = 0.1,
                                    show_only_NES_positive_score = T,format.digits = 3,
                                    clustering_method = "ward.D",
                                    clustering_distance_rows = "euclidean",
                                    clustering_distance_cols = "euclidean",show_text_for_ns = F)
save(PBMCs,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_v0.1.SingCellaR.rdata")                                    

说在最后

从上面的分析流程大家也能感受到SingCellaR的功能还是十分齐全的,相对于Seurat包的分析结果,它在处理单细胞数据标准流程上有以下优点:

  1. QC过程简单粗暴,而且结果很容易理解;
  2. 可以通过一行代码鉴定双细胞并去除;
  3. 能进行Force-directed graph analysis,这种分析在有些数据中表现很好;
  4. 可以进行GSEA分析,内置的免疫细胞gmt文件很方便大家进行细胞注释。

好啦,本期推文到这就结束了,下期Immugent将会展示使用SingCellaR分析一个公共的疾病数据,并结合具体的科学问题进行解读。



关注下方公众号下回更新不迷路

ixxmu commented 2 years ago

SingCellaR代码实操(一):PBMCs的标准流程 by 生信宝库

前言

Immugent在前一篇推文:SingCellaR:一站式研究细胞分化轨迹的功能调节的利器中,基于一项造血干细胞的研究结果初步介绍了SingCellaR的功能框架。从本节推文开始后续会有5篇有关SingCellaR代码实操的教程,分别不同角度分析多种来源的数据,以解决和发现不同的科学问题。

为了方便大家复现本次展示的流程,Immugent使用的是10X官网的5k_pbmc_v3数据,大家可以直接到10x genomics官网下载相应的数据,即可以进行本节代码的复现。



分析流程

首先是将数据导入到SingCellaR中。

library(SingCellaR)

data_matrices_dir<-"../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/filtered_feature_bc_matrix/"
PBMCs<-new("SingCellaR")
PBMCs@dir_path_10x_matrix<-data_matrices_dir
PBMCs@sample_uniq_id<-"PBMCs"

load_matrices_from_cellranger(PBMCs,cellranger.version = 3)

PBMCs
## An object of class SingCellaR with a matrix of : 33538 genes across 5025 samples.


一行代码实现QC结果的展示,顶!

plot_cells_annotation(PBMCs,type="histogram")


还有其它QC结果展示的图,都很美观。

plot_cells_annotation(PBMCs,type="boxplot")
plot_UMIs_vs_Detected_genes(PBMCs)

检测双细胞和过滤双细胞。

filter_cells_and_genes(PBMCs,
                       min_UMIs=1000,
                       max_UMIs=30000,
                       min_detected_genes=300,
                       max_detected_genes=5000,
                       max_percent_mito=15,
                       genes_with_expressing_cells = 10,isRemovedDoublets = FALSE)

DoubletDetection_with_scrublet(PBMCs)
table(PBMCs@meta.data$Scrublet_type)
## 
## Doublets  Singlet 
##       96     4929

filter_cells_and_genes(PBMCs,
                       min_UMIs=1000,
                       max_UMIs=30000,
                       min_detected_genes=300,
                       max_detected_genes=5000,
                       max_percent_mito=15,
                       genes_with_expressing_cells = 10,isRemovedDoublets = TRUE)


以下就是和Seurat类似的单细胞常规处理流程。

normalize_UMIs(PBMCs,use.scaled.factor = T)
remove_unwanted_confounders(PBMCs,residualModelFormulaStr="~UMI_count+percent_mito")
get_variable_genes_by_fitting_GLM_model(PBMCs,mean_expr_cutoff = 0.05,disp_zscore_cutoff = 0.05)

remove_unwanted_genes_from_variable_gene_set(PBMCs,gmt.file = "../SingCellaR_example_datasets//Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
                                             removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
plot_variable_genes(PBMCs)                                             
runPCA(PBMCs,use.components=30,use.regressout.data = T)
plot_PCA_Elbowplot(PBMCs)
runTSNE(PBMCs,dim_reduction_method = "pca",n.dims.use = 10)
plot_tsne_label_by_qc(PBMCs,point.size = 0.1)

runUMAP(PBMCs,dim_reduction_method = "pca",n.dims.use = 10,n.neighbors = 20,
        uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(PBMCs,feature = "UMI_count",point.size = 0.1,mark.feature = F)
identifyClusters(PBMCs,n.dims.use = 10,n.neighbors = 100,knn.metric = "euclidean")
plot_tsne_label_by_clusters(PBMCs,show_method = "louvain",point.size = 2)
plot_umap_label_by_clusters(PBMCs,show_method = "louvain",point.size = 0.80)

下面介绍SingCellaR的一项特有功能分析--Force-directed graph analysis,它结合了力定向图来可视化细胞轨迹。

runFA2_ForceDirectedGraph(PBMCs,n.dims.use = 10,
                          n.neighbors = 5,n.seed = 1,fa2_n_iter = 1000)
plot_forceDirectedGraph_label_by_clusters(PBMCs,show_method = "louvain",vertex.size = 1,
                                          background.color = "black")                          
findMarkerGenes(PBMCs,cluster.type = "louvain")
plot_heatmap_for_marker_genes(PBMCs,cluster.type = "louvain",n.TopGenes = 10,rowFont.size = 5)

cl3_marker_genes<-getMarkerGenesInfo(PBMCs,cluster.type = "louvain",cluster_id = "cl3")
cl3_marker_genes<-cl3_marker_genes[order(cl3_marker_genes$log2FC,decreasing = T),]
head(cl3_marker_genes)

plot_tsne_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_forceDirectedGraph_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_violin_for_marker_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
plot_bubble_for_genes_per_cluster(PBMCs,cluster.type = "louvain",IsApplyClustering = T,
                            IsClusterByRow = T,IsClusterByColumn = T,
                            gene_list=c("CCR7","CD163",
                                        "GNLY","CD72",
                                        "CD8A","CD8B","IL3RA"
                                        ),show.percent = T,buble.scale = 12,point.color1 = "gray",
                                        point.color2 = "firebrick1")
runKNN_Graph(PBMCs,n.dims.use = 10,n.neighbors = 10)
library(threejs)
plot_3D_knn_graph_label_by_clusters(PBMCs,show_method = "louvain",vertext.size = 0.25)
plot_3D_knn_graph_label_by_gene(PBMCs,show.gene = "GNLY",vertext.size = 0.30)
clustering_KMeansOnKNN_Graph(PBMCs, k=10, iter.max = 100)

plot_tsne_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)
plot_umap_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)

最后,SingCellaR还内置了GSEA分析流程,并且将结果可视化。

pre_rankedGenes_for_GSEA<-identifyGSEAPrerankedGenes_for_all_clusters(PBMCs,
                                                                    cluster.type = "louvain")
save(pre_rankedGenes_for_GSEA,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")                 
#Run fGSEA
load("../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")
fgsea_Results<-Run_fGSEA_for_multiple_comparisons(GSEAPrerankedGenes_list = pre_rankedGenes_for_GSEA, eps = 0,
                    gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.hematopoiesis.signature.genes.gmt")
plot_heatmap_for_fGSEA_all_clusters(fgsea_Results,isApplyCutoff = TRUE,
                                    use_pvalues_for_clustering=T,
                                    show_NES_score = T,fontsize_row = 10,
                                    adjusted_pval = 0.1,
                                    show_only_NES_positive_score = T,format.digits = 3,
                                    clustering_method = "ward.D",
                                    clustering_distance_rows = "euclidean",
                                    clustering_distance_cols = "euclidean",show_text_for_ns = F)
save(PBMCs,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_v0.1.SingCellaR.rdata")                                    

说在最后

从上面的分析流程大家也能感受到SingCellaR的功能还是十分齐全的,相对于Seurat包的分析结果,它在处理单细胞数据标准流程上有以下优点:

  1. QC过程简单粗暴,而且结果很容易理解;
  2. 可以通过一行代码鉴定双细胞并去除;
  3. 能进行Force-directed graph analysis,这种分析在有些数据中表现很好;
  4. 可以进行GSEA分析,内置的免疫细胞gmt文件很方便大家进行细胞注释。

好啦,本期推文到这就结束了,下期Immugent将会展示使用SingCellaR分析一个公共的疾病数据,并结合具体的科学问题进行解读。



关注下方公众号下回更新不迷路