Closed ixxmu closed 5 months ago
PRJNA705464
是肾透明细胞癌的大样本数据集,包含scRNA-seq和TCR数据。组织来源分为Normal、Tumor、PBMC和淋巴结,同时还有药物治疗等,内容是相当丰富。
这么大的数据量咱们怎么用呢?
从上游直接分析?还是找人帮忙分析?
上游分析数据量太大:
找人分析…………一言难尽,还是先自己摸索下吧。
惊奇的发现作者竟然提供了Seurat
对象数据,而且生信技能树大神还分享过该数据集《构建seurat对象之初就应该是把基因名字转换好》,简直完美。下面跟着大神的教程认真学习下:
首先从SRP308561
相关链接下载ccRCC_6pat_Seurat
和ccRCC_6pat_cell_annotations.txt
文件。(数据有点大)
rm(list = ls())
librarian::shelf(Seurat)
# sce <- readRDS("rawdata/ccRCC_6pat_Seurat")
# sce
# sce <- UpdateSeuratObject(sce)
# sce
# save(sce,file = "sce_0raw.Rdata")
# 基因转换
if(F){
load("sce_0raw.Rdata")
DimPlot(sce)
sce # 16323 features across 167283 samples within 1 assay
# 基因转换
library(org.Hs.eg.db)
head(rownames(sce))
ids=AnnotationDbi::select(org.Hs.eg.db,keys = rownames(sce),
columns = c('ENSEMBL','SYMBOL'),
keytype = 'ENSEMBL')
head(ids)
dim(ids) # [1] 16458
ids=na.omit(ids)
dim(ids) # [1] 15527
length(unique(ids$SYMBOL)) # [1] 15516
# 这里的关系超级乱,互相之间都不是一对一
# 凡是混乱的ID一律删除即可
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
pos=match(ids$ENSEMBL,rownames(sce) )
sce=sce[pos,]
sce # 15388 features
#rownames(sce) = ids$SYMBOL
head(rownames(sce)); head(ids)
# https://github.com/satijalab/seurat/issues/2617
# RenameGenesSeurat ------------------------------------------------------------------------------------
RenameGenesSeurat <- function(obj ,
newnames ) {
# Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration.
# It only changes obj@assays$RNA@counts, @data and @scale.data.
print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
RNA <- obj@assays$RNA
if (nrow(RNA) == length(newnames)) {
if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]] <- newnames
} else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
obj@assays$RNA <- RNA
return(obj)
}
obj=RenameGenesSeurat(obj = sce,
newnames = ids$SYMBOL)
sce <- obj; rm(obj)
save(sce,file = 'first_sce.Rdata')
}
# 细胞注释
if(F){
load("first_sce.Rdata")
library(ggplot2)
genes_to_check = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A','CD19', 'CD79A', 'MS4A1' ,
'IGHG1', 'MZB1', 'SDC1',
'CD68', 'CD163', 'CD14',
'TPSAB1' , 'TPSB2', # mast cells,
'RCVRN','FPR1' , 'ITGAM' ,
'FGF7','MME', 'ACTA2',
'PECAM1', 'VWF',
'EPCAM' , 'KRT19', 'PROM1', 'ALDH1A1' )
library(stringr)
p_all_markers <- DotPlot(sce , features = genes_to_check,
assay='RNA' ) + coord_flip()
p_all_markers
ggsave(plot=p_all_markers, filename="check_all_marker_by_seurat_cluster.pdf",width = 12, height = 10)
# 需要自行看图,定细胞亚群:
celltype=data.frame(ClusterID=0:34,
celltype='unkown')
celltype[celltype$ClusterID %in% c( 0,2:4,6:8,10,15,17,17,19,31,32),2]='Tcells'
celltype[celltype$ClusterID %in% c( 24 ),2]='plasma'
celltype[celltype$ClusterID %in% c( 22 ),2]='naive-Bcells'
celltype[celltype$ClusterID %in% c(21),2]='mast'
celltype[celltype$ClusterID %in% c( 1,5,12:14,18,23,27,33,34),2]='myeloid'
celltype[celltype$ClusterID %in% c( 9,16,26 ),2]='epithelial'
celltype[celltype$ClusterID %in% c( 11),2]='endo'
celltype[celltype$ClusterID %in% c(20),2]='fibo'
celltype[celltype$ClusterID %in% c(25 ),2]='SMC'
head(celltype)
celltype
table(celltype$celltype)
sce$celltype <- plyr::mapvalues(sce$cluster,
from = 0:34,
to=celltype$celltype)
p1 <- DimPlot(sce,group.by = "celltype",label = T)
ggsave(p1,filename="DimPlot_by_seurat_cluster.pdf",width = 12, height = 10)
save(sce, file = "first_sce.Rdata")
}
接下来看看大佬们的神来之笔,首先是PMID34804944Fig5A
。
作者是这样进行分析的:
PRJNA705464, which is a large database containing cells totally, was applied to investigate the role of pyroptosis-related genes in cell–cell interaction in ccRCC TME. Only untreated tumor samples from PRJNA705464 were applied for further analysis.
下面咱们就提取untreated
样本进行后续分析,果然可完美复现Figure5A。
# meatdata
library(tidyverse)
metadata <- data.table::fread("rawdata/ccRCC_6pat_cell_annotations.txt",data.table = F) %>%
column_to_rownames("cell")
metadata <- metadata[sce$cell,]
metadata1 <- metadata[,c("Sample_name","cluster_name")]
sce1 <- AddMetaData(object = sce,
metadata = metadata1)
DimPlot(sce1, group.by = "cluster_name",label = T)+ NoLegend()
rm(sce)
#仅提取 untreated 样本
sce_sub <- subset(sce1, (Sample_name=="Untreated 1"| Sample_name=="Untreated 2") & type == "Tumor")
sce_sub #29799 samples
DimPlot(sce_sub, reduction = "tsne") # PMID: 34804944 Figure5A
DimPlot(sce_sub, reduction = "tsne",group.by = "celltype")
同样作者在PMID35592316
的FiguresS9A-D也对PRJNA705464
进行再次分析,有兴趣的大佬可以下载下来进行复习学习。
参考链接:
Identification and Verification of m7G Modification Patterns and Characterization of Tumor Microenvironment Infiltration via Multi-Omics Analysis in Clear Cell Renal Cell Carcinoma.
Establishment of a prognosis Prediction Model Based on Pyroptosis-Related Signatures Associated With the Immune Microenvironment and Molecular Heterogeneity in Clear Cell Renal Cell Carcinoma.
声明:
本文绝大部分代码均学习整理自生信技能树、单细胞天地等,仅供交流学习,禁止用于商业用途,如有侵权,请联系删除。鉴于本人水平有限,某些内容难免理解不到位,还请各位大佬多多指教。
https://mp.weixin.qq.com/s/GRjOw4TNhLrOiSHdhqX4pQ