Closed ixxmu closed 2 years ago
1.什么是CNV?
PS:对背景知识不理解的小伙伴建议去知乎搜索专栏:焦老师讲遗传系列之12:拷贝数变异CNV
2.文献中怎么用CNV推断?
一篇21年的揭示转移性胰腺导管腺癌瘤内异质性的文章:
3.该如何进行CNV推断?
由于篇幅有限,这里着重介绍下常用的inferCNV和copyKAT。
1.inferCNV
(1)提出:Single-cell RNA-seq highlights intratumoral heterogeneity in primary glioblastoma. Science. 2014 Jun. PMID: 24925914a
(2)改进:Dissectingthe multicellular ecosystem of metastatic melanoma by single-cell RNA-seq.Science. 2016 Apr 8.PMID: 27124452
(3)细化+升华:Single-CellTranscriptomic Analysis of Primary and Metastatic Head and Neck Cancer. Cell.2017 Dec 14.PMID: 29198524
#代码实战:比较简单,只需准备好count矩阵表达文件、
#单个细胞类群的注释文件和基因在染色体上的位置文件即可,
#运行速度看电脑配置和细胞数量,我示例是3317个细胞跑了20分钟。
rm(list = ls())
#安装infercnv包
BiocManager::install("infercnv")
#使用infercnv自带数据-----------------------------------------------------------------------
library(infercnv)
library(tidyverse)
dir.create("inferCNV")
dir.create("inferCNV/test")
##读取示例数据目录
exprMatrix = system.file("extdata", "oligodendroglioma_expression_downsampled.counts.matrix.gz", package="infercnv")
cellAnnota = system.file("extdata", "oligodendroglioma_annotations_downsampled.txt", package="infercnv")
geneLocate = system.file("extdata", "gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt", package="infercnv")
#创建inferCNV对象,直接给相应的文件路径即可
infercnv_obj = CreateInfercnvObject(delim = '\t',
raw_counts_matrix = exprMatrix,
annotations_file = cellAnnota,
gene_order_file = geneLocate,
ref_group_names = c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))
##分析细胞CNV
#cutoff阈值,官网建议Smart-seq2数据选“1”, 10x Genomics数据选“0.1”
infercnv_obj = infercnv::run(infercnv_obj,
cutoff=1,
out_dir='inferCNV/test',
cluster_by_groups=TRUE,
#analysis_mode="subclusters", #默认是"samples"
denoise=TRUE,
HMM=TRUE)
library(RColorBrewer)
library(ggsci)
infercnv::plot_cnv(infercnv_obj,
plot_chr_scale = T,
output_filename = "inferCNV/test/better_plot",output_format = "pdf",
custom_color_pal = color.palette(c(pal_nejm()(8)[1],"white",pal_nejm()(8)[2]), c(2,2)))
#使用TISCH中的Seurat(LIHC的GSE125449)数据-----------------------------------------------------------------------
rm(list = ls())
library(infercnv)
library(tidyverse)
library(Seurat)
library(tidyverse)
dir.create("inferCNV/valudation")
#获得Count矩阵
data <- Read10X_h5('LIHC_GSE125449_aPDL1aCTLA4_expression.h5')
data <- CreateSeuratObject(data, project = "", min.cells = 3, min.features = 200)
data #3835个细胞,17356个基因
meta <- read.delim("LIHC_GSE125449_aPDL1aCTLA4_CellMetainfo_table.tsv", header=F)
colnames(meta) <- meta[1,]
meta <- meta[-1,]
rownames(data)
colnames(meta)
colnames(meta) <- c("Cell","UMAP_1","UMAP_2","Celltype_malignancy",
"Celltype_major_lineage","Celltype_minor_lineage",
"Cluster","Celltype","Treatment","Patient","Sample",
"Source","Age","Gender","Stage")
data@assays$RNA@meta.features <- as.data.frame(rownames(data))
identical(meta$Cell,colnames(data))
meta <- meta[meta$Cell%in%colnames(data),]
a <- data@meta.data
a <- cbind(a,meta)
data@meta.data <- a
table(data@meta.data$Celltype_malignancy)
#数据质控+去除批次效应 ---------------------------------------------------------
data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = "^MT-") # 注意基因名称变化
VlnPlot(data, group.by = "Patient", features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0, ncol = 3) +
NoLegend()
##筛选合格细胞
cat("Before filter:",nrow(data@meta.data),"cells\n")
data <- subset(data, subset =
nFeature_RNA > 200 &
nFeature_RNA < 3000 &
nCount_RNA > 500 &
nCount_RNA < 4000 &
percent.mt < 5)
cat("After filter :",nrow(data@meta.data),"cells\n")
data #保留17356个基因,3317个细胞。
save(data,file = "GSE125449_count.rda")
dd <- data[["RNA"]]@counts
dd2 <- data@meta.data
dd <- as.data.frame(dd)
dd2 <- dd2[,c(4,7)]
table(dd2$Celltype_malignancy)
dd2$Celltype_malignancy <- factor(dd2$Celltype_malignancy,levels = c("Immune cells","Stromal cells","Malignant cells"))
load('GRCh38_v39.gtf.rda') #参考基因组,可自行下载
dd3 <- gtf_data[,c(12,1:3)]
dd3 <- distinct(dd3,gene_name,.keep_all = T)
gene <- intersect(rownames(dd),dd3$gene_name)
dd <- dd[rownames(dd)%in%gene,]
dd3 <- dd3[dd3$gene_name%in%gene,]
write.table(as.matrix.data.frame(dd), file ="inferCNV/valudation/counts.matrix",quote = F,sep = "\t",row.names = T,col.names = T)
write.table(as.data.frame(dd3),file = "inferCNV/valudation/gencode.txt", quote = F,sep = "\t",row.names = F,col.names = F)
write.table(as.data.frame(dd2),file = "inferCNV/valudation/annotation.txt", quote = F,sep = "\t",row.names = F,col.names = F)
#运行infercnv-----------------------------------------------------------------------
rm(list = ls())
library(tidyverse)
library(infercnv)
memory.limit(10000000)
#使用inferCNV之前,最好过滤掉低质量的细胞
infercnv_obj = CreateInfercnvObject(raw_counts_matrix="inferCNV/valudation/counts.matrix",
annotations_file="inferCNV/valudation/annotation.txt",
delim="\t",
gene_order_file="inferCNV/valudation/gencode.txt",
ref_group_names=c("Immune cells")
)
#ref_group_names参数根据细胞注释文件填写,在本数据,我们假设用“Immune cells”作为参照,看“Stromal cells”和“Malignant cells”的恶性;
#ref_group_names=NULL,则会选用所有细胞的平均表达作为参照,这种模式下,最好能确保细胞内在有足够的差异
infercnv_obj = infercnv::run(infercnv_obj,
cutoff=1,
out_dir="try2",
cluster_by_groups=TRUE,
#analysis_mode="subclusters", #默认是"samples"
denoise=TRUE,
HMM=TRUE,
num_threads=4
)
#去噪,HMM预测CNV这两项我一般都选上
2.copyKAT
#copyKAT-----------------------------------------------------------------------
rm(list = ls())
#install_github("navinlabcode/copykat")
library(copykat)
load("GSE125449_count.rda")
counts <- as.matrix(data@assays$RNA@counts)
cnv <- copykat(rawmat = counts,ngene.chr = 5,
sam.name = "GSE125449_copykat",n.cores = 1)
save(cnv,file="GSE125449_cnv_copyKAT.rda"
小结
文献下载-医学僧
机器学习(分类)-医学僧
ggplot可视化-医学僧
热图可视化-医学僧
TCGA挖掘-医学僧
GEO挖掘-医学僧
肿瘤预后模型-医学僧
肿瘤分子亚型-医学僧
欢迎小伙伴们进群👏👏👏,每个群收费15元。请联系小编微信yixuesengkyrj或扫描下面二维码:
https://mp.weixin.qq.com/s/O5xL-QQXROSlWfkVeioqRw