ixxmu / mp_duty

抓取网络文章到github issues保存
https://archives.duty-machine.now.sh/
122 stars 30 forks source link

你的富集结果还用气泡图表示?? #2407

Closed ixxmu closed 2 years ago

ixxmu commented 2 years ago

https://mp.weixin.qq.com/s/hVUiNMjHEQdJABQ9zZqJYg

github-actions[bot] commented 2 years ago

你的富集结果还用气泡图表示?? by YuYuFiSH

连CELL都勇升到66.8分了,而我。。。。

还是先学会画图把


据说,多种R包可以绘制雷达图

对ggplot系列函数熟悉的小伙伴可以选择ggradarecharts4r

fmsbradarchart也同样能实现你的需求


🍥下面,从单细胞差异分析入手,直到GO富集分析

1.单细胞差异分析

  • FindAllMarkers
# 6. FindAllMarkers celltype----
sce
# An object of class Seurat 
# 25288 features across 13560 samples within 1 assay 
# Active assay: RNA (25288 features, 2000 variable features)
# 4 dimensional reductions calculated: pca, tsne, umap, harmony
table(sce$celltype)

Idents(sce) <- "celltype"
Idents(sce)
sce.markers <- FindAllMarkers(sce)
save(sce.markers,file = "harmony_celltype.markers.Rdata")
write.csv(sce.markers,file = "harmony_celltype.markers.csv")

2. 挑top100

head(harmony_celltype_markers)
#            p_val    avg_log2FC pct.1 pct.2     p_val_adj         cluster   gene
# FCER1A  0.000000e+00   2.016015 0.734 0.126  0.000000e+00 Inflammatory_DC FCER1A
# CD1C    0.000000e+00   1.647918 0.503 0.064  0.000000e+00 Inflammatory_DC   CD1C
# IL1R2  1.776186e-291   1.655928 0.567 0.099 4.491619e-287 Inflammatory_DC  IL1R2
# CD1E   3.616531e-250   1.093357 0.333 0.036 9.145483e-246 Inflammatory_DC   CD1E
# NAPSB  3.350806e-242   1.358380 0.521 0.097 8.473517e-238 Inflammatory_DC  NAPSB
# NR4A3  5.633118e-232   1.374079 0.625 0.145 1.424503e-227 Inflammatory_DC  NR4A3

sce.markers <- harmony_celltype_markers
DT::datatable(sce.markers)
# 对每一个cluster,根据avg_log2FC,取前100个基因
top100 <- sce.markers %>% group_by(cluster) %>% top_n(100, avg_log2FC)
head(top100)
dim(top100) #1000    7

3. GO富集分析

  • bitr: 转换ID
# GO富集分析
# https://mp.weixin.qq.com/s/z-Kk02I2g8vAESgcGuncUg
library(clusterProfiler)
# 挑选p_val<0.05
de_genes <- subset(top100, p_val<0.05)
length(de_genes$gene) # 1000
head(de_genes)

# 转化ID
entrez_genes <- bitr(de_genes$gene, fromType="SYMBOL"
                                    toType="ENTREZID"
                                    OrgDb="org.Hs.eg.db"# 人
#  5.45% of input gene IDs are fail to map...

head(entrez_genes)
# SYMBOL ENTREZID
# 1 FCER1A     2205
# 2   CD1C      911
# 3  IL1R2     7850
# 4   CD1E      913
# 5  NAPSB   256236
# 6  NR4A3     8013

# 将原来的SYMBOL 和 ENTREZID 对应起来
mix <- merge(de_genes,entrez_genes,by.x="gene",by.y="SYMBOL")
dim(mix) #950   8
head(mix)
#   gene         p_val avg_log2FC pct.1 pct.2     p_val_adj           cluster ENTREZID
# 1   A2M  4.322616e-68  0.4559287 0.402 0.183  1.093103e-63          TREM2_Mf        2
# 2   A2M  2.036888e-05  0.5190426 0.210 0.200  5.150881e-01 CCR2- HLA-DRhi C1        2
# 3 ABCA6  7.529550e-58  0.7018329 0.127 0.052  1.904073e-53 CCR2- HLA-DRhi C1    23460
# 4  ABI3 1.146337e-237  1.3451063 0.489 0.106 2.898856e-233        CD16+ Mono    51225
# 5  ABL2  0.000000e+00  1.3000718 0.501 0.180  0.000000e+00 CCR2+ HLA-DRhi C2       27
# 6 ACAP1 2.340444e-179  0.9188265 0.333 0.047 5.918515e-175         Mast cell     9744
  • merge : 整合
  • split : 拆分
  • compareCluster : 富集分析(多个clusters)
# 整合起来
de_gene_clusters <- data.frame(ENTREZID=mix$ENTREZID,
                               cluster=mix$cluster)
table(de_gene_clusters$cluster)
# 只挑选其中3个cluster作富集
de_gene_clusters <- de_gene_clusters[(de_gene_clusters$cluster == 'CCR2- HLA-DRhi C1'|
                                      de_gene_clusters$cluster=='CCR2+ HLA-DRhi C2'|
                                      de_gene_clusters$cluster== 'CCR2+ HLA-DRhi C3'),]
table(de_gene_clusters$cluster)
# CCR2- HLA-DRhi C1 CCR2+ HLA-DRhi C2 CCR2+ HLA-DRhi C3 
# 96                98                95 

# 拆分成list
list_de_gene_clusters <- split(de_gene_clusters$ENTREZID,de_gene_clusters$cluster)


因为我下面可视化是针对某些通路,

所以这里pvalueCutoff,qvalueCutoff都调的很大,尽可能富集出感兴趣的通路出来

# 多个clusters富集分析
formula_res <- compareCluster(
  ENTREZID~cluster, 
  data=de_gene_clusters, 
  fun="enrichGO"
  OrgDb="org.Hs.eg.db",
  ont = "ALL"# GO富集类型,One of "BP", "MF", and "CC" or "ALL"
  pAdjustMethod = "BH",
  pvalueCutoff  = 1,
  qvalueCutoff  =1,
  minGSSize = 5# 最小的基因富集数量
  maxGSSize = 1000# 最大的基因富集数量

head(formula_res)
f=as.data.frame(formula_res)
dim(f) #9805   12
  • 挑选感兴趣的通路
enrich <- c("response to oxidative stress",
            "response to endoplasmic reticulum stress",
            "interleukin-10 production",
            'antigen processing and presentation',
            'endocytosis',
            'response to tumor necrosis factor',
            'I-kappaB kinase/NF-kappaB signaling',
            'response to interferon-gamma',
            'cytokine production',
            'leukocyte chemotaxis')
            
choose_f <- f[(f$Description %in% enrich),]
table(choose_f$Description)
df=choose_f

4. 气泡图

p=ggplot(df,aes(x = cluster, 
                y = Description, # 按照富集度大小排序
                size = Count,
                colour=p.adjust)) +
  geom_point(shape = 16)+                     # 设置点的形状
  labs(x = "Ratio", y = "Pathway")+           # 设置x,y轴的名称
  scale_colour_continuous(                    # 设置颜色图例
    name="Enrichment",                        # 图例名称
    low="#7fcdbb",                            # 设置颜色范围
    high="#fc9272")+
  scale_radius(                               # 设置点大小图例
    range=c(5,9),                             # 设置点大小的范围
    name="Size")+                             # 图例名称
  guides(
    color = guide_colorbar(order = 1),        # 决定图例的位置顺序
    size = guide_legend(order = 2)
  )+theme_bw()                                # 设置主题
p


  • 如果名字太长,可以换行显示
p + scale_y_discrete(labels=function(x) str_wrap(x, width=12)) +
  scale_x_discrete(labels=function(x) str_wrap(x, width=10)) # 名字换行      

5. 雷达图

  • dcast :  先换成短矩阵
ff <- choose_f[,c("Cluster","Description","Count")]
head(ff)
#          Cluster                         Description        Count
# 2   CCR2- HLA-DRhi C1                         endocytosis    17
# 51  CCR2- HLA-DRhi C1   response to tumor necrosis factor     7
# 74  CCR2- HLA-DRhi C1        response to interferon-gamma     5
# 93  CCR2- HLA-DRhi C1                leukocyte chemotaxis     6
# 128 CCR2- HLA-DRhi C1 antigen processing and presentation     4
# 785 CCR2- HLA-DRhi C1        response to oxidative stress     5

# 长数据换成短数据形式
dd=dcast(ff,Cluster~Description,value.var = 'Count')
  • 调整格式
rownames(dd) <- dd[,1]
dd <- dd[,-1]

# 第一行包含每个变量的最大值
# 第二行包含每个变量的最小值
dd=rbind(rep(20,5) , rep(0,5) , dd)

# 变量需要换成数值形式
for (i in 1:10) {
  #i=1
  dd[,i] <- as.numeric(dd[,i])
  
}
str(dd)
# 'data.frame': 5 obs. of  10 variables:
# $ antigen processing and presentation     : num  20 0 4 1 3
# $ cytokine production                     : num  20 0 7 15 17
# $ endocytosis                             : num  20 0 17 4 4
# $ I-kappaB kinase/NF-kappaB signaling     : num  20 0 1 6 4
# $ interleukin-10 production               : num  20 0 1 1 NA
# $ leukocyte chemotaxis                    : num  20 0 6 11 10
# $ response to endoplasmic reticulum stress: num  20 0 2 6 2
# $ response to interferon-gamma            : num  20 0 5 4 3
# $ response to oxidative stress            : num  20 0 5 12 8
# $ response to tumor necrosis factor       : num  20 0 7 14 3
  • radarchart : 终于到画图了
library(fmsb)
colnames(dd)
# 有些名字太长了,需要调整下距离
# https://stackoverflow.com/questions/69306607/how-to-move-radar-chart-spider-chart-labels-in-r-fmsb-for-r-shiny-so-labels-do
f=c("antigen processing and presentation" ,"cytokine production" ,                    
  "endocytosis" ,"       NF-kappaB 
  signaling"
,     
  "interleukin-10 
  production"
 ,"leukocyte chemotaxis" ,                   
  "       response to
                                  endoplasmic reticulum stress"
 ,
  "           response to 
              interferon-gamma"
,            
  "       response to 
               oxidative stress"
 ,
  "    response to 
                  tumor necrosis factor"
 )

# https://zhuanlan.zhihu.com/p/363992240
{cairo_pdf(file = "mouse-heart/fig8_radarchart.pdf"
          width = 20,
          height = 10
          family = "STSong")
#?radarchart
radarchart(dd, 
           axistype=0# 坐标轴的类型
           vlcex=2# 标签大小
           palcex=5# 轴四周字体大小缩放比例
           pcol=colors_border , #设置颜色
           pfcol=colors_in , # 内部填充色
           plwd=1.3 , #线条粗线
           plty=1,#虚线,实线
           vlabels = c(f), # 标签
           pty=32# 点的形状
           cglcol="black"# 雷达图网络格颜色
           cglty=3,  #背景线条虚线,实线
           cglwd=0.6 #背景线条粗细
)

legend(x=1.5, y=0.90, legend = rownames(dd[-c(1,2),]), 
       bty = "n"
       pch=20 , 
       col=colors_border, 
       text.col = "black"
       cex=1.5, pt.cex=2)


dev.off()
}







Don't forget to SubscribeFollow

Like & Share !


YuYuFiSH

邮箱:chenyu_202000@163.com