Open hlnicholls opened 9 months ago
Your code in the 2nd code box, starting from the line kegg_genes <- kegg[,]
, looks very complex to me. So what exactly do you want to achieve here? Could you provide a reproducible example?
Also note that (AFAIK) the function enrichDF2enrichResult
is not part of clusterProfiler
, but rather taken from a GitHub page called multienrichjam
(here)?
Yet, this thread may (still) be useful? https://support.bioconductor.org/p/p133748/
Thank you for your reply and for your help. It is taking from multienrichjam. Sorry I should've also mentioned previously that maybe 1-2 years ago I was able to create a cnetplot with color scaling working as I needed for this type of data (I think I used that bioconductor link you sent to set it up with using scale_color_gradientn()
). However, I've since reinstalled all the packages on a new computer and now I can't get that old code to work.
Ultimately I just want to color code my genes by their PoPs scores (in my d
I have a column of genes and a column of their PoPs scores). I want to see this for my genes that are in the top 5 most enriched pathways.
Here is my full code with example data:
library(data.table)
library(clusterProfiler)
library(tidyverse)
library(multienrichjam)
library(RColorBrewer)
library(ggraph)
library(ggfittext)
library(tidyr)
library(msigdbr)
library(org.Hs.eg.db)
library(biomaRt)
d <- structure(list(Gene = c("ACOT7", "ACTA2", "ACTN2", "ACTN4", "ADCY6",
"ADO", "ADPRHL1", "AFF4", "AGO2", "AHCY", "ALG10", "ALPK3", "AMOTL2",
"ANAPC7", "ANKS1A", "ANXA4", "ARFGAP2", "ARHGAP27", "ARIH2",
"ARMC9", "ARPC3", "ASIP", "ATP1B2", "ATP2B1", "ATXN2", "B3GNT7",
"BAG3", "BAIAP3", "BANK1", "BCAS3", "BHLHE41", "BMP7", "BMPR2",
"BRD2", "C1QTNF4", "C1orf21", "C1orf35", "C2orf42", "C2orf69",
"C3orf18", "CACFD1", "CACNB2", "CACNB3", "CALCB", "CALU", "CAMK2D",
"CAMK2G", "CASQ2", "CASZ1", "CCDC141"), Median_PoPS = c(0.040097016,
0.066717199, 0.119115226, 0.565579881, 0.152149101, -0.002287785,
0.030722876, 0.202042945, 0.121184633, 0.078079842, -0.098154534,
0.242834185, 0.113019505, 0.022269027, -0.037821713, 0.152854103,
0.054061764, 0.070453112, 0.112837399, 0.108880234, -0.203012964,
0.077142703, 0.225070177, -0.01134475, 0.162986282, 0.490838522,
0.45760657, 0.045157219, 0.114610058, 0.089620962, 0.193743901,
0.110843458, 0.123453215, 0.264372212, 0.04456751, 0.107923069,
-0.017319883, 0.08909364, -0.064415409, 0.203052356, 0.033044087,
0.376682059, 0.049786121, 0.000317271, 0.077634837, 0.171648497,
0.252097225, 0.440276142, 0.140784241, 1.090303171), entrezgene_id = c(11332L,
59L, 88L, 81L, 112L, 84890L, 113622L, 27125L, 27161L, 191L, 84920L,
57538L, 51421L, 51434L, 23294L, 307L, 84364L, 201176L, 10425L,
80210L, 10094L, 434L, 482L, 490L, 6311L, 93010L, 9531L, 8938L,
55024L, 54828L, 79365L, 655L, 659L, 6046L, 114900L, 81563L, 79169L,
54980L, 205327L, 51161L, 11094L, 783L, 784L, 797L, 813L, 817L,
818L, 845L, 54897L, 285025L)), class = c("data.table", "data.frame"
), row.names = c(NA, -50L), sorted = "Gene")
# Getting KEGG pathways for my genes:
kegg_organism = "hsa"
kegg_enrich <- enrichKEGG(gene = d$entrezgene_id,
organism = 'hsa',
pvalueCutoff = 0.05,
pAdjustMethod = 'BH')
kegg <- setReadable(kegg_enrich, 'org.Hs.eg.db', 'ENTREZID')
kegg_genes <- kegg[,]
# Getting the top 5 most significantly enriched pathways to plot their network:
keggtop5 <- kegg_genes[order(kegg_genes$p.adjust),]
keggtop5 <- keggtop5[1:5,]
top5all <- enrichDF2enrichResult(keggtop5)
gene_list_scores<-na.omit(gene_list_scores)
filtered_kegg_genes <- kegg_genes[!grepl("cancer", kegg_genes$Description, ignore.case = TRUE), ]
top5paths <- head(filtered_kegg_genes[order(filtered_kegg_genes$pvalue), ], 5)
top5all <- enrichDF2enrichResult(top5paths, pAdjustMethod='BH')
scores <- setNames(d$Median_PoPS, d$entrezgene_id)
p <- cnetplot(top5all, showCategory = 5,
foldChange=scores,
scale_color_gradient2(name='associated data', low='steelblue', high='firebrick'))
# or
p <- cnetplot(top5all,showCategory = 5, color.params = list(foldChange = scores)
So if I understand you correctly, you would like to visualize the network of the top 5 most significant gene sets in a cnetplot
, thereby using your preferred color scale for the values of Median_PoPS
?
I would do it like below; note that you can change the default colors using options(enrichplot.colours = c("steelblue","firebrick")
). See also: https://github.com/YuLab-SMU/enrichplot/issues/268
Moreover, since the output is a ggplot2
object, this can (even) be further modified if needed.
Also note that the object gene_list_scores
is not defined in your code snippet...
> library(clusterProfiler)
> library(ggplot2)
>
> ## generate input data
> d <- structure(list(Gene = c("ACOT7", "ACTA2", "ACTN2", "ACTN4", "ADCY6",
+ "ADO", "ADPRHL1", "AFF4", "AGO2", "AHCY", "ALG10", "ALPK3", "AMOTL2",
+ "ANAPC7", "ANKS1A", "ANXA4", "ARFGAP2", "ARHGAP27", "ARIH2",
+ "ARMC9", "ARPC3", "ASIP", "ATP1B2", "ATP2B1", "ATXN2", "B3GNT7",
+ "BAG3", "BAIAP3", "BANK1", "BCAS3", "BHLHE41", "BMP7", "BMPR2",
+ "BRD2", "C1QTNF4", "C1orf21", "C1orf35", "C2orf42", "C2orf69",
+ "C3orf18", "CACFD1", "CACNB2", "CACNB3", "CALCB", "CALU", "CAMK2D",
+ "CAMK2G", "CASQ2", "CASZ1", "CCDC141"), Median_PoPS = c(0.040097016,
+ 0.066717199, 0.119115226, 0.565579881, 0.152149101, -0.002287785,
+ 0.030722876, 0.202042945, 0.121184633, 0.078079842, -0.098154534,
+ 0.242834185, 0.113019505, 0.022269027, -0.037821713, 0.152854103,
+ 0.054061764, 0.070453112, 0.112837399, 0.108880234, -0.203012964,
+ 0.077142703, 0.225070177, -0.01134475, 0.162986282, 0.490838522,
+ 0.45760657, 0.045157219, 0.114610058, 0.089620962, 0.193743901,
+ 0.110843458, 0.123453215, 0.264372212, 0.04456751, 0.107923069,
+ -0.017319883, 0.08909364, -0.064415409, 0.203052356, 0.033044087,
+ 0.376682059, 0.049786121, 0.000317271, 0.077634837, 0.171648497,
+ 0.252097225, 0.440276142, 0.140784241, 1.090303171), entrezgene_id = c(11332L,
+ 59L, 88L, 81L, 112L, 84890L, 113622L, 27125L, 27161L, 191L, 84920L,
+ 57538L, 51421L, 51434L, 23294L, 307L, 84364L, 201176L, 10425L,
+ 80210L, 10094L, 434L, 482L, 490L, 6311L, 93010L, 9531L, 8938L,
+ 55024L, 54828L, 79365L, 655L, 659L, 6046L, 114900L, 81563L, 79169L,
+ 54980L, 205327L, 51161L, 11094L, 783L, 784L, 797L, 813L, 817L,
+ 818L, 845L, 54897L, 285025L)), class = c("data.table", "data.frame"
+ ), row.names = c(NA, -50L), sorted = "Gene")
>
> ## check
> head(d)
Key: <Gene>
Gene Median_PoPS entrezgene_id
<char> <num> <int>
1: ACOT7 0.040097016 11332
2: ACTA2 0.066717199 59
3: ACTN2 0.119115226 88
4: ACTN4 0.565579881 81
5: ADCY6 0.152149101 112
6: ADO -0.002287785 84890
>
> ## perform KEGG-based overrepresentation analysis
> kegg_enrich <- enrichKEGG(gene = d$entrezgene_id,
+ organism = 'hsa',
+ pvalueCutoff = 0.05,
+ pAdjustMethod = 'BH')
>
> ## converts geneids into symbols
> kegg <- setReadable(kegg_enrich, 'org.Hs.eg.db', 'ENTREZID')
>
> kegg
#
# over-representation test
#
#...@organism hsa
#...@ontology KEGG
#...@keytype ENTREZID
#...@gene chr [1:50] "11332" "59" "88" "81" "112" "84890" "113622" "27125" "27161" ...
#...pvalues adjusted by 'BH' with cutoff <0.05
#...19 enriched terms found
'data.frame': 19 obs. of 11 variables:
$ category : chr "Organismal Systems" "Organismal Systems" "Organismal Systems" "Organismal Systems" ...
$ subcategory: chr "Circulatory system" "Endocrine system" "Digestive system" "Endocrine system" ...
$ ID : chr "hsa04261" "hsa04925" "hsa04971" "hsa04921" ...
$ Description: chr "Adrenergic signaling in cardiomyocytes" "Aldosterone synthesis and secretion" "Gastric acid secretion" "Oxytocin signaling pathway" ...
$ GeneRatio : chr "7/29" "5/29" "4/29" "5/29" ...
$ BgRatio : chr "154/8662" "98/8662" "76/8662" "154/8662" ...
$ pvalue : num 5.51e-07 1.60e-05 1.10e-04 1.40e-04 1.78e-04 ...
$ p.adjust : num 6.44e-05 9.37e-04 3.63e-03 3.63e-03 3.63e-03 ...
$ qvalue : num 4.75e-05 6.91e-04 2.68e-03 2.68e-03 2.68e-03 ...
$ geneID : chr "ADCY6/ATP1B2/ATP2B1/CACNB2/CACNB3/CAMK2D/CAMK2G" "ADCY6/ATP1B2/ATP2B1/CAMK2D/CAMK2G" "ADCY6/ATP1B2/CAMK2D/CAMK2G" "ADCY6/CACNB2/CACNB3/CAMK2D/CAMK2G" ...
$ Count : int 7 5 4 5 4 4 4 3 5 4 ...
#...Citation
T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
The Innovation. 2021, 2(3):100141
>
> ## prepare scores
> scores <- setNames(d$Median_PoPS, d$entrezgene_id)
>
>
> ####
> #### From now on I diverge from your code:
> ####
>
> ## Get the top 5 most significantly enriched pathways.
> ## Note that by default the results are already sorted on significance.
> top.n <- 5
> keggtop5 <- as.data.frame(kegg_enrich)$Description[1:top.n]
>
>
> ## set/change the color to what you prefer
> options(enrichplot.colours = c("steelblue","firebrick"))
>
> ## note the new way of what to use as color in the cnetplot
> color.params = list(foldChange = scores)
> p <- cnetplot(kegg,
+ showCategory = keggtop5, # names of gene sets/pathways
+ color.params=color.params)
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.
>
> print(p)
>
> ## since 'p' is a ggplot2 object, this can be further modified if needed.
> ## along the same lines as: https://support.bioconductor.org/p/p133748/
> library(scales)
>
> min.value <- floor( min(p$data$color, na.rm = TRUE) )
> max.value <- ceiling( max(p$data$color, na.rm = TRUE) )
>
>
> #red-green colour scale
> p2 <- p + scale_color_gradientn(name = "fold change",
+ colours = c("green", "red"),
+ values = rescale(c(min.value, 0, max.value)),
+ limits= c(min.value, max.value),
+ breaks=c(min.value , 0, max.value) )
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
>
> print(p2)
>
> #red-white-green colour scale, and legend box labelled 'Median PoPS'.
> p3 <- p + scale_color_gradientn(name = "Median PoPS",
+ colours = c("green", "white","red"),
+ values = rescale(c(min.value, 0, max.value)),
+ limits= c(min.value, max.value),
+ breaks=c(min.value , 0, max.value) )
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
>
> print(p3)
>
>
Outputs:
p
(colour scale set to steelblue
- firebrick
via options(...)
):
p2
(colour scale set to red
- green
via scale_color_gradientn()
):
p3
(colour scale set to red
- white
- green
via scale_color_gradientn()
, and legend box labelled Median PoPS
)
This is exactly what I needed, thank you very much for taking the time to look into this! This has completely resolved my issue.
Thank you for developing this package, it is very useful.
I am trying to plot a cnetplot with a color scale relating to my foldchange values.
Running the example code works as expected:
However, my version of x is not a list but an enrichResult. When I try to run that, the code gets an error:
I have also tried:
These run but do not color code the nodes.