Open Shi-YuZhang opened 10 months ago
Note that gene ID mapping is currently only available for certain model organisms for which a Bioconductor OrgDb package exist, ie. for those organisms:
> BiocManager::available("^org\\.[A-Z]")
[1] "org.Ag.eg.db" "org.At.tair.db" "org.Bt.eg.db"
[4] "org.Ce.eg.db" "org.Cf.eg.db" "org.Dm.eg.db"
[7] "org.Dr.eg.db" "org.EcK12.eg.db" "org.EcSakai.eg.db"
[10] "org.Gg.eg.db" "org.Hs.eg.db" "org.Mm.eg.db"
[13] "org.Mmu.eg.db" "org.Mxanthus.db" "org.Pt.eg.db"
[16] "org.Rn.eg.db" "org.Sc.sgd.db" "org.Ss.eg.db"
[19] "org.Xl.eg.db"
That said, you can turn to AnnotationHub and achieve your goal with a couple of extra commands like that:
> library(AnnotationHub)
> ah <- AnnotationHub()
> query(ah, c("OrgDb", "Macaca"))
AnnotationHub with 5 records
# snapshotDate(): 2023-10-20
# $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
# $species: Macaca nemestrina, Macaca mulatta, Macaca irus, Macaca fascicula...
# $rdataclass: OrgDb
# additional mcols(): taxonomyid, genome, description,
# coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
# rdatapath, sourceurl, sourcetype
# retrieve records with, e.g., 'object[["AH114088"]]'
title
AH114088 | org.Mmu.eg.db.sqlite
AH114408 | org.Macaca_nemestrina.eg.sqlite
AH114563 | org.Macaca_cynomolgus.eg.sqlite
AH114564 | org.Macaca_fascicularis.eg.sqlite
AH114565 | org.Macaca_irus.eg.sqlite
> org.db <- ah[["AH114564"]]
> library(EnrichmentBrowser)
> mcf <- getGenesets(org = "mcf", db = "kegg")
> mcf[1:3]
$`mcf00010_Glycolysis_/_Gluconeogenesis`
[1] "101865076" "101865104" "101865220" "101865678" "101866112" "101866226"
[7] "101866506" "101866609" "101867330" "101925242" "101925472" "101925598"
[13] "101925612" "101925639" "101925921" "101926834" "102114947" "102115022"
[19] "102116429" "102116859" "102117828" "102118498" "102119787" "102120140"
[25] "102120757" "102120771" "102120901" "102121518" "102121717" "102121906"
[31] "102122039" "102122135" "102122172" "102122307" "102122664" "102123018"
[37] "102123026" "102123383" "102123413" "102124145" "102124195" "102125243"
[43] "102125491" "102125557" "102125836" "102125852" "102127599" "102128279"
[49] "102129493" "102129929" "102130622" "102130836" "102130971" "102131289"
[55] "102131754" "102131932" "102133548" "102133980" "102134594" "102134909"
[61] "102135856" "102137352" "102138258" "102138838" "102138923" "102139491"
[67] "102139551" "102139699" "102139794" "102140005" "102140050" "102140136"
[73] "102140922" "102140927" "102140991" "102141145" "102141498" "102142152"
[79] "102143796" "102144419" "102144494" "102144820" "102145864" "102147228"
[85] "107126374" "123572777"
$`mcf00020_Citrate_cycle_(TCA_cycle)`
[1] "101865050" "101865119" "101865736" "101866578" "101867049" "101867276"
[7] "101867314" "101925784" "101925790" "101925901" "101926620" "102118371"
[13] "102122039" "102122263" "102124727" "102125836" "102125852" "102126003"
[19] "102127185" "102127751" "102129545" "102129952" "102131289" "102132064"
[25] "102134309" "102134909" "102135856" "102139439" "102140136" "102140991"
[31] "102142663" "102144513" "102145238" "102145957" "102146568"
$mcf00030_Pentose_phosphate_pathway
[1] "101865104" "101865220" "101865918" "101865992" "101866782" "101866917"
[7] "101867414" "101925639" "101925866" "101925921" "102115057" "102115771"
[13] "102117021" "102120422" "102124428" "102125557" "102129493" "102129791"
[19] "102130622" "102130836" "102132916" "102134335" "102135829" "102136564"
[25] "102137352" "102138258" "102140005" "102140428" "102140927" "102141498"
[31] "102142152" "102144892" "102146745"
> library(AnnotationDbi)
> mcf.sym <- lapply(mcf, function(s) mapIds(org.db, keys = s, keytype = "ENTREZID", column = "SYMBOL"))
> mcf.sym <- lapply(mcf.sym, unname)
> mcf.sym <- lapply(mcf.sym, sort)
> mcf.sym[1:3]
$`mcf00010_Glycolysis_/_Gluconeogenesis`
[1] "ACSS1" "ACSS2" "ADH4" "ADH5" "ADH6"
[6] "ADH7" "ADPGK" "AKR1A1" "ALDH1B1" "ALDH2"
[11] "ALDH3A1" "ALDH3A2" "ALDH3B1" "ALDH7A1" "ALDH9A1"
[16] "ALDOA" "ALDOB" "ALDOC" "BPGM" "DLAT"
[21] "DLD" "ENO1" "ENO2" "ENO3" "ENO4"
[26] "FBP1" "FBP2" "G6PC1" "G6PC2" "G6PC3"
[31] "GALM" "GAPDH" "GAPDHS" "GCK" "GPI"
[36] "HK1" "HK2" "HK3" "HKDC1" "LDHA"
[41] "LDHAL6A" "LDHAL6B" "LDHB" "LDHC" "LOC102115022"
[46] "LOC102116859" "LOC102117828" "LOC102118498" "LOC102119787" "LOC102120757"
[51] "LOC102121717" "LOC102122039" "LOC102122172" "LOC102122664" "LOC102123018"
[56] "LOC102123383" "LOC102123413" "LOC102124145" "LOC102124195" "LOC102130971"
[61] "LOC102131932" "LOC102138258" "LOC102139491" "LOC102140005" "LOC102140927"
[66] "LOC102144419" "LOC102144820" "LOC123572777" "MINPP1" "PCK1"
[71] "PCK2" "PDHA1" "PDHA2" "PDHB" "PFKL"
[76] "PFKM" "PFKP" "PGAM1" "PGAM2" "PGK1"
[81] "PGK2" "PGM1" "PGM2" "PKLR" "PKM"
[86] "TPI1"
$`mcf00020_Citrate_cycle_(TCA_cycle)`
[1] "ACLY" "ACO1" "ACO2" "CS" "DLAT"
[6] "DLD" "DLST" "FH" "IDH1" "IDH2"
[11] "IDH3A" "IDH3B" "IDH3G" "LOC102122039" "LOC102127185"
[16] "LOC102127751" "LOC102129952" "LOC102134309" "MDH1" "MDH2"
[21] "OGDH" "OGDHL" "PC" "PCK1" "PCK2"
[26] "PDHA1" "PDHA2" "PDHB" "SDHA" "SDHB"
[31] "SDHC" "SDHD" "SUCLA2" "SUCLG1" "SUCLG2"
$mcf00030_Pentose_phosphate_pathway
[1] "ALDOA" "ALDOB" "ALDOC" "DERA" "FBP1"
[6] "FBP2" "G6PD" "GLYCTK" "GPI" "H6PD"
[11] "IDNK" "LOC102138258" "LOC102140005" "LOC102140927" "PFKL"
[16] "PFKM" "PFKP" "PGD" "PGLS" "PGM1"
[21] "PGM2" "PRPS1" "PRPS1L1" "PRPS2" "RBKS"
[26] "RGN" "RPE" "RPIA" "SHPK" "TALDO1"
[31] "TKT" "TKTL1" "TKTL2"
Thank you for the convenient tool. However, when I run
mcf <- getGenesets(org = "mcf", db = "kegg", gene.id.type = "SYMBOL",cache = TRUE, return.type="list")
, it returnsI had checked the org three-letter code in KEGG (https://www.genome.jp/kegg/catalog/org_list.html), "mcf" means Macaca fascicularis (crab-eating macaque).