Open BioinformaNicks opened 3 years ago
So I think I got it working with a workaround:
m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('H')) %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
go_m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('C5')) %>% filter(., gs_subcat != 'HPO') %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
m_t2g <- bind_rows(m_t2g, go_m_t2g)
go_jointable <- msigdbr::msigdbr(species = "Homo sapiens", category = c('C5')) %>% filter(., gs_subcat != 'HPO')
then
enriched <- enricher(names(enrich_ready), TERM2GENE=m_t2g)
enricher_result <- enriched@result
enricher_result <- enricher_result %>% mutate(go_jointable[match(enricher_result$ID, go_jointable$gs_name), c('gs_name', 'gs_subcat', 'gs_exact_source')])
hallmark_result <- filter(enricher_result, is.na(gs_name)) %>% select(!c('gs_name', 'gs_subcat', 'gs_exact_source'))
go_result <- filter(enricher_result, !is.na(gs_name))
go_result <- mutate(go_result, ONTOLOGY = str_split(go_result$gs_subcat, ':', simplify = T)[,2])
go_result$ID <- go_result$gs_exact_source
go_result <- go_result %>% select(c("ONTOLOGY","ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","qvalue","geneID","Count" ))
rownames(go_result) <- go_result$ID
go_result_bp <- filter(go_result, ONTOLOGY == 'BP')
go_result_mf <- filter(go_result, ONTOLOGY == 'MF')
go_result_cc <- filter(go_result, ONTOLOGY == 'CC')
enriched@ontology <- 'BP'
enriched@keytype <- "ENTREZID"
enriched@organism <- "Homo sapiens"
enriched@result <- go_result_bp
enriched <- simplify(enriched)
simplified_bp <- enriched@result
enriched@result <- go_result_mf
enriched <- simplify(enriched)
simplified_mf <- enriched@result
enriched@result <- go_result_cc
enriched <- simplify(enriched)
simplified_cc <- enriched@result
total_simplified <- bind_rows(simplified_bp, simplified_mf, simplified_cc) %>% select(!ONTOLOGY) %>% mutate(ID = Description)
rownames(total_simplified) <- total_simplified$ID
total_simplified <- bind_rows(total_simplified, hallmark_result)
enriched@result <- total_simplified
enriched@result <- enriched@result[order(enriched@result$p.adjust),]
So I'm trying to run enricher() on both the MSigDB Hallmark set and the C5 (GO) set, by doing the following:
However, this will lead to a lot of redundant GO terms in the enrichResult object. While simplify() can be applied to enrichGO results, it can not be applied to enricher() results when using GO ontology. Is there a possibility of including this functionality?
Alternatively, do you know of any way to filter out the redundancy in the msigdbr C5 (GO) selection beforehand?