Closed pauGuas closed 1 year ago
Hi pauGaus,
There is no such option in Fantaxtic, but I've written you some code to do so - see below. I might implement this into the package at some point, but for now, just copy and run the functions below.
Hope that this solves your issue!
require("phyloseq")
require("fantaxtic")
require("tidyverse")
# This function gets the top n taxa for every sample in a phyloseq object
get_top_sample_taxa <- function(ps_obj, n_taxa = 1){
# Make sure taxa are rows
if (!phyloseq::taxa_are_rows(ps_obj)) {
otu_table(ps_obj) <- t(otu_tbl)
}
# Get the top taxa per sample
top_taxa <- otu_table(ps_obj) %>%
data.frame(taxid = row.names(.)) %>%
pivot_longer(!taxid, names_to = "sample_id", values_to = "abundance") %>%
group_by(sample_id) %>%
slice_max(abundance, n = n_taxa) %>%
arrange(desc(abundance)) %>%
mutate(tax_rank = rank(x = -abundance, ties.method = "first", na.last = T))
# Add taxonomic annotations
top_taxa <- ps_obj %>%
tax_table() %>%
data.frame(taxid = row.names(.)) %>%
right_join(top_taxa, by = "taxid")
# Add relative abundance
top_taxa <- ps_obj %>%
sample_sums() %>%
data.frame(sample_id = names(.),
library_size = .) %>%
right_join(top_taxa, by = "sample_id") %>%
rowwise() %>%
mutate(proportion = abundance / library_size)
# Reorder
top_taxa <- top_taxa %>%
select(!library_size) %>%
relocate(sample_id, tax_rank, taxid, abundance, proportion)
return(top_taxa)
}
# This function merges all taxa that are not specified in 'taxa' into a taxon
# named 'other_label' if 'discard_other = FALSE', else it removes them.
collapse_taxa <- function(ps_obj, taxa, discard_other = FALSE, other_label = "Other"){
# Make sure taxa are rows
if (!phyloseq::taxa_are_rows(ps_obj)) {
otu_table(ps_obj) <- t(otu_table(ps_obj)
}
# Merge or discard all other taxa
if (discard_other) {
ps_obj <- phyloseq::prune_taxa(taxa, ps_obj)
} else {
# Merge taxa
to_merge <- phyloseq::taxa_names(ps_obj)
to_merge <- to_merge[!(to_merge %in% taxa)]
ps_obj <- merge_taxa(ps_obj, to_merge)
# Update the taxon name to other_label
tax_tbl <- phyloseq::tax_table(ps_obj)
indx <- which(row.names(tax_tbl) %in% to_merge)
tax_tbl[indx, ] <- other_label
phyloseq::tax_table(ps_obj) <- tax_tbl
}
return(ps_obj)
}
# Load data
data(GlobalPatterns)
ps_obj <- GlobalPatterns
# Get the top n taxa per sample
top_taxa <- get_top_sample_taxa(ps_obj, n_taxa = 3)
top_taxa
### If you want to plot with fantaxtic, run the steps below ###
# Collapse taxa
ps_obj <- collapse_taxa(ps_obj, top_taxa$taxid)
# Generate labels
ps_obj <- name_taxa(ps_obj, label = "Unkown", species = T, other_label = "Other")
# Plot
fantaxtic_bar(ps_obj, color_by = "Phylum", label_by = "Species", other_label = "Other")
Let me know if this was useful.
Thank you so much for sending this code. I actually found some other code that worked before you sent this, so I decided not to risk confusing myself by trying yours! Thank you.
I'm glad to hear that you found a solution to your problem. Would you mind trying out the code above to see if it does what you want? I'd like to implement it into the package, but before I do so, any feedback would be appreciated.
This issue has now been resolved. To get the top taxa per sample, run:
data(GlobalPatterns)
top <- top_taxa(GlobalPatterns, n_taxa = 1, grouping = "sample_id")
Hi, I need to see what are the top taxa in each individual sample. Is there a way to do that using fantaxtic? I can only see the top taxa across all samples in the entire phyloseq object.