Open ValPoltr opened 3 years ago
Hi @ValPoltr, the reason for the repeating Species names in the plot is because that was what's been annotated in the tax_table
. As to the second question, I think those gaps indicate the Species fields of those taxa in the tax_table
are NA
.
Below I use the test dataset GlobalPatterns
to show what is happening:
library(phyloseq)
data(GlobalPatterns)
top_taxa <- names(sort(taxa_sums(GlobalPatterns), decreasing = TRUE))[1:20]
top_gp <- transform_sample_counts(GlobalPatterns, function(OTU) OTU / sum(OTU))
top_gp <- prune_taxa(top_taxa, top_gp)
# show Taxonomy Table
tax_table(top_gp)
Taxonomy Table: [20 taxa by 7 taxonomic ranks]:
Kingdom Phylum Class Order Family Genus Species
12812 "Bacteria" "Actinobacteria" "Actinobacteria" "koll13" NA NA NA
317658 "Bacteria" "Actinobacteria" "Actinobacteria" "Actinomycetales" "ACK-M1" NA NA
329744 "Bacteria" "Actinobacteria" "Actinobacteria" "Actinomycetales" "ACK-M1" NA NA
326977 "Bacteria" "Actinobacteria" "Actinobacteria" "Bifidobacteriales" "Bifidobacteriaceae" "Bifidobacterium" "Bifidobacteriumadolescentis"
317182 "Bacteria" "Cyanobacteria" "Chloroplast" "Stramenopiles" NA NA NA
549656 "Bacteria" "Cyanobacteria" "Chloroplast" "Stramenopiles" NA NA NA
279599 "Bacteria" "Cyanobacteria" "Nostocophycideae" "Nostocales" "Nostocaceae" "Dolichospermum" NA
263681 "Bacteria" "Cyanobacteria" "4C0d-2" "YS2" NA NA NA
360229 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Neisseria" NA
536311 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Kingella" NA
94166 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Pasteurellales" "Pasteurellaceae" "Haemophilus" "Haemophilusparainfluenzae"
550960 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales" "Enterobacteriaceae" "Providencia" NA
158660 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
331820 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
244304 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
298875 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Clostridiaceae" "Clostridium" NA
192573 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" NA NA
171551 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" "Faecalibacterium" "Faecalibacteriumprausnitzii"
189047 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" NA NA
98605 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales" "Streptococcaceae" "Streptococcus" "Streptococcussanguinis"
taxa.label = NULL
(show taxa_names
)plot_heatmap(top_gp, method = "MDS", taxa.label = NULL, trans = NULL, low = "beige", high = "red", na.value = "beige", distance = "bray")
taxa.label = "Genus"
plot_heatmap(top_gp, method = "MDS", taxa.label = "Genus", trans = NULL, low = "beige", high = "red", na.value = "beige", distance = "bray")
taxa.label = "Species"
plot_heatmap(top_gp, method = "MDS", taxa.label = "Species", trans = NULL, low = "beige", high = "red", na.value = "beige", distance = "bray")
Hello,
I understood your comment @ycl6, but it is the possibility to join the different taxonomic levels respectively, in order to avoid the repetition in the y axis?
Many thanks
Yes @mafbio, you can use tax_glom
to agglomerate taxa (see below). The abundance will be added up in the agglomerated object and you can use it to plot heatmap.
As you can see from the resulting tax table, ASVs that have no taxonomy assignment with the specified rank (e.g. Genus) get removed. So if this is not what you want, you need to modified your tax table so that instead of NA
you give those a string description, e.g. unassigned, unknown, etc.
library(phyloseq)
data(GlobalPatterns)
top_taxa <- names(sort(taxa_sums(GlobalPatterns), decreasing = TRUE))[1:20]
top_gp <- transform_sample_counts(GlobalPatterns, function(OTU) OTU / sum(OTU))
top_gp <- prune_taxa(top_taxa, top_gp)
> top_gp
phyloseq-class experiment-level object
otu_table() OTU Table: [ 20 taxa and 26 samples ]
sample_data() Sample Data: [ 26 samples by 7 sample variables ]
tax_table() Taxonomy Table: [ 20 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 20 tips and 19 internal nodes ]
> tax_table(top_gp)
Taxonomy Table: [20 taxa by 7 taxonomic ranks]:
Kingdom Phylum Class Order Family Genus Species
12812 "Bacteria" "Actinobacteria" "Actinobacteria" "koll13" NA NA NA
317658 "Bacteria" "Actinobacteria" "Actinobacteria" "Actinomycetales" "ACK-M1" NA NA
329744 "Bacteria" "Actinobacteria" "Actinobacteria" "Actinomycetales" "ACK-M1" NA NA
326977 "Bacteria" "Actinobacteria" "Actinobacteria" "Bifidobacteriales" "Bifidobacteriaceae" "Bifidobacterium" "Bifidobacteriumadolescentis"
317182 "Bacteria" "Cyanobacteria" "Chloroplast" "Stramenopiles" NA NA NA
549656 "Bacteria" "Cyanobacteria" "Chloroplast" "Stramenopiles" NA NA NA
279599 "Bacteria" "Cyanobacteria" "Nostocophycideae" "Nostocales" "Nostocaceae" "Dolichospermum" NA
263681 "Bacteria" "Cyanobacteria" "4C0d-2" "YS2" NA NA NA
360229 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Neisseria" NA
536311 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Kingella" NA
94166 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Pasteurellales" "Pasteurellaceae" "Haemophilus" "Haemophilusparainfluenzae"
550960 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales" "Enterobacteriaceae" "Providencia" NA
158660 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
331820 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
244304 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
298875 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Clostridiaceae" "Clostridium" NA
192573 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" NA NA
171551 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" "Faecalibacterium" "Faecalibacteriumprausnitzii"
189047 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" NA NA
98605 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales" "Streptococcaceae" "Streptococcus" "Streptococcussanguinis"
top_gp_g <- tax_glom(top_gp, taxrank = "Genus")
> top_gp_g
phyloseq-class experiment-level object
otu_table() OTU Table: [ 10 taxa and 26 samples ]
sample_data() Sample Data: [ 26 samples by 7 sample variables ]
tax_table() Taxonomy Table: [ 10 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 10 tips and 9 internal nodes ]
> tax_table(top_gp_g)
Taxonomy Table: [10 taxa by 7 taxonomic ranks]:
Kingdom Phylum Class Order Family Genus Species
326977 "Bacteria" "Actinobacteria" "Actinobacteria" "Bifidobacteriales" "Bifidobacteriaceae" "Bifidobacterium" NA
279599 "Bacteria" "Cyanobacteria" "Nostocophycideae" "Nostocales" "Nostocaceae" "Dolichospermum" NA
360229 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Neisseria" NA
536311 "Bacteria" "Proteobacteria" "Betaproteobacteria" "Neisseriales" "Neisseriaceae" "Kingella" NA
94166 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Pasteurellales" "Pasteurellaceae" "Haemophilus" NA
550960 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales" "Enterobacteriaceae" "Providencia" NA
331820 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales" "Bacteroidaceae" "Bacteroides" NA
298875 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Clostridiaceae" "Clostridium" NA
171551 "Bacteria" "Firmicutes" "Clostridia" "Clostridiales" "Ruminococcaceae" "Faecalibacterium" NA
98605 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales" "Streptococcaceae" "Streptococcus" NA
Thank you very much for the information. It has been a great help
Hello dear colleagues! I'm a student. And i just started learning these methods. Could someone help me understand some of the details on this plot:
top100ITSsklearn <- names(sort(taxa_sums(ITSsklearn), decreasing=TRUE))[1:20]
ITSsklearn.top20 <- transform_sample_counts(ITSsklearn, function(OTU) OTU / sum(OTU))
ITSsklearn.top20 <- prune_taxa(top20ITSsklearn, ITSsklearn.top20)
plot_heatmap(ITSsklearn.top20, method = "MDS", taxa.label = "Species", trans = NULL, low = "beige", high = "red", na.value = "beige", distance = 'bray')+ theme (axis.text.y = element_text(size=16))