Closed Arrowessee closed 3 months ago
Hi @Arrowessee
Here's an example. I choose Phylum
because it seems simpler as a demo.
library(phyloseq)
library(ggplot2)
data(GlobalPatterns)
# To simplify the GP data
ps = subset_taxa(GlobalPatterns, Kingdom == "Bacteria")
ps = prune_taxa(taxa_sums(ps) > 1000, ps)
ps.ord = ordinate(ps, "NMDS", "bray")
#> Square root transformation
#> Wisconsin double standardization
#> Run 0 stress 0.1469555
#> Run 1 stress 0.1469555
#> ... Procrustes: rmse 2.780778e-06 max resid 7.810977e-06
#> ... Similar to previous best
#> Run 2 stress 0.1469555
#> ... Procrustes: rmse 2.767032e-06 max resid 8.208212e-06
#> ... Similar to previous best
#> Run 3 stress 0.1469555
#> ... Procrustes: rmse 2.397755e-06 max resid 5.909792e-06
#> ... Similar to previous best
#> Run 4 stress 0.1503528
#> Run 5 stress 0.1469555
#> ... New best solution
#> ... Procrustes: rmse 7.804064e-07 max resid 1.763328e-06
#> ... Similar to previous best
#> Run 6 stress 0.1469555
#> ... Procrustes: rmse 1.362189e-06 max resid 3.085764e-06
#> ... Similar to previous best
#> Run 7 stress 0.1503528
#> Run 8 stress 0.1469555
#> ... Procrustes: rmse 1.45389e-06 max resid 4.227427e-06
#> ... Similar to previous best
#> Run 9 stress 0.1503528
#> Run 10 stress 0.1503528
#> Run 11 stress 0.1503528
#> Run 12 stress 0.1469555
#> ... Procrustes: rmse 1.153258e-06 max resid 2.852589e-06
#> ... Similar to previous best
#> Run 13 stress 0.1503528
#> Run 14 stress 0.2255778
#> Run 15 stress 0.1469555
#> ... Procrustes: rmse 3.011263e-06 max resid 7.846365e-06
#> ... Similar to previous best
#> Run 16 stress 0.1469555
#> ... Procrustes: rmse 1.039727e-06 max resid 2.511837e-06
#> ... Similar to previous best
#> Run 17 stress 0.1469555
#> ... New best solution
#> ... Procrustes: rmse 8.777735e-07 max resid 2.324318e-06
#> ... Similar to previous best
#> Run 18 stress 0.1503529
#> Run 19 stress 0.1503529
#> Run 20 stress 0.1503528
#> *** Best solution repeated 1 times
# Obtain taxa scores
mat = as.data.frame(vegan::scores(ps.ord, choices = 1:2, display = "species", physeq = ps))
head(mat)
#> NMDS1 NMDS2
#> 12812 -0.8109251 1.0719368
#> 241082 -1.0036987 0.9680266
#> 349592 -0.9114754 0.9926004
#> 215846 -0.8446904 -0.1038471
#> 317641 -0.7087311 -0.2169197
#> 512041 -0.4267098 -0.7438115
# Combine with taxonomy table
mat = cbind(tax_table(ps), mat)
head(mat)
#> Kingdom Phylum Class Order Family
#> 12812 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 241082 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 349592 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
#> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales <NA>
#> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Iamiaceae
#> Genus Species NMDS1 NMDS2
#> 12812 <NA> <NA> -0.8109251 1.0719368
#> 241082 <NA> <NA> -1.0036987 0.9680266
#> 349592 <NA> <NA> -0.9114754 0.9926004
#> 215846 <NA> <NA> -0.8446904 -0.1038471
#> 317641 <NA> <NA> -0.7087311 -0.2169197
#> 512041 <NA> <NA> -0.4267098 -0.7438115
# Create a new column, keep selected taxa names and rename remaining as "Others"
selected = c("Bacteroidetes","Firmicutes","Proteobacteria")
mat$Taxa = mat$Phylum
mat$Taxa[!mat$Taxa %in% selected] = "Others"
head(mat)
#> Kingdom Phylum Class Order Family
#> 12812 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 241082 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 349592 Bacteria Actinobacteria Actinobacteria koll13 <NA>
#> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
#> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales <NA>
#> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Iamiaceae
#> Genus Species NMDS1 NMDS2 Taxa
#> 12812 <NA> <NA> -0.8109251 1.0719368 Others
#> 241082 <NA> <NA> -1.0036987 0.9680266 Others
#> 349592 <NA> <NA> -0.9114754 0.9926004 Others
#> 215846 <NA> <NA> -0.8446904 -0.1038471 Others
#> 317641 <NA> <NA> -0.7087311 -0.2169197 Others
#> 512041 <NA> <NA> -0.4267098 -0.7438115 Others
# Reorder taxa so that "Others" appears last
mat$Taxa = factor(mat$Taxa, levels = c(selected,"Others"))
levels(mat$Taxa)
#> [1] "Bacteroidetes" "Firmicutes" "Proteobacteria" "Others"
# Original plot
plot_ordination(ps, ps.ord, type = "taxa", color = "Phylum", title = "taxa")
# New plot
ggplot(mat, aes(NMDS1, NMDS2, color = Taxa)) + geom_point() +
guides(color = guide_legend("Phylum"))
Created on 2024-03-20 with reprex v2.0.2
Hi @Arrowessee
Here's an example. I choose
Phylum
because it seems simpler as a demo.library(phyloseq) library(ggplot2) data(GlobalPatterns) # To simplify the GP data ps = subset_taxa(GlobalPatterns, Kingdom == "Bacteria") ps = prune_taxa(taxa_sums(ps) > 1000, ps) ps.ord = ordinate(ps, "NMDS", "bray") #> Square root transformation #> Wisconsin double standardization #> Run 0 stress 0.1469555 #> Run 1 stress 0.1469555 #> ... Procrustes: rmse 2.780778e-06 max resid 7.810977e-06 #> ... Similar to previous best #> Run 2 stress 0.1469555 #> ... Procrustes: rmse 2.767032e-06 max resid 8.208212e-06 #> ... Similar to previous best #> Run 3 stress 0.1469555 #> ... Procrustes: rmse 2.397755e-06 max resid 5.909792e-06 #> ... Similar to previous best #> Run 4 stress 0.1503528 #> Run 5 stress 0.1469555 #> ... New best solution #> ... Procrustes: rmse 7.804064e-07 max resid 1.763328e-06 #> ... Similar to previous best #> Run 6 stress 0.1469555 #> ... Procrustes: rmse 1.362189e-06 max resid 3.085764e-06 #> ... Similar to previous best #> Run 7 stress 0.1503528 #> Run 8 stress 0.1469555 #> ... Procrustes: rmse 1.45389e-06 max resid 4.227427e-06 #> ... Similar to previous best #> Run 9 stress 0.1503528 #> Run 10 stress 0.1503528 #> Run 11 stress 0.1503528 #> Run 12 stress 0.1469555 #> ... Procrustes: rmse 1.153258e-06 max resid 2.852589e-06 #> ... Similar to previous best #> Run 13 stress 0.1503528 #> Run 14 stress 0.2255778 #> Run 15 stress 0.1469555 #> ... Procrustes: rmse 3.011263e-06 max resid 7.846365e-06 #> ... Similar to previous best #> Run 16 stress 0.1469555 #> ... Procrustes: rmse 1.039727e-06 max resid 2.511837e-06 #> ... Similar to previous best #> Run 17 stress 0.1469555 #> ... New best solution #> ... Procrustes: rmse 8.777735e-07 max resid 2.324318e-06 #> ... Similar to previous best #> Run 18 stress 0.1503529 #> Run 19 stress 0.1503529 #> Run 20 stress 0.1503528 #> *** Best solution repeated 1 times # Obtain taxa scores mat = as.data.frame(vegan::scores(ps.ord, choices = 1:2, display = "species", physeq = ps)) head(mat) #> NMDS1 NMDS2 #> 12812 -0.8109251 1.0719368 #> 241082 -1.0036987 0.9680266 #> 349592 -0.9114754 0.9926004 #> 215846 -0.8446904 -0.1038471 #> 317641 -0.7087311 -0.2169197 #> 512041 -0.4267098 -0.7438115 # Combine with taxonomy table mat = cbind(tax_table(ps), mat) head(mat) #> Kingdom Phylum Class Order Family #> 12812 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 241082 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 349592 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales <NA> #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Iamiaceae #> Genus Species NMDS1 NMDS2 #> 12812 <NA> <NA> -0.8109251 1.0719368 #> 241082 <NA> <NA> -1.0036987 0.9680266 #> 349592 <NA> <NA> -0.9114754 0.9926004 #> 215846 <NA> <NA> -0.8446904 -0.1038471 #> 317641 <NA> <NA> -0.7087311 -0.2169197 #> 512041 <NA> <NA> -0.4267098 -0.7438115 # Create a new column, keep selected taxa names and rename remaining as "Others" selected = c("Bacteroidetes","Firmicutes","Proteobacteria") mat$Taxa = mat$Phylum mat$Taxa[!mat$Taxa %in% selected] = "Others" head(mat) #> Kingdom Phylum Class Order Family #> 12812 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 241082 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 349592 Bacteria Actinobacteria Actinobacteria koll13 <NA> #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales <NA> #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Iamiaceae #> Genus Species NMDS1 NMDS2 Taxa #> 12812 <NA> <NA> -0.8109251 1.0719368 Others #> 241082 <NA> <NA> -1.0036987 0.9680266 Others #> 349592 <NA> <NA> -0.9114754 0.9926004 Others #> 215846 <NA> <NA> -0.8446904 -0.1038471 Others #> 317641 <NA> <NA> -0.7087311 -0.2169197 Others #> 512041 <NA> <NA> -0.4267098 -0.7438115 Others # Reorder taxa so that "Others" appears last mat$Taxa = factor(mat$Taxa, levels = c(selected,"Others")) levels(mat$Taxa) #> [1] "Bacteroidetes" "Firmicutes" "Proteobacteria" "Others" # Original plot plot_ordination(ps, ps.ord, type = "taxa", color = "Phylum", title = "taxa")
# New plot ggplot(mat, aes(NMDS1, NMDS2, color = Taxa)) + geom_point() + guides(color = guide_legend("Phylum"))
Created on 2024-03-20 with reprex v2.0.2
Thank you!!! So much this helps a lot!
Hi @Arrowessee Glad you find it useful :smile: Please close the issue if your question is resolved. Thanks.
Hi. I am very new in R so please pardon my very basic questions.
I am doing an ordination plot of my sequencing data and the genus legend is occupying too much space in the plot. Is there a way I can show the legend in a smaller or partial manner?
Thank you in advance for your help!