joey711 / phyloseq

phyloseq is a set of classes, wrappers, and tools (in R) to make it easier to import, store, and analyze phylogenetic sequencing data; and to reproducibly share that data and analysis with others. See the phyloseq front page:
http://joey711.github.io/phyloseq/
567 stars 187 forks source link

Showing top 5 genus in ordination plot legend #1732

Closed Arrowessee closed 3 months ago

Arrowessee commented 4 months ago

Hi. I am very new in R so please pardon my very basic questions.

I am doing an ordination plot of my sequencing data and the genus legend is occupying too much space in the plot. Is there a way I can show the legend in a smaller or partial manner?

Thank you in advance for your help!

ycl6 commented 3 months ago

Hi @Arrowessee

Here's an example. I choose Phylum because it seems simpler as a demo.

    library(phyloseq)
    library(ggplot2)

    data(GlobalPatterns)

    # To simplify the GP data
    ps = subset_taxa(GlobalPatterns, Kingdom == "Bacteria")
    ps = prune_taxa(taxa_sums(ps) > 1000, ps)

    ps.ord = ordinate(ps, "NMDS", "bray")
    #> Square root transformation
    #> Wisconsin double standardization
    #> Run 0 stress 0.1469555 
    #> Run 1 stress 0.1469555 
    #> ... Procrustes: rmse 2.780778e-06  max resid 7.810977e-06 
    #> ... Similar to previous best
    #> Run 2 stress 0.1469555 
    #> ... Procrustes: rmse 2.767032e-06  max resid 8.208212e-06 
    #> ... Similar to previous best
    #> Run 3 stress 0.1469555 
    #> ... Procrustes: rmse 2.397755e-06  max resid 5.909792e-06 
    #> ... Similar to previous best
    #> Run 4 stress 0.1503528 
    #> Run 5 stress 0.1469555 
    #> ... New best solution
    #> ... Procrustes: rmse 7.804064e-07  max resid 1.763328e-06 
    #> ... Similar to previous best
    #> Run 6 stress 0.1469555 
    #> ... Procrustes: rmse 1.362189e-06  max resid 3.085764e-06 
    #> ... Similar to previous best
    #> Run 7 stress 0.1503528 
    #> Run 8 stress 0.1469555 
    #> ... Procrustes: rmse 1.45389e-06  max resid 4.227427e-06 
    #> ... Similar to previous best
    #> Run 9 stress 0.1503528 
    #> Run 10 stress 0.1503528 
    #> Run 11 stress 0.1503528 
    #> Run 12 stress 0.1469555 
    #> ... Procrustes: rmse 1.153258e-06  max resid 2.852589e-06 
    #> ... Similar to previous best
    #> Run 13 stress 0.1503528 
    #> Run 14 stress 0.2255778 
    #> Run 15 stress 0.1469555 
    #> ... Procrustes: rmse 3.011263e-06  max resid 7.846365e-06 
    #> ... Similar to previous best
    #> Run 16 stress 0.1469555 
    #> ... Procrustes: rmse 1.039727e-06  max resid 2.511837e-06 
    #> ... Similar to previous best
    #> Run 17 stress 0.1469555 
    #> ... New best solution
    #> ... Procrustes: rmse 8.777735e-07  max resid 2.324318e-06 
    #> ... Similar to previous best
    #> Run 18 stress 0.1503529 
    #> Run 19 stress 0.1503529 
    #> Run 20 stress 0.1503528 
    #> *** Best solution repeated 1 times

    # Obtain taxa scores
    mat = as.data.frame(vegan::scores(ps.ord, choices = 1:2, display = "species", physeq = ps))
    head(mat)
    #>             NMDS1      NMDS2
    #> 12812  -0.8109251  1.0719368
    #> 241082 -1.0036987  0.9680266
    #> 349592 -0.9114754  0.9926004
    #> 215846 -0.8446904 -0.1038471
    #> 317641 -0.7087311 -0.2169197
    #> 512041 -0.4267098 -0.7438115

    # Combine with taxonomy table
    mat = cbind(tax_table(ps), mat)
    head(mat)
    #>         Kingdom         Phylum          Class            Order          Family
    #> 12812  Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 241082 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 349592 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
    #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales            <NA>
    #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales       Iamiaceae
    #>        Genus Species      NMDS1      NMDS2
    #> 12812   <NA>    <NA> -0.8109251  1.0719368
    #> 241082  <NA>    <NA> -1.0036987  0.9680266
    #> 349592  <NA>    <NA> -0.9114754  0.9926004
    #> 215846  <NA>    <NA> -0.8446904 -0.1038471
    #> 317641  <NA>    <NA> -0.7087311 -0.2169197
    #> 512041  <NA>    <NA> -0.4267098 -0.7438115

    # Create a new column, keep selected taxa names and rename remaining as "Others"
    selected = c("Bacteroidetes","Firmicutes","Proteobacteria")
    mat$Taxa = mat$Phylum
    mat$Taxa[!mat$Taxa %in% selected] = "Others"
    head(mat)
    #>         Kingdom         Phylum          Class            Order          Family
    #> 12812  Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 241082 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 349592 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
    #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales            <NA>
    #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales       Iamiaceae
    #>        Genus Species      NMDS1      NMDS2   Taxa
    #> 12812   <NA>    <NA> -0.8109251  1.0719368 Others
    #> 241082  <NA>    <NA> -1.0036987  0.9680266 Others
    #> 349592  <NA>    <NA> -0.9114754  0.9926004 Others
    #> 215846  <NA>    <NA> -0.8446904 -0.1038471 Others
    #> 317641  <NA>    <NA> -0.7087311 -0.2169197 Others
    #> 512041  <NA>    <NA> -0.4267098 -0.7438115 Others

    # Reorder taxa so that "Others" appears last
    mat$Taxa = factor(mat$Taxa, levels = c(selected,"Others"))
    levels(mat$Taxa)
    #> [1] "Bacteroidetes"  "Firmicutes"     "Proteobacteria" "Others"

    # Original plot
    plot_ordination(ps, ps.ord, type = "taxa", color = "Phylum", title = "taxa")

    # New plot
    ggplot(mat, aes(NMDS1, NMDS2, color = Taxa)) + geom_point() +
            guides(color = guide_legend("Phylum"))

Created on 2024-03-20 with reprex v2.0.2

Arrowessee commented 3 months ago

Hi @Arrowessee

Here's an example. I choose Phylum because it seems simpler as a demo.

    library(phyloseq)
    library(ggplot2)

    data(GlobalPatterns)

    # To simplify the GP data
    ps = subset_taxa(GlobalPatterns, Kingdom == "Bacteria")
    ps = prune_taxa(taxa_sums(ps) > 1000, ps)

    ps.ord = ordinate(ps, "NMDS", "bray")
    #> Square root transformation
    #> Wisconsin double standardization
    #> Run 0 stress 0.1469555 
    #> Run 1 stress 0.1469555 
    #> ... Procrustes: rmse 2.780778e-06  max resid 7.810977e-06 
    #> ... Similar to previous best
    #> Run 2 stress 0.1469555 
    #> ... Procrustes: rmse 2.767032e-06  max resid 8.208212e-06 
    #> ... Similar to previous best
    #> Run 3 stress 0.1469555 
    #> ... Procrustes: rmse 2.397755e-06  max resid 5.909792e-06 
    #> ... Similar to previous best
    #> Run 4 stress 0.1503528 
    #> Run 5 stress 0.1469555 
    #> ... New best solution
    #> ... Procrustes: rmse 7.804064e-07  max resid 1.763328e-06 
    #> ... Similar to previous best
    #> Run 6 stress 0.1469555 
    #> ... Procrustes: rmse 1.362189e-06  max resid 3.085764e-06 
    #> ... Similar to previous best
    #> Run 7 stress 0.1503528 
    #> Run 8 stress 0.1469555 
    #> ... Procrustes: rmse 1.45389e-06  max resid 4.227427e-06 
    #> ... Similar to previous best
    #> Run 9 stress 0.1503528 
    #> Run 10 stress 0.1503528 
    #> Run 11 stress 0.1503528 
    #> Run 12 stress 0.1469555 
    #> ... Procrustes: rmse 1.153258e-06  max resid 2.852589e-06 
    #> ... Similar to previous best
    #> Run 13 stress 0.1503528 
    #> Run 14 stress 0.2255778 
    #> Run 15 stress 0.1469555 
    #> ... Procrustes: rmse 3.011263e-06  max resid 7.846365e-06 
    #> ... Similar to previous best
    #> Run 16 stress 0.1469555 
    #> ... Procrustes: rmse 1.039727e-06  max resid 2.511837e-06 
    #> ... Similar to previous best
    #> Run 17 stress 0.1469555 
    #> ... New best solution
    #> ... Procrustes: rmse 8.777735e-07  max resid 2.324318e-06 
    #> ... Similar to previous best
    #> Run 18 stress 0.1503529 
    #> Run 19 stress 0.1503529 
    #> Run 20 stress 0.1503528 
    #> *** Best solution repeated 1 times

    # Obtain taxa scores
    mat = as.data.frame(vegan::scores(ps.ord, choices = 1:2, display = "species", physeq = ps))
    head(mat)
    #>             NMDS1      NMDS2
    #> 12812  -0.8109251  1.0719368
    #> 241082 -1.0036987  0.9680266
    #> 349592 -0.9114754  0.9926004
    #> 215846 -0.8446904 -0.1038471
    #> 317641 -0.7087311 -0.2169197
    #> 512041 -0.4267098 -0.7438115

    # Combine with taxonomy table
    mat = cbind(tax_table(ps), mat)
    head(mat)
    #>         Kingdom         Phylum          Class            Order          Family
    #> 12812  Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 241082 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 349592 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
    #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales            <NA>
    #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales       Iamiaceae
    #>        Genus Species      NMDS1      NMDS2
    #> 12812   <NA>    <NA> -0.8109251  1.0719368
    #> 241082  <NA>    <NA> -1.0036987  0.9680266
    #> 349592  <NA>    <NA> -0.9114754  0.9926004
    #> 215846  <NA>    <NA> -0.8446904 -0.1038471
    #> 317641  <NA>    <NA> -0.7087311 -0.2169197
    #> 512041  <NA>    <NA> -0.4267098 -0.7438115

    # Create a new column, keep selected taxa names and rename remaining as "Others"
    selected = c("Bacteroidetes","Firmicutes","Proteobacteria")
    mat$Taxa = mat$Phylum
    mat$Taxa[!mat$Taxa %in% selected] = "Others"
    head(mat)
    #>         Kingdom         Phylum          Class            Order          Family
    #> 12812  Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 241082 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 349592 Bacteria Actinobacteria Actinobacteria           koll13            <NA>
    #> 215846 Bacteria Actinobacteria Actinobacteria Acidimicrobiales Microthrixaceae
    #> 317641 Bacteria Actinobacteria Actinobacteria Acidimicrobiales            <NA>
    #> 512041 Bacteria Actinobacteria Actinobacteria Acidimicrobiales       Iamiaceae
    #>        Genus Species      NMDS1      NMDS2   Taxa
    #> 12812   <NA>    <NA> -0.8109251  1.0719368 Others
    #> 241082  <NA>    <NA> -1.0036987  0.9680266 Others
    #> 349592  <NA>    <NA> -0.9114754  0.9926004 Others
    #> 215846  <NA>    <NA> -0.8446904 -0.1038471 Others
    #> 317641  <NA>    <NA> -0.7087311 -0.2169197 Others
    #> 512041  <NA>    <NA> -0.4267098 -0.7438115 Others

    # Reorder taxa so that "Others" appears last
    mat$Taxa = factor(mat$Taxa, levels = c(selected,"Others"))
    levels(mat$Taxa)
    #> [1] "Bacteroidetes"  "Firmicutes"     "Proteobacteria" "Others"

    # Original plot
    plot_ordination(ps, ps.ord, type = "taxa", color = "Phylum", title = "taxa")

    # New plot
    ggplot(mat, aes(NMDS1, NMDS2, color = Taxa)) + geom_point() +
            guides(color = guide_legend("Phylum"))

Created on 2024-03-20 with reprex v2.0.2

Thank you!!! So much this helps a lot!

ycl6 commented 3 months ago

Hi @Arrowessee Glad you find it useful :smile: Please close the issue if your question is resolved. Thanks.