gavinsimpson / ggvegan

ggplot-based plots for vegan
https://gavinsimpson.github.io/ggvegan/
GNU General Public License v2.0
113 stars 30 forks source link

Lollipop chart to illustrate the species scores to an axis #19

Open MikkoVihtakari opened 6 years ago

MikkoVihtakari commented 6 years ago

Could something like this be useful for the package? The graph is from the example.

image

#' @title Lollipop chart of species contributions to ordination axes
#' @description Creates a lollipop chart of species contributions to ordination axes with scalable point size
#' @param mod \code{\link[vegan]{cca}} or rda object
#' @param axis The ordination axis for which the chart should be made for
#' @param n Number of columns (=species) to show
#' @param sides Character vector specifying which columns should be shown. Options: \code{"both"} shows \code{n/2} from both sides of the axis ordered by species scores; \code{"contrib"} orders the species based on "contributions" to the axis (squared species scores); \code{pos} orders the species based on the species score to the axis and \code{neg} orders the species based on the inverted species score to the axis. 
#' @param point_cols a character vector of length 2 specifying the colors for negative and positive species scores respectively.
#' @param xlab X-axis label
#' @param ylab Y-axis label. If \code{NULL}, the label is generated automatically from the \code{\link[vegan]{cca}} or rda object. 
#' @param sp_names Optional data frame specifying the species names to be used in the plot. The first column has to specify the species names used in the \code{\link[vegan]{cca}} or rda object and the second column the species names that should be used to replace the original names.
#' @param point_size Single number specifying the size for "lollipop heads". 
#' @param text_col Character. Color of text for the points. The text spcifies species contributions to the axis in percentages.
#' @param map_size Logical. Should point size be mapped to species contributions to the axis?
#' @param break_interval Single number specifying the interval to be used for x-axis.
#' @import ggplot2 vegan
#' @importFrom plyr mapvalues
#' @importFrom plyr round_any
#' @author Mikko Vihtakari
#' @example data(dune)
#' axis_plot(rda(dune), break_interval = 0.5)
#' @export

axis_plot <- function(mod, axis = 1, n = 10, sides = "both", point_cols = c("#f8766d", "#00ba38"), xlab = NULL, sp_names = NULL, point_size = 6, text_col = "white", map_size = FALSE, break_interval = 0.1) {

  ## ####

  tmp <- scores(mod, display = "sp")[,axis]
  contrib <- 100*scores(mod, scaling = 0, display = "sp")[,axis]^2

  if(sides == "contrib") {
    tmp <- tmp[names(sort(contrib, decreasing = TRUE)[1:n])]
    tmp <- tmp[order(-tmp)]
  } else if(sides == "pos") {
    tmp <- tmp[order(-tmp)][1:n]
  } else if(sides == "neg") {
    tmp <- tmp[order(tmp)][1:n]
  } else {
    sp1 <- tmp[order(tmp)][1:ceiling(n/2)]
    sp2 <- tmp[order(-tmp)][1:ceiling(n/2)]
    tmp <- c(sp1, sp2)
    tmp <- tmp[order(tmp)]
  }

  xp <- data.frame(variable = names(tmp), value = unname(tmp))
  xp$variable <- factor(xp$variable, levels = xp$variable)
  xp$sign <- ifelse(sign(xp$value) < 0, "neg", "pos")
  xp <- merge(xp, data.frame(variable = names(contrib), contr = unname(contrib)), all.x = TRUE, sort = FALSE)

  if(is.null(xlab)) {
    xlab <- paste(names(axis.expl(mod)[axis]), "value")
  }

  if(!is.null(sp_names)) {
    levels(xp$variable) <- plyr::mapvalues(levels(xp$variable), as.character(sp_names[[1]]), as.character(sp_names[[2]]), warn_missing = FALSE)
  }

xmin <- plyr::round_any(min(xp$value), break_interval, floor)
xmax <- plyr::round_any(max(xp$value), break_interval, ceiling)

  ## ####
  out <- ggplot(data = xp, aes(x = variable, y = value, fill = sign, label = round(contr, 1))) +
    geom_hline(yintercept = 0, size = LS(0.5), color = "black") +
    geom_segment(aes(y = 0, x = variable, yend = value, xend = variable), size = LS(0.5)) +
    coord_flip() +
    scale_y_continuous(name = xlab, breaks = seq(xmin, xmax, break_interval), limits = c(xmin, xmax)) +
    xlab(ylab) + 
    theme_minimal(base_size = 8) +
    theme(legend.position = "none",
      axis.line.x = element_line(size = LS(0.5), color = "black"), 
      axis.ticks.x = element_line(size = LS(0.5), color = "black"),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),  
      plot.margin=unit(c(0.5, 0.2, 0.5, 0.1), units = "line"))
  ## ####

  if(map_size) {
    # ####
    out +
      geom_point(aes(size = contr), shape = 21) +
      geom_text(color= text_col, size = 1/3*point_size) +
      scale_fill_manual(values = point_cols) +
      scale_size(range = c(point_size-1, point_size+4))
    # ####
  } else {
    # ####
    out + 
      geom_point(size = point_size, shape = 21) +
      geom_text(color= text_col, size = 1/3*point_size) +
      scale_fill_manual(values = point_cols)
    # ####
  }

}

The current code requires a custom axis.expl function, which can be replaced:

#' @title Get percent of total inertia explained by principal or constrained axes
#' @param mod cca.object
#' @param axes A numeric vector indicating the axes for which percent explained inertia should be returned for
#' @return Returns a named vector containing explained inertia as percentages. Returns constrained inertia fo CCA and unconstrained for CA

axis.expl <- function(mod, axes = 1:2) {

  if(is.null(mod$CCA)) {
    sapply(axes, function(i) {
    100*mod$CA$eig[i]/mod$tot.chi
    })
  } else {
    sapply(axes, function(i) {
    100*mod$CCA$eig[i]/mod$tot.chi
    })
  }

}

The "contribution calculation" does not seem to work for cca objects at the moment, but fixing this should not be a major issue.