yerkes-gencore / gencoreBulk

An R package to facilitate bulk RNAseq analyses at the ENPRC Genomics Core
Other
1 stars 0 forks source link

fix scaling on gsea dotplots #34

Open derrik-gratz opened 3 months ago

derrik-gratz commented 3 months ago

use scale_size_area instead of scale radius. can set a ceiling that way

  scale_size_area(#name="NOM p-val", 
                  # limits=c(0.1,0.00001),
                 # range=range,
                 breaks=breaks,
                 labels=labels, 
                 max_size=range[2]) +
derrik-gratz commented 3 months ago

Nevermind, this has incosistent radius/area scaling with different pvalue ranges. Might have to just manually cap values prior to feeding to scale radius

derrik-gratz commented 3 months ago
library(scales)
reverselog_trans <- function(base = exp(1)) {
    trans <- function(x) -log(x, base)
    inv <- function(x) base^(-x)
    trans_new(paste0("reverselog-", format(base)), trans, inv, 
              log_breaks(base = base), 
              domain = c(1e-100, Inf))
}
gseaDotplot_joint_2 <- function(gsea_results,
                              pathway_order = NULL,
                              x_order = NULL,
                              significance = c(0.05, 0.01, 0.001),
                              range = c(1,8),
                              breaks = c(0.1,0.01,0.001,0.0001),
                              cap_values = FALSE,
                              # labels = c(0.1,0.01,0.001,0.0001),
                              p_val_col = 'pval',
                              use_shortened_pathways = TRUE){
  if (use_shortened_pathways) {
    gsea_results$pathway <- gsea_results$pathway_short
  }
  if (!is.null(pathway_order)) {
    if (all(pathway_order %in% unique(gsea_results$pathway))){
      pathway_order <- order(factor(gsea_results$pathway, levels = pathway_order))
      gsea_results$pathway <- .wrap_underscore_strings_balance(gsea_results$pathway,36)
      ## reordering
      gsea_results <- gsea_results[pathway_order,]
      gsea_results$pathway <- factor(gsea_results$pathway, levels = unique(gsea_results$pathway))
    } else {
      warning('pathways specified in pathway_order not found, defaulting to arbitrary order')
    }
  } else {
    gsea_results$pathway <- .wrap_underscore_strings_balance(gsea_results$pathway,36)
  }

  if (!is.null(x_order)) {
    if (all(x_order %in% unique(gsea_results$ID))){
      gsea_results$ID <- factor(gsea_results$ID, levels = x_order)
    } else {
      warning('Specified x_order not all found in data, defaulting to arbitrary order')
    }
  }
  if (cap_values) {
    ## Set the minimum value to be 1/10th of the smallest label
    gsea_results[[p_val_col]] <- pmax(tail(breaks, 1)/10, gsea_results[[p_val_col]])
  }

  gsea_results$label <- NA
  caption <- ''
  if (!is.null(significance)) {
    if (is.numeric(significance)) {
      label <- '*'
      for (cutoff in significance) {
        gsea_results$label <- ifelse(gsea_results[[p_val_col]] < cutoff, label, gsea_results$label)
        caption <- paste(caption, label, '<', cutoff, ';', sep = ' ')
        label <- paste0(label, '*')
      }
      # gsea_results$label[is.numeric(gsea_results$label)] <- NA
    } else {
      stop('Significance argument should be a numeric vector')
    }
  } 
  ggplot(gsea_results, aes(x=.data$ID, y=.data$pathway, size=.data[[p_val_col]],
                           color=.data$NES, label = .data$label)) + 
    geom_point() +
    scale_color_gradient2(low="blue",
                          mid="white",
                          high="red",
                          midpoint=0,
                          breaks=c(-2,-1,0,1,2),
                          limits=c(min(gsea_results$NES,-1),
                                   max(gsea_results$NES,1))) +
    theme_classic(11) +
    theme(panel.grid.major = element_line(colour = "grey92"),
          panel.grid.minor = element_line(colour = "grey92"),
          panel.grid.major.y = element_line(colour = "grey92"),#element_blank(),
          panel.grid.minor.y = element_line(colour = "grey92")) +
    theme(axis.text.x = element_text(angle = 90, 
                                     vjust = 0.5, 
                                     hjust=1)) +
    labs(x="Comparison",
         y="Gene set", 
         color = "Normalized\nenrichment\nscore",
         size="Adjusted\np-value",
         title="GSEA pathway enrichments",
         caption = caption) +
    scale_radius(#name="NOM p-val", 
                  # limits=c(0.1,0.00001),
                 range=range,
      trans=reverselog_trans(),
                 breaks=breaks,
                 # labels=labels,
      ) +
    geom_text(na.rm = TRUE, color = 'white', size = 3)
}