nhs-r-community / NHSRplotthedots

An SPC package to support NHSE/I 'Making Data Count' programme
https://nhs-r-community.github.io/NHSRplotthedots/
Other
47 stars 22 forks source link

Option to Implement to a Hard Zero Origin for y or x axis. #154

Closed HanselPalencia closed 2 years ago

HanselPalencia commented 2 years ago

Hi Everyone, so I had a project here at work recently where my boss asked me to implement some hard zeroes on some SPC charts. I was able to achieve this by manipulating the original ptd_create_ggplot funtion.

ptd_create_ggplot2 <- function(x,
                              point_size = 4,
                              percentage_y_axis = FALSE,
                              main_title,
                              x_axis_label,
                              y_axis_label,
                              fixed_x_axis_multiple = TRUE,
                              fixed_y_axis_multiple = TRUE,
                              x_axis_date_format = "%d/%m/%y",
                              x_axis_breaks = NULL,
                              y_axis_breaks = NULL,
                              icons_size = 8L,
                              icons_position = c("top right", "bottom right", "bottom left", "top left", "none"),
                              colours = ptd_spc_colours(),
                              theme_override = NULL,
                              break_lines = c("both", "limits", "process", "none"),
                              ...) {
  dots <- list(...)
  if (length(dots) > 0) {
    warning(
      "Unknown arguments provided by plot: ",
      paste(names(dots), collapse = ", "),
      "\nCheck for common spelling mistakes in arguments."
    )
  }

  assertthat::assert_that(
    inherits(x, "ptd_spc_df"),
    msg = "x argument must be an 'ptd_spc_df' object, created by ptd_spc()."
  )

  # argument needs to be called x for s3 plot method, but rename it to .data so it's more obvious through the rest of
  # the method
  .data <- x
  options <- attr(.data, "options")

  if (missing(main_title)) {
    main_title <- paste0(
      "SPC Chart of ",
      ptd_capitalise(options$value_field),
      ", starting ",
      format(min(.data[["x"]], na.rm = TRUE), format = "%d/%m/%Y")
    )
  }

  if (missing(x_axis_label)) {
    x_axis_label <- ptd_capitalise(options[["date_field"]])
  }
  if (missing(y_axis_label)) {
    y_axis_label <- ptd_capitalise(options[["value_field"]])
  }

  icons_position <- match.arg(icons_position)

  break_lines <- match.arg(break_lines)
  ptd_validate_plot_options(
    point_size,
    percentage_y_axis,
    main_title,
    x_axis_label,
    y_axis_label,
    fixed_x_axis_multiple,
    fixed_y_axis_multiple,
    x_axis_date_format,
    x_axis_breaks,
    y_axis_breaks,
    icons_size,
    icons_position,
    colours,
    theme_override,
    break_lines
  )

  colours_subset <- if (options[["improvement_direction"]] == "neutral") {
    colours[c("common_cause", "special_cause_neutral")]
  } else {
    colours[c("common_cause", "special_cause_improvement", "special_cause_concern")]
  }

  # apply a short groups warning caption if needed
  caption <- if (any(.data$short_group_warning)) {
    paste0(
      "Some trial limits created by groups of fewer than 12 points exist. \n",
      "These will become more reliable as more data is added."
    )
  }

  line_size <- point_size / 3

  break_limits <- break_lines %in% c("both", "limits")
  break_process <- break_lines %in% c("both", "process")

  plot <- ggplot(.data, aes(x = .data$x, y = .data$y)) +
    geom_line(aes(y = .data$upl, group = if (break_limits) .data$rebase_group else 0),
              linetype = "dashed", size = line_size, colour = colours$upl
    ) +
    geom_line(aes(y = .data$lpl, group = if (break_limits) .data$rebase_group else 0),
              linetype = "dashed", size = line_size, colour = colours$lpl
    ) +
    geom_line(aes(y = .data$target),
              linetype = "dashed", size = line_size, colour = colours$target, na.rm = TRUE
    ) +
    geom_line(aes(y = .data$trajectory),
              linetype = "dashed", size = line_size, colour = colours$trajectory, na.rm = TRUE
    ) +
    geom_line(aes(y = mean, group = if (break_limits) .data$rebase_group else 0),
              linetype = "solid", colour = colours$mean_line
    ) +
    geom_line(aes(group = if (break_process) .data$rebase_group else 0),
              linetype = "solid", size = line_size, colour = colours$value_line
    ) +
    geom_point(aes(colour = .data$point_type), size = point_size) +
    scale_colour_manual(
      values = colours_subset,
      labels = ptd_title_case
    ) +
    labs(
      title = main_title,
      x = x_axis_label,
      y = y_axis_label,
      caption = caption,
      group = NULL
    ) +
    theme_minimal() +
    theme(
      plot.background = element_rect(color = "grey", size = 1), # border around whole plot
      plot.margin = unit(c(5, 5, 5, 5), "mm"), # 5mm of white space around plot edge
      axis.text.x = element_text(angle = 45, hjust = 1),
      panel.grid = element_line(color = "grey70"), # gridline colour
      panel.grid.major.x = element_blank(), # remove major x gridlines
      panel.grid.minor.x = element_blank(), # remove minor x gridlines
      legend.position = "bottom",
      legend.title = element_blank()
    ) +
    theme_override

  plot <- plot + if (is.null(x_axis_breaks)) {
    scale_x_datetime(
      breaks = sort(unique(.data$x)),
      date_labels = x_axis_date_format
    )
  } else {
    scale_x_datetime(
      date_breaks = x_axis_breaks,
      date_labels = x_axis_date_format
    )
  }

  # Apply facet wrap if a facet field is present
  if (!is.null(options$facet_field)) {
    # For multiple facet chart, derived fixed/free scales value from x and y axis properties
    faces_scales <- if (fixed_x_axis_multiple) {
      ifelse(fixed_y_axis_multiple, "fixed", "free_y")
    } else {
      ifelse(fixed_y_axis_multiple, "free_x", "free")
    }

    plot <- plot +
      facet_wrap(vars(.data$f), scales = faces_scales)
  }

#### THIS SECTION HERE ####

  if (percentage_y_axis %||% FALSE) {
    plot <- plot +
      scale_y_continuous(labels = scales::label_percent(y_axis_breaks))
  } else if (!is.null(y_axis_breaks)) {
    yaxis <- c(.data[["y"]], .data[["upl"]], .data[["lpl"]], .data[["target"]])
    start <- 0
    end <- max(yaxis, na.rm = TRUE)+1
    plot <- plot +
      scale_y_continuous(limits = c(start, end))
  }

#### END SECTION CHANGES ####

  if (icons_position != "none") {
    plot <- plot +
      geom_ptd_icon(icons_size = icons_size, icons_position = icons_position)
  }

  plot
}

Basically its just a really simple limits change instead of an overall change in breaks, labels, etc. Can we get this added as regular functionality? (i.e. add some if else statements to account for if someone wants to change initial value of y axis)

I can do it if it's something that the overall community thinks is a good idea.

I would also like to note that attempting to manipulate the limits, labels, etc. is very difficult after the fact as when you feed this plot output into say ggplotly(), it absorbs the original ggplot() limits, labels, etc. Even using the layout() function in {plotly} does not correct the axis limits or labels.

tomjemmett commented 2 years ago

personally, I think this is outside the scope of this package, as it's already something that ggplot handles well with the ylim() function. for instance:

library(tidyverse)
library(NHSRplotthedots)

ae_df <- NHSRdatasets::ae_attendances |>
  summarise(across(where(is.numeric), sum), .groups = "drop") |>
  mutate(performance = 1 - breaches / attendances)

ae_spc <- ae_df |>
  ptd_spc(
    date_field = period,
    value_field = performance,
    target = 0.95
  )

plot(ae_spc) +
  ylim(0, NA) # lower bound set to 0, upper bound to NA (will figure out value from data)
ThomUK commented 2 years ago

I agree with Tom, I think this is out of scope for the package. The addition of the ylim() line solves the problem for both ggplot and plotly output, using the code below:

set.seed(1)
data <- rnorm(30) + 20
date <- seq.Date(from = as.Date("2020-01-01"), length.out = 30, by = "month")

dtf <- data.frame(
  data = data,
  date = date
)

p <- NHSRplotthedots::ptd_spc(dtf, data, date) %>% 
  ptd_create_ggplot() +
  ylim(0, NA)

p

plotly::ggplotly(p)