R4EPI / sitrep

Report templates and helper functions for applied epidemiology
https://r4epi.github.io/sitrep/
GNU General Public License v3.0
39 stars 14 forks source link

put geom_squares() function in {epikit} #279

Open aspina7 opened 2 years ago

aspina7 commented 2 years ago

Adapted from {incidence} but makes it possible to use directly with {ggplot2} maintaining the use of scale_x_date() functions. Seems to work dates or month (presumably works with whatever geom_histogram() is fed to it.... but need to add tests.

Also need to re-structure so can used it with the ggplot2 + rather than %>%

pacman::p_load("rio", "tidyverse", "lubridate")

## define a function for plotting squares 
## plot: ggplot object (geom_histogram)
## color: colour for the outline of the squares
## fill: colour for filling in the squares (default is NA)
## position: where the squares go (should inherit from the ggplot obj)
add_squares <- function(plot, 
                        color = "black", 
                        fill = NA, 
                        position = "stack"
                        ) {

  df <- ggplot_build(plot)$data[[1]]

  # define squares for plotting over 
  squaredf <- df[rep(seq.int(nrow(df)), df[["count"]]), ]
  squaredf[["count"]] <- 1

  squaredf <- mutate(squaredf, 
                     x = as.Date(x, origin = "1970-01-01"))

  ## add the squares to the basic plot 
  plot + 
    geom_histogram(data = squaredf, 
             mapping = aes(x = x, y = count),
             stat = "identity",
             color = "black",
             fill  = NA,
             position = "stack", 
             width = squaredf$xmax - squaredf$xmin
    )

}

# file import
linelist <- import("https://github.com/appliedepi/epirhandbook/raw/master/inst/extdata/case_linelists/linelist_cleaned.rds")

# fix factor levels
linelist <- linelist %>% 
  mutate(outcome = fct_explicit_na(outcome, na_level = "Missing"),
         outcome = fct_rev(outcome)
         )

# linelist for central hospital
central_linelist <- linelist %>%
  filter(hospital == "Central Hospital") %>% 
  mutate(epiweek = floor_date(date_onset,   "week", week_start = 1), 
         month = floor_date(date_onset,   "month")) %>% 
  select(date_onset, epiweek, outcome) %>% 
  arrange(date_onset)

############################### weekly #########################################

# weekly histo breaks for central hospital
weekly_breaks_central <- seq.Date(
  from = floor_date(min(central_linelist$date_onset, na.rm=T) - 1,   "week", week_start = 1), # monday before first case
  to   = ceiling_date(max(central_linelist$date_onset, na.rm=T) + 1, "week", week_start = 1), # monday after last case
  by   = "week")

# define total number 
numz <- paste0("N = ", nrow(central_linelist))

# define caption dates 
capz <- paste0("*Monday weeks from ", 
               min(central_linelist$date_onset, na.rm = TRUE) %>% 
                 format("%d %B %Y"), " to ", 
               max(central_linelist$date_onset, na.rm = TRUE) %>% 
                 format("%d %B %Y"), 
               ". \n", 
               sum(is.na(central_linelist$date_onset)), 
               " cases missing date of onset and not shown.")

## use the counts dataset (feed geom_col)
basic_plot <- central_linelist %>% 

  ggplot() + 

  # bar chart (if plotting from aggregated counts) 
  geom_histogram(
    # define what to plot and colour
    mapping = aes(x = date_onset, fill = outcome), 
    # define the breaks to use 
    breaks = weekly_breaks_central, 
    # start end closed
    closed = "left"

  ) + 

  # y-axis scale as before 
  scale_y_continuous(expand = c(0,0)) +

  # x-axis scale sets efficient date labels
  scale_x_date(
    expand = c(0,0),                       # remove excess x-axis space below and after case bars
    date_breaks = "months", 
    labels = scales::label_date_short()) + # auto efficient date labels

  scale_fill_brewer(type = "div", 
                    palette = 7) +

  # labels and theme
  labs(
    # Alex: would stay away from defining as "incidence":
      # while not technically wrong because it can be defined as cases/time-period, 
      # traditionalists would say it should be used as cases/population/time-period
    title = "Weekly cases of disease X, by outcome",
    subtitle = numz,
    x = "Week of symptom onset*",
    # Alex: dont need to say "weekly" here because the axis is should counts (small n)
      # time period denoted by the x-axis
    y = "Cases (n)",
    fill = "Outcome",
    # No need to repeat N here (as in title) 
    caption = capz)+
  theme_classic(16)+
  theme(legend.position = "right",
        plot.caption = element_text(hjust=0, face = "italic"))

## add the squares to the basic plot 
basic_plot %>% 
  add_squares()

############################### monthly #########################################

# monthly histo breaks for central hospital
monthly_breaks_central <- seq.Date(
  from = floor_date(min(central_linelist$date_onset, na.rm=T) - 1,   "month"),          
  to   = ceiling_date(max(central_linelist$date_onset, na.rm=T) + 1, "month"),
  by   = "month")

# define total number 
numz <- paste0("N = ", nrow(central_linelist))

# define caption dates 
capz <- paste0("*Monday weeks from ", 
               min(central_linelist$date_onset, na.rm = TRUE) %>% 
                 format("%d %B %Y"), " to ", 
               max(central_linelist$date_onset, na.rm = TRUE) %>% 
                 format("%d %B %Y"), 
               ". \n", 
               sum(is.na(central_linelist$date_onset)), 
               " cases missing date of onset and not shown.")

## use the counts dataset (feed geom_col)
basic_plot <- central_linelist %>% 

  ggplot() + 

  # bar chart (if plotting from aggregated counts) 
  geom_histogram(
    # define what to plot and colour
    mapping = aes(x = date_onset, fill = outcome), 
    # define the breaks to use 
    breaks = monthly_breaks_central, 
    # start end closed
    closed = "left"

  ) + 

  # y-axis scale as before 
  scale_y_continuous(expand = c(0,0)) +

  # x-axis scale sets efficient date labels
  scale_x_date(
    limits = c(min(monthly_breaks_central), max(monthly_breaks_central)),
    expand = c(0,0),                       # remove excess x-axis space below and after case bars
    date_breaks = "months", 
    labels = scales::label_date_short()) + # auto efficient date labels

  scale_fill_brewer(type = "div", 
                    palette = 7) +

  # labels and theme
  labs(
    # Alex: would stay away from defining as "incidence":
    # while not technically wrong because it can be defined as cases/time-period, 
    # traditionalists would say it should be used as cases/population/time-period
    title = "Weekly cases of disease X, by outcome",
    subtitle = numz,
    x = "Week of symptom onset*",
    # Alex: dont need to say "weekly" here because the axis is should counts (small n)
    # time period denoted by the x-axis
    y = "Cases (n)",
    fill = "Outcome",
    # No need to repeat N here (as in title) 
    caption = capz)+
  theme_classic(16)+
  theme(legend.position = "right",
        plot.caption = element_text(hjust=0, face = "italic"))

## add the squares to the basic plot 
basic_plot %>% 
  add_squares()