epiverse-trace / tutorials-middle

https://epiverse-trace.github.io/tutorials-middle/
Other
3 stars 1 forks source link

add cfr summative assessment scenario to workshop materials #112

Open avallecam opened 1 month ago

avallecam commented 1 month ago

Idea: Share two dataset examples: ebola and covid data at different moments in time.

Question: Why are the different rolling CFR curve trends between adjusted and naive for Ebola and Covid?

Task for the instructor: After showing rolling, showcase vignette on when cfr_time_varying() is appropriate (reference call out)

Goal: Communicate that for an appropriate estimate time-varying estimate, keep the data with the highest sample size

Filter one region only

library(cfr)
library(incidence2)
#> Loading required package: grates
library(tidyverse)

covid_delay <- epiparameter::epidist_db(
  disease = "covid",
  epi_dist = "onset-to-death",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the citation use the 'get_citation' function

covid_pre <- incidence2::covidregionaldataUK %>% 
  as_tibble() %>% 
  filter(region == "North East") %>% 
  incidence2::incidence(
    date_index = "date",
    counts = c("cases_new","deaths_new"),
    complete_dates = TRUE)
#> Warning in incidence2::incidence(): `cases_new` contains NA values. Consider
#> imputing these and calling `incidence()` again.

plot(covid_pre, fill = "count_variable")


covid_all <- covid_pre %>% 
  cfr::prepare_data(cases_variable = "cases_new",
                    deaths_variable = "deaths_new") 
#> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.

# rolling -----------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
rolling_cfr_naive <- cfr_rolling(
  data = covid_all
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.

# Calculate the rolling daily CFR while correcting for delays
rolling_cfr_corrected <- cfr_rolling(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x) #,poisson_threshold = 100000
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.
#> Some daily ratios of total deaths to total cases with known outcome are below 0.01%: some CFR estimates may be unreliable.FALSE

# combine the data for plotting
rolling_cfr_naive$method <- "naive"
rolling_cfr_corrected$method <- "corrected"

data_cfr <- rbind(
  rolling_cfr_naive,
  rolling_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  labs(title = "rolling", x = "Date", y = "Disease severity")
#> Warning: Removed 65 rows containing missing values or values outside the scale range
#> (`geom_line()`).


# time varying ------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
time_varying_cfr_naive <- cfr_time_varying(
  data = covid_all
)

# Calculate the rolling daily CFR while correcting for delays
time_varying_cfr_corrected <- cfr_time_varying(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x) #,poisson_threshold = 100000
)

# combine the data for plotting
time_varying_cfr_naive$method <- "naive"
time_varying_cfr_corrected$method <- "corrected"

data_cfr_timevarying <- rbind(
  time_varying_cfr_naive,
  time_varying_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr_timevarying) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  labs(title = "time varying", x = "Date", y = "Disease severity")
#> Warning: Removed 80 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Created on 2024-08-13 with reprex v2.1.0

avallecam commented 1 month ago

Keep all the regions

library(cfr)
library(incidence2)
#> Loading required package: grates
library(tidyverse)

covid_delay <- epiparameter::epidist_db(
  disease = "covid",
  epi_dist = "onset-to-death",
  single_epidist = TRUE
  )
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the citation use the 'get_citation' function

covid_pre <- incidence2::covidregionaldataUK %>% 
  as_tibble() %>% 
  # filter(region == "North East") %>% 
  incidence2::incidence(
    date_index = "date",
    counts = c("cases_new","deaths_new"),
    complete_dates = TRUE)
#> Warning in incidence2::incidence(): `cases_new` contains NA values. Consider
#> imputing these and calling `incidence()` again.

plot(covid_pre, fill = "count_variable")


covid_all <- covid_pre %>% 
  cfr::prepare_data(cases_variable = "cases_new",
                    deaths_variable = "deaths_new") 
#> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.

# covid_section <- covid_all %>% 
#   dplyr::filter(date > ymd(20200305) & date < ymd(20200505))
# 
# covid_all %>% 
#   cfr::cfr_static()
# 
# covid_section %>% 
#   cfr::cfr_static(delay_density = function(x) density(covid_delay,x))

# rolling -----------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
rolling_cfr_naive <- cfr_rolling(
  data = covid_all
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.

# Calculate the rolling daily CFR while correcting for delays
rolling_cfr_corrected <- cfr_rolling(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x) ,poisson_threshold = 100000
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.
#> Some daily ratios of total deaths to total cases with known outcome are below 0.01%: some CFR estimates may be unreliable.FALSE

# combine the data for plotting
rolling_cfr_naive$method <- "naive"
rolling_cfr_corrected$method <- "corrected"

data_cfr <- rbind(
  rolling_cfr_naive,
  rolling_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  scale_colour_brewer(
    palette = "Dark2",
    labels = c("Corrected CFR", "Naive CFR"),
    name = NULL
  ) +
  scale_fill_brewer(
    palette = "Dark2"
  ) + 
  labs(title = "rolling", x = "Date", y = "Disease severity")
#> Warning: Removed 71 rows containing missing values or values outside the scale range
#> (`geom_line()`).


# time varying ------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
time_varying_cfr_naive <- cfr_time_varying(
  data = covid_all
)

# Calculate the rolling daily CFR while correcting for delays
time_varying_cfr_corrected <- cfr_time_varying(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x)#,poisson_threshold = 100000
)

# combine the data for plotting
time_varying_cfr_naive$method <- "naive"
time_varying_cfr_corrected$method <- "corrected"

data_cfr_timevarying <- rbind(
  time_varying_cfr_naive,
  time_varying_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr_timevarying) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  scale_colour_brewer(
    palette = "Dark2",
    labels = c("Corrected CFR", "Naive CFR"),
    name = NULL
  ) +
  scale_fill_brewer(
    palette = "Dark2"
  ) + 
  labs(title = "time varying", x = "Date", y = "Disease severity")
#> Warning: Removed 75 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Created on 2024-07-30 with reprex v2.1.0