epiverse-trace / tutorials-middle

Other

3 stars 1 forks source link

Idea: Share two dataset examples: ebola and covid data at different moments in time.

Question: Why are the different rolling CFR curve trends between adjusted and naive for Ebola and Covid?

Task for the instructor: After showing rolling, showcase vignette on when cfr_time_varying() is appropriate (reference call out)

Goal: Communicate that for an appropriate estimate time-varying estimate, keep the data with the highest sample size

Filter one region only

library(cfr)
library(incidence2)
#> Loading required package: grates

library(tidyverse)

covid_delay <- epiparameter::epidist_db(
  disease = "covid",
  epi_dist = "onset-to-death",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the citation use the 'get_citation' function


covid_pre <- incidence2::covidregionaldataUK %>% 
  as_tibble() %>% 
  filter(region == "North East") %>% 
  incidence2::incidence(
    date_index = "date",
    counts = c("cases_new","deaths_new"),
    complete_dates = TRUE)
#> Warning in incidence2::incidence(): `cases_new` contains NA values. Consider
#> imputing these and calling `incidence()` again.


plot(covid_pre, fill = "count_variable")


covid_all <- covid_pre %>% 
  cfr::prepare_data(cases_variable = "cases_new",
                    deaths_variable = "deaths_new") 
#> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.


# rolling -----------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
rolling_cfr_naive <- cfr_rolling(
  data = covid_all
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.


# Calculate the rolling daily CFR while correcting for delays
rolling_cfr_corrected <- cfr_rolling(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x) #,poisson_threshold = 100000
)
#> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.
#> Some daily ratios of total deaths to total cases with known outcome are below 0.01%: some CFR estimates may be unreliable.FALSE


# combine the data for plotting
rolling_cfr_naive$method <- "naive"
rolling_cfr_corrected$method <- "corrected"

data_cfr <- rbind(
  rolling_cfr_naive,
  rolling_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  labs(title = "rolling", x = "Date", y = "Disease severity")
#> Warning: Removed 65 rows containing missing values or values outside the scale range
#> (`geom_line()`).


# time varying ------------------------------------------------------------

# Calculate the CFR without correcting for delays on each day of the outbreak
time_varying_cfr_naive <- cfr_time_varying(
  data = covid_all
)

# Calculate the rolling daily CFR while correcting for delays
time_varying_cfr_corrected <- cfr_time_varying(
  data = covid_all,
  delay_density = function(x) density(covid_delay,x) #,poisson_threshold = 100000
)

# combine the data for plotting
time_varying_cfr_naive$method <- "naive"
time_varying_cfr_corrected$method <- "corrected"

data_cfr_timevarying <- rbind(
  time_varying_cfr_naive,
  time_varying_cfr_corrected
)

# visualise both corrected and uncorrected rolling estimates
ggplot(data_cfr_timevarying) +
  geom_ribbon(
    aes(
      date,
      ymin = severity_low, ymax = severity_high,
      fill = method
    ),
    alpha = 0.2, show.legend = FALSE
  ) +
  geom_line(
    aes(date, severity_estimate, colour = method)
  ) +
  labs(title = "time varying", x = "Date", y = "Disease severity")
#> Warning: Removed 80 rows containing missing values or values outside the scale range
#> (`geom_line()`).

^{Created on 2024-08-13 with reprex v2.1.0}

Keep all the regions

library(cfr) library(incidence2) #> Loading required package: grates

library(tidyverse) covid_delay <- epiparameter::epidist_db( disease = "covid", epi_dist = "onset-to-death", single_epidist = TRUE ) #> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan #> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other #> Epidemiological Characteristics of 2019 Novel Coronavirus Infections #> with Right Truncation: A Statistical Analysis of Publicly Available #> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538 #> <https://doi.org/10.3390/jcm9020538>.. #> To retrieve the citation use the 'get_citation' function

covid_pre <- incidence2::covidregionaldataUK %>% as_tibble() %>% # filter(region == "North East") %>% incidence2::incidence( date_index = "date", counts = c("cases_new","deaths_new"), complete_dates = TRUE) #> Warning in incidence2::incidence(): `cases_new` contains NA values. Consider #> imputing these and calling `incidence()` again.

plot(covid_pre, fill = "count_variable")

covid_all <- covid_pre %>% cfr::prepare_data(cases_variable = "cases_new", deaths_variable = "deaths_new") #> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.

# covid_section <- covid_all %>% # dplyr::filter(date > ymd(20200305) & date < ymd(20200505)) # # covid_all %>% # cfr::cfr_static() # # covid_section %>% # cfr::cfr_static(delay_density = function(x) density(covid_delay,x)) # rolling ----------------------------------------------------------------- # Calculate the CFR without correcting for delays on each day of the outbreak rolling_cfr_naive <- cfr_rolling( data = covid_all ) #> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak.

# Calculate the rolling daily CFR while correcting for delays rolling_cfr_corrected <- cfr_rolling( data = covid_all, delay_density = function(x) density(covid_delay,x) ,poisson_threshold = 100000 ) #> `cfr_rolling()` is a convenience function to help understand how additional data influences the overall (static) severity. Use `cfr_time_varying()` instead to estimate severity changes over the course of the outbreak. #> Some daily ratios of total deaths to total cases with known outcome are below 0.01%: some CFR estimates may be unreliable.FALSE

# combine the data for plotting rolling_cfr_naive$method <- "naive" rolling_cfr_corrected$method <- "corrected" data_cfr <- rbind( rolling_cfr_naive, rolling_cfr_corrected ) # visualise both corrected and uncorrected rolling estimates ggplot(data_cfr) + geom_ribbon( aes( date, ymin = severity_low, ymax = severity_high, fill = method ), alpha = 0.2, show.legend = FALSE ) + geom_line( aes(date, severity_estimate, colour = method) ) + scale_colour_brewer( palette = "Dark2", labels = c("Corrected CFR", "Naive CFR"), name = NULL ) + scale_fill_brewer( palette = "Dark2" ) + labs(title = "rolling", x = "Date", y = "Disease severity") #> Warning: Removed 71 rows containing missing values or values outside the scale range #> (`geom_line()`).

# time varying ------------------------------------------------------------ # Calculate the CFR without correcting for delays on each day of the outbreak time_varying_cfr_naive <- cfr_time_varying( data = covid_all ) # Calculate the rolling daily CFR while correcting for delays time_varying_cfr_corrected <- cfr_time_varying( data = covid_all, delay_density = function(x) density(covid_delay,x)#,poisson_threshold = 100000 ) # combine the data for plotting time_varying_cfr_naive$method <- "naive" time_varying_cfr_corrected$method <- "corrected" data_cfr_timevarying <- rbind( time_varying_cfr_naive, time_varying_cfr_corrected ) # visualise both corrected and uncorrected rolling estimates ggplot(data_cfr_timevarying) + geom_ribbon( aes( date, ymin = severity_low, ymax = severity_high, fill = method ), alpha = 0.2, show.legend = FALSE ) + geom_line( aes(date, severity_estimate, colour = method) ) + scale_colour_brewer( palette = "Dark2", labels = c("Corrected CFR", "Naive CFR"), name = NULL ) + scale_fill_brewer( palette = "Dark2" ) + labs(title = "time varying", x = "Date", y = "Disease severity") #> Warning: Removed 75 rows containing missing values or values outside the scale range #> (`geom_line()`).

^{Created on 2024-07-30 with reprex v2.1.0}

epiverse-trace / tutorials-middle

add cfr summative assessment scenario to workshop materials #112

Filter one region only

Keep all the regions