epiverse-trace / simulist

An R package for simulating line lists
https://epiverse-trace.github.io/simulist/
Other
4 stars 0 forks source link

how to increase the amount of deaths within hospitalized cases? (not urgent) #102

Closed avallecam closed 2 months ago

avallecam commented 2 months ago

No rush for this one. I'll use outbreaks::mers_korea_2015 for my burning need.

But tried a quick one to get a decent dataset suitable to estimate CFR. I tried combinations of hosp_risk and hosp_death_risk but got mostly none. For this, would it be better to simulate like 1000 datasets to get an extreme scenario? Again, it's not urgent, but I'm curious.

library(simulist)
library(epiparameter)
library(incidence2)
#> Loading required package: grates
library(cfr)
library(tidyverse)

set.seed(33)

# epiparameter ------------------------------------------------------------

# create COVID-19 contact distribution
contact_distribution <- epiparameter::epidist(
  disease = "COVID-19", 
  epi_dist = "contact distribution", 
  prob_distribution = "pois", 
  prob_distribution_params = c(mean = 2)
)
#> Citation cannot be created as author, year, journal or title is missing

# create COVID-19 infectious period
infect_period <- epiparameter::epidist(
  disease = "COVID-19",
  epi_dist = "infectious period",
  prob_distribution = "gamma",
  prob_distribution_params = c(shape = 1, scale = 1)
)
#> Citation cannot be created as author, year, journal or title is missing

# get onset to hospital admission from {epiparameter} database
onset_to_hosp <- epiparameter::epidist_db(
  disease = "COVID-19",
  epi_dist = "onset to hospitalisation",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the citation use the 'get_citation' function

# get onset to death from {epiparameter} database
onset_to_death <- epiparameter::epidist_db(
  disease = "COVID-19",
  epi_dist = "onset to death",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the citation use the 'get_citation' function

# simulist ----------------------------------------------------------------

linelist <- sim_linelist(
  contact_distribution = contact_distribution,
  infect_period = infect_period,
  prob_infect = 0.5,
  onset_to_hosp = onset_to_hosp,
  onset_to_death = onset_to_death,
  hosp_risk = 0.5,
  hosp_death_risk = 0.2,
  outbreak_start_date = as.Date("2019-12-01")
)

glimpse(linelist)
#> Rows: 279
#> Columns: 11
#> $ id                 <int> 1, 3, 4, 6, 8, 9, 10, 12, 13, 15, 17, 18, 20, 23, 2…
#> $ case_name          <chr> "Sadie Lansing", "Archibald Quintana", "Chager Blan…
#> $ case_type          <chr> "probable", "probable", "confirmed", "confirmed", "…
#> $ sex                <chr> "f", "m", "m", "f", "m", "f", "f", "f", "m", "f", "…
#> $ age                <int> 80, 68, 4, 77, 22, 45, 8, 52, 79, 80, 32, 3, 27, 25…
#> $ date_onset         <date> 2019-12-01, 2019-12-01, 2019-12-01, 2019-12-01, 20…
#> $ date_admission     <date> NA, NA, 2019-12-14, NA, NA, 2019-12-01, NA, NA, 20…
#> $ date_death         <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ date_first_contact <date> NA, 2019-11-30, 2019-12-02, 2019-11-26, 2019-12-03…
#> $ date_last_contact  <date> NA, 2019-12-04, 2019-12-05, 2019-12-05, 2019-12-07…
#> $ ct_value           <dbl> NA, NA, 24.8, 24.8, NA, 24.8, 24.8, NA, 24.8, 24.8,…

incidence_format <- linelist %>% 
  # incidence2 workflow
  incidence2::incidence(date_index = c("date_admission","date_death")) %>%
  incidence2::complete_dates()

incidence_format %>% 
  cfr::prepare_data(cases_variable = "date_admission",deaths_variable = "date_death")
#> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.
#>          date cases deaths
#> 1  2019-12-01     1      0
#> 2  2019-12-02     0      0
#> 3  2019-12-03     0      0
#> 4  2019-12-04     0      0
#> 5  2019-12-05     0      0
#> 6  2019-12-06     0      0
#> 7  2019-12-07     0      0
#> 8  2019-12-08     0      0
#> 9  2019-12-09     0      0
#> 10 2019-12-10     0      0
#> 11 2019-12-11     0      0
#> 12 2019-12-12     0      0
#> 13 2019-12-13     0      0
#> 14 2019-12-14     0      0
#> 15 2019-12-15     0      0
#> 16 2019-12-16     0      0
#> 17 2019-12-17     0      0
#> 18 2019-12-18     0      0
#> 19 2019-12-19     0      0
#> 20 2019-12-20     0      0
#> 21 2019-12-21     0      0
#> 22 2019-12-22     0      0
#> 23 2019-12-23     0      0
#> 24 2019-12-24     0      0
#> 25 2019-12-25     0      0
#> 26 2019-12-26     0      0
#> 27 2019-12-27     0      0
#> 28 2019-12-28     0      0
#> 29 2019-12-29     0      0
#> 30 2019-12-30     0      0
#> 31 2019-12-31     0      0
#> 32 2020-01-01     0      0
#> 33 2020-01-02     0      0
#> 34 2020-01-03     0      0

incidence_format %>% 
  cfr::prepare_data(cases_variable = "date_admission",deaths_variable = "date_death") %>% 
  cfr::cfr_static()
#> NAs in cases and deaths are being replaced with 0s: Set `fill_NA = FALSE` to prevent this.
#>   severity_mean severity_low severity_high
#> 1             0            0         0.975

Created on 2024-04-17 with reprex v2.1.0

joshwlambert commented 2 months ago

Note This response uses the time-varying-cfr branch on {simulist} which will soon be merged into main.

To increase the number of individuals that get hospitalised then increase the hosp_risk (the largest value is 1 which is every infection goes to hospital) and if you want the fatality risk for hospitalised individuals to increase, increase the hosp_death_risk (1 is again the largest value). Here is an example where both of those risks are high and shows that, even with stochasticity, these risks should be represented in the line list data.

library(simulist)
# load data required to simulate line list
contact_distribution <- epiparameter::epidist(
  disease = "COVID-19",
  epi_dist = "contact distribution",
  prob_distribution = "pois",
  prob_distribution_params = c(mean = 2)
)
#> Citation cannot be created as author, year, journal or title is missing

infect_period <- epiparameter::epidist(
  disease = "COVID-19",
  epi_dist = "infectious period",
  prob_distribution = "gamma",
  prob_distribution_params = c(shape = 1, scale = 1)
)
#> Citation cannot be created as author, year, journal or title is missing

# get onset to hospital admission from {epiparameter} database
onset_to_hosp <- epiparameter::epidist_db(
  disease = "COVID-19",
  epi_dist = "onset to hospitalisation",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the short citation use the 'get_citation' function

# get onset to death from {epiparameter} database
onset_to_death <- epiparameter::epidist_db(
  disease = "COVID-19",
  epi_dist = "onset to death",
  single_epidist = TRUE
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the short citation use the 'get_citation' function
set.seed(12345)
# example with single hospitalisation risk for entire population
linelist <- sim_linelist(
  contact_distribution = contact_distribution,
  infect_period = infect_period,
  prob_infect = 0.5,
  onset_to_hosp = onset_to_hosp,
  onset_to_death = onset_to_death,
  hosp_risk = 0.9, 
  hosp_death_risk = 0.9
)
head(linelist)
#>   id          case_name case_type sex age date_onset date_admission   outcome
#> 1  1 Christopher Brooks  probable   m  55 2023-01-01     2023-01-01      died
#> 2  2     Chantell Tarar suspected   f  56 2023-01-03     2023-01-03      died
#> 3  3    Rifaah el-Mirza suspected   m  64 2023-01-01           <NA> recovered
#> 4  4      Charlie Weems confirmed   m  69 2023-01-04     2023-01-10      died
#> 5  5   Autumn Castaneda confirmed   f  42 2023-01-04           <NA> recovered
#> 6  8    Bakar el-Koroma suspected   m  42 2023-01-01     2023-01-09      died
#>   date_outcome date_first_contact date_last_contact ct_value
#> 1   2023-01-08               <NA>              <NA>       NA
#> 2   2023-01-16         2023-01-01        2023-01-04       NA
#> 3         <NA>         2022-12-27        2023-01-02       NA
#> 4   2023-01-13         2023-01-04        2023-01-06     24.5
#> 5         <NA>         2023-01-06        2023-01-09     24.5
#> 6   2023-01-09         2023-01-01        2023-01-04       NA
nrow(linelist)
#> [1] 4481
sum(!is.na(linelist$date_admission)) / nrow(linelist)
#> [1] 0.9000223
sum(linelist$outcome == "died") / nrow(linelist)
#> [1] 0.8136577

Created on 2024-04-17 with reprex v2.1.0

The reason the proportion of infections that go on to die is lower than the hosp_death_risk is because the non_hosp_death_risk (the case fatality risk for individuals that are not hospitalised is left as its default value (0.05)).

Please let me know if this is clear and answers your question, and if you would like this to be better documented in the package please say.

avallecam commented 2 months ago

The time-varying-cfr branch worked as expected when filling the inputs of this issue. Issue solved!

set.seed(12345)
# example with single hospitalisation risk for entire population
linelist <- sim_linelist(
  contact_distribution = contact_distribution,
  infect_period = infect_period,
  prob_infect = 0.5,
  onset_to_hosp = onset_to_hosp,
  onset_to_death = onset_to_death,
  hosp_risk = 0.5,
  non_hosp_death_risk = 0.05, 
  hosp_death_risk = 0.2
)

nrow(linelist)
#> [1] 4481

# hospital risk
sum(!is.na(linelist$date_admission)) / nrow(linelist)
#> [1] 0.5001116

# naive CFR
sum(linelist$outcome == "died") / nrow(linelist)
#> [1] 0.1216246

# HFR
sum(linelist$outcome == "died") / sum(!is.na(linelist$date_admission))
#> [1] 0.243195

Created on 2024-04-24 with reprex v2.1.0