epiverse-trace / episoap

[Not published - under active development] A Store of Outbreak Analytics Pipelines Provided as Rmarkdown Report Templates
https://epiverse-trace.github.io/episoap/
Other
4 stars 2 forks source link

update how to plug in `<epidist>` object to `epinow2` #130

Closed avallecam closed 4 months ago

avallecam commented 4 months ago

Hi @jamesmbaazam @joshwlambert

I wrote a summative connecting {epiparameter} and {EpiNow2} in https://github.com/epiverse-trace/howto/issues/34

After reading https://github.com/epiverse-trace/episoap/issues/113#issuecomment-1907832192 I understand that it's recommended to plug in directly to the pmf argument instead of plugging values to the mean, sd, max, and distribution arguments.

I'm getting different outputs, so interested in double-checking this with you

# howto epinow2 -----------------------------------------------------------

library(epiparameter)
library(EpiNow2)
library(tidyverse)

# cases -------------------------------------------------------------------

example_confirmed
#>            date confirm
#>   1: 2020-02-22      14
#>   2: 2020-02-23      62
#>   3: 2020-02-24      53
#>   4: 2020-02-25      97
#>   5: 2020-02-26      93
#>  ---                   
#> 126: 2020-06-26     296
#> 127: 2020-06-27     255
#> 128: 2020-06-28     175
#> 129: 2020-06-29     174
#> 130: 2020-06-30     126

# delay: generation time --------------------------------------------------------------

covid_serialint <- 
  epiparameter::epidist_db(
    disease = "covid",
    epi_dist = "serial",
    author = "Nishiura",
    single_epidist = T
  )
#> Using Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.. 
#> To retrieve the short citation use the 'get_citation' function

covid_serialint
#> Disease: COVID-19
#> Pathogen: SARS-CoV-2
#> Epi Distribution: serial interval
#> Study: Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.
#> Distribution: lnorm
#> Parameters:
#>   meanlog: 1.386
#>   sdlog: 0.568

# discretise continuous distribution -------------------------------------------------------------------

covid_serialint_discrete <- 
  epiparameter::discretise(covid_serialint)

# get maximum value from discrete distribution -------------------------------------------------------------------

covid_serialint_discrete_max <- 
  covid_serialint_discrete$prob_dist$q(p = 0.999)

covid_serialint_discrete_max
#> [1] 23

# (previously I used) ----------------------------------------------------------------

serial_interval_covid <- 
  EpiNow2::dist_spec(
    mean = covid_serialint$summary_stats$mean,
    sd = covid_serialint$summary_stats$sd,
    max = covid_serialint_discrete_max,
    distribution = "lognormal"
  )
#> Warning: The meaning of the 'max' argument has changed compared to previous versions. It now indicates the maximum of a distribution rather than the length of the probability mass function (including 0) that it represented previously. To replicate previous behaviour reduce max by 1.
#> This warning is displayed once every 8 hours.

serial_interval_covid
#> 
#>   Fixed distribution with PMF [0.18 0.1 0.079 0.065 0.056 0.049 0.044 0.04 0.037 0.034 0.031 0.029 0.028 0.026 0.025 0.024 0.022 0.021 0.021 0.02 0.019 0.018 0.018 0.017]

# (update, this is the expected output) ------------------------------------------------------------------

si_x <- seq(1L, to = covid_serialint_discrete_max, by = 1L)

serial_interval_covid_update <- 
  EpiNow2::dist_spec(pmf = covid_serialint_discrete$prob_dist$d(si_x))

serial_interval_covid_update
#> 
#>   Fixed distribution with PMF [0.0073 0.1 0.2 0.19 0.15 0.11 0.075 0.051 0.035 0.023 0.016 0.011 0.0076 0.0053 0.0037 0.0027 0.0019 0.0014 0.001 0.00074 0.00055 0.00041 0.00031]

Created on 2024-02-13 with reprex v2.0.2

avallecam commented 4 months ago

Updated now with the new branch EpiNow2@dist-interfase tagging @adamkucharski @Degoot-AM

# howto epinow2 -----------------------------------------------------------

library(epiparameter)
library(EpiNow2)
#> 
#> Attaching package: 'EpiNow2'
#> The following object is masked from 'package:epiparameter':
#> 
#>     discretise
#> The following object is masked from 'package:stats':
#> 
#>     Gamma
library(tidyverse)

# cases -------------------------------------------------------------------

example_confirmed
#>            date confirm
#>          <Date>   <num>
#>   1: 2020-02-22      14
#>   2: 2020-02-23      62
#>   3: 2020-02-24      53
#>   4: 2020-02-25      97
#>   5: 2020-02-26      93
#>  ---                   
#> 126: 2020-06-26     296
#> 127: 2020-06-27     255
#> 128: 2020-06-28     175
#> 129: 2020-06-29     174
#> 130: 2020-06-30     126

# delay: generation time --------------------------------------------------------------

covid_serialint <- 
  epiparameter::epidist_db(
    disease = "covid",
    epi_dist = "serial",
    author = "Nishiura",
    single_epidist = T
  )
#> Using Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.. 
#> To retrieve the short citation use the 'get_citation' function

covid_serialint
#> Disease: COVID-19
#> Pathogen: SARS-CoV-2
#> Epi Distribution: serial interval
#> Study: Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.
#> Distribution: lnorm
#> Parameters:
#>   meanlog: 1.386
#>   sdlog: 0.568

# discretise continuous distribution --------------------------------------

covid_serialint_discrete <- 
  epiparameter::discretise(covid_serialint)

# get maximum value from discrete distribution ----------------------------

covid_serialint_discrete_max <- 
  covid_serialint_discrete$prob_dist$q(p = 0.999)

covid_serialint_discrete_max
#> [1] 23

# get sequence of quantile values -----------------------------------------

si_x <- seq(1L, to = covid_serialint_discrete_max, by = 1L)

# version: EpiNow2 1.4.9 ---------------------------------------------------

serial_interval_covid_update <- 
  EpiNow2::dist_spec(pmf = covid_serialint_discrete$prob_dist$d(si_x))

serial_interval_covid_update
#> - nonparametric distribution
#>   PMF: [0.0073 0.1 0.2 0.19 0.15 0.11 0.075 0.051 0.034 0.023 0.016 0.011 0.0076 0.0053 0.0037 0.0027 0.0019 0.0014 0.001 0.00074 0.00055 0.00041 0.00031]

# new branch: EpiNow2@dist-interfase --------------------------------------

serial_interval_covid_branch_pmf <- 
  EpiNow2::pmf(covid_serialint_discrete$prob_dist$d(si_x))

serial_interval_covid_branch_pmf
#> - nonparametric distribution
#>   PMF: [0.0073 0.1 0.2 0.19 0.15 0.11 0.075 0.051 0.035 0.023 0.016 0.011 0.0076 0.0053 0.0037 0.0027 0.0019 0.0014 0.001 0.00074 0.00055 0.00041 0.00031]

# new branch: is this a simpler and valid alternative? --------------------------------

covid_serialint_parameters <- epiparameter::get_parameters(covid_serialint)

covid_serialint_parameters
#>   meanlog     sdlog 
#> 1.3862617 0.5679803

serial_interval_covid_branch_lognormalmax <- 
  EpiNow2::LogNormal(
    meanlog = covid_serialint_parameters["meanlog"], #shortcut: use the "tab" key ↹ within [...]
    sdlog = covid_serialint_parameters["sdlog"],
    max = covid_serialint_discrete_max)

serial_interval_covid_branch_lognormalmax
#> - lognormal distribution (max: 23):
#>   meanlog:
#>     1.4
#>   sdlog:
#>     0.57

Created on 2024-02-13 with reprex v2.0.2

These two alternative outputs serial_interval_covid_branch_pmf and serial_interval_covid_branch_lognormalmax work fine when running epinow()

epinow_estimates_alt <- epinow(
  # cases
  reported_cases = example_confirmed[1:60],
  # delays
  generation_time = generation_time_opts(serial_interval_covid_branch_lognormalmax),
  # computation
  stan = stan_opts(
    cores = 4, samples = 1000, chains = 3,
    control = list(adapt_delta = 0.99)
  )
)

summary(epinow_estimates_alt)
avallecam commented 4 months ago

Now I can see that the implementation I was doing here (chunk below) was not appropriate. I managed to see it thanks to the explicit output of the dist_spec() (now deprecated, but still informative) comparing the input and output.

# misspecification of 
# summary statistics (normally distributed) as distribution parameters (lognormal distribution)

covid_serialint
#> Disease: COVID-19
#> Pathogen: SARS-CoV-2
#> Epi Distribution: serial interval
#> Study: Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.
#> Distribution: lnorm
#> Parameters:
#>   meanlog: 1.386
#>   sdlog: 0.568

# discretise continuous distribution --------------------------------------

covid_serialint_discrete <- 
  epiparameter::discretise(covid_serialint)

# get maximum value from discrete distribution ----------------------------

covid_serialint_discrete_max <- 
  covid_serialint_discrete$prob_dist$q(p = 0.999)

covid_serialint_discrete_max
#> [1] 23

# previous: EpiNow2 1.4.0 -------------------------------------------------

serial_interval_covid <- 
  EpiNow2::dist_spec(
    mean = covid_serialint$summary_stats$mean,
    sd = covid_serialint$summary_stats$sd,
    max = covid_serialint_discrete_max,
    distribution = "lognormal"
  )

serial_interval_covid
#> - lognormal distribution (max: 23):
#>   meanlog:
#>     - fixed value:
#>       4.7
#>   sdlog:
#>     - fixed value:
#>       2.9

instead, it should look like:

library(epiparameter)
library(EpiNow2)
#> 
#> Attaching package: 'EpiNow2'
#> The following object is masked from 'package:epiparameter':
#> 
#>     discretise
#> The following object is masked from 'package:stats':
#> 
#>     Gamma

covid_incubation <- epiparameter::epidist_db(
  disease = "covid",
  epi_dist = "incubation",
  author = "Natalie",
  single_epidist = T
)
#> Using Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.. 
#> To retrieve the short citation use the 'get_citation' function

covid_incubation
#> Disease: COVID-19
#> Pathogen: SARS-CoV-2
#> Epi Distribution: incubation period
#> Study: Linton N, Kobayashi T, Yang Y, Hayashi K, Akhmetzhanov A, Jung S, Yuan
#> B, Kinoshita R, Nishiura H (2020). "Incubation Period and Other
#> Epidemiological Characteristics of 2019 Novel Coronavirus Infections
#> with Right Truncation: A Statistical Analysis of Publicly Available
#> Case Data." _Journal of Clinical Medicine_. doi:10.3390/jcm9020538
#> <https://doi.org/10.3390/jcm9020538>.
#> Distribution: lnorm
#> Parameters:
#>   meanlog: 1.611
#>   sdlog: 0.472

covid_incubation_parameters <- epiparameter::get_parameters(covid_incubation)

covid_incubation_parameters
#>   meanlog     sdlog 
#> 1.6111948 0.4723807

covid_incubation_discrete <- epiparameter::discretise(covid_incubation)

incubation_time_covid <- EpiNow2::dist_spec(
  mean = covid_incubation_parameters["meanlog"],
  sd = covid_incubation_parameters["sdlog"],
  max = covid_incubation_discrete$prob_dist$q(p = 0.999), #must for epinow()
  distribution = "lognormal"
)

incubation_time_covid
#> - lognormal distribution (max: 21):
#>   meanlog:
#>     - fixed value:
#>       1.611195
#>   sdlog:
#>     - fixed value:
#>       0.4723807

Created on 2024-02-14 with reprex v2.0.2

avallecam commented 4 months ago

this should be the final outlook, prioritizing EpiNow2::pmf() over EpiNow2::LogNormal(), from https://github.com/epiforecasts/EpiNow2/pull/504 using remotes::install_github("epiforecasts/EpiNow2@dist-interface") to install it.

# howto epinow2 -----------------------------------------------------------

library(epiparameter)
library(EpiNow2)

# cases -------------------------------------------------------------------

example_confirmed
#>            date confirm
#>          <Date>   <num>
#>   1: 2020-02-22      14
#>   2: 2020-02-23      62
#>   3: 2020-02-24      53
#>   4: 2020-02-25      97
#>   5: 2020-02-26      93
#>  ---                   
#> 126: 2020-06-26     296
#> 127: 2020-06-27     255
#> 128: 2020-06-28     175
#> 129: 2020-06-29     174
#> 130: 2020-06-30     126

# delay: generation time --------------------------------------------------------------

covid_serialint <- 
  epiparameter::epidist_db(
    disease = "covid",
    epi_dist = "serial",
    author = "Nishiura",
    single_epidist = T
  )
#> Using Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.. 
#> To retrieve the short citation use the 'get_citation' function

covid_serialint
#> Disease: COVID-19
#> Pathogen: SARS-CoV-2
#> Epi Distribution: serial interval
#> Study: Nishiura H, Linton N, Akhmetzhanov A (2020). "Serial interval of novel
#> coronavirus (COVID-19) infections." _International Journal of
#> Infectious Diseases_. doi:10.1016/j.ijid.2020.02.060
#> <https://doi.org/10.1016/j.ijid.2020.02.060>.
#> Distribution: lnorm
#> Parameters:
#>   meanlog: 1.386
#>   sdlog: 0.568

# discretise continuous distribution --------------------------------------

covid_serialint_discrete <- 
  epiparameter::discretise(covid_serialint)

# get maximum value from discrete distribution ----------------------------

covid_serialint_discrete_max <- 
  covid_serialint_discrete$prob_dist$q(p = 0.999)

covid_serialint_discrete_max
#> [1] 23

# get sequence of quantile values -----------------------------------------

si_x <- seq(1L, to = covid_serialint_discrete_max, by = 1L)

# new branch: EpiNow2@dist-interfase --------------------------------------

serial_interval_covid_branch_pmf <- 
  EpiNow2::pmf(covid_serialint_discrete$prob_dist$d(si_x))

serial_interval_covid_branch_pmf
#> - nonparametric distribution
#>   PMF: [0.0073 0.1 0.2 0.19 0.15 0.11 0.075 0.051 0.035 0.023 0.016 0.011 0.0076 0.0053 0.0037 0.0027 0.0019 0.0014 0.001 0.00074 0.00055 0.00041 0.00031]

# epinow -------------------------------------------------------------------

epinow_estimates <- epinow(
  # cases
  reported_cases = example_confirmed[1:60],
  # delays
  generation_time = generation_time_opts(serial_interval_covid_branch_pmf),
  # computation
  stan = stan_opts(
    cores = 4, samples = 1000, chains = 3,
    control = list(adapt_delta = 0.99)
  )
)

summary(epinow_estimates)
plot(epinow_estimates)

Created on 2024-02-13 with reprex v2.0.2

avallecam commented 4 months ago

closing this issue given that after this exploration and user testing of the code in reprex, I solved my understanding of the current and coming package connection. more specific issues where raised from this.