grattan / covid19.model.sa2

4 stars 0 forks source link

Compare model results in March with actual case numbers #51

Closed wfmackey closed 4 years ago

wfmackey commented 4 years ago

This will be shown at the end of Chapter 3.

wfmackey commented 4 years ago

Playing around with this now. @HughParsonage can you tell me what is happening with initial case numbers for a given by_state = date? Looks very different to the covid19.model.sa2/inst/extdata/time_series_cases.fst file.


library(tidyverse)
  options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#> 
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#> 
#>     collapse
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday, week,
#>     yday, year
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test

run_days <- 30
repeat_times <- 1

repeat_simulation <- function(.name,
                              .repeat = repeat_times, 
                              .days = run_days, 
                              PolicyPars = set_policypars(),
                              EpiPars = set_epipars(),
                              ...) {

  return_simulation <- function(x, 
                                to_return = .to_return,
                                .PolicyPars = PolicyPars,
                                .EpiPars = EpiPars) {

     rep_each <- function(x, len) {
        rep(x, each = len / length(x), length.out = len)
     }

    d <- 
    simulate_sa2(days_to_simulate = .days, 
                 returner = 3,
                 PolicyPars = .PolicyPars,
                 EpiPars = .EpiPars)

    # Status
      Status12 <- d[["Status12"]]

      DT <- data.table(N = Status12)
      DT[, day := rep_each(1:.days, .N)]
      DT[, state := rep_each(1:9, .N), by = .(day)]
      DT[, status := rep_each(-2:3, .N), by = .(day, state)]
      DT[, isolated := rep_len(0:1, .N)]
      DT[, runid := x]

    # New infections
      NewInfectionsByState <- d[["NewInfectionsByState"]]

      DT2 <- data.table(new_infections = NewInfectionsByState)
      DT2[, day := rep_each(1:.days, .N)]
      DT2[, state := rep_each(1:9, .N), by = .(day)]

      return(left_join(DT, DT2))

  }

  d <- map_dfr(1:.repeat, return_simulation)

  if (!dir.exists("output")) dir.create("output")
  write_fst(d, glue("output/{.name}.fst"))

}

plot_status <- function(data, keep_all_states = FALSE) {

  if (!keep_all_states) {
  data <- data %>% 
    filter(state <= 6 & state != 5)
  }

data %>% 
  filter(status >= 1) %>% 
  group_by(runid, state, day) %>% 
  summarise(N = sum(N)) %>% 
  ggplot(aes(day, N)) +
  geom_line() +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  scale_y_continuous(limits = c(0, NA)) + 
  facet_wrap(vars(state), scales = "free_y", ncol = 1) + 
  theme(legend.position = "top")
}

plot_new_cases <- function(data, keep_all_states = FALSE) {
  if (!keep_all_states) {
    data <- data %>% 
      filter(state <= 6 & state != 5)
  }

  data %>% 
    group_by(runid, state, day) %>% 
    filter(row_number() == 1) %>% 
    ggplot(aes(day, new_infections)) +
    geom_line() + 
    facet_wrap(vars(state), scales = "free_y", ncol = 1)
}

# Looking at early days
from_date <- ymd("2020-03-05")
keep_states <- c("NSW", "Vic")

early_days <- repeat_simulation("early_days",
  .repeat = 1,
  .days = 21,
  by_state = from_date,
  EpiPars = set_epipars(a_workplace_rate = .5,
                        q_workplace = .01,
                        q_household = 0.02,
                        a_household_rate = 0.15, # def 0.15
                        q_supermarket = 0.002),
  PolicyPars = set_policypars(
                    supermarkets_open = TRUE,
                    max_persons_per_supermarket = 100,
                    schools_open = TRUE,
                    only_Year12 = FALSE,
                    school_days_per_wk = 5,
                    do_contact_tracing = FALSE,
                    workplaces_open = TRUE,
                    workplace_size_max = 100
                  ))
#> Joining, by = c("day", "state")

# Tidy early_days
early_days_data <- early_days %>% 
  mutate(state = strayr::strayr(state),
         infected = status >= 1) %>% 
  filter(infected) %>% 
  group_by(runid, state, day) %>% 
  summarise(cases = sum(N),
            new_cases = max(new_infections)) %>% 
  mutate(date = from_date + (day - 1)) %>% 
  filter(state %in% keep_states)

# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>% 
  clean_names() %>% 
  select(-total) %>% 
  pivot_longer(-date, names_to = "state", values_to = "cases") %>% 
  mutate(state = strayr::strayr(state)) %>% 
  filter(state %in% keep_states) %>% 
  group_by(state) %>% 
  mutate(new_cases = cases - lag(cases))

# Plot
early_days_data %>% 
  ggplot(aes(x = date, y = cases)) + 
  geom_line(aes(group = runid),
            alpha = .8, colour = "red") + 
  geom_line(data = actual, colour = "black") + 
  facet_wrap(vars(state)) + 
  scale_x_date(limits = c(from_date, from_date + 21)) + 
  scale_y_continuous() +
  theme_minimal()
#> Warning: Removed 97 row(s) containing missing values (geom_path).

Created on 2020-05-26 by the reprex package (v0.3.0)

HughParsonage commented 4 years ago

One minor issue: the data simply wasn't recorded back then. e.g. NSW did not start recording the number of cases by (our version of) status until April

wfmackey commented 4 years ago

So where does by_state = ymd() get its data from?

HughParsonage commented 4 years ago

by_state = ymd has no effect. It's the same as by_state = TRUE. You want .first_day = "<date>"

wfmackey commented 4 years ago

lol that will likely explain it then

HughParsonage commented 4 years ago

But to answer the essence, when using .first_day it simply tries to get the data, but it makes no check that the data is complete for the data required.

wfmackey commented 4 years ago

So in the example below, what's happening with .first_day?


library(tidyverse)
  options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#> 
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#> 
#>     collapse
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday, week,
#>     yday, year
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test

run_days <- 30
repeat_times <- 1

repeat_simulation <- function(.name,
                              .repeat = repeat_times, 
                              .days = run_days, 
                              PolicyPars = set_policypars(),
                              EpiPars = set_epipars(),
                              ...) {

  return_simulation <- function(x, 
                                to_return = .to_return,
                                .PolicyPars = PolicyPars,
                                .EpiPars = EpiPars) {

     rep_each <- function(x, len) {
        rep(x, each = len / length(x), length.out = len)
     }

    d <- 
    simulate_sa2(days_to_simulate = .days, 
                 returner = 3,
                 PolicyPars = .PolicyPars,
                 EpiPars = .EpiPars)

    # Status
      Status12 <- d[["Status12"]]

      DT <- data.table(N = Status12)
      DT[, day := rep_each(1:.days, .N)]
      DT[, state := rep_each(1:9, .N), by = .(day)]
      DT[, status := rep_each(-2:3, .N), by = .(day, state)]
      DT[, isolated := rep_len(0:1, .N)]
      DT[, runid := x]

    # New infections
      NewInfectionsByState <- d[["NewInfectionsByState"]]

      DT2 <- data.table(new_infections = NewInfectionsByState)
      DT2[, day := rep_each(1:.days, .N)]
      DT2[, state := rep_each(1:9, .N), by = .(day)]

      return(left_join(DT, DT2))

  }

  d <- map_dfr(1:.repeat, return_simulation)

  if (!dir.exists("output")) dir.create("output")
  write_fst(d, glue("output/{.name}.fst"))

}

plot_status <- function(data, keep_all_states = FALSE) {

  if (!keep_all_states) {
  data <- data %>% 
    filter(state <= 6 & state != 5)
  }

data %>% 
  filter(status >= 1) %>% 
  group_by(runid, state, day) %>% 
  summarise(N = sum(N)) %>% 
  ggplot(aes(day, N)) +
  geom_line() +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  scale_y_continuous(limits = c(0, NA)) + 
  facet_wrap(vars(state), scales = "free_y", ncol = 1) + 
  theme(legend.position = "top")
}

plot_new_cases <- function(data, keep_all_states = FALSE) {
  if (!keep_all_states) {
    data <- data %>% 
      filter(state <= 6 & state != 5)
  }

  data %>% 
    group_by(runid, state, day) %>% 
    filter(row_number() == 1) %>% 
    ggplot(aes(day, new_infections)) +
    geom_line() + 
    facet_wrap(vars(state), scales = "free_y", ncol = 1)
}

# Looking at early days
from_date <- ymd("2020-03-05")
keep_states <- c("NSW", "Vic")

early_days <- repeat_simulation("early_days",
  .repeat = 1,
  .days = 21,
  by_state = TRUE,
  .first_day = from_date,
  EpiPars = set_epipars(a_workplace_rate = .5,
                        q_workplace = .01,
                        q_household = 0.02,
                        a_household_rate = 0.15, # def 0.15
                        q_supermarket = 0.002),
  PolicyPars = set_policypars(
                    supermarkets_open = TRUE,
                    max_persons_per_supermarket = 100,
                    schools_open = TRUE,
                    only_Year12 = FALSE,
                    school_days_per_wk = 5,
                    do_contact_tracing = FALSE,
                    workplaces_open = TRUE,
                    workplace_size_max = 100
                  ))
#> Joining, by = c("day", "state")

# Tidy early_days
early_days_data <- early_days %>% 
  mutate(state = strayr::strayr(state),
         infected = status >= 1) %>% 
  filter(infected) %>% 
  group_by(runid, state, day) %>% 
  summarise(cases = sum(N),
            new_cases = max(new_infections)) %>% 
  mutate(date = from_date + (day - 1)) %>% 
  filter(state %in% keep_states)

# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>% 
  clean_names() %>% 
  select(-total) %>% 
  pivot_longer(-date, names_to = "state", values_to = "cases") %>% 
  mutate(state = strayr::strayr(state)) %>% 
  filter(state %in% keep_states) %>% 
  group_by(state) %>% 
  mutate(new_cases = cases - lag(cases))

# Plot
early_days_data %>% 
  ggplot(aes(x = date, y = cases)) + 
  geom_line(aes(group = runid),
            alpha = .8, colour = "red") + 
  geom_line(data = actual, colour = "black") + 
  facet_wrap(vars(state)) + 
  scale_x_date(limits = c(from_date, from_date + 21)) + 
  scale_y_continuous() +
  theme_minimal()
#> Warning: Removed 97 row(s) containing missing values (geom_path).

Created on 2020-05-28 by the reprex package (v0.3.0)

HughParsonage commented 4 years ago

From what I can see you're not passing .first_day to simulate_sa2. You need to add ... to the simulate_sa2 call.

wfmackey commented 4 years ago

IT'S PROBABLY THAT

HughParsonage commented 4 years ago

YOU IDIOT

hth

HughParsonage commented 4 years ago

BTW I'm presently working on a MultiPolicy argument to simulate_sa2 in order to properly model changes to the policy assumptions hitherhto

wfmackey commented 4 years ago

Given the state case data in time_series_cases.fst goes back to January, why is there a limit?

library(covid19.model.sa2)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

from_date <- ymd("2020-03-05")

early_days <- simulate_sa2(
                by_state = TRUE,
                .first_day = from_date,
                EpiPars = set_epipars(a_workplace_rate = .5,
                                      q_workplace = .01,
                                      q_household = 0.02,
                                      a_household_rate = 0.15, # def 0.15
                                      q_supermarket = 0.002),
                PolicyPars = set_policypars(
                  supermarkets_open = TRUE,
                  max_persons_per_supermarket = 100,
                  schools_open = TRUE,
                  only_Year12 = FALSE,
                  school_days_per_wk = 5,
                  do_contact_tracing = FALSE,
                  workplaces_open = TRUE,
                  workplace_size_max = 100)
                )
#> Error in set_initial_by_state(state, first_yday = .first_day): `first_yday = 65`, but the earliest allowed yday, given available data, is 79.

Created on 2020-05-28 by the reprex package (v0.3.0)

HughParsonage commented 4 years ago

Because the case data is cumulative; we need the recovered data to determine active cases.

HughParsonage commented 4 years ago

Also do have/know where I could find a consolidated list of previous policies?

HughParsonage commented 4 years ago

Also fun fact ymd is unnecessary: you can just pass the string.

wfmackey commented 4 years ago

Also do have/know where I could find a consolidated list of previous policies?

Anika keeping track here. But not really 'consolidated'. https://docs.google.com/spreadsheets/d/1ZqnCmSueVD26Xrw1hMszi5Q_aQwsZJe78Y5HCLwXQ64

wfmackey commented 4 years ago

Because the case data is cumulative; we need the recovered data to determine active cases.

Yep sure. Can't we use the illness_ inputs to back-out an estimate of active cases? Validating the model using March data would be really useful.

HughParsonage commented 4 years ago

Yeah, I did take a look at ways to get it out. The hesitation I had was that most of the cases back then (and even now probably) are due to overseas arrivals. As a result, their incubation and illness periods don't match the parameters.

wfmackey commented 4 years ago

Yeah that's fair. Maybe a way to achieve this kind of validation is to allow an argument (by_state or .first_day or an additional) to take a list/df of active cases by state as the starting point. Then the user -- me, in this case -- can provide their own starting point.

HughParsonage commented 4 years ago

So technically that is possible. You can use the myaus argument to simualte_sa2 together with set_initial_by_state to manually construct a table that is passed directly to do_au_simulate

wfmackey commented 4 years ago

That's great. Can you show a mwe of that?

HughParsonage commented 4 years ago

Emphasis on the word 'technically'

library(magrittr)
library(hutils)
library(data.table)
stopifnot(isNamespaceLoaded("covid19.model.sa2"))  # don't nest loading
#> Error: isNamespaceLoaded("covid19.model.sa2") is not TRUE
covid19.model.sa2::attachme()
#> The following object is masked from package:hutils:
#> 
#>     samp
myaus <- covid19.model.sa2:::read_typical()

cases_by_state <- read_sys("time_series_cases.fst")
deaths_by_state <- read_sys("time_series_deaths.fst")
recovered_by_state <- read_sys("time_series_recovered.fst")

# modify recovered by state, e.g.
states_in_file <- intersect(names(recovered_by_state), states())
approx_column <- function(x, n = length(x)) {
  force(n)
  as.integer(approx(seq_along(x), x, n = n)[["y"]])
}

# This won't work -- results in more 'recovered cases' than cases
# recovered_by_state[1, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]
# recovered_by_state[, (states_in_file) := lapply(.SD, approx_column), .SDcols = c(states_in_file)]
# recovered_by_state[1, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]
recovered_by_state[, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]

myaus[, Status := set_initial_by_state(state, 
                                       cases_by_state = cases_by_state,
                                       deaths_by_state = deaths_by_state,
                                       recovered_by_state = recovered_by_state,
                                       first_yday = "2020-03-05",
                                       .population = .N)]
myaus[Status != 0L, InfectedOn := yday("2020-03-05") - 11L]

nPlacesByDestType <-
  lapply(1:106, function(i) {
    if (i == 98L) {
      read_sys("nSupermarkets_by_sa2.fst",
               columns = "nSupermarkets")[[1L]]
    } else {
      integer(0)
    }
  })

aus <- copy(myaus)  # not necessary just in case you need original

out <- 
  do_au_simulate(Status = dollars(aus, Status),
                 InfectedOn = dollars(aus, InfectedOn),
                 SA2 = dollars(aus, sa2),
                 hid = dollars(aus, hid),
                 Age = dollars(aus, Age),
                 School = dollars(aus, short_school_id),
                 DZN = dollars(aus, short_dzn),
                 wid = dollars(aus, wid),
                 nColleagues = dollars(aus, nColleagues),
                 PlaceTypeBySA2 = integer(0),
                 LabourForceStatus = dollars(aus, LabourForceStatus),
                 SeedOriginal = 1:2048,
                 Policy = set_policypars(),
                 MultiPolicy = NULL,
                 nPlacesByDestType = nPlacesByDestType,
                 Epi = set_epipars(),
                 Incubation = rep(14L, nrow(aus)),
                 Illness = rep(1L, nrow(aus)),
                 nSupermarketsAvbl = integer(nrow(aus)),
                 SupermarketTypical = integer(nrow(aus)),
                 minPlaceID_nPlacesByDestType = minPlaceID_nPlacesByDestType,
                 yday_start = yday("2020-03-05"),   # as required
                 days_to_sim = 5,
                 N = nrow(aus),
                 display_progress = getOption("covid19.showProgress", 0L),
                 optionz = getOption("optionz", 0L),
                 nThread = getOption("covid19.model.sa2_nThread", 1L), 
                 returner = 2L)
invisible(out)  # All outputs are large

Created on 2020-05-28 by the reprex package (v0.3.0)

wfmackey commented 4 years ago

lol

wfmackey commented 4 years ago

I am imagining something like:

library(tidyverse)
library(covid19.model.sa2)

manual_initial_status <- tribble(
  ~state, ~active, ~critical, ~dead, ~healed,
   "NSW",     100,         9,     6,      30,
   "VIC",      40,         5,     4,      20,
   "QLD",      30,         2,     3,      10,
    "SA",      10,         0,     0,      10,
    "WA",      12,         0,     0,      10,
   "TAS",      20,         0,     0,       1,
    "NT",       1,         0,     0,       3,
   "ACT",       0,         0,     0,       1)
)

simulate_sa2(InitialStatus = manual_initial_status)
wfmackey commented 4 years ago

This is important for validation -- is it possible to make the above (or let .first_day estimate active cases from total cases) available soonish?

HughParsonage commented 4 years ago

Not today.

HughParsonage commented 4 years ago

But I agree that it's top priority.

wfmackey commented 4 years ago

Yep. Rough time estimate?

wfmackey commented 4 years ago

I don't think I'm using .first_day correctly? You can talk me through it at 2pm.


# Set up -------

library(tidyverse)
options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#> 
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#> 
#>     collapse
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday, week,
#>     yday, year
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
library(strayr)
library(grattantheme)

run_days <- 20
repeat_times <- 1

repeat_simulation <- function(.name,
                              .rebuild = "check",
                              .repeat = repeat_times, 
                              .days = run_days, 
                              PolicyPars = set_policypars(),
                              EpiPars = set_epipars(),
                              ...) {

  file_path <- glue("output/{.name}.fst")

  if (.rebuild == "check" & file.exists(file_path)) {
    message("Run found; loading")
    return(read_fst(file_path))
  }
  # is true otherwise, so proceed

  # Set epi settings
  message("Using epi parameters:")
  default_epi <- set_epipars(
    incubation_distribution = "pois",
    incubation_mean = 6,
    incubation_sigma = 0.5
  )

  EpiPars <- modifyList(default_epi, EpiPars)

  print(EpiPars)

  # Add new policy settings to defaults
  message("Using policy parameters:")
  default_policy <- set_policypars()

  PolicyPars <- modifyList(default_policy, PolicyPars)

  print(PolicyPars)

  return_simulation <- function(x, 
                                to_return = .to_return,
                                .PolicyPars = PolicyPars,
                                .EpiPars = EpiPars,
                                ...) {

    rep_each <- function(x, len) {
      rep(x, each = len / length(x), length.out = len)
    }

    d <- 
      simulate_sa2(days_to_simulate = .days, 
                   returner = 3,
                   PolicyPars = .PolicyPars,
                   EpiPars = .EpiPars,
                   ...)

    # Status
    Status12 <- d[["Status12"]]

    DT <- data.table(N = Status12)
    DT[, day := rep_each(1:.days, .N)]
    DT[, state := rep_each(1:9, .N), by = .(day)]
    DT[, status := rep_each(-2:3, .N), by = .(day, state)]
    DT[, isolated := rep_len(0:1, .N)]
    DT[, runid := x]

    # New infections
    NewInfectionsByState <- d[["NewInfectionsByState"]]

    DT2 <- data.table(new_infections = NewInfectionsByState)
    DT2[, day := rep_each(1:.days, .N)]
    DT2[, state := rep_each(1:9, .N), by = .(day)]

    return(left_join(DT, DT2))

  }

  d <- map_dfr(1:.repeat, return_simulation) %>% 
    mutate(state_code = state,
           state = strayr(state))

  if (!dir.exists("output")) dir.create("output")
  write_fst(d, file_path)

}

# Run -----

from_date <- ymd("2020-03-10")
keep_states <- c("NSW", "Vic")

early_days <- repeat_simulation("early_days",
                                .rebuild = TRUE,
                                .repeat = 1,
                                .days = 21,
                                .first_day = from_date,
                                EpiPars = set_epipars(a_workplace_rate = .5,
                                                      q_workplace = .01,
                                                      q_household = 0.02,
                                                      a_household_rate = 0.15, # def 0.15
                                                      q_supermarket = 0.002),
                                PolicyPars = set_policypars(
                                  supermarkets_open = TRUE,
                                  max_persons_per_supermarket = 100,
                                  schools_open = TRUE,
                                  only_Year12 = FALSE,
                                  school_days_per_wk = 5,
                                  do_contact_tracing = FALSE,
                                  workplaces_open = TRUE,
                                  workplace_size_max = 100
                                ))
#> Using epi parameters:
#> $CHECKED
#> [1] TRUE
#> 
#> $a_household_rate
#> [1] 0.15
#> 
#> $a_schools_rate
#> [1] 0.07
#> 
#> $a_workplace_rate
#> [1] 0.5
#> 
#> $illness_distribution
#> [1] 1
#> 
#> $illness_mean
#> [1] 15
#> 
#> $illness_sigma
#> [1] 1
#> 
#> $incubation_distribution
#> [1] 1
#> 
#> $incubation_mean
#> [1] 5
#> 
#> $incubation_sigma
#> [1] 0.44
#> 
#> $p_asympto
#> [1] 0.48
#> 
#> $p_critical
#> [1] 0.02
#> 
#> $p_death
#> [1] 0.01
#> 
#> $p_visit_major_event
#> [1] 0.01923077
#> 
#> $q_household
#> [1] 0.02
#> 
#> $q_major_event
#> [1] 2e-04
#> 
#> $q_places
#> [1] 0.002
#> 
#> $q_school
#> [1] 0.0003333333
#> 
#> $q_school_grade
#> [1] 0.002
#> 
#> $q_supermarket
#> [1] 0.002
#> 
#> $q_workplace
#> [1] 0.01
#> 
#> $resistance_threshold
#> [1] 0.4
#> Using policy parameters:
#> $school_lockdown_triggers_exist
#> [1] TRUE
#> 
#> $tests_by_state_was_null
#> [1] TRUE
#> 
#> $yday_start
#> [1] 0
#> 
#> $supermarkets_open
#> [1] TRUE
#> 
#> $schools_open
#> [1] TRUE
#> 
#> $only_Year12
#> [1] FALSE
#> 
#> $school_days_per_wk
#> $school_days_per_wk$AUS
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$NSW
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$VIC
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$QLD
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$SA
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$WA
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$TAS
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$NT
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$ACT
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$OTH
#>  [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#> 
#> $school_days_per_wk$week15combns
#> $school_days_per_wk$week15combns[[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    2    3    4    5
#> 
#> $school_days_per_wk$week15combns[[2]]
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,]    1    1    1    1    2    2    2    3    3     4
#> [2,]    2    3    4    5    3    4    5    4    5     5
#> 
#> $school_days_per_wk$week15combns[[3]]
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,]    1    1    1    1    1    1    2    2    2     3
#> [2,]    2    2    2    3    3    4    3    3    4     4
#> [3,]    3    4    5    4    5    5    4    5    5     5
#> 
#> $school_days_per_wk$week15combns[[4]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    1    1    1    2
#> [2,]    2    2    2    3    3
#> [3,]    3    3    4    4    4
#> [4,]    4    5    5    5    5
#> 
#> $school_days_per_wk$week15combns[[5]]
#>      [,1]
#> [1,]    1
#> [2,]    2
#> [3,]    3
#> [4,]    4
#> [5,]    5
#> 
#> 
#> $school_days_per_wk$all_full_time
#> [1] TRUE
#> 
#> 
#> $do_contact_tracing
#> [1] FALSE
#> 
#> $contact_tracing_days_before_test
#> [1] 0
#> 
#> $contact_tracing_days_until_result
#> [1] 3
#> 
#> $contact_tracing_only_sympto
#> [1] TRUE
#> 
#> $contact_tracing_success
#> [1] 0.9
#> 
#> $tests_by_state
#>  [1] 19405  4276 11632  1110   799   857   469    16   246     0
#> 
#> $max_persons_per_event
#> [1] 5
#> 
#> $n_major_events_weekday
#> [1] 28
#> 
#> $n_major_events_weekend
#> [1] 56
#> 
#> $max_persons_per_supermarket
#> [1] 100
#> 
#> $cafes_open
#> [1] TRUE
#> 
#> $age_based_lockdown
#>   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 
#> $workplaces_open
#> [1] 1
#> 
#> $workplace_size_max
#> [1] 100
#> 
#> $workplace_size_beta
#> [1] 13
#> 
#> $workplace_size_lmu
#> [1] -1
#> 
#> $workplace_size_lsi
#> [1] -1
#> 
#> $travel_outside_sa2
#> [1] FALSE
#> 
#> $lockdown_triggers__schools
#> $lockdown_triggers__schools$AUS
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$NSW
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$VIC
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$QLD
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$SA
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$WA
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$TAS
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$NT
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$ACT
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> $lockdown_triggers__schools$OTH
#>                      default_schools_with_any_critical 
#>                                                      1 
#> default_schools_with_any_critical_duration_of_lockdown 
#>                                                     91 
#>                        default_schools_with_infections 
#>                                                      4 
#>   default_schools_with_infections_duration_of_lockdown 
#>                                                     28 
#>                    default_schools_with_infections_geq 
#>                                                      3 
#>                                     do_school_lockdown 
#>                                                      1 
#> 
#> 
#> attr(,"original_call")
#> set_policypars()
#> Joining, by = c("day", "state")

# Tidy early_days
early_days_data <- early_days %>% 
  mutate(state = strayr::strayr(state),
         infected = status >= 1) %>% 
  filter(infected) %>% 
  group_by(runid, state, day) %>% 
  summarise(cases = sum(N),
            new_cases = max(new_infections)) %>% 
  mutate(date = from_date + (day - 1)) %>% 
  filter(state %in% keep_states)

# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>% 
  clean_names() %>% 
  select(-total) %>% 
  pivot_longer(-date, names_to = "state", values_to = "cases") %>% 
  mutate(state = strayr::strayr(state)) %>% 
  filter(state %in% keep_states) %>%
  group_by(state) %>% 
  mutate(new_cases = cases - lag(cases))

# Plot
early_days_data %>% 
  ggplot(aes(x = date, y = cases)) + 
  geom_line(aes(group = runid),
            alpha = .8, colour = "red") + 
  geom_line(data = actual, colour = "black") + 
  facet_wrap(vars(state)) + 
  scale_x_date(limits = c(from_date, from_date + 21)) +
  scale_y_continuous() +
  theme_minimal()
#> Warning: Removed 100 row(s) containing missing values (geom_path).


# End reprex

Created on 2020-05-30 by the reprex package (v0.3.0)

HughParsonage commented 4 years ago

Initial result for VIC @wfmackey

library(hutils)
library(data.table)
library(ggplot2)
library(scales)
library(magrittr)

library(covid19.model.sa2)
rep_each <- function(x, len) {
  rep(x, each = len / length(x), length.out = len)
}

read_returner3 <- function(file) { 
  ans <- scan(file, what = integer(), quiet = TRUE)
  DT <- data.table(N = ans)
  DT[, day := rep_each(1:75, .N)]
  DT[, Date := as.Date("2020-03-22") + day]
  DT[, state := rep_each(1:9, .N), by = .(day)]
  DT[, status := rep_each(-2:3, .N), by = .(day, state)]
  DT[, isolated := rep_len(0:1, .N)]
  DT[]
}

RESULT <- "~/covid19-results-0527/"

Grid <- fread(file = file.path(RESULT, "Grid.csv"))
Grid[, I := .I]

time_series_cases <- read_sys("time_series_cases.fst")
time_series_healed <- read_sys("time_series_recovered.fst")
time_series_deaths <- read_sys("time_series_deaths.fst")

Imputed <-
  covid19.model.sa2:::impute_time_series(time_series_cases, 
                                         time_series_deaths,
                                         time_series_healed)

Cases_By_Date <-
  Imputed[[1]] %>%
  .[, Total := NULL] %>%
  melt.data.table(id.vars = c("Date"),
                  variable.name = "State",
                  variable.factor = FALSE, 
                  value.name = "CumCases") %>%
  setkey(Date, State) %>%
  .[]

Recovered_by_Date <- 
  Imputed[[3]] %>%
  drop_cols("Total") %>%
  melt.data.table(id.vars = "Date",
                  variable.name = "State",
                  variable.factor = FALSE,
                  value.name = "Healed") %>%
  setkey(Date, State) %>%
  .[]

Killed_by_Date <- 
  Imputed[[2]] %>%
  drop_cols("Total") %>%
  melt.data.table(id.vars = "Date",
                  variable.name = "State",
                  variable.factor = FALSE,
                  value.name = "Killed") %>%
  setkey(Date, State) %>%
  .[]

ActiveCases <-
  Cases_By_Date %>%
  .[Recovered_by_Date] %>%
  .[Killed_by_Date] %>%
  .[, Active := CumCases - Healed - Killed] %>%
  .[]

Forecast <-
  c(dir(file.path(RESULT, "14"), 
        pattern = "\\.csv$", 
        full.names = TRUE, 
        recursive = TRUE),
    dir(file.path(RESULT, "05"), 
        pattern = "\\.csv$", 
        full.names = TRUE, 
        recursive = TRUE)) %>%
  .[file.size(.) > 0] %>%
  .[!duplicated(sub("^([0-9]+).*$", "\\1", basename(.)))] %>%
  lapply(function(file.csv) {
    grid_id <- as.integer(sub("^([0-9]+).*$", "\\1", basename(file.csv)))
    read_returner3(file.csv) %>%
      .[, State := states()[state + 1L]] %>%
      .[, Date := as.Date("2020-03-22") + day] %>%
      .[status > 0, .(N = sum(N)), keyby = .(Date, State)] %>%
      .[, I := grid_id]
  }) %>%
  rbindlist(use.names = TRUE, fill = TRUE)
withr::with_seed(1, {
  Forecast[State == "VIC"] %>% 
    merge(ActiveCases, by = c("Date", "State")) %>%
    merge(Grid, by = "I") %>%
    .[I %in% sample(I, size = 1e3)]
}) %>%
  .[, q_supermarket := factor(q_supermarket, levels = c(1/5000, 1/4000, 1/3000, 1/1000), 
                              labels = c("1/5000", "1/4000", "1/3000", "1/1000"),
                              ordered = TRUE)] %>%
  ggplot(aes(x = Date, y = N, color = q_supermarket, group = I)) +
  geom_line() +
  scale_color_viridis_d() +
  theme_dark() + 
  geom_line(aes(y = Active), color = "red", size = 1.1)

Created on 2020-06-01 by the reprex package (v0.3.0)

wfmackey commented 4 years ago

Initial questions:

  1. What's happening between day 1 and day 2 of model results?
  2. Why is day 1 so much lower than reality?
HughParsonage commented 4 years ago

These are good questions I don't have answers to. I suspect it's something to do with the imputation of incubation/illness periods for those already infected before day 1. It's more difficult for these dates because data is more incomplete.

HughParsonage commented 4 years ago

Other question: why do the numbers not decrease more? Some policies should go to near zero.

HughParsonage commented 4 years ago

I think the comment from one of the reviewers concerning general social distancing might be useful. Could just be a parameter that scales all q_ parameters.

wfmackey commented 4 years ago

I think the comment from one of the reviewers concerning general social distancing might be useful. Could just be a parameter that scales all q_ parameters.

That would be useful.

wfmackey commented 4 years ago

These are good questions I don't have answers to. I suspect it's something to do with the imputation of incubation/illness periods for those already infected before day 1. It's more difficult for these dates because data is more incomplete.

Where does the model get it's initial status from, if not from the .fst files?

HughParsonage commented 4 years ago

It imputes the fst files from the VIC or TAS columns (depending on what's available). The number of quarantined cases is taken from the NSW's sources of infections file.

wfmackey commented 4 years ago

Comparing imputed figures with the model results is what we should be doing then right? As that is what the model is optimised on?

HughParsonage commented 4 years ago

Yes.

@wfmackey Do you reckon you could have a go at a technique for imputing the Status and InfectedOn variables from a given start date? I've gone in circles a bit. It's not easy but I'm possibly making it harder than it should be. The technique should take into account the number of people who are in quarantine and the given incubation and illness parameters, as well as the fst files obviously.

Way back at the start I used the (imputed) distribution of Victorian cases, but that makes less sense for a particular date, and doesn't map easily when trying to quarantine new cases.

wfmackey commented 4 years ago

Yep I can do that. I will create a fst file that has imputed number of active cases by state each day. That file can be read by simulate_sa2, and also can be exported for use. Does that work for you?

HughParsonage commented 4 years ago

Preferably a function that returns the imputed values rather than a file.

HughParsonage commented 4 years ago

Here's what I'm trying to fix: https://github.com/grattan/covid19.model.sa2/blob/24e5807c640336b4ae525db0d22aeab32096113d/R/simulate.R#L356

wfmackey commented 4 years ago

@HughParsonage thoughts on this approach?

We would need to decide whether to make the illness + testing delay fixed or dependant on user input of illness_ params. To ensure model comparisons are done from the same starting point, I think we should make it fixed.

If you're happy with it, I'll weave it into mutate_Status_InfectedOn:


  library(fst)
  library(tidyverse)
  library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
  library(covid19.model.sa2)
  library(purrr)

  Cases <- read_fst("inst/extdata/time_series_cases.fst")
  Healed <- read_fst("inst/extdata/time_series_recovered.fst")

# Each person is
illness_mean <- 11
delay_in_testing_mean <- 2

cases <-
  Cases %>%
  select(-Total) %>%
  pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
  janitor::clean_names()

healed <-
  Healed %>%
  pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
  janitor::clean_names()

# uncount cases
d <- cases %>%
  arrange(state, date) %>%
  group_by(state) %>%
  mutate(new_cases = cases - lag(cases)) %>%
  select(-cases) %>%
  filter(!is.na(new_cases),
         new_cases > 0) %>%
  ungroup() %>%
  uncount(new_cases) %>%
  mutate(illness_duration = rpois(n(), illness_mean),
         testing_delay = rpois(n(), delay_in_testing_mean),
         days_remaining_from_test = pmax(1, illness_duration - testing_delay),
         healed_date = date + days_remaining_from_test)

hist(d$days_remaining_from_test)


get_long_date <- function(.start, .end, .state) {

  tibble(active_dates = seq.Date(.start, .end, 1),
         state = .state)
}

active <-
pmap_dfr(list(d$date,
              d$healed_date,
              d$state),
         get_long_date) %>%
  group_by(state, date = active_dates) %>%
  summarise(active_cases_imputed = n())
#> `summarise()` regrouping by 'state' (override with `.groups` argument)

# Compare with reported active in Victoria

cases %>%
  left_join(healed) %>%
  filter(state == "VIC") %>%
  filter(!is.na(healed)) %>%
  mutate(active_cases = cases - healed) %>%
  left_join(active) %>%
  ggplot(aes(x = date)) +
  geom_line(aes(y = active_cases), colour = "red") +
  geom_line(aes(y = active_cases_imputed), colour = "black")
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")

Created on 2020-06-01 by the reprex package (v0.3.0)

HughParsonage commented 4 years ago

Seems to fail with NSW (VIC is in some ways the easier of the states):

image

(I just replaced "VIC" with "NSW" in the above.)

wfmackey commented 4 years ago

Sorry, this is a better way to show it. I have used illness_mean = 15 and testing_delay = 2 in the below. Looks like NSW and VIC are outliers in different directions.


  library(fst)
  library(tidyverse)
  library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
  library(covid19.model.sa2)
  library(purrr)

  Cases <- read_fst("inst/extdata/time_series_cases.fst")
  Healed <- read_fst("inst/extdata/time_series_recovered.fst")

# Each person is
illness_mean <- 15
delay_in_testing_mean <- 2

cases <-
  Cases %>%
  select(-Total) %>%
  pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
  janitor::clean_names()

healed <-
  Healed %>%
  pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
  janitor::clean_names()

# uncount cases
d <- cases %>%
  arrange(state, date) %>%
  group_by(state) %>%
  mutate(new_cases = cases - lag(cases)) %>%
  select(-cases) %>%
  filter(!is.na(new_cases),
         new_cases > 0) %>%
  ungroup() %>%
  uncount(new_cases) %>%
  mutate(illness_duration = rpois(n(), illness_mean),
         testing_delay = rpois(n(), delay_in_testing_mean),
         days_remaining_from_test = pmax(1, illness_duration - testing_delay),
         healed_date = date + days_remaining_from_test)

hist(d$days_remaining_from_test)


get_long_date <- function(.start, .end, .state) {

  tibble(active_dates = seq.Date(.start, .end, 1),
         state = .state)
}

active <-
pmap_dfr(list(d$date,
              d$healed_date,
              d$state),
         get_long_date) %>%
  group_by(state, date = active_dates) %>%
  summarise(active_cases_imputed = n())
#> `summarise()` regrouping by 'state' (override with `.groups` argument)

# Compare with reported active in Victoria

combine_data <- cases %>%
  left_join(healed) %>%
  mutate(active_cases = cases - healed) %>%
  left_join(active)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")

combine_data %>%
  # filter(state == "ACT") %>%
  # filter(!is.na(healed)) %>%
  ggplot(aes(x = date)) +
  geom_line(aes(y = active_cases), colour = "red") +
  geom_line(aes(y = active_cases_imputed), colour = "black") +
  facet_wrap(vars(state), scales = "free_y")
#> Warning: Removed 47 row(s) containing missing values (geom_path).

Created on 2020-06-01 by the reprex package (v0.3.0)

wfmackey commented 4 years ago

Don't know what NSW is doing tbh.

HughParsonage commented 4 years ago

Excellent. I was always using NSW as the benchmark, which your chart shows was a ... dismal approach.

If you want to spend another 15 on it, feel free, but that's enough to go on. Thanks!!

wfmackey commented 4 years ago

Looks like each state models their recoveries differently. I suggest we take one approach and assert that the result is the number of active cases in each state at time t.

I'll keep plugging in numbers. It might be best for both if you add it to simulate.R. Wouldn't want a tidyverse depend!

HughParsonage commented 4 years ago

From NSW Health, (I think):

Recovery is based on self-reported information when the person is interviewed 21 days after the onset of symptoms.

https://www.health.nsw.gov.au/Infectious/covid-19/Pages/stats-nsw.aspx

wfmackey commented 4 years ago

That link is useful. I'll cite it in the report. Playing around some more: can't really match NSW reporting.


  library(fst)
  library(tidyverse)
  library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
  library(covid19.model.sa2)
  library(purrr)

  Cases <- read_fst("inst/extdata/time_series_cases.fst")
  Healed <- read_fst("inst/extdata/time_series_recovered.fst")

# Each person is
illness_mean <- 18
illness_sigma <- 0.44
delay_in_testing_mean <- 2

cases <-
  Cases %>%
  select(-Total) %>%
  pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
  janitor::clean_names()

healed <-
  Healed %>%
  pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
  janitor::clean_names()

# uncount cases
dlong <- cases %>%
  arrange(state, date) %>%
  group_by(state) %>%
  mutate(new_cases = cases - lag(cases)) %>%
  select(-cases) %>%
  filter(!is.na(new_cases),
         new_cases > 0) %>%
  ungroup() %>%
  uncount(new_cases)

compare_cases <- function(dist = "pois",
                          illness_mean = 5,
                          illness_sigma = 0.44,
                          delay_in_testing_mean = 2) {

  n <- nrow(dlong)

  if (dist == "pois") {
    .illness_duration <- rpois(n, illness_mean)
    .testing_delay  <- rpois(n, delay_in_testing_mean)
  }

  if (dist == "lnorm") {
    .illness_duration <- rlnorm(n,
                                log(illness_mean),
                                illness_sigma) %>%
      round()

    .testing_delay <- rlnorm(n, log(delay_in_testing_mean), illness_sigma/5) %>%
      round()

  }

  d <- dlong %>%
    mutate(illness_duration = .illness_duration,
           testing_delay = .testing_delay,
           days_remaining_from_test = pmax(1, illness_duration - testing_delay),
           healed_date = date + days_remaining_from_test)

  hist(d$days_remaining_from_test)

  get_long_date <- function(.start, .end, .state) {

    tibble(active_dates = seq.Date(.start, .end, 1),
           state = .state)
  }

  active <-
  pmap_dfr(list(d$date,
                d$healed_date,
                d$state),
           get_long_date) %>%
    group_by(state, date = active_dates) %>%
    summarise(active_cases_imputed = n())

  # plot
  combine_data <- cases %>%
    left_join(healed) %>%
    mutate(active_cases = cases - healed) %>%
    left_join(active)

  combine_data %>%
    ggplot(aes(x = date)) +
    geom_line(aes(y = active_cases), colour = "red") +
    geom_line(aes(y = active_cases_imputed), colour = "black") +
    facet_wrap(vars(state), scales = "free_y")

}

compare_cases("pois", 15, .44, 2)
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")

#> Warning: Removed 47 row(s) containing missing values (geom_path).

compare_cases("pois", 10, .44, 2) # best for VIC
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")

#> Warning: Removed 47 row(s) containing missing values (geom_path).

compare_cases("lnorm", 15, 1.4, 2) # best for NSW??
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")

#> Warning: Removed 47 row(s) containing missing values (geom_path).

Created on 2020-06-01 by the reprex package (v0.3.0)