Closed wfmackey closed 4 years ago
Playing around with this now. @HughParsonage can you tell me what is happening with initial case numbers for a given by_state = date
? Looks very different to the covid19.model.sa2/inst/extdata/time_series_cases.fst
file.
library(tidyverse)
options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#>
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#>
#> collapse
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday, week,
#> yday, year
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
run_days <- 30
repeat_times <- 1
repeat_simulation <- function(.name,
.repeat = repeat_times,
.days = run_days,
PolicyPars = set_policypars(),
EpiPars = set_epipars(),
...) {
return_simulation <- function(x,
to_return = .to_return,
.PolicyPars = PolicyPars,
.EpiPars = EpiPars) {
rep_each <- function(x, len) {
rep(x, each = len / length(x), length.out = len)
}
d <-
simulate_sa2(days_to_simulate = .days,
returner = 3,
PolicyPars = .PolicyPars,
EpiPars = .EpiPars)
# Status
Status12 <- d[["Status12"]]
DT <- data.table(N = Status12)
DT[, day := rep_each(1:.days, .N)]
DT[, state := rep_each(1:9, .N), by = .(day)]
DT[, status := rep_each(-2:3, .N), by = .(day, state)]
DT[, isolated := rep_len(0:1, .N)]
DT[, runid := x]
# New infections
NewInfectionsByState <- d[["NewInfectionsByState"]]
DT2 <- data.table(new_infections = NewInfectionsByState)
DT2[, day := rep_each(1:.days, .N)]
DT2[, state := rep_each(1:9, .N), by = .(day)]
return(left_join(DT, DT2))
}
d <- map_dfr(1:.repeat, return_simulation)
if (!dir.exists("output")) dir.create("output")
write_fst(d, glue("output/{.name}.fst"))
}
plot_status <- function(data, keep_all_states = FALSE) {
if (!keep_all_states) {
data <- data %>%
filter(state <= 6 & state != 5)
}
data %>%
filter(status >= 1) %>%
group_by(runid, state, day) %>%
summarise(N = sum(N)) %>%
ggplot(aes(day, N)) +
geom_line() +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_y_continuous(limits = c(0, NA)) +
facet_wrap(vars(state), scales = "free_y", ncol = 1) +
theme(legend.position = "top")
}
plot_new_cases <- function(data, keep_all_states = FALSE) {
if (!keep_all_states) {
data <- data %>%
filter(state <= 6 & state != 5)
}
data %>%
group_by(runid, state, day) %>%
filter(row_number() == 1) %>%
ggplot(aes(day, new_infections)) +
geom_line() +
facet_wrap(vars(state), scales = "free_y", ncol = 1)
}
# Looking at early days
from_date <- ymd("2020-03-05")
keep_states <- c("NSW", "Vic")
early_days <- repeat_simulation("early_days",
.repeat = 1,
.days = 21,
by_state = from_date,
EpiPars = set_epipars(a_workplace_rate = .5,
q_workplace = .01,
q_household = 0.02,
a_household_rate = 0.15, # def 0.15
q_supermarket = 0.002),
PolicyPars = set_policypars(
supermarkets_open = TRUE,
max_persons_per_supermarket = 100,
schools_open = TRUE,
only_Year12 = FALSE,
school_days_per_wk = 5,
do_contact_tracing = FALSE,
workplaces_open = TRUE,
workplace_size_max = 100
))
#> Joining, by = c("day", "state")
# Tidy early_days
early_days_data <- early_days %>%
mutate(state = strayr::strayr(state),
infected = status >= 1) %>%
filter(infected) %>%
group_by(runid, state, day) %>%
summarise(cases = sum(N),
new_cases = max(new_infections)) %>%
mutate(date = from_date + (day - 1)) %>%
filter(state %in% keep_states)
# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>%
clean_names() %>%
select(-total) %>%
pivot_longer(-date, names_to = "state", values_to = "cases") %>%
mutate(state = strayr::strayr(state)) %>%
filter(state %in% keep_states) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases))
# Plot
early_days_data %>%
ggplot(aes(x = date, y = cases)) +
geom_line(aes(group = runid),
alpha = .8, colour = "red") +
geom_line(data = actual, colour = "black") +
facet_wrap(vars(state)) +
scale_x_date(limits = c(from_date, from_date + 21)) +
scale_y_continuous() +
theme_minimal()
#> Warning: Removed 97 row(s) containing missing values (geom_path).
Created on 2020-05-26 by the reprex package (v0.3.0)
One minor issue: the data simply wasn't recorded back then. e.g. NSW did not start recording the number of cases by (our version of) status until April
So where does by_state = ymd()
get its data from?
by_state = ymd
has no effect. It's the same as by_state = TRUE
. You want .first_day = "<date>"
lol that will likely explain it then
But to answer the essence, when using .first_day
it simply tries to get the data, but it makes no check that the data is complete for the data required.
So in the example below, what's happening with .first_day
?
library(tidyverse)
options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#>
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#>
#> collapse
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday, week,
#> yday, year
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
run_days <- 30
repeat_times <- 1
repeat_simulation <- function(.name,
.repeat = repeat_times,
.days = run_days,
PolicyPars = set_policypars(),
EpiPars = set_epipars(),
...) {
return_simulation <- function(x,
to_return = .to_return,
.PolicyPars = PolicyPars,
.EpiPars = EpiPars) {
rep_each <- function(x, len) {
rep(x, each = len / length(x), length.out = len)
}
d <-
simulate_sa2(days_to_simulate = .days,
returner = 3,
PolicyPars = .PolicyPars,
EpiPars = .EpiPars)
# Status
Status12 <- d[["Status12"]]
DT <- data.table(N = Status12)
DT[, day := rep_each(1:.days, .N)]
DT[, state := rep_each(1:9, .N), by = .(day)]
DT[, status := rep_each(-2:3, .N), by = .(day, state)]
DT[, isolated := rep_len(0:1, .N)]
DT[, runid := x]
# New infections
NewInfectionsByState <- d[["NewInfectionsByState"]]
DT2 <- data.table(new_infections = NewInfectionsByState)
DT2[, day := rep_each(1:.days, .N)]
DT2[, state := rep_each(1:9, .N), by = .(day)]
return(left_join(DT, DT2))
}
d <- map_dfr(1:.repeat, return_simulation)
if (!dir.exists("output")) dir.create("output")
write_fst(d, glue("output/{.name}.fst"))
}
plot_status <- function(data, keep_all_states = FALSE) {
if (!keep_all_states) {
data <- data %>%
filter(state <= 6 & state != 5)
}
data %>%
filter(status >= 1) %>%
group_by(runid, state, day) %>%
summarise(N = sum(N)) %>%
ggplot(aes(day, N)) +
geom_line() +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_y_continuous(limits = c(0, NA)) +
facet_wrap(vars(state), scales = "free_y", ncol = 1) +
theme(legend.position = "top")
}
plot_new_cases <- function(data, keep_all_states = FALSE) {
if (!keep_all_states) {
data <- data %>%
filter(state <= 6 & state != 5)
}
data %>%
group_by(runid, state, day) %>%
filter(row_number() == 1) %>%
ggplot(aes(day, new_infections)) +
geom_line() +
facet_wrap(vars(state), scales = "free_y", ncol = 1)
}
# Looking at early days
from_date <- ymd("2020-03-05")
keep_states <- c("NSW", "Vic")
early_days <- repeat_simulation("early_days",
.repeat = 1,
.days = 21,
by_state = TRUE,
.first_day = from_date,
EpiPars = set_epipars(a_workplace_rate = .5,
q_workplace = .01,
q_household = 0.02,
a_household_rate = 0.15, # def 0.15
q_supermarket = 0.002),
PolicyPars = set_policypars(
supermarkets_open = TRUE,
max_persons_per_supermarket = 100,
schools_open = TRUE,
only_Year12 = FALSE,
school_days_per_wk = 5,
do_contact_tracing = FALSE,
workplaces_open = TRUE,
workplace_size_max = 100
))
#> Joining, by = c("day", "state")
# Tidy early_days
early_days_data <- early_days %>%
mutate(state = strayr::strayr(state),
infected = status >= 1) %>%
filter(infected) %>%
group_by(runid, state, day) %>%
summarise(cases = sum(N),
new_cases = max(new_infections)) %>%
mutate(date = from_date + (day - 1)) %>%
filter(state %in% keep_states)
# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>%
clean_names() %>%
select(-total) %>%
pivot_longer(-date, names_to = "state", values_to = "cases") %>%
mutate(state = strayr::strayr(state)) %>%
filter(state %in% keep_states) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases))
# Plot
early_days_data %>%
ggplot(aes(x = date, y = cases)) +
geom_line(aes(group = runid),
alpha = .8, colour = "red") +
geom_line(data = actual, colour = "black") +
facet_wrap(vars(state)) +
scale_x_date(limits = c(from_date, from_date + 21)) +
scale_y_continuous() +
theme_minimal()
#> Warning: Removed 97 row(s) containing missing values (geom_path).
Created on 2020-05-28 by the reprex package (v0.3.0)
From what I can see you're not passing .first_day
to simulate_sa2
. You need to add ...
to the simulate_sa2
call.
IT'S PROBABLY THAT
YOU IDIOT
hth
BTW I'm presently working on a MultiPolicy
argument to simulate_sa2
in order to properly model changes to the policy assumptions hitherhto
Given the state case data in time_series_cases.fst
goes back to January, why is there a limit?
library(covid19.model.sa2)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
from_date <- ymd("2020-03-05")
early_days <- simulate_sa2(
by_state = TRUE,
.first_day = from_date,
EpiPars = set_epipars(a_workplace_rate = .5,
q_workplace = .01,
q_household = 0.02,
a_household_rate = 0.15, # def 0.15
q_supermarket = 0.002),
PolicyPars = set_policypars(
supermarkets_open = TRUE,
max_persons_per_supermarket = 100,
schools_open = TRUE,
only_Year12 = FALSE,
school_days_per_wk = 5,
do_contact_tracing = FALSE,
workplaces_open = TRUE,
workplace_size_max = 100)
)
#> Error in set_initial_by_state(state, first_yday = .first_day): `first_yday = 65`, but the earliest allowed yday, given available data, is 79.
Created on 2020-05-28 by the reprex package (v0.3.0)
Because the case data is cumulative; we need the recovered
data to determine active cases.
Also do have/know where I could find a consolidated list of previous policies?
Also fun fact ymd
is unnecessary: you can just pass the string.
Also do have/know where I could find a consolidated list of previous policies?
Anika keeping track here. But not really 'consolidated'. https://docs.google.com/spreadsheets/d/1ZqnCmSueVD26Xrw1hMszi5Q_aQwsZJe78Y5HCLwXQ64
Because the case data is cumulative; we need the
recovered
data to determine active cases.
Yep sure. Can't we use the illness_
inputs to back-out an estimate of active cases? Validating the model using March data would be really useful.
Yeah, I did take a look at ways to get it out. The hesitation I had was that most of the cases back then (and even now probably) are due to overseas arrivals. As a result, their incubation and illness periods don't match the parameters.
Yeah that's fair. Maybe a way to achieve this kind of validation is to allow an argument (by_state
or .first_day
or an additional) to take a list/df of active cases by state as the starting point. Then the user -- me, in this case -- can provide their own starting point.
So technically that is possible. You can use the myaus
argument to simualte_sa2
together with set_initial_by_state
to manually construct a table that is passed directly to do_au_simulate
That's great. Can you show a mwe of that?
Emphasis on the word 'technically'
library(magrittr)
library(hutils)
library(data.table)
stopifnot(isNamespaceLoaded("covid19.model.sa2")) # don't nest loading
#> Error: isNamespaceLoaded("covid19.model.sa2") is not TRUE
covid19.model.sa2::attachme()
#> The following object is masked from package:hutils:
#>
#> samp
myaus <- covid19.model.sa2:::read_typical()
cases_by_state <- read_sys("time_series_cases.fst")
deaths_by_state <- read_sys("time_series_deaths.fst")
recovered_by_state <- read_sys("time_series_recovered.fst")
# modify recovered by state, e.g.
states_in_file <- intersect(names(recovered_by_state), states())
approx_column <- function(x, n = length(x)) {
force(n)
as.integer(approx(seq_along(x), x, n = n)[["y"]])
}
# This won't work -- results in more 'recovered cases' than cases
# recovered_by_state[1, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]
# recovered_by_state[, (states_in_file) := lapply(.SD, approx_column), .SDcols = c(states_in_file)]
# recovered_by_state[1, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]
recovered_by_state[, (states_in_file) := lapply(.SD, coalesce, 0L), .SDcols = c(states_in_file)]
myaus[, Status := set_initial_by_state(state,
cases_by_state = cases_by_state,
deaths_by_state = deaths_by_state,
recovered_by_state = recovered_by_state,
first_yday = "2020-03-05",
.population = .N)]
myaus[Status != 0L, InfectedOn := yday("2020-03-05") - 11L]
nPlacesByDestType <-
lapply(1:106, function(i) {
if (i == 98L) {
read_sys("nSupermarkets_by_sa2.fst",
columns = "nSupermarkets")[[1L]]
} else {
integer(0)
}
})
aus <- copy(myaus) # not necessary just in case you need original
out <-
do_au_simulate(Status = dollars(aus, Status),
InfectedOn = dollars(aus, InfectedOn),
SA2 = dollars(aus, sa2),
hid = dollars(aus, hid),
Age = dollars(aus, Age),
School = dollars(aus, short_school_id),
DZN = dollars(aus, short_dzn),
wid = dollars(aus, wid),
nColleagues = dollars(aus, nColleagues),
PlaceTypeBySA2 = integer(0),
LabourForceStatus = dollars(aus, LabourForceStatus),
SeedOriginal = 1:2048,
Policy = set_policypars(),
MultiPolicy = NULL,
nPlacesByDestType = nPlacesByDestType,
Epi = set_epipars(),
Incubation = rep(14L, nrow(aus)),
Illness = rep(1L, nrow(aus)),
nSupermarketsAvbl = integer(nrow(aus)),
SupermarketTypical = integer(nrow(aus)),
minPlaceID_nPlacesByDestType = minPlaceID_nPlacesByDestType,
yday_start = yday("2020-03-05"), # as required
days_to_sim = 5,
N = nrow(aus),
display_progress = getOption("covid19.showProgress", 0L),
optionz = getOption("optionz", 0L),
nThread = getOption("covid19.model.sa2_nThread", 1L),
returner = 2L)
invisible(out) # All outputs are large
Created on 2020-05-28 by the reprex package (v0.3.0)
lol
I am imagining something like:
library(tidyverse)
library(covid19.model.sa2)
manual_initial_status <- tribble(
~state, ~active, ~critical, ~dead, ~healed,
"NSW", 100, 9, 6, 30,
"VIC", 40, 5, 4, 20,
"QLD", 30, 2, 3, 10,
"SA", 10, 0, 0, 10,
"WA", 12, 0, 0, 10,
"TAS", 20, 0, 0, 1,
"NT", 1, 0, 0, 3,
"ACT", 0, 0, 0, 1)
)
simulate_sa2(InitialStatus = manual_initial_status)
This is important for validation -- is it possible to make the above (or let .first_day
estimate active cases from total cases) available soonish?
Not today.
But I agree that it's top priority.
Yep. Rough time estimate?
I don't think I'm using .first_day
correctly? You can talk me through it at 2pm.
# Set up -------
library(tidyverse)
options(dplyr.summarise.inform = FALSE)
library(purrr)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(patchwork)
library(covid19.model.sa2)
library(fst)
library(glue)
#>
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#>
#> collapse
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday, week,
#> yday, year
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
library(strayr)
library(grattantheme)
run_days <- 20
repeat_times <- 1
repeat_simulation <- function(.name,
.rebuild = "check",
.repeat = repeat_times,
.days = run_days,
PolicyPars = set_policypars(),
EpiPars = set_epipars(),
...) {
file_path <- glue("output/{.name}.fst")
if (.rebuild == "check" & file.exists(file_path)) {
message("Run found; loading")
return(read_fst(file_path))
}
# is true otherwise, so proceed
# Set epi settings
message("Using epi parameters:")
default_epi <- set_epipars(
incubation_distribution = "pois",
incubation_mean = 6,
incubation_sigma = 0.5
)
EpiPars <- modifyList(default_epi, EpiPars)
print(EpiPars)
# Add new policy settings to defaults
message("Using policy parameters:")
default_policy <- set_policypars()
PolicyPars <- modifyList(default_policy, PolicyPars)
print(PolicyPars)
return_simulation <- function(x,
to_return = .to_return,
.PolicyPars = PolicyPars,
.EpiPars = EpiPars,
...) {
rep_each <- function(x, len) {
rep(x, each = len / length(x), length.out = len)
}
d <-
simulate_sa2(days_to_simulate = .days,
returner = 3,
PolicyPars = .PolicyPars,
EpiPars = .EpiPars,
...)
# Status
Status12 <- d[["Status12"]]
DT <- data.table(N = Status12)
DT[, day := rep_each(1:.days, .N)]
DT[, state := rep_each(1:9, .N), by = .(day)]
DT[, status := rep_each(-2:3, .N), by = .(day, state)]
DT[, isolated := rep_len(0:1, .N)]
DT[, runid := x]
# New infections
NewInfectionsByState <- d[["NewInfectionsByState"]]
DT2 <- data.table(new_infections = NewInfectionsByState)
DT2[, day := rep_each(1:.days, .N)]
DT2[, state := rep_each(1:9, .N), by = .(day)]
return(left_join(DT, DT2))
}
d <- map_dfr(1:.repeat, return_simulation) %>%
mutate(state_code = state,
state = strayr(state))
if (!dir.exists("output")) dir.create("output")
write_fst(d, file_path)
}
# Run -----
from_date <- ymd("2020-03-10")
keep_states <- c("NSW", "Vic")
early_days <- repeat_simulation("early_days",
.rebuild = TRUE,
.repeat = 1,
.days = 21,
.first_day = from_date,
EpiPars = set_epipars(a_workplace_rate = .5,
q_workplace = .01,
q_household = 0.02,
a_household_rate = 0.15, # def 0.15
q_supermarket = 0.002),
PolicyPars = set_policypars(
supermarkets_open = TRUE,
max_persons_per_supermarket = 100,
schools_open = TRUE,
only_Year12 = FALSE,
school_days_per_wk = 5,
do_contact_tracing = FALSE,
workplaces_open = TRUE,
workplace_size_max = 100
))
#> Using epi parameters:
#> $CHECKED
#> [1] TRUE
#>
#> $a_household_rate
#> [1] 0.15
#>
#> $a_schools_rate
#> [1] 0.07
#>
#> $a_workplace_rate
#> [1] 0.5
#>
#> $illness_distribution
#> [1] 1
#>
#> $illness_mean
#> [1] 15
#>
#> $illness_sigma
#> [1] 1
#>
#> $incubation_distribution
#> [1] 1
#>
#> $incubation_mean
#> [1] 5
#>
#> $incubation_sigma
#> [1] 0.44
#>
#> $p_asympto
#> [1] 0.48
#>
#> $p_critical
#> [1] 0.02
#>
#> $p_death
#> [1] 0.01
#>
#> $p_visit_major_event
#> [1] 0.01923077
#>
#> $q_household
#> [1] 0.02
#>
#> $q_major_event
#> [1] 2e-04
#>
#> $q_places
#> [1] 0.002
#>
#> $q_school
#> [1] 0.0003333333
#>
#> $q_school_grade
#> [1] 0.002
#>
#> $q_supermarket
#> [1] 0.002
#>
#> $q_workplace
#> [1] 0.01
#>
#> $resistance_threshold
#> [1] 0.4
#> Using policy parameters:
#> $school_lockdown_triggers_exist
#> [1] TRUE
#>
#> $tests_by_state_was_null
#> [1] TRUE
#>
#> $yday_start
#> [1] 0
#>
#> $supermarkets_open
#> [1] TRUE
#>
#> $schools_open
#> [1] TRUE
#>
#> $only_Year12
#> [1] FALSE
#>
#> $school_days_per_wk
#> $school_days_per_wk$AUS
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$NSW
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$VIC
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$QLD
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$SA
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$WA
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$TAS
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$NT
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$ACT
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$OTH
#> [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
#>
#> $school_days_per_wk$week15combns
#> $school_days_per_wk$week15combns[[1]]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1 2 3 4 5
#>
#> $school_days_per_wk$week15combns[[2]]
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 1 1 1 1 2 2 2 3 3 4
#> [2,] 2 3 4 5 3 4 5 4 5 5
#>
#> $school_days_per_wk$week15combns[[3]]
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 1 1 1 1 1 1 2 2 2 3
#> [2,] 2 2 2 3 3 4 3 3 4 4
#> [3,] 3 4 5 4 5 5 4 5 5 5
#>
#> $school_days_per_wk$week15combns[[4]]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1 1 1 1 2
#> [2,] 2 2 2 3 3
#> [3,] 3 3 4 4 4
#> [4,] 4 5 5 5 5
#>
#> $school_days_per_wk$week15combns[[5]]
#> [,1]
#> [1,] 1
#> [2,] 2
#> [3,] 3
#> [4,] 4
#> [5,] 5
#>
#>
#> $school_days_per_wk$all_full_time
#> [1] TRUE
#>
#>
#> $do_contact_tracing
#> [1] FALSE
#>
#> $contact_tracing_days_before_test
#> [1] 0
#>
#> $contact_tracing_days_until_result
#> [1] 3
#>
#> $contact_tracing_only_sympto
#> [1] TRUE
#>
#> $contact_tracing_success
#> [1] 0.9
#>
#> $tests_by_state
#> [1] 19405 4276 11632 1110 799 857 469 16 246 0
#>
#> $max_persons_per_event
#> [1] 5
#>
#> $n_major_events_weekday
#> [1] 28
#>
#> $n_major_events_weekend
#> [1] 56
#>
#> $max_persons_per_supermarket
#> [1] 100
#>
#> $cafes_open
#> [1] TRUE
#>
#> $age_based_lockdown
#> [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>
#> $workplaces_open
#> [1] 1
#>
#> $workplace_size_max
#> [1] 100
#>
#> $workplace_size_beta
#> [1] 13
#>
#> $workplace_size_lmu
#> [1] -1
#>
#> $workplace_size_lsi
#> [1] -1
#>
#> $travel_outside_sa2
#> [1] FALSE
#>
#> $lockdown_triggers__schools
#> $lockdown_triggers__schools$AUS
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$NSW
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$VIC
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$QLD
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$SA
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$WA
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$TAS
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$NT
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$ACT
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#> $lockdown_triggers__schools$OTH
#> default_schools_with_any_critical
#> 1
#> default_schools_with_any_critical_duration_of_lockdown
#> 91
#> default_schools_with_infections
#> 4
#> default_schools_with_infections_duration_of_lockdown
#> 28
#> default_schools_with_infections_geq
#> 3
#> do_school_lockdown
#> 1
#>
#>
#> attr(,"original_call")
#> set_policypars()
#> Joining, by = c("day", "state")
# Tidy early_days
early_days_data <- early_days %>%
mutate(state = strayr::strayr(state),
infected = status >= 1) %>%
filter(infected) %>%
group_by(runid, state, day) %>%
summarise(cases = sum(N),
new_cases = max(new_infections)) %>%
mutate(date = from_date + (day - 1)) %>%
filter(state %in% keep_states)
# Get actual case numbers
actual <- read_fst("../covid19.model.sa2/inst/extdata/time_series_cases.fst") %>%
clean_names() %>%
select(-total) %>%
pivot_longer(-date, names_to = "state", values_to = "cases") %>%
mutate(state = strayr::strayr(state)) %>%
filter(state %in% keep_states) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases))
# Plot
early_days_data %>%
ggplot(aes(x = date, y = cases)) +
geom_line(aes(group = runid),
alpha = .8, colour = "red") +
geom_line(data = actual, colour = "black") +
facet_wrap(vars(state)) +
scale_x_date(limits = c(from_date, from_date + 21)) +
scale_y_continuous() +
theme_minimal()
#> Warning: Removed 100 row(s) containing missing values (geom_path).
# End reprex
Created on 2020-05-30 by the reprex package (v0.3.0)
Initial result for VIC @wfmackey
library(hutils)
library(data.table)
library(ggplot2)
library(scales)
library(magrittr)
library(covid19.model.sa2)
rep_each <- function(x, len) {
rep(x, each = len / length(x), length.out = len)
}
read_returner3 <- function(file) {
ans <- scan(file, what = integer(), quiet = TRUE)
DT <- data.table(N = ans)
DT[, day := rep_each(1:75, .N)]
DT[, Date := as.Date("2020-03-22") + day]
DT[, state := rep_each(1:9, .N), by = .(day)]
DT[, status := rep_each(-2:3, .N), by = .(day, state)]
DT[, isolated := rep_len(0:1, .N)]
DT[]
}
RESULT <- "~/covid19-results-0527/"
Grid <- fread(file = file.path(RESULT, "Grid.csv"))
Grid[, I := .I]
time_series_cases <- read_sys("time_series_cases.fst")
time_series_healed <- read_sys("time_series_recovered.fst")
time_series_deaths <- read_sys("time_series_deaths.fst")
Imputed <-
covid19.model.sa2:::impute_time_series(time_series_cases,
time_series_deaths,
time_series_healed)
Cases_By_Date <-
Imputed[[1]] %>%
.[, Total := NULL] %>%
melt.data.table(id.vars = c("Date"),
variable.name = "State",
variable.factor = FALSE,
value.name = "CumCases") %>%
setkey(Date, State) %>%
.[]
Recovered_by_Date <-
Imputed[[3]] %>%
drop_cols("Total") %>%
melt.data.table(id.vars = "Date",
variable.name = "State",
variable.factor = FALSE,
value.name = "Healed") %>%
setkey(Date, State) %>%
.[]
Killed_by_Date <-
Imputed[[2]] %>%
drop_cols("Total") %>%
melt.data.table(id.vars = "Date",
variable.name = "State",
variable.factor = FALSE,
value.name = "Killed") %>%
setkey(Date, State) %>%
.[]
ActiveCases <-
Cases_By_Date %>%
.[Recovered_by_Date] %>%
.[Killed_by_Date] %>%
.[, Active := CumCases - Healed - Killed] %>%
.[]
Forecast <-
c(dir(file.path(RESULT, "14"),
pattern = "\\.csv$",
full.names = TRUE,
recursive = TRUE),
dir(file.path(RESULT, "05"),
pattern = "\\.csv$",
full.names = TRUE,
recursive = TRUE)) %>%
.[file.size(.) > 0] %>%
.[!duplicated(sub("^([0-9]+).*$", "\\1", basename(.)))] %>%
lapply(function(file.csv) {
grid_id <- as.integer(sub("^([0-9]+).*$", "\\1", basename(file.csv)))
read_returner3(file.csv) %>%
.[, State := states()[state + 1L]] %>%
.[, Date := as.Date("2020-03-22") + day] %>%
.[status > 0, .(N = sum(N)), keyby = .(Date, State)] %>%
.[, I := grid_id]
}) %>%
rbindlist(use.names = TRUE, fill = TRUE)
withr::with_seed(1, {
Forecast[State == "VIC"] %>%
merge(ActiveCases, by = c("Date", "State")) %>%
merge(Grid, by = "I") %>%
.[I %in% sample(I, size = 1e3)]
}) %>%
.[, q_supermarket := factor(q_supermarket, levels = c(1/5000, 1/4000, 1/3000, 1/1000),
labels = c("1/5000", "1/4000", "1/3000", "1/1000"),
ordered = TRUE)] %>%
ggplot(aes(x = Date, y = N, color = q_supermarket, group = I)) +
geom_line() +
scale_color_viridis_d() +
theme_dark() +
geom_line(aes(y = Active), color = "red", size = 1.1)
Created on 2020-06-01 by the reprex package (v0.3.0)
Initial questions:
These are good questions I don't have answers to. I suspect it's something to do with the imputation of incubation/illness periods for those already infected before day 1. It's more difficult for these dates because data is more incomplete.
Other question: why do the numbers not decrease more? Some policies should go to near zero.
I think the comment from one of the reviewers concerning general social distancing might be useful. Could just be a parameter that scales all q_
parameters.
I think the comment from one of the reviewers concerning general social distancing might be useful. Could just be a parameter that scales all
q_
parameters.
That would be useful.
These are good questions I don't have answers to. I suspect it's something to do with the imputation of incubation/illness periods for those already infected before day 1. It's more difficult for these dates because data is more incomplete.
Where does the model get it's initial status from, if not from the .fst
files?
It imputes the fst
files from the VIC or TAS columns (depending on what's available). The number of quarantined cases is taken from the NSW's sources of infections file.
Comparing imputed figures with the model results is what we should be doing then right? As that is what the model is optimised on?
Yes.
@wfmackey Do you reckon you could have a go at a technique for imputing the Status
and InfectedOn
variables from a given start date? I've gone in circles a bit. It's not easy but I'm possibly making it harder than it should be. The technique should take into account the number of people who are in quarantine and the given incubation and illness parameters, as well as the fst
files obviously.
Way back at the start I used the (imputed) distribution of Victorian cases, but that makes less sense for a particular date, and doesn't map easily when trying to quarantine new cases.
Yep I can do that. I will create a fst
file that has imputed number of active cases by state each day. That file can be read by simulate_sa2
, and also can be exported for use. Does that work for you?
Preferably a function that returns the imputed values rather than a file.
Here's what I'm trying to fix: https://github.com/grattan/covid19.model.sa2/blob/24e5807c640336b4ae525db0d22aeab32096113d/R/simulate.R#L356
@HughParsonage thoughts on this approach?
We would need to decide whether to make the illness + testing delay fixed or dependant on user input of illness_
params. To ensure model comparisons are done from the same starting point, I think we should make it fixed.
If you're happy with it, I'll weave it into mutate_Status_InfectedOn
:
library(fst)
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(covid19.model.sa2)
library(purrr)
Cases <- read_fst("inst/extdata/time_series_cases.fst")
Healed <- read_fst("inst/extdata/time_series_recovered.fst")
# Each person is
illness_mean <- 11
delay_in_testing_mean <- 2
cases <-
Cases %>%
select(-Total) %>%
pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
janitor::clean_names()
healed <-
Healed %>%
pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
janitor::clean_names()
# uncount cases
d <- cases %>%
arrange(state, date) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases)) %>%
select(-cases) %>%
filter(!is.na(new_cases),
new_cases > 0) %>%
ungroup() %>%
uncount(new_cases) %>%
mutate(illness_duration = rpois(n(), illness_mean),
testing_delay = rpois(n(), delay_in_testing_mean),
days_remaining_from_test = pmax(1, illness_duration - testing_delay),
healed_date = date + days_remaining_from_test)
hist(d$days_remaining_from_test)
get_long_date <- function(.start, .end, .state) {
tibble(active_dates = seq.Date(.start, .end, 1),
state = .state)
}
active <-
pmap_dfr(list(d$date,
d$healed_date,
d$state),
get_long_date) %>%
group_by(state, date = active_dates) %>%
summarise(active_cases_imputed = n())
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
# Compare with reported active in Victoria
cases %>%
left_join(healed) %>%
filter(state == "VIC") %>%
filter(!is.na(healed)) %>%
mutate(active_cases = cases - healed) %>%
left_join(active) %>%
ggplot(aes(x = date)) +
geom_line(aes(y = active_cases), colour = "red") +
geom_line(aes(y = active_cases_imputed), colour = "black")
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")
Created on 2020-06-01 by the reprex package (v0.3.0)
Seems to fail with NSW (VIC is in some ways the easier of the states):
(I just replaced "VIC"
with "NSW"
in the above.)
Sorry, this is a better way to show it. I have used illness_mean = 15
and testing_delay = 2
in the below. Looks like NSW and VIC are outliers in different directions.
library(fst)
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(covid19.model.sa2)
library(purrr)
Cases <- read_fst("inst/extdata/time_series_cases.fst")
Healed <- read_fst("inst/extdata/time_series_recovered.fst")
# Each person is
illness_mean <- 15
delay_in_testing_mean <- 2
cases <-
Cases %>%
select(-Total) %>%
pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
janitor::clean_names()
healed <-
Healed %>%
pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
janitor::clean_names()
# uncount cases
d <- cases %>%
arrange(state, date) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases)) %>%
select(-cases) %>%
filter(!is.na(new_cases),
new_cases > 0) %>%
ungroup() %>%
uncount(new_cases) %>%
mutate(illness_duration = rpois(n(), illness_mean),
testing_delay = rpois(n(), delay_in_testing_mean),
days_remaining_from_test = pmax(1, illness_duration - testing_delay),
healed_date = date + days_remaining_from_test)
hist(d$days_remaining_from_test)
get_long_date <- function(.start, .end, .state) {
tibble(active_dates = seq.Date(.start, .end, 1),
state = .state)
}
active <-
pmap_dfr(list(d$date,
d$healed_date,
d$state),
get_long_date) %>%
group_by(state, date = active_dates) %>%
summarise(active_cases_imputed = n())
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
# Compare with reported active in Victoria
combine_data <- cases %>%
left_join(healed) %>%
mutate(active_cases = cases - healed) %>%
left_join(active)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")
combine_data %>%
# filter(state == "ACT") %>%
# filter(!is.na(healed)) %>%
ggplot(aes(x = date)) +
geom_line(aes(y = active_cases), colour = "red") +
geom_line(aes(y = active_cases_imputed), colour = "black") +
facet_wrap(vars(state), scales = "free_y")
#> Warning: Removed 47 row(s) containing missing values (geom_path).
Created on 2020-06-01 by the reprex package (v0.3.0)
Don't know what NSW is doing tbh.
Excellent. I was always using NSW as the benchmark, which your chart shows was a ... dismal approach.
If you want to spend another 15 on it, feel free, but that's enough to go on. Thanks!!
Looks like each state models their recoveries differently. I suggest we take one approach and assert that the result is the number of active cases in each state at time t
.
I'll keep plugging in numbers. It might be best for both if you add it to simulate.R
. Wouldn't want a tidyverse
depend!
From NSW Health, (I think):
Recovery is based on self-reported information when the person is interviewed 21 days after the onset of symptoms.
https://www.health.nsw.gov.au/Infectious/covid-19/Pages/stats-nsw.aspx
That link is useful. I'll cite it in the report. Playing around some more: can't really match NSW reporting.
library(fst)
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(covid19.model.sa2)
library(purrr)
Cases <- read_fst("inst/extdata/time_series_cases.fst")
Healed <- read_fst("inst/extdata/time_series_recovered.fst")
# Each person is
illness_mean <- 18
illness_sigma <- 0.44
delay_in_testing_mean <- 2
cases <-
Cases %>%
select(-Total) %>%
pivot_longer(-Date, names_to = "State", values_to = "Cases") %>%
janitor::clean_names()
healed <-
Healed %>%
pivot_longer(-Date, names_to = "State", values_to = "Healed") %>%
janitor::clean_names()
# uncount cases
dlong <- cases %>%
arrange(state, date) %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases)) %>%
select(-cases) %>%
filter(!is.na(new_cases),
new_cases > 0) %>%
ungroup() %>%
uncount(new_cases)
compare_cases <- function(dist = "pois",
illness_mean = 5,
illness_sigma = 0.44,
delay_in_testing_mean = 2) {
n <- nrow(dlong)
if (dist == "pois") {
.illness_duration <- rpois(n, illness_mean)
.testing_delay <- rpois(n, delay_in_testing_mean)
}
if (dist == "lnorm") {
.illness_duration <- rlnorm(n,
log(illness_mean),
illness_sigma) %>%
round()
.testing_delay <- rlnorm(n, log(delay_in_testing_mean), illness_sigma/5) %>%
round()
}
d <- dlong %>%
mutate(illness_duration = .illness_duration,
testing_delay = .testing_delay,
days_remaining_from_test = pmax(1, illness_duration - testing_delay),
healed_date = date + days_remaining_from_test)
hist(d$days_remaining_from_test)
get_long_date <- function(.start, .end, .state) {
tibble(active_dates = seq.Date(.start, .end, 1),
state = .state)
}
active <-
pmap_dfr(list(d$date,
d$healed_date,
d$state),
get_long_date) %>%
group_by(state, date = active_dates) %>%
summarise(active_cases_imputed = n())
# plot
combine_data <- cases %>%
left_join(healed) %>%
mutate(active_cases = cases - healed) %>%
left_join(active)
combine_data %>%
ggplot(aes(x = date)) +
geom_line(aes(y = active_cases), colour = "red") +
geom_line(aes(y = active_cases_imputed), colour = "black") +
facet_wrap(vars(state), scales = "free_y")
}
compare_cases("pois", 15, .44, 2)
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")
#> Warning: Removed 47 row(s) containing missing values (geom_path).
compare_cases("pois", 10, .44, 2) # best for VIC
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")
#> Warning: Removed 47 row(s) containing missing values (geom_path).
compare_cases("lnorm", 15, 1.4, 2) # best for NSW??
#> `summarise()` regrouping by 'state' (override with `.groups` argument)
#> Joining, by = c("date", "state")
#> Joining, by = c("date", "state")
#> Warning: Removed 47 row(s) containing missing values (geom_path).
Created on 2020-06-01 by the reprex package (v0.3.0)
This will be shown at the end of Chapter 3.