Closed dicook closed 1 year ago
Getting a bit further, but now finding that the time series are empty:
covid <- read_csv("https://raw.githubusercontent.com/numbats/eda/master/data/melb_lga_covid.csv") %>%
mutate(Buloke = as.numeric(ifelse(Buloke == "null", "0", Buloke))) %>%
mutate(Hindmarsh = as.numeric(ifelse(Hindmarsh == "null", "0", Hindmarsh))) %>%
mutate(Towong = as.numeric(ifelse(Towong == "null", "0", Towong))) %>%
pivot_longer(cols = Alpine:Yarriambiack, names_to="NAME", values_to="cases") %>%
mutate(Date = ydm(paste0("2020/",Date))) %>%
mutate(cases=replace_na(cases, 0))
covid <- covid %>%
group_by(NAME) %>%
mutate(new_cases = cases - dplyr::lag(cases))
covid <- covid %>%
select(-cases) %>%
rename(lga = NAME, date=Date, cases = new_cases)
covid_ts <- as_tsibble(covid, key=lga, index=date)
lga <- strayr::read_absmap("lga2018") |>
rename(lga = lga_name_2018) |>
filter(state_name_2016 == "Victoria")
covid_matching <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date,
output = "unmatch")
lga <- lga %>%
mutate(lga = ifelse(lga == "Colac-Otway (S)", "Colac Otway (S)", lga))
covid_matching <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date,
output = "unmatch")
covid_matching$paired %>% print(n=80)
covid_matching$others
lga <- lga %>%
filter(!(lga %in% covid_matching$others$spatial))
covid_cb <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date, coords = c(cent_long, cent_lat))
The cubble happily is created without any errors, but the time series component is empty:
> covid_cb
Error in data$ts[ts_size][[1]] : subscript out of bounds
Hi Di, this is me reproducing your example after some changes in the cubble code. Could you check if this meets your need?
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(cubble)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:cubble':
#>
#> index, key_data, key_vars
#> The following object is masked from 'package:lubridate':
#>
#> interval
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, union
covid <- read_csv("https://raw.githubusercontent.com/numbats/eda/master/data/melb_lga_covid.csv") %>%
mutate(Buloke = as.numeric(ifelse(Buloke == "null", "0", Buloke))) %>%
mutate(Hindmarsh = as.numeric(ifelse(Hindmarsh == "null", "0", Hindmarsh))) %>%
mutate(Towong = as.numeric(ifelse(Towong == "null", "0", Towong))) %>%
pivot_longer(cols = Alpine:Yarriambiack, names_to="NAME", values_to="cases") %>%
mutate(Date = ydm(paste0("2020/",Date))) %>%
mutate(cases=replace_na(cases, 0))
#> Rows: 112 Columns: 80
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): Date, Buloke, Hindmarsh, Towong
#> dbl (76): Alpine, Ararat, Ballarat, Banyule, Bass Coast, Baw Baw, Bayside, B...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid <- covid %>%
group_by(NAME) %>%
mutate(new_cases = cases - dplyr::lag(cases)) %>%
select(-cases) %>%
rename(lga = NAME, date=Date, cases = new_cases)
covid_ts <- as_tsibble(covid, key=lga, index=date)
lga <- strayr::read_absmap("lga2018") |>
rename(lga = lga_name_2018) |>
filter(state_name_2016 == "Victoria")
covid_matching <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date,coords = c(cent_long, cent_lat))
#> ! Some sites in the temporal table don't have spatial information
#> ! Some sites in the spatial table don't have temporal information
#> ! Use argument `output = "unmatch"` to check on the unmatched key
# instructed from above to use `output = "unmatch"` to check on matching
covid_matching <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date,coords = c(cent_long, cent_lat),
output = "unmatch")
covid_matching$paired
#> # A tibble: 78 × 2
#> spatial temporal
#> <chr> <chr>
#> 1 Alpine (S) Alpine
#> 2 Ararat (RC) Ararat
#> 3 Ballarat (C) Ballarat
#> 4 Banyule (C) Banyule
#> 5 Bass Coast (S) Bass Coast
#> 6 Baw Baw (S) Baw Baw
#> 7 Bayside (C) Bayside
#> 8 Benalla (RC) Benalla
#> 9 Boroondara (C) Boroondara
#> 10 Brimbank (C) Brimbank
#> # … with 68 more rows
covid_matching$others
#> $temporal
#> [1] "Colac Otway"
#>
#> $spatial
#> [1] "Colac-Otway (S)"
#> [2] "Unincorporated Vic"
#> [3] "No usual address (Vic.)"
#> [4] "Migratory - Offshore - Shipping (Vic.)"
# modify the lga suggested by the matching summary
lga <- lga %>%
mutate(lga = ifelse(lga == "Colac-Otway (S)", "Colac Otway", lga)) %>%
filter(!(lga %in% covid_matching$others$spatial))
# create a cubble again
covid_cb <- as_cubble(
list(spatial = lga, temporal = covid_ts),
key = lga, index = date, coords = c(cent_long, cent_lat))
covid_cb
#> # cubble: lga [78]: nested form [sf]
#> # bbox: [141.27, -38.63, 148.3, -34.86]
#> # temporal: date [date], cases [dbl]
#> lga_code_2018 state_code_2016 state_name_2016 areasqkm_2018 cent_long
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 20110 2 Victoria 4788. 147.
#> 2 20260 2 Victoria 4211. 143.
#> 3 20570 2 Victoria 739. 144.
#> 4 20660 2 Victoria 62.5 145.
#> 5 20740 2 Victoria 866. 146.
#> 6 20830 2 Victoria 4028. 146.
#> 7 20910 2 Victoria 37.2 145.
#> 8 21010 2 Victoria 2353. 146.
#> 9 21110 2 Victoria 60.2 145.
#> 10 21180 2 Victoria 123. 145.
#> # … with 68 more rows, and 4 more variables: cent_lat <dbl>, lga <chr>,
#> # ts <list>, geometry <MULTIPOLYGON [°]>
Created on 2022-09-19 by the reprex package (v2.0.1)
Yes, this works! Great, I like it and makes it much easier for the user now.
gives the following error:
All the time series are of the same length according to my check.