Open wfmackey opened 2 years ago
Something to start with perhaps?
library(tidyverse) library(glue) # Set up sacc_url <- "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&12690_SACC2008_DO0001_201608.xls&1269.0&Data%20Cubes&94D987C267F8BB83CA25744B00153EE5&0&Second%20Edition&19.05.2008&Previous" temp_dir <- tempdir() temp_path <- file.path(temp_dir, "sacc.xls") download.file(sacc_url, temp_path, mode = "wb") raw <- readxl::read_excel(temp_path, sheet = 5, range = "A7:D329", col_names = FALSE) %>% janitor::clean_names() # initial data sacc_2nd_raw <- raw %>% filter(!is.na(x1)) %>% select(code = 1, text = 2) %>% mutate(code = as.character(code)) major <- sacc_2nd_raw %>% filter(str_length(code) == 1) %>% rename(major_code = code, major = text) minor <- sacc_2nd_raw %>% filter(str_length(code) == 2) %>% rename(minor_code = code, minor = text) # combined sacc_2nd_countries <- sacc_2nd_raw %>% filter(str_length(code) == 4) %>% mutate(major_code = str_sub(code, 1, 1), minor_code = str_sub(code, 1, 2)) %>% left_join(major, by = "major_code") %>% left_join(minor, by = "minor_code") %>% select(major_code, major_group = major, minor_code, minor_group = minor, country_code = code, country = text)
# # A tibble: 252 x 6 # major_code major_group minor_code minor_group country_code country # <chr> <chr> <chr> <chr> <chr> <chr> # 1 1 OCEANIA AND ANTARCTICA 11 Australia (includes External Territories) 1101 Australia # 2 1 OCEANIA AND ANTARCTICA 11 Australia (includes External Territories) 1102 Norfolk Island # 3 1 OCEANIA AND ANTARCTICA 11 Australia (includes External Territories) 1199 Australian External Territories, nec # 4 1 OCEANIA AND ANTARCTICA 12 New Zealand 1201 New Zealand # 5 1 OCEANIA AND ANTARCTICA 13 Melanesia 1301 New Caledonia # 6 1 OCEANIA AND ANTARCTICA 13 Melanesia 1302 Papua New Guinea # 7 1 OCEANIA AND ANTARCTICA 13 Melanesia 1303 Solomon Islands # 8 1 OCEANIA AND ANTARCTICA 13 Melanesia 1304 Vanuatu # 9 1 OCEANIA AND ANTARCTICA 14 Micronesia 1401 Guam # 10 1 OCEANIA AND ANTARCTICA 14 Micronesia 1402 Kiribati # # ... with 242 more rows # # i Use `print(n = ...)` to see more rows
Something to start with perhaps?