runapp-aus / strayr

A catalogue of ready-to-use ABS coding structures. Package documentation can be found here: https://runapp-aus.github.io/strayr/
44 stars 14 forks source link

add SACC country codes #75

Open wfmackey opened 2 years ago

williamlai2 commented 1 year ago

Something to start with perhaps?

library(tidyverse)
library(glue)

# Set up
sacc_url <- "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&12690_SACC2008_DO0001_201608.xls&1269.0&Data%20Cubes&94D987C267F8BB83CA25744B00153EE5&0&Second%20Edition&19.05.2008&Previous"

temp_dir <- tempdir()
temp_path <- file.path(temp_dir, "sacc.xls")

download.file(sacc_url, temp_path, mode = "wb")

raw <- readxl::read_excel(temp_path,
                          sheet = 5,
                          range = "A7:D329",
                          col_names = FALSE) %>%
  janitor::clean_names()

# initial data
sacc_2nd_raw <- raw %>%
  filter(!is.na(x1)) %>%
  select(code = 1,
         text = 2) %>%
  mutate(code = as.character(code))

major <- sacc_2nd_raw %>% 
  filter(str_length(code) == 1) %>% 
  rename(major_code = code,
         major = text)

minor <- sacc_2nd_raw %>% 
  filter(str_length(code) == 2) %>% 
  rename(minor_code = code,
         minor = text)

# combined
sacc_2nd_countries <- sacc_2nd_raw %>% 
  filter(str_length(code) == 4) %>% 
  mutate(major_code = str_sub(code, 1, 1),
         minor_code = str_sub(code, 1, 2)) %>% 
  left_join(major, by = "major_code") %>% 
  left_join(minor, by = "minor_code") %>% 
  select(major_code, major_group = major, minor_code, minor_group = minor, country_code = code, country = text)
# # A tibble: 252 x 6
# major_code major_group            minor_code minor_group                               country_code country                             
# <chr>      <chr>                  <chr>      <chr>                                     <chr>        <chr>                               
# 1 1          OCEANIA AND ANTARCTICA 11         Australia (includes External Territories) 1101         Australia                           
# 2 1          OCEANIA AND ANTARCTICA 11         Australia (includes External Territories) 1102         Norfolk Island                      
# 3 1          OCEANIA AND ANTARCTICA 11         Australia (includes External Territories) 1199         Australian External Territories, nec
# 4 1          OCEANIA AND ANTARCTICA 12         New Zealand                               1201         New Zealand                         
# 5 1          OCEANIA AND ANTARCTICA 13         Melanesia                                 1301         New Caledonia                       
# 6 1          OCEANIA AND ANTARCTICA 13         Melanesia                                 1302         Papua New Guinea                    
# 7 1          OCEANIA AND ANTARCTICA 13         Melanesia                                 1303         Solomon Islands                     
# 8 1          OCEANIA AND ANTARCTICA 13         Melanesia                                 1304         Vanuatu                             
# 9 1          OCEANIA AND ANTARCTICA 14         Micronesia                                1401         Guam                                
# 10 1          OCEANIA AND ANTARCTICA 14         Micronesia                                1402         Kiribati                            
# # ... with 242 more rows
# # i Use `print(n = ...)` to see more rows