Open StatsRhian opened 3 months ago
Example of wrangling which might be needed
library("tidyverse")
library("glue")
library("readxl")
ics_name = "Lancashire and South Cumbria"
filename = glue("NHS Workforce Statistics, May 2023 England and Organisation.xlsx")
staff_group = read_xlsx(path = filename, sheet = "3. NHSE, Org & SG - FTE", skip = 5)
time_series = read_xlsx(path = filename, sheet = "5. All Staff, NHSE & Org - FTE",
range = "A5:FO766")
staff_group |>
janitor::clean_names() |>
dplyr::filter(is.na(nhs_england_region_name)) |> # Remove region aggregates
dplyr::mutate(organisation_name = if_else(!is.na(ics_name), "Total", organisation_name)) |>
dplyr::filter(!is.na(organisation_name))|> # Remove dud rows
tidyr::fill(ics_name) |>
dplyr::select(ics_name, organisation_name, total,
hchs_doctors, `nurses_health_visitors`,
`midwives`, `ambulance_staff`) |>
dplyr::mutate(dplyr::across(total:ambulance_staff, round)) |>
saveRDS(file = "workforce_staff_group.rds")
read_xlsx(path = filename, sheet = "5. All Staff, NHSE & Org - FTE",
range = "A5:FO766") |>
janitor::clean_names() |>
dplyr::filter(is.na(nhs_england_region_name)) |> # Remove region aggregates
dplyr::mutate(organisation_name = if_else(!is.na(ics_name), "Total", organisation_name)) |>
dplyr::filter(!is.na(organisation_name))|>
tidyr::fill(ics_name) |>
dplyr::select(-c(nhs_england_region_code, nhs_england_region_name, ics_code, organisation_code)) |>
pivot_longer(-c(ics_name, organisation_name), names_to = "date", values_to = "count") |>
mutate(date = as.Date(as.numeric(stringr::str_remove(date, "x")), origin = "1899-12-30"))|>
dplyr::mutate(count = round(count)) |>
saveRDS(file = "workforce_timeseries.rds")
Would be nice to add workforce statistics
Could make a nice time series dataset