nhs-r-community / NHSRdatasets

NHS and healthcare related datasets for training and learning R
https://nhs-r-community.github.io/NHSRdatasets
Creative Commons Zero v1.0 Universal
68 stars 24 forks source link

Add example NHSR workforce statistics datasets #70

Open StatsRhian opened 3 months ago

StatsRhian commented 3 months ago

Would be nice to add workforce statistics

Could make a nice time series dataset

StatsRhian commented 3 months ago

Example of wrangling which might be needed

library("tidyverse")
library("glue")
library("readxl")

ics_name = "Lancashire and South Cumbria"

filename = glue("NHS Workforce Statistics, May 2023 England and Organisation.xlsx")
staff_group = read_xlsx(path = filename, sheet = "3. NHSE, Org & SG - FTE", skip = 5)
time_series = read_xlsx(path = filename, sheet = "5. All Staff, NHSE & Org - FTE",
                        range = "A5:FO766")

  staff_group |>
  janitor::clean_names() |>
  dplyr::filter(is.na(nhs_england_region_name)) |> # Remove region aggregates
  dplyr::mutate(organisation_name = if_else(!is.na(ics_name), "Total", organisation_name)) |>
  dplyr::filter(!is.na(organisation_name))|>  # Remove dud rows
  tidyr::fill(ics_name) |>
  dplyr::select(ics_name, organisation_name, total,
                hchs_doctors, `nurses_health_visitors`,
                `midwives`, `ambulance_staff`) |>
    dplyr::mutate(dplyr::across(total:ambulance_staff, round)) |>
  saveRDS(file = "workforce_staff_group.rds")

  read_xlsx(path = filename, sheet = "5. All Staff, NHSE & Org - FTE",
            range = "A5:FO766") |>
    janitor::clean_names() |>
    dplyr::filter(is.na(nhs_england_region_name)) |> # Remove region aggregates
    dplyr::mutate(organisation_name = if_else(!is.na(ics_name), "Total", organisation_name)) |>
    dplyr::filter(!is.na(organisation_name))|>
    tidyr::fill(ics_name) |>
    dplyr::select(-c(nhs_england_region_code, nhs_england_region_name, ics_code, organisation_code)) |>
    pivot_longer(-c(ics_name, organisation_name), names_to = "date", values_to = "count") |>
    mutate(date = as.Date(as.numeric(stringr::str_remove(date, "x")), origin = "1899-12-30"))|>
    dplyr::mutate(count = round(count)) |>
    saveRDS(file = "workforce_timeseries.rds")