ctsit / redcapcustodian

Simplified, automated data management on REDCap systems
Other
12 stars 6 forks source link

Fix get redcap email revisions #34

Closed ChemiKyle closed 2 years ago

ChemiKyle commented 2 years ago

Closes #10

Rectifies schisms created in #31 by adjustments to reference function.

To see the upshot of the changes, run the following script in an interactive session:

a represents the output of the reference function (and the function in this PR), b is the altered function.
a_final and b_final are the respective outputs of the function described in #11

Note that update_redcap_email_addresses does not set all values to NA for carol, despite having no good emails.

detach("package:redcapcustodian", unload=TRUE)

library(redcapcustodian)
library(DBI)
library(tidyverse)

dbDisconnect(conn)
conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:")

create_test_table(conn, "redcap_user_information")

get_redcap_email_output <- dplyr::tribble(
  ~ui_id, ~username, ~email_field_name, ~email,
  1, "site_admin", "user_email", "joe.user@projectredcap.org",
  2, "admin", "user_email",  "admin@example.org",
  3, "alice", "user_email",  "alice@example.org",
  4, "bob",   "user_email",  "bob_a@example.org",
  4, "bob",   "user_email2", "bob_b@example.org",
  5, "carol", "user_email",  "carol_a@example.org",
  5, "carol", "user_email2", "carol_b@example.org",
  6, "dan",   "user_email",  "dan_a@example.org",
  6, "dan",   "user_email2", "dan_b@example.org",
  6, "dan",   "user_email3", "dan_c@example.org"
) %>% dplyr::mutate(ui_id = as.integer(ui_id))

get_bad_redcap_email_output <- dplyr::tribble(
  ~ui_id, ~username, ~email_field_name, ~email,
  1, "site_admin", "user_email", "joe.user@projectredcap",
  2, "admin", "user_email",  "admin@example",
  3, "alice", "user_email",  "alice@example",
  4, "bob",   "user_email",  "bob_a@example",
  4, "bob",   "user_email2", "bob_b@example",
  5, "carol", "user_email",  "carol_a@example",
  5, "carol", "user_email2", "carol_b@example",
  6, "dan",   "user_email",  "dan_a@example",
  6, "dan",   "user_email2", "dan_b@example",
  6, "dan",   "user_email3", "dan_c@example",
  6, "dan",   "user_email3", "dan_c@example"
) %>% dplyr::mutate(ui_id = as.integer(ui_id))

get_institutional_person_output <- dplyr::tribble(
  ~user_id, ~email,
  "site_admin", "joe.user@projectredcap.org",
  "admin", "admin@example.org",
  "alice", "alice@example.org",
  "dan",   "dan_b@example",
  "bob",   "",
  "carol", NA
)

bad_redcap_user_emails <- get_bad_redcap_email_output
person <- get_institutional_person_output

person_data_for_redcap_users_with_bad_emails <- person %>%
  dplyr::select(.data$user_id, .data$email) %>%
  dplyr::filter(.data$user_id %in% bad_redcap_user_emails$username)

replacement_email_addresses_for_bad_redcap_emails <- bad_redcap_user_emails %>%
    inner_join(person_data_for_redcap_users_with_bad_emails, by = c("username" = "user_id"), suffix = c(".bad", ".replacement")) %>%
    filter(email.bad != email.replacement) %>%
    filter(!is.na(email.replacement)) %>%
    mutate(corrected_email = email.replacement) %>%
    select(ui_id, username, email_field_name, corrected_email)

# initial function
a <- replacement_email_addresses_for_bad_redcap_emails %>%
  bind_rows(bad_redcap_user_emails) %>%
  group_by(ui_id, email_field_name) %>%
  # columnar equivalent of coalesce for each row
  # ensures retention of corrected_email where marked for deletion
  # https://stackoverflow.com/a/60645992/7418735
  summarise_all(~ na.omit(.)[1]) %>%
  ungroup()

# new revision
b <- bad_redcap_user_emails %>%
  dplyr::inner_join(person_data_for_redcap_users_with_bad_emails, by = c("username" = "user_id"), suffix = c(".bad", ".replacement")) %>%
  dplyr::filter(.data$email.bad != .data$email.replacement) %>%
  dplyr::filter(!is.na(.data$email.replacement)) %>%
  dplyr::filter(.data$email.replacement != "") %>%
  dplyr::mutate(corrected_email = .data$email.replacement) %>%
  dplyr::group_by(.data$ui_id, .data$email_field_name) %>%
  # columnar equivalent of coalesce for each row
  # ensures retention of corrected_email where marked for deletion
  # https://stackoverflow.com/a/60645992/7418735
  dplyr::summarise_all(~ na.omit(.)[1]) %>%
  dplyr::ungroup() %>%
  dplyr::select(
           .data$ui_id,
           .data$username,
           .data$email_field_name,
           .data$corrected_email
         )

update_redcap_email_addresses <- function(con, redcap_email_revisions) {
  # break down updated emails by field
  # columns (e.g. user_email<n>) cannot be parameterized
  # pivot_wider cannot be used as NAs in non-replacement fields result in overwrites
  # the solution is to create a list of lists, one list for each email_field_name
  redcap_email_change_groups <- redcap_email_revisions %>%
    select(email_field_name, corrected_email, ui_id) %>%
    group_split(email_field_name, .key = "email_field_name", .keep = F) %>%
    # RMySQL prepared statements do not allow named parameters
    lapply(as.list) %>%
    lapply(unname) %>%
    setNames(nm = c(paste0("user_email", seq_along(.))))

  # Write corrected emails to redcap db, erasing where necessary
  dbExecute(
    con,
    paste0(
      "UPDATE redcap_user_information ",
      "SET user_email = ? ",
      "WHERE ui_id = ?"
    ),
    redcap_email_change_groups$user_email1
  )

  dbExecute(
    con,
    paste0(
      "UPDATE redcap_user_information ",
      "SET user_email2 = ? ",
      "WHERE ui_id = ?"
    ),
    redcap_email_change_groups$user_email2
  )

  dbExecute(
    con,
    paste0(
      "UPDATE redcap_user_information ",
      "SET user_email3 = ? ",
      "WHERE ui_id = ?"
    ),
    redcap_email_change_groups$user_email3
  )
}

update_redcap_email_addresses(conn, a)

a_final <- tbl(conn, "redcap_user_information") %>%
  collect()

# create new table
dbDisconnect(conn)
conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:")

create_test_table(conn, "redcap_user_information")

update_redcap_email_addresses(conn, b)

b_final <- tbl(conn, "redcap_user_information") %>%
  collect()

identical(a_final, b_final)

a_final

b_final
pbchase commented 2 years ago

Addresses Issue #35