Rectifies schisms created in #31 by adjustments to reference function.
To see the upshot of the changes, run the following script in an interactive session:
a represents the output of the reference function (and the function in this PR), b is the altered function. a_final and b_final are the respective outputs of the function described in #11
Note that update_redcap_email_addresses does not set all values to NA for carol, despite having no good emails.
detach("package:redcapcustodian", unload=TRUE)
library(redcapcustodian)
library(DBI)
library(tidyverse)
dbDisconnect(conn)
conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:")
create_test_table(conn, "redcap_user_information")
get_redcap_email_output <- dplyr::tribble(
~ui_id, ~username, ~email_field_name, ~email,
1, "site_admin", "user_email", "joe.user@projectredcap.org",
2, "admin", "user_email", "admin@example.org",
3, "alice", "user_email", "alice@example.org",
4, "bob", "user_email", "bob_a@example.org",
4, "bob", "user_email2", "bob_b@example.org",
5, "carol", "user_email", "carol_a@example.org",
5, "carol", "user_email2", "carol_b@example.org",
6, "dan", "user_email", "dan_a@example.org",
6, "dan", "user_email2", "dan_b@example.org",
6, "dan", "user_email3", "dan_c@example.org"
) %>% dplyr::mutate(ui_id = as.integer(ui_id))
get_bad_redcap_email_output <- dplyr::tribble(
~ui_id, ~username, ~email_field_name, ~email,
1, "site_admin", "user_email", "joe.user@projectredcap",
2, "admin", "user_email", "admin@example",
3, "alice", "user_email", "alice@example",
4, "bob", "user_email", "bob_a@example",
4, "bob", "user_email2", "bob_b@example",
5, "carol", "user_email", "carol_a@example",
5, "carol", "user_email2", "carol_b@example",
6, "dan", "user_email", "dan_a@example",
6, "dan", "user_email2", "dan_b@example",
6, "dan", "user_email3", "dan_c@example",
6, "dan", "user_email3", "dan_c@example"
) %>% dplyr::mutate(ui_id = as.integer(ui_id))
get_institutional_person_output <- dplyr::tribble(
~user_id, ~email,
"site_admin", "joe.user@projectredcap.org",
"admin", "admin@example.org",
"alice", "alice@example.org",
"dan", "dan_b@example",
"bob", "",
"carol", NA
)
bad_redcap_user_emails <- get_bad_redcap_email_output
person <- get_institutional_person_output
person_data_for_redcap_users_with_bad_emails <- person %>%
dplyr::select(.data$user_id, .data$email) %>%
dplyr::filter(.data$user_id %in% bad_redcap_user_emails$username)
replacement_email_addresses_for_bad_redcap_emails <- bad_redcap_user_emails %>%
inner_join(person_data_for_redcap_users_with_bad_emails, by = c("username" = "user_id"), suffix = c(".bad", ".replacement")) %>%
filter(email.bad != email.replacement) %>%
filter(!is.na(email.replacement)) %>%
mutate(corrected_email = email.replacement) %>%
select(ui_id, username, email_field_name, corrected_email)
# initial function
a <- replacement_email_addresses_for_bad_redcap_emails %>%
bind_rows(bad_redcap_user_emails) %>%
group_by(ui_id, email_field_name) %>%
# columnar equivalent of coalesce for each row
# ensures retention of corrected_email where marked for deletion
# https://stackoverflow.com/a/60645992/7418735
summarise_all(~ na.omit(.)[1]) %>%
ungroup()
# new revision
b <- bad_redcap_user_emails %>%
dplyr::inner_join(person_data_for_redcap_users_with_bad_emails, by = c("username" = "user_id"), suffix = c(".bad", ".replacement")) %>%
dplyr::filter(.data$email.bad != .data$email.replacement) %>%
dplyr::filter(!is.na(.data$email.replacement)) %>%
dplyr::filter(.data$email.replacement != "") %>%
dplyr::mutate(corrected_email = .data$email.replacement) %>%
dplyr::group_by(.data$ui_id, .data$email_field_name) %>%
# columnar equivalent of coalesce for each row
# ensures retention of corrected_email where marked for deletion
# https://stackoverflow.com/a/60645992/7418735
dplyr::summarise_all(~ na.omit(.)[1]) %>%
dplyr::ungroup() %>%
dplyr::select(
.data$ui_id,
.data$username,
.data$email_field_name,
.data$corrected_email
)
update_redcap_email_addresses <- function(con, redcap_email_revisions) {
# break down updated emails by field
# columns (e.g. user_email<n>) cannot be parameterized
# pivot_wider cannot be used as NAs in non-replacement fields result in overwrites
# the solution is to create a list of lists, one list for each email_field_name
redcap_email_change_groups <- redcap_email_revisions %>%
select(email_field_name, corrected_email, ui_id) %>%
group_split(email_field_name, .key = "email_field_name", .keep = F) %>%
# RMySQL prepared statements do not allow named parameters
lapply(as.list) %>%
lapply(unname) %>%
setNames(nm = c(paste0("user_email", seq_along(.))))
# Write corrected emails to redcap db, erasing where necessary
dbExecute(
con,
paste0(
"UPDATE redcap_user_information ",
"SET user_email = ? ",
"WHERE ui_id = ?"
),
redcap_email_change_groups$user_email1
)
dbExecute(
con,
paste0(
"UPDATE redcap_user_information ",
"SET user_email2 = ? ",
"WHERE ui_id = ?"
),
redcap_email_change_groups$user_email2
)
dbExecute(
con,
paste0(
"UPDATE redcap_user_information ",
"SET user_email3 = ? ",
"WHERE ui_id = ?"
),
redcap_email_change_groups$user_email3
)
}
update_redcap_email_addresses(conn, a)
a_final <- tbl(conn, "redcap_user_information") %>%
collect()
# create new table
dbDisconnect(conn)
conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:")
create_test_table(conn, "redcap_user_information")
update_redcap_email_addresses(conn, b)
b_final <- tbl(conn, "redcap_user_information") %>%
collect()
identical(a_final, b_final)
a_final
b_final
Closes #10
Rectifies schisms created in #31 by adjustments to reference function.
To see the upshot of the changes, run the following script in an interactive session:
a
represents the output of the reference function (and the function in this PR),b
is the altered function.a_final
andb_final
are the respective outputs of the function described in #11Note that
update_redcap_email_addresses
does not set all values toNA
for carol, despite having no good emails.