MSKCC-Epi-Bio / gnomeR

Package to wrangle and visualize genomic data in R
https://mskcc-epi-bio.github.io/gnomeR/
Other
26 stars 16 forks source link

Add function to extract patient ID from sample ID #331

Closed karissawhiting closed 2 months ago

karissawhiting commented 10 months ago
library(dplyr)
library(stringr)

extract_patient_id <- function(data) {
  if (all(str_detect(data$sample_id, "^P-\\d{1,}-T.*"))) {
    return(data %>%
      mutate(patient_id = str_replace(sample_id, "-T.*", "")))
  } else {
    # If any value does not match the format, raise an error
    cli::cli_abort("Some {.code sample_id} values do not match the expected IMPACT sample format.")
  }
} 

# Example
df <- data.frame(
  sample_id =
    c("P-0000071-T01-IM3", "P-0000072-T02-IM4", "P-0000073-T03-IM5")
)
transformed_df <- extract_patient_id(df)
print(transformed_df)