cells2numbers / migrationminer

MigrationmineR is a R package to analyse and profile in vitro cell tracking and migration data. It is belongs to the cytominer-verse used for morphological profiling and allows to create temporal or dynamic profiles.
Other
5 stars 0 forks source link

add function to correct for confounding variables #27

Open cells2numbers opened 5 years ago

cells2numbers commented 5 years ago

example code:

Correct for three variables (age, sex and date of recording). Each combination of subset / offset is corrected separetly


# helper function to iterate over all features 
get_lm_helper_function <- function(profiles_tmp) {
  # get feature names 
  # feature_names <- colnames(profiles_tmp) %>% str_subset("^Track")
  feature_names <- c("Track_Angle", "Track_Directionality", "Track_CI","Track_Speed","Track_yFMI", "Track_xFMI", "Track_CI","Track_DP", "Track_MSD", "Track_Speed_X","Track_Speed_Y", "Track_Speed_max")
  # calculate corrected values as residuals of lm
  df_result <- do.call(
    cbind,
    lapply(
      feature_names, 
      get_lm_model_fitted_values, 
      profiles_tmp = profiles_tmp
      )
    )
  # add experiment id Metadata_experiment
  df_result %<>% 
    mutate(Metadata_experiment = profiles_tmp$Metadata_experiment)
}

# linear model lm is used to remove date/age/sex effects
get_lm_model_fitted_values <- function(y, profiles_tmp){
  new_colname <- paste0(y)

  tryCatch({
      lm_formula <- paste0(y, " ~ Clinical_Parameter_Age + Clinical_Parameter_Sex + Clinical_Parameter_Date")
      lm_model <- lm(lm_formula, data = profiles_tmp,na.action = na.exclude) 
      fitted_values <- tibble( "V1" = residuals(lm_model, na.action=na.exclude))
      colnames(fitted_values) <- new_colname
    },
    fitted_values = function(err){
      fitted_values <- tibble("V1" = rep(NA, nrow(profiles_tmp))) 
      colnames(fitted_values) <- new_colname
      return(fitted_values)
    }
  )

  return(fitted_values)
} 

Correct values

profiles_corrected <- profiles %>% 
  filter(!is.na(Track_Directionality), Metadata_Frame_Subset < 180, Metadata_offset < 180) %>% 
  group_by(Metadata_Frame_Subset, Metadata_offset) %>% 
  do(get_lm_helper_function(.))