Closed jrosen48 closed 7 years ago
done
need to write new code
uv_outlier_detector <- function(x, na.rm = T, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
return(y)
}
uv_outlier_detector <- function(x, na.rm = T, ...) {
# need to figure out where this came from - from a SO question, can probably re-write
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
remove_uv_out_func <- function(data){
x <- sapply(data, uv_outlier_detector)
return(x)
}
remove_uv_main_func <- function(data, removed_obs_df, cases_to_keep, print_status){
data_tmp <- remove_uv_out_func(data) # makes uv outliers na
if(print_status == T){
print(paste0("### Note: ", sum(is.na(data_tmp)), " cases with univariate outliers out of ", nrow(data_tmp), " cases removed, so ", nrow(data_tmp) - sum(is.na(data_tmp)), " used in subsequent analysis ###"))
}
if(any(is.na(data_tmp))){
x <- removed_obs_df[cases_to_keep, ]
y <- !complete.cases(data_tmp)
z <- x$row[y]
removed_obs_df$reason_removed[z] <- "uniivariate_outlier"
}
data_out <- data_tmp[complete.cases(data_tmp), ]
if(any(is.na(data_tmp))){
data_out <- list(data_out, removed_obs_df, uv_outliers_boolean_vector = y)
} else{
data_out <- list(data_out, removed_obs_df, uv_outliers_boolean_vector = NULL)
}
return(data_out)
}
remove_mv_out_func <- function(data){
mvout <- chemometrics::Moutlier(data, quantile = 0.99, plot = F)
the_index <- which(mvout$md > mvout$cutoff)
if (any(the_index) == T){
return(the_index)
} else{
return(data)
}
}
currently these are the only functions from other sources (mv is from chemometrics package)