jrosen48 / prcr

R package for person-centered analysis
https://jrosen48.github.io/prcr/
Other
5 stars 2 forks source link

write new functions for uv and mv outlier detection #7

Closed jrosen48 closed 7 years ago

jrosen48 commented 8 years ago

currently these are the only functions from other sources (mv is from chemometrics package)

jrosen48 commented 8 years ago

done

jrosen48 commented 8 years ago

need to write new code

jrosen48 commented 8 years ago
uv_outlier_detector <- function(x, na.rm = T, ...) {
    qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
    H <- 1.5 * IQR(x, na.rm = na.rm)
    y <- x
    y[x < (qnt[1] - H)] <- NA
    y[x > (qnt[2] + H)] <- NA
    return(y)
}

uv_outlier_detector <- function(x, na.rm = T, ...) {
    # need to figure out where this came from - from a SO question, can probably re-write
    qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
    H <- 1.5 * IQR(x, na.rm = na.rm)
    y <- x
    y[x < (qnt[1] - H)] <- NA
    y[x > (qnt[2] + H)] <- NA
    y
}

remove_uv_out_func <- function(data){
    x <- sapply(data, uv_outlier_detector)
    return(x)
}

remove_uv_main_func <- function(data, removed_obs_df, cases_to_keep, print_status){
    data_tmp <- remove_uv_out_func(data) # makes uv outliers na
    if(print_status == T){
        print(paste0("### Note: ", sum(is.na(data_tmp)), " cases with univariate outliers out of ", nrow(data_tmp), " cases removed, so ", nrow(data_tmp) - sum(is.na(data_tmp)), " used in subsequent analysis ###"))
    }
    if(any(is.na(data_tmp))){
        x <- removed_obs_df[cases_to_keep, ]
        y <- !complete.cases(data_tmp)
        z <- x$row[y]
        removed_obs_df$reason_removed[z] <- "uniivariate_outlier"
    }
    data_out <- data_tmp[complete.cases(data_tmp), ]
    if(any(is.na(data_tmp))){
        data_out <- list(data_out, removed_obs_df, uv_outliers_boolean_vector = y)
    } else{
        data_out <- list(data_out, removed_obs_df, uv_outliers_boolean_vector = NULL)
    }
    return(data_out)
}

remove_mv_out_func <- function(data){
    mvout <- chemometrics::Moutlier(data, quantile = 0.99, plot = F)
    the_index <- which(mvout$md > mvout$cutoff)
    if (any(the_index) == T){
        return(the_index)
    } else{
        return(data)
    }
}