Closed spsanderson closed 2 years ago
Function:
#' UMAP Projection #' #' @author Steven P. Sanderson II, MPH #' #' @description Create a umap object from the [uwot::umap()] function. #' #' @seealso #' * \url{https://cran.r-project.org/package=uwot} (CRAN) #' * \url{https://github.com/jlmelville/uwot} (GitHub) #' * \url{https://github.com/jlmelville/uwot} (arXiv paper) #' #' @details This takes in the user item table/matix that is produced by [kmeans_user_item_tbl()] #' function. This function uses the defaults of [uwot::umap()]. #' #' @param .data The data from the [kmeans_user_item_tbl()] function. #' @param .kmeans_map_tbl The data from the [kmeans_mapped_tbl()]. #' @param .k_cluster Pick the desired amount of clusters from your analysis of the scree plot. #' #' @examples #' library(healthyR.data) #' library(healthyR) #' library(dplyr) #' library(broom) #' #' data_tbl <- healthyR_data %>% #' filter(ip_op_flag == "I") %>% #' filter(payer_grouping != "Medicare B") %>% #' filter(payer_grouping != "?") %>% #' select(service_line, payer_grouping) %>% #' mutate(record = 1) %>% #' as_tibble() #' #' uit_tbl <- kmeans_user_item_tbl( #' .data = data_tbl #' , .row_input = service_line #' , .col_input = payer_grouping #' , .record_input = record #' ) #' #' kmm_tbl <- kmeans_mapped_tbl(uit_tbl) #' #' umap_list(.data = uit_tbl, kmm_tbl, 3) #' #' @return A list of tibbles and the umap object #' #' @export #' umap_list <- function(.data , .kmeans_map_tbl , .k_cluster = 5) { # * Tidyeval ---- k_cluster_var_expr <- .k_cluster # * Checks ---- if (!is.data.frame(.data)) { stop(call. = FALSE, "(.data) is not a data.frame/tibble. Please supply.") } if (!is.data.frame(.kmeans_map_tbl)) { stop(call. = FALSE, "(.kmeans_map_tbl) is not a data.frame/tibble. Please supply.") } # * Data ---- data <- tibble::as_tibble(.data) kmeans_map_tbl <- tibble::as_tibble(.kmeans_map_tbl) # * Manipulation ---- umap_obj <- data %>% dplyr::select(-1) %>% uwot::umap() umap_results_tbl <- umap_obj %>% tibble::as_tibble() %>% purrr::set_names("x", "y") %>% dplyr::bind_cols(data %>% dplyr::select(1)) kmeans_obj <- kmeans_map_tbl %>% dplyr::pull(k_means) %>% purrr::pluck(k_cluster_var_expr) kmeans_cluster_tbl <- kmeans_obj %>% broom::augment(data) %>% dplyr::select(1, .cluster) umap_kmeans_cluster_results_tbl <- umap_results_tbl %>% dplyr::left_join(kmeans_cluster_tbl) # * Data List ---- list_names <- df_list <- list( umap_obj = umap_obj, umap_results_tbl = umap_results_tbl, kmeans_obj = kmeans_obj, kmeans_cluster_tbl = kmeans_cluster_tbl, umap_kmeans_cluster_results_tbl = umap_kmeans_cluster_results_tbl ) # * Return ---- return(df_list) }
Example:
ump_lst <- umap_list(uit_tbl, kmm_tbl, 3) > ump_lst $umap_obj [,1] [,2] [1,] -0.74571982 2.2514666 [2,] -0.15199175 2.1459239 [3,] 0.68102262 -2.2383791 [4,] -0.38277975 1.0841764 [5,] 0.22941372 1.4139083 [6,] 1.11321011 -1.9530446 [7,] -0.58606782 -0.6579749 [8,] 0.77251217 -1.1631352 [9,] 0.32206175 -1.8929783 [10,] 0.03789804 -0.7778479 [11,] -0.02187505 1.7635368 [12,] -0.92922345 1.7515033 [13,] 0.22819314 0.9602579 [14,] -0.28279415 -0.2338883 [15,] 0.07320938 -1.5743767 [16,] 0.43210325 -1.3941590 [17,] 0.07226262 0.4439653 [18,] -0.90982023 1.2273057 [19,] -0.38926092 0.6284164 [20,] 0.48171482 -0.6196457 [21,] -0.27508488 -1.2331883 [22,] -0.50644556 1.8743180 [23,] 0.73746178 -1.8061606 attr(,"scaled:center") [1] -1.965624 -9.547336 $umap_results_tbl # A tibble: 23 × 3 x y service_line <dbl> <dbl> <chr> 1 -0.746 2.25 Alcohol Abuse 2 -0.152 2.15 Bariatric Surgery For Obesity 3 0.681 -2.24 Carotid Endarterectomy 4 -0.383 1.08 Cellulitis 5 0.229 1.41 Chest Pain 6 1.11 -1.95 CHF 7 -0.586 -0.658 COPD 8 0.773 -1.16 CVA 9 0.322 -1.89 GI Hemorrhage 10 0.0379 -0.778 Joint Replacement # … with 13 more rows # ℹ Use `print(n = ...)` to see more rows $kmeans_obj K-means clustering with 3 clusters of sizes 12, 5, 6 Cluster means: Blue Cross Commercial Compensation Exchange Plans HMO Medicaid 1 0.0783745 0.02182129 0.0043244347 0.006202137 0.04493860 0.03684344 2 0.1495475 0.03679700 0.0003066332 0.020729565 0.16252855 0.13072521 3 0.1170278 0.03141187 0.0101665392 0.013865190 0.09822472 0.08557952 Medicaid HMO Medicare A Medicare HMO No Fault Self Pay 1 0.08001653 0.5625037 0.15152338 0.003475542 0.009976485 2 0.31446157 0.1318675 0.03192357 0.001364577 0.019748398 3 0.14652195 0.3535395 0.10524131 0.007067791 0.031353724 Clustering vector: [1] 2 2 1 3 3 1 1 1 1 1 2 2 3 1 1 1 3 3 3 1 1 2 1 Within cluster sum of squares by cluster: [1] 0.09625399 0.19152559 0.08456928 (between_SS / total_SS = 73.6 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" [6] "betweenss" "size" "iter" "ifault" $kmeans_cluster_tbl # A tibble: 23 × 2 service_line .cluster <chr> <fct> 1 Alcohol Abuse 2 2 Bariatric Surgery For Obesity 2 3 Carotid Endarterectomy 1 4 Cellulitis 3 5 Chest Pain 3 6 CHF 1 7 COPD 1 8 CVA 1 9 GI Hemorrhage 1 10 Joint Replacement 1 # … with 13 more rows # ℹ Use `print(n = ...)` to see more rows $umap_kmeans_cluster_results_tbl # A tibble: 23 × 4 x y service_line .cluster <dbl> <dbl> <chr> <fct> 1 -0.746 2.25 Alcohol Abuse 2 2 -0.152 2.15 Bariatric Surgery For Obesity 2 3 0.681 -2.24 Carotid Endarterectomy 1 4 -0.383 1.08 Cellulitis 3 5 0.229 1.41 Chest Pain 3 6 1.11 -1.95 CHF 1 7 -0.586 -0.658 COPD 1 8 0.773 -1.16 CVA 1 9 0.322 -1.89 GI Hemorrhage 1 10 0.0379 -0.778 Joint Replacement 1 # … with 13 more rows # ℹ Use `print(n = ...)` to see more rows
``
Function:
Example:
``