njtierney / naniar

Tidy data structures, summaries, and visualisations for missing data
http://naniar.njtierney.com/
Other
649 stars 54 forks source link

Impute mode #321

Closed njtierney closed 1 year ago

njtierney commented 1 year ago

Description

implements imputing the mode

Related Issue

213

Example

library(naniar)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_mode(vec)
#>  [1] -0.82831377  1.00094758  0.94702118  0.94702118  0.94702118  0.64740837
#>  [7]  1.79198464 -0.08941483  1.27700962 -0.21884888

dat <- tibble(
  num = rnorm(10),
  int = rpois(10, 5),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>        num   int fct  
#>      <dbl> <int> <fct>
#>  1 -0.321     NA A    
#>  2 NA          3 B    
#>  3 -0.200     NA C    
#>  4  3.34       3 D    
#>  5  0.807      4 E    
#>  6 -0.915      4 F    
#>  7 NA          3 <NA> 
#>  8  0.0148     7 <NA> 
#>  9 -0.374      5 I    
#> 10  0.0479     5 J

dat %>%
  nabular() %>%
  mutate(
    num = impute_mode(num)
  )
#> # A tibble: 10 × 6
#>        num   int fct   num_NA int_NA fct_NA
#>      <dbl> <int> <fct> <fct>  <fct>  <fct> 
#>  1 -0.321     NA A     !NA    NA     !NA   
#>  2 -0.183      3 B     NA     !NA    !NA   
#>  3 -0.200     NA C     !NA    NA     !NA   
#>  4  3.34       3 D     !NA    !NA    !NA   
#>  5  0.807      4 E     !NA    !NA    !NA   
#>  6 -0.915      4 F     !NA    !NA    !NA   
#>  7 -0.183      3 <NA>  NA     !NA    NA    
#>  8  0.0148     7 <NA>  !NA    !NA    NA    
#>  9 -0.374      5 I     !NA    !NA    !NA   
#> 10  0.0479     5 J     !NA    !NA    !NA

Created on 2023-04-10 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.3 (2023-03-15) #> os macOS Ventura 13.2 #> system aarch64, darwin20 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Australia/Hobart #> date 2023-04-10 #> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.0) #> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.0) #> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.0) #> dplyr * 1.1.1 2023-03-22 [1] CRAN (R 4.2.0) #> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.0) #> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> fs 1.6.1 2023-02-06 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) #> ggplot2 3.4.1 2023-02-10 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.0) #> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.0) #> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.0) #> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> naniar * 1.0.0.9000 2023-04-10 [1] local #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> purrr 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.0) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rlang 1.1.0 2023-03-14 [1] CRAN (R 4.2.0) #> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.0) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0) #> scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.0) #> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.2.0) #> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0) #> utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.0) #> vctrs 0.6.1 2023-03-22 [1] CRAN (R 4.2.0) #> visdat 0.6.0 2023-02-02 [1] local #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> xfun 0.37 2023-01-31 [1] CRAN (R 4.2.0) #> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```

Tests

Yes

NEWS + DESCRIPTION

Yes

njtierney commented 1 year ago
library(naniar)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_mode(vec)
#>  [1] -0.8667728 -1.3941443 -1.2821929 -1.1371585 -0.7508977 -1.0704500
#>  [7] -1.1450668 -1.1450668  1.1446806 -1.1450668

dat <- tibble(
  num = rnorm(10),
  int = as.integer(rpois(10, 5)),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>        num   int fct  
#>      <dbl> <int> <fct>
#>  1 NA          6 A    
#>  2 NA         NA B    
#>  3  0.364      7 C    
#>  4 -1.22       4 D    
#>  5  0.0346     3 <NA> 
#>  6  0.0860     5 F    
#>  7 -0.486      4 <NA> 
#>  8 -0.930      5 H    
#>  9  0.932     NA I    
#> 10 -0.946      5 J

dat %>%
  nabular() %>%
  mutate(
    num = impute_mode(num),
    int = impute_mode(int),
    fct = impute_mode(fct)
  )
#> # A tibble: 10 × 6
#>        num   int fct   num_NA int_NA fct_NA
#>      <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1 -0.787      6 A     NA     !NA    !NA   
#>  2 -0.787      5 B     NA     NA     !NA   
#>  3  0.364      7 C     !NA    !NA    !NA   
#>  4 -1.22       4 D     !NA    !NA    !NA   
#>  5  0.0346     3 A     !NA    !NA    NA    
#>  6  0.0860     5 F     !NA    !NA    !NA   
#>  7 -0.486      4 A     !NA    !NA    NA    
#>  8 -0.930      5 H     !NA    !NA    !NA   
#>  9  0.932      5 I     !NA    NA     !NA   
#> 10 -0.946      5 J     !NA    !NA    !NA

Created on 2023-04-10 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.3 (2023-03-15) #> os macOS Ventura 13.2 #> system aarch64, darwin20 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Australia/Hobart #> date 2023-04-10 #> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.0) #> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.0) #> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.0) #> dplyr * 1.1.1 2023-03-22 [1] CRAN (R 4.2.0) #> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.0) #> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> fs 1.6.1 2023-02-06 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) #> ggplot2 3.4.1 2023-02-10 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.0) #> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.0) #> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.0) #> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> naniar * 1.0.0.9000 2023-04-10 [1] local #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> purrr 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.0) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rlang 1.1.0 2023-03-14 [1] CRAN (R 4.2.0) #> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.0) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0) #> scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.0) #> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.2.0) #> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0) #> utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.0) #> vctrs 0.6.1 2023-03-22 [1] CRAN (R 4.2.0) #> visdat 0.6.0 2023-02-02 [1] local #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> xfun 0.37 2023-01-31 [1] CRAN (R 4.2.0) #> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```