correctly implement `any_na()` (`any_miss()`) and `any_complete()` #329

njtierney closed 1 year ago

njtierney commented 1 year ago

Rework examples to demonstrate workflow for finding complete variables.


# for vectors
misses <- c(NA, NA, NA)
complete <- c(1, 2, 3)
mixture <- c(NA, 1, NA)

#> [1] TRUE
#> [1] FALSE
#> [1] FALSE
#> [1] FALSE
#> [1] TRUE
#> [1] FALSE

#> [1] TRUE
#> [1] FALSE
#> [1] TRUE

# for data frames
#> [1] FALSE
# an alias of all_na
#> [1] FALSE
#> [1] FALSE

#> [1] TRUE
#> [1] TRUE

# use in identifying columns with all missing/complete

#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>     filter, lag
#> The following objects are masked from 'package:base':
#>     intersect, setdiff, setequal, union
# for printing
aq <- as_tibble(airquality)
#> # A tibble: 153 × 6
#>    Ozone Solar.R  Wind  Temp Month   Day
#>    <int>   <int> <dbl> <int> <int> <int>
#>  1    41     190   7.4    67     5     1
#>  2    36     118   8      72     5     2
#>  3    12     149  12.6    74     5     3
#>  4    18     313  11.5    62     5     4
#>  5    NA      NA  14.3    56     5     5
#>  6    28      NA  14.9    66     5     6
#>  7    23     299   8.6    65     5     7
#>  8    19      99  13.8    59     5     8
#>  9     8      19  20.1    61     5     9
#> 10    NA     194   8.6    69     5    10
#> # ℹ 143 more rows
# select variables with all missing values
aq %>% select(where(all_na))
#> # A tibble: 153 × 0
# there are none!
# select columns with any NA values
aq %>% select(where(any_na))
#> # A tibble: 153 × 2
#>    Ozone Solar.R
#>    <int>   <int>
#>  1    41     190
#>  2    36     118
#>  3    12     149
#>  4    18     313
#>  5    NA      NA
#>  6    28      NA
#>  7    23     299
#>  8    19      99
#>  9     8      19
#> 10    NA     194
#> # ℹ 143 more rows
# select only columns with all complete data
aq %>% select(where(all_complete))
#> # A tibble: 153 × 4
#>     Wind  Temp Month   Day
#>    <dbl> <int> <int> <int>
#>  1   7.4    67     5     1
#>  2   8      72     5     2
#>  3  12.6    74     5     3
#>  4  11.5    62     5     4
#>  5  14.3    56     5     5
#>  6  14.9    66     5     6
#>  7   8.6    65     5     7
#>  8  13.8    59     5     8
#>  9  20.1    61     5     9
#> 10   8.6    69     5    10
#> # ℹ 143 more rows

# select columns where there are any complete cases (all the data)
aq %>% select(where(any_complete))
#> # A tibble: 153 × 6
#>    Ozone Solar.R  Wind  Temp Month   Day
#>    <int>   <int> <dbl> <int> <int> <int>
#>  1    41     190   7.4    67     5     1
#>  2    36     118   8      72     5     2
#>  3    12     149  12.6    74     5     3
#>  4    18     313  11.5    62     5     4
#>  5    NA      NA  14.3    56     5     5
#>  6    28      NA  14.9    66     5     6
#>  7    23     299   8.6    65     5     7
#>  8    19      99  13.8    59     5     8
#>  9     8      19  20.1    61     5     9
#> 10    NA     194   8.6    69     5    10
#> # ℹ 143 more rows

Created on 2023-04-28 with reprex v2.0.2

