cis-ds / reproducible-examples

0 stars 0 forks source link

01-drop-na-r #27

Open Stellaj240 opened 2 years ago

Stellaj240 commented 2 years ago
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)

# get data from rcfss package
# install latest version if not already installed
# devtools::install_github("uc-cfss/rcfss")
library(rcfss)

# load the data
data("mass_shootings")
mass_shootings
#> # A tibble: 125 × 14
#>    case       year month   day location summary fatalities injured total_victims
#>    <chr>     <dbl> <chr> <int> <chr>    <chr>        <dbl>   <dbl>         <dbl>
#>  1 Oxford H…  2021 Nov      30 Oxford,… "Ethan…          4       7            11
#>  2 San Jose…  2021 May      26 San Jos… "Samue…          9       0             9
#>  3 FedEx wa…  2021 Apr      15 Indiana… "Brand…          8       7            15
#>  4 Orange o…  2021 Mar      31 Orange,… "Amina…          4       1             5
#>  5 Boulder …  2021 Mar      22 Boulder… "Ahmad…         10       0            10
#>  6 Atlanta …  2021 Mar      16 Atlanta… "Rober…          8       1             9
#>  7 Springfi…  2020 Mar      16 Springf… "Joaqu…          4       0             4
#>  8 Molson C…  2020 Feb      26 Milwauk… "Antho…          5       0             5
#>  9 Jersey C…  2019 Dec      10 Jersey … "David…          4       3             7
#> 10 Pensacol…  2019 Dec       6 Pensaco… "Ahmed…          3       8            11
#> # … with 115 more rows, and 5 more variables: location_type <chr>, male <lgl>,
#> #   age_of_shooter <dbl>, race <chr>, prior_mental_illness <chr>

# Generate a bar chart that identifies the number of mass shooters
# associated with each race category. The bars should be sorted
# from highest to lowest.

# using reorder() and aggregating the data before plotting
mass_shootings %>%
  count(race) %>%
  drop_na(race) %>%
  ggplot(mapping = aes(x = reorder(race, -n), y = n)) +
  geom_col() +
  labs(
    title = "Mass shootings in the United States (1982-2019)",
    x = "Race of perpetrator",
    y = "Number of incidents"
  )
#> Error in drop_na(., race): could not find function "drop_na"

Created on 2022-07-05 by the reprex package (v2.0.1)