cis-ds / Discussion

Public discussion
10 stars 15 forks source link

Allison Leon #158

Closed adleon53 closed 3 years ago

adleon53 commented 3 years ago
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)

# get data from rcfss package
# install latest version if not already installed
# devtools::install_github("uc-cfss/rcfss")
library(rcfss)

# load the data
data("mass_shootings")
mass_shootings
#> # A tibble: 114 x 14
#>    case   year month   day location summary fatalities injured total_victims
#>    <chr> <dbl> <chr> <int> <chr>    <chr>        <dbl>   <dbl>         <dbl>
#>  1 Dayt…  2019 Aug       4 Dayton,… "PENDI…          9      27            36
#>  2 El P…  2019 Aug       3 El Paso… "PENDI…         20      26            46
#>  3 Gilr…  2019 Jul      28 Gilroy,… "Santi…          3      12            15
#>  4 Virg…  2019 May      31 Virgini… "DeWay…         12       4            16
#>  5 Harr…  2019 Feb      15 Aurora,… "Gary …          5       6            11
#>  6 Penn…  2019 Jan      24 State C… "Jorda…          3       1             4
#>  7 SunT…  2019 Jan      23 Sebring… "Zephe…          5       0             5
#>  8 Merc…  2018 Nov      19 Chicago… "Juan …          3       0             3
#>  9 Thou…  2018 Nov       7 Thousan… "Ian D…         12      22            34
#> 10 Tree…  2018 Oct      27 Pittsbu… "Rober…         11       6            17
#> # … with 104 more rows, and 5 more variables: location_type <chr>, male <lgl>,
#> #   age_of_shooter <dbl>, race <chr>, prior_mental_illness <chr>

# Generate a bar chart that identifies the number of mass shooters
# associated with each race category. The bars should be sorted
# from highest to lowest.

# using reorder() and aggregating the data before plotting
mass_shootings %>%
  count(race) %>%
  drop_na(race) %>%
  ggplot(mapping = aes(x = reorder(race, -n), y = n)) +
  geom_col() +
  labs(
    title = "Mass shootings in the United States (1982-2019)",
    x = "Race of perpetrator",
    y = "Number of incidents"
  )
#> Error in drop_na(., race): could not find function "drop_na"

Created on 2020-11-05 by the reprex package (v0.3.0)