opiniontext is an ongoing project to build a data package containing text of US Supreme Court opinions available publicly at SupremeCourt.gov. Text data is to be made readily available in the form of text documents, csv, and R objects. Csv and R objects outputted will contain the following columns:
You can install the development version of opiniontext from GitHub with:
# install.packages("devtools")
devtools::install_github("JCRascal/opiniontext")
library(opiniontext)
library(tidytext)
library(dplyr)
library(ggplot2)
case_words <- opinions_2019 %>%
filter(author != "Per Curiam") %>%
filter(author != "Syllabus") %>%
unnest_tokens(word, text) %>%
anti_join(get_stopwords(), by = "word") %>%
count(author, word, sort = TRUE)
case_words <- case_words %>%
group_by(author) %>%
summarize(total = sum(n)) %>%
right_join(case_words) %>%
filter(nchar(word) > 2)
case_plot <- case_words %>%
bind_tf_idf(word, author, n) %>%
group_by(author) %>%
slice_max(tf_idf, n = 10) %>%
ungroup() %>%
mutate(word = reorder_within(word, tf_idf, author))
ggplot(case_plot, aes(word, tf_idf, fill = author)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
facet_wrap(~author, ncol = 4, scales = "free") +
coord_flip() +
scale_x_reordered()