Closed trinker closed 7 years ago
#' term_before(presidential_debates_2012$dialogue, 'president')
#' term_after(presidential_debates_2012$dialogue, 'president')
#' term_after(presidential_debates_2012$dialogue, 'oil')
#' term_first(presidential_debates_2012$dialogue)
#'
#' \dontrun{
#' library(dplyr); library(lexicon)
#'
#' pos_df_pronouns[['pronoun']][1:5] %>%
#' lapply(function(x){
#' term_after(presidential_debates_2012$dialogue, paste0("\\b", x, "\\b"))
#' }) %>%
#' setNames(pos_df_pronouns[['pronoun']][1:5])
#'
#' term_first(presidential_debates_2012$dialogue) %>%
#' filter(!term %in% tolower(sw_dolch) & !grepl("'", term))
#' }
term_before <- function(text.var, term, ignore.case = TRUE, ...){
regex <- paste0(
ifelse(ignore.case, "(?i)", ""),
'[A-Za-z\'-]+(?=\\s', term, ')'
)
trms <- na.omit(unlist(stringi::stri_extract_all_regex(text.var, regex)))
if (ignore.case) trms <- tolower(trms)
dplyr::tbl_df(textshape::tidy_table(as.table(sort(table(trms), TRUE)), "term", "frequency"))
}
term_after <- function(text.var, term, ignore.case = TRUE, ...){
regex <- paste0(
ifelse(ignore.case, "(?i)", ""),
'(?<=', term, '\\s)[A-Za-z\'-]+'
)
trms <- na.omit(unlist(stringi::stri_extract_all_regex(text.var, regex)))
if (ignore.case) trms <- tolower(trms)
dplyr::tbl_df(textshape::tidy_table(as.table(sort(table(trms), TRUE)), "term", "frequency"))
}
term_first <- function(text.var, ignore.case = TRUE, ...){
regex <- paste0(ifelse(ignore.case, "(?i)", ""), '^[A-Za-z\'-]+')
trms <- na.omit(unlist(stringi::stri_extract_all_regex(text.var, regex)))
if (ignore.case) trms <- tolower(trms)
dplyr::tbl_df(textshape::tidy_table(as.table(sort(table(trms), TRUE)), "term", "frequency"))
}