내장 데이터셋 sentiment_dic의 중복 데이터 2건 존재

다음과 같이 중복을 제거하는 로직을 넣어 데이터를 생성하는 스크립트 개발

library(dplyr)

fname_sentiword <- here::here("inst", "data", "KnuSentiLex", "SentiWord_info.json")

sentiment_dic <- jsonlite::fromJSON(fname_sentiword) %>% 
  filter(!duplicated(word)) %>%     # 중복 제거 로직
  mutate(n_gram = stringr::str_count(word, pattern = "\\s+") + 1L) %>%
  mutate(polarity = as.integer(polarity)) %>%
  tibble::as_tibble()

save(sentiment_dic, file = glue::glue("data/sentiment_dic.rda"))

bit2r / bitTA

내장 데이터셋 sentiment_dic의 중복 데이터 2건 존재 #26