TeorinKim / NetworkAnalysis

0 stars 0 forks source link

R 형태소 분석 명령문 #20

Open TeorinKim opened 2 years ago

TeorinKim commented 2 years ago

install.packages("remotes")

remotes::install_github("mrchypark/multilinguer")

library(multilinguer)

install_java()

multilinguer::install_java()

install.packages("d:/Download/NLP4kec_1.4.0.zip", repos=NULL)

library(NLP4kec) library(rJava)

library(haven)

install.packages("tm")

library(tm)

불용어 <- read.csv(file="d:/ML/stopwords.csv", header = TRUE, sep=",")

result <- file_parser_r(path="d:/ML/헌법.xlsx", language = "ko") 결과 <- gsub(" ", " ", result)

전처리 <- 결과

말뭉치 <- VCorpus(VectorSource(전처리)) 말뭉치 <- tm_map(말뭉치, removePunctuation) 말뭉치 <- tm_map(말뭉치, removeNumbers) 말뭉치 <- tm_map(말뭉치, removeWords, 불용어$stopword) 말뭉치 <- tm_map(말뭉치, PlainTextDocument)

DTM = DocumentTermMatrix(말뭉치, control=list(wordLenths=c(2,Inf))) TDM = DocumentTermMatrix(말뭉치, control=list(wordLenths=c(2,Inf)))

colnames(DTM) <- trimws(colnames(DTM)) DTM <- DTM[,nchar(colnames(DTM)) >1] DTM

빈도 <- colSums(as.matrix(DTM[1,])) 빈도[head(order(-빈도), 32)]

library(ggplot2) library(dplyr)

단어 <- data.frame(단어=names(빈도), freq=빈도)

ggplot(head(arrange(단어, -freq), 20), aes(x=reorder(단어, -freq), 20), y=빈도)+geom_bar(stat="identity")