Comparison/Contrast word clouds based on sentiment
Emotional analysis (any one lexicon)
Collect tweets (by R)
library(twitteR)
Change the next four lines based on your own consumer_key, consume_secret, access_token, and access_secret.
consumer_key <- "..."
consumer_secret <- "...."
access_token <- "...."
access_secret <- "...."
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
tw = twitteR::searchTwitter("#ClimateChange", n = 2000, since = '2017-01-01', retryOnRateLimit = 1e3)
tdf = twitteR::twListToDF(tw)
create df out of query
write.csv(tdf,"path/tweets.csv") # to save the .csv file.
Unigram TF word cloud
cc <- TermDocumentMatrix(cleaned_corpus)
cc <- as.matrix(cc)
Term Frequency
cc_frequency <- rowSums(df)
Sort term_frequency in descending order
cc_frequency <- sort(cc_frequency,dec=TRUE)
Create word_freqs
cc_word_freqs <- data.frame(term = names(cc_frequency), num = cc_frequency)
Create a wordcloud for the values in word_freqs
wordcloud(cc_word_freqs$term, first_word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"), random.color = TRUE,random.order = FALSE, scale = c(3,0.3))
Bigram TF word cloud
bigram_tdm <- TermDocumentMatrix(cleaned_review_corpus,control = list(tokenize=tokenizer))
bigram_tdm_m <- as.matrix(bigram_tdm)
Frequency
Term Frequency cheking with begidiagram
term_frequency <- rowSums(bigram_tdm_m)
Sort term_frequency in descending order
term_frequency <- sort(term_frequency,dec=TRUE)
Create word_freqs
word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
Create word_freqs
word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
Create a wordcloud for the values in word_freqs
wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))
tf-idf weighting
tfidf_tdm <- TermDocumentMatrix(cleaned_review_corpus,control=list(weighting=weightTfIdf))
tfidf_tdm_m <- as.matrix(tfidf_tdm)
Term Frequency
term_frequency <- rowSums(tfidf_tdm_m)
Sort term_frequency in descending order
term_frequency <- sort(term_frequency,dec=TRUE)
library(wordcloud)
Create word_freqs
word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
Create a wordcloud for the values in word_freqs
wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))
TF-IDF word cloud
tfidf_tdm <- TermDocumentMatrix(cleaned_corpus,control=list(weighting=weightTfIdf))
tfidf_tdm_m <- as.matrix(tfidf_tdm)
Term Frequency
term_frequency <- rowSums(tfidf_tdm_m)
Sort term_frequency in descending order
term_frequency <- sort(term_frequency,dec=TRUE)
Create word_freqs
word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
Create a wordcloud for the values in word_freqs
wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))
ClimateChange 2000 tweets analysis
create df out of query
write.csv(tdf,"path/tweets.csv") # to save the .csv file.
Read file
df <- read.csv(CCtweets.csv ')
making a corpus of a vector source
corpus <- VCorpus(VectorSource(df$text))
Cleaning corpus - pre_processing
clean_corpus <- function(cleaned_corpus){ removeURL <- content_transformer(function(x) gsub("(f|ht)tp(s?)://\S+", "", x, perl=T)) cleaned_corpus <- tm_map(cleaned_corpus, removeURL) cleaned_corpus <- tm_map(cleaned_corpus, content_transformer(replace_abbreviation)) cleaned_corpus <- tm_map(cleaned_corpus, content_transformer(tolower)) cleaned_corpus <- tm_map(cleaned_corpus, removePunctuation) cleaned_corpus <- tm_map(cleaned_corpus, removeNumbers) cleaned_corpus <- tm_map(cleaned_corpus, removeWords, stopwords("english")) cleaned_corpus <- tm_map(cleaned_corpus, stripWhitespace) return(cleaned_corpus) } cleaned_corpus <- clean_corpus(corpus)
Unigram TF word cloud cc <- TermDocumentMatrix(cleaned_corpus) cc <- as.matrix(cc)
Term Frequency cc_frequency <- rowSums(df) Sort term_frequency in descending order cc_frequency <- sort(cc_frequency,dec=TRUE) Create word_freqs cc_word_freqs <- data.frame(term = names(cc_frequency), num = cc_frequency) Create a wordcloud for the values in word_freqs wordcloud(cc_word_freqs$term, first_word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"), random.color = TRUE,random.order = FALSE, scale = c(3,0.3))![climatechange unigram](https://user-images.githubusercontent.com/37715086/39090887-a0db493a-45b7-11e8-967d-efa199730c18.png)
Bigram TF word cloud bigram_tdm <- TermDocumentMatrix(cleaned_review_corpus,control = list(tokenize=tokenizer)) bigram_tdm_m <- as.matrix(bigram_tdm) Frequency Term Frequency cheking with begidiagram term_frequency <- rowSums(bigram_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE)
Create word_freqs
word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
Create a wordcloud for the values in word_freqs
wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))![climatechange bigram](https://user-images.githubusercontent.com/37715086/39090848-3d092382-45b7-11e8-8800-c79ab0f311e5.png)
Trigram TF word cloud
Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired")) tf-idf weighting tfidf_tdm <- TermDocumentMatrix(cleaned_review_corpus,control=list(weighting=weightTfIdf)) tfidf_tdm_m <- as.matrix(tfidf_tdm) Term Frequency term_frequency <- rowSums(tfidf_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE) library(wordcloud) Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))![climatechange tf-idf word cloud](https://user-images.githubusercontent.com/37715086/38900510-a52775c8-4269-11e8-8a98-4d690cd587f7.png)
TF-IDF word cloud tfidf_tdm <- TermDocumentMatrix(cleaned_corpus,control=list(weighting=weightTfIdf)) tfidf_tdm_m <- as.matrix(tfidf_tdm)
Term Frequency term_frequency <- rowSums(tfidf_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE) Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"))![climatechange comparison cloud](https://user-images.githubusercontent.com/37715086/39090276-e49f141c-45a8-11e8-8465-87f42383ba31.png)
bing bing_lex <- get_sentiments("bing") text_bing_lex <- inner_join(text_tidy, bing_lex, by = c("term" = "word")) text_bing_lex$sentiment_n <- ifelse(text_bing_lex$sentiment=="negative", -1, 1) text_bing_lex$sentiment_value <- text_bing_lex$sentiment_n * text_bing_lex$count text_bing_lex bing_aggdata <- aggregate(text_bing_lex$sentiment_value, list(index = text_bing_lex$document), sum) bing_aggdata sapply(bing_aggdata,typeof) bing_aggdata$index <- as.numeric(bing_aggdata$index) ggplot(bing_aggdata, aes(index, x)) + geom_point() ggplot(bing_aggdata, aes(index, x)) + geom_smooth()
![smooth sentment cc](https://user-images.githubusercontent.com/37715086/38902831-31bb56e8-4270-11e8-8a91-5bd061bdbb70.png)
Comparison word cloud on sentiment
library(reshape2) text_tidy %>% inner_join(get_sentiments("bing"), by = c("term" = "word")) %>% count(term, sentiment, sort=TRUE) %>% acast(term ~ sentiment, value.var = "n", fill = 0) %>% comparison.cloud(colors=brewer.pal(8, "Dark2"), max.words=2000)![climatechange comparison word cloud on sentiment](https://user-images.githubusercontent.com/37715086/38903164-9c8bcd26-4271-11e8-973d-e70f9e6ba53b.png)
Climatechange Emotional analysis
library(radarchart) nrc_lex <- get_sentiments("nrc") nrc <- inner_join(text_tidy, nrc_lex, by = c("term" = "word")) nrc_noposneg <- nrc[!(nrc$sentiment %in% c("positive","negative")),] aggdata <- aggregate(nrc_noposneg$count, list(index = nrc_noposneg$sentiment), sum) chartJSRadar(aggdata)![climatechange emotional analysis](https://user-images.githubusercontent.com/37715086/38903188-bacdbd1c-4271-11e8-8eba-7df6df1daf05.png)
:)
Unigram
Bigram
Trigram
TF-IDF
commonality cloud
Bing agregate point
Bing agregate Smooth
word Comparison
Emotional analysis