Mesikt / Zinu

0 stars 0 forks source link

#ClimateChangeTwitterfeedtextanalysis #1

Open Mesikt opened 6 years ago

Mesikt commented 6 years ago

ClimateChange 2000 tweets analysis

  1. TF word clouds Unigram Bigram Trigram
  2. TF-IDF word cloud
  3. Sentiment analysis (any one lexicon)
  4. Comparison/Contrast word clouds based on sentiment
  5. Emotional analysis (any one lexicon) Collect tweets (by R) library(twitteR) Change the next four lines based on your own consumer_key, consume_secret, access_token, and access_secret. consumer_key <- "..." consumer_secret <- "...." access_token <- "...." access_secret <- "...." setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret) tw = twitteR::searchTwitter("#ClimateChange", n = 2000, since = '2017-01-01', retryOnRateLimit = 1e3) tdf = twitteR::twListToDF(tw)

    create df out of query

    write.csv(tdf,"path/tweets.csv") # to save the .csv file.

    Read file

    df <- read.csv(CCtweets.csv ')

making a corpus of a vector source

corpus <- VCorpus(VectorSource(df$text))

Cleaning corpus - pre_processing

clean_corpus <- function(cleaned_corpus){ removeURL <- content_transformer(function(x) gsub("(f|ht)tp(s?)://\S+", "", x, perl=T)) cleaned_corpus <- tm_map(cleaned_corpus, removeURL) cleaned_corpus <- tm_map(cleaned_corpus, content_transformer(replace_abbreviation)) cleaned_corpus <- tm_map(cleaned_corpus, content_transformer(tolower)) cleaned_corpus <- tm_map(cleaned_corpus, removePunctuation) cleaned_corpus <- tm_map(cleaned_corpus, removeNumbers) cleaned_corpus <- tm_map(cleaned_corpus, removeWords, stopwords("english")) cleaned_corpus <- tm_map(cleaned_corpus, stripWhitespace) return(cleaned_corpus) } cleaned_corpus <- clean_corpus(corpus)

Unigram TF word cloud cc <- TermDocumentMatrix(cleaned_corpus) cc <- as.matrix(cc)

Term Frequency cc_frequency <- rowSums(df) Sort term_frequency in descending order cc_frequency <- sort(cc_frequency,dec=TRUE) Create word_freqs cc_word_freqs <- data.frame(term = names(cc_frequency), num = cc_frequency) Create a wordcloud for the values in word_freqs wordcloud(cc_word_freqs$term, first_word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired"), random.color = TRUE,random.order = FALSE, scale = c(3,0.3)) climatechange unigram

Bigram TF word cloud bigram_tdm <- TermDocumentMatrix(cleaned_review_corpus,control = list(tokenize=tokenizer)) bigram_tdm_m <- as.matrix(bigram_tdm) Frequency Term Frequency cheking with begidiagram term_frequency <- rowSums(bigram_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE)

Create word_freqs

word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)

Create a wordcloud for the values in word_freqs

wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired")) climatechange bigram

Trigram TF word cloud

Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired")) tf-idf weighting tfidf_tdm <- TermDocumentMatrix(cleaned_review_corpus,control=list(weighting=weightTfIdf)) tfidf_tdm_m <- as.matrix(tfidf_tdm) Term Frequency term_frequency <- rowSums(tfidf_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE) library(wordcloud) Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired")) climatechange tf-idf word cloud

TF-IDF word cloud tfidf_tdm <- TermDocumentMatrix(cleaned_corpus,control=list(weighting=weightTfIdf)) tfidf_tdm_m <- as.matrix(tfidf_tdm)

Term Frequency term_frequency <- rowSums(tfidf_tdm_m) Sort term_frequency in descending order term_frequency <- sort(term_frequency,dec=TRUE) Create word_freqs word_freqs <- data.frame(term = names(term_frequency), num = term_frequency) Create a wordcloud for the values in word_freqs wordcloud(word_freqs$term, word_freqs$num,min.freq=15,max.words=2000,colors=brewer.pal(8, "Paired")) climatechange comparison cloud

bing bing_lex <- get_sentiments("bing") text_bing_lex <- inner_join(text_tidy, bing_lex, by = c("term" = "word")) text_bing_lex$sentiment_n <- ifelse(text_bing_lex$sentiment=="negative", -1, 1) text_bing_lex$sentiment_value <- text_bing_lex$sentiment_n * text_bing_lex$count text_bing_lex bing_aggdata <- aggregate(text_bing_lex$sentiment_value, list(index = text_bing_lex$document), sum) bing_aggdata sapply(bing_aggdata,typeof) bing_aggdata$index <- as.numeric(bing_aggdata$index) ggplot(bing_aggdata, aes(index, x)) + geom_point() ggplot(bing_aggdata, aes(index, x)) + geom_smooth() climatechangesentiment analysis bing lexicon 1 smooth sentment cc

Comparison word cloud on sentiment

library(reshape2) text_tidy %>% inner_join(get_sentiments("bing"), by = c("term" = "word")) %>% count(term, sentiment, sort=TRUE) %>% acast(term ~ sentiment, value.var = "n", fill = 0) %>% comparison.cloud(colors=brewer.pal(8, "Dark2"), max.words=2000) climatechange comparison word cloud on sentiment

Climatechange Emotional analysis

library(radarchart) nrc_lex <- get_sentiments("nrc") nrc <- inner_join(text_tidy, nrc_lex, by = c("term" = "word")) nrc_noposneg <- nrc[!(nrc$sentiment %in% c("positive","negative")),] aggdata <- aggregate(nrc_noposneg$count, list(index = nrc_noposneg$sentiment), sum) chartJSRadar(aggdata) climatechange emotional analysis

:)

Unigram

climatechange unigram

Bigram

climatechange bigramtfworldcloud

Trigram

climatechange trigram

TF-IDF

tf_idf

commonality cloud

climatechange comparison cloud

Bing agregate point

cc bingagregate

Bing agregate Smooth

ccbingagregate smooth

word Comparison

climatechangecomparison

Emotional analysis

climatechangeemotionalanalysis

Mesikt commented 2 years ago

Climate Change 2000 tweets analysis