Open eakl opened 7 years ago
Original Post by nicogla
Congratulations!
Here is another (simpler) version of the code:
library(maps)
usmap<-map('state')
searchtext<-'@realDonaldTrump'
DT.tweets=searchTwitter(searchtext,n=10000,geocode='40.375,-100,1500mi')
DT.DF=twListToDF(DT.tweets)
SelDT.DF=DT.DF[!is.na(DT.DF$longitude),]
usmap<-map('state')
points(SelDT.DF$longitude, SelDT.DF$latitude, pch=19, col="red", cex=0.5)
And this is the resulting map: http://imgur.com/cyC1UNt
Original Post by RomainB_
So I adapted the previous code, I hope it's OK
library(devtools)
install_github("twitteR", username="geoffjentry") # We use a workaround for the connection library(twitteR)
api_key = "YOURKEY"
api_secret = "YOURKEY"
access_token = "YOURKEY"
access_token_secret = "YOURKEY"
setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
library(maps)
usmap<-map('state')
searchtext<-'@HillaryClinton' DT.tweets=searchTwitter(searchtext,n=5000,geocode='40.375,-100,1500mi') DT.DF=twListToDF(DT.tweets)
SelDT.DF=DT.DF[!is.na(DT.DF$longitude),]
usmap<-map('state')
points(SelDT.DF$longitude, SelDT.DF$latitude, pch=19, col="blue", cex=0.5)
searchtext2<-'@realDonaldTrump' DT.tweets2=searchTwitter(searchtext2,n=5000,geocode='40.375,-100,1500mi') DT.DF2=twListToDF(DT.tweets2)
SelDT.DF2=DT.DF2[!is.na(DT.DF2$longitude),]
points(SelDT.DF2$longitude, SelDT.DF2$latitude, pch=19, col="red", cex=0.5)
Original Post by AlexGuen
Map Sentiment Analysis Grey = Neutral Red = Bad SteelBlue = Good
usmap<-map('state')
searchtext<-'@realDonaldTrump'
DT.tweets=searchTwitter(searchtext,n=10000,geocode='40.375,-100,1500mi')
DT.DF=twListToDF(DT.tweets)
# 2.2 Extract text from lexicons
pos.words = scan('positive-words.txt',what='character', comment.char=';')
neg.words = scan('negative-words.txt',what='character', comment.char=';')
# 3.1.2 Write in function to score sentiment
library(plyr)
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{ # function to score the sentiments
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list
# or a vector as an "l" for us
# we want a simple array ("a") of scores back, so we use
# "l" + "a" + "ply" = "laply":
scores = laply(sentences, function(sentence, pos.words, neg.words) {
# clean up sentences with R's regex-driven global substitute, gsub():
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case:
sentence = tolower(sentence)
# split into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
clean.tweets <- function(tweets.df){ # Function to clean the data
twlist<-twListToDF(tweets.df)
datatemp <- unlist(strsplit(twlist$text, split=", "))
# remove usernames
datatemp<-gsub("@[[:alnum:]]*","",datatemp)
# to ASCII
datatemp <- iconv(datatemp, "latin1", "ASCII", sub="")
datatemp <- str_replace_all(datatemp,"[^[:graph:]]", " ")
# remove punctuation
datatemp<-gsub("[[:punct:]]", "", datatemp)
# remove htpp
datatemp<-gsub("http[[:alnum:]]*","",datatemp)
# remove numbers
datatemp<-gsub("\\d", "",datatemp)
# remove unrecognized chars
datatemp<-gsub("�", "",datatemp)
# remove "stop words"
myStopWords <-c(stopwords('english'))
datatemp<-removeWords(datatemp,myStopWords)
# Strip whitespace
datatemp<-stripWhitespace(datatemp)
# to lowercase
datatemp <-tolower(datatemp)
return(datatemp)
}
# 4.1 Score tweets' sentiment
library(stringr)
DT.score=score.sentiment(clean.tweets(DT.tweets), pos.words, neg.words, .progress='text')
DT.score$Index <- 1:nrow(DT.score)
DT.DF$Index <- 1:nrow(DT.DF)
SelDT.DF=DT.DF[!is.na(DT.DF$longitude),]
SelDT.DF$score <- DT.score$score[match(SelDT.DF$Index, DT.score$Index)]
SelDT.DF$Opinion <- "Neutral"
SelDT.DF$Opinion[which(SelDT.DF$score>0)] <- "Good"
SelDT.DF$Opinion[which(SelDT.DF$score<0)] <- "Bad"
usmap<-map('state')
points(SelDT.DF$longitude[which(SelDT.DF$Opinion=="Neutral")], SelDT.DF$latitude[which(SelDT.DF$Opinion=="Neutral")], pch=19, col="grey", cex=0.5)
points(SelDT.DF$longitude[which(SelDT.DF$Opinion=="Bad")], SelDT.DF$latitude[which(SelDT.DF$Opinion=="Bad")], pch=19, col="red", cex=0.5)
points(SelDT.DF$longitude[which(SelDT.DF$Opinion=="Good")], SelDT.DF$latitude[which(SelDT.DF$Opinion=="Good")], pch=19, col="steelblue", cex=0.5)
Original Post by SaturnMusic
How to get a free bounty in big data analytics