trinker / sentimentr

Dictionary based sentiment analysis that considers valence shifters
Other
426 stars 84 forks source link

sentiment_by.sentment & sentiment_by.sentiment_by (plus profanity and emoion versions) bug #97

Closed trinker closed 5 years ago

trinker commented 5 years ago
pres <- get_sentences(presidential_debates_2012)
pres_sent <- sentiment_by(pres)
sentiment_by(pres_sent, by = c('person', 'time'))

#      time word_count        sd ave_sentiment
# 1:   <NA>      42689 0.2451383     0.1028443
# 2: person         10        NA     0.4427189
# 3:   time          9        NA     0.3750000

pres_sent2 <- sentiment(pres)
sentiment_by(pres_sent2, by = c('person', 'time'))

#====================================================================================

pres_prof <- profanity_by(pres)
profanity_by(pres_prof, by = c('person', 'time'))

#      time word_count profanity_count           sd ave_profanity
# 1: person         10               0           NA  0.000000e+00
# 2:   time          9               0           NA  0.000000e+00
# 3:   <NA>      42689               1 0.0008827429  2.342524e-05

pres_prof2 <- profanity(pres)
profanity_by(pres_prof2, by = c('person', 'time'))

#====================================================================================

pres_emo <- emotion_by(pres)
emotion_by(pres_emo, by = c('person', 'time'))

#       time         emotion_type word_count emotion_count          sd  ave_emotion
#  1: person                anger         10             0          NA 0.0000000000
#  2: person        anger_negated         10             0          NA 0.0000000000
#  3: person         anticipation         10             0          NA 0.0000000000
#  4: person anticipation_negated         10             0          NA 0.0000000000
#  5: person              disgust         10             0          NA 0.0000000000
#  6: person      disgust_negated         10             0          NA 0.0000000000
#  7: person                 fear         10             0          NA 0.0000000000
#  8: person         fear_negated         10             0          NA 0.0000000000
#  9: person                  joy         10             0          NA 0.0000000000
# 10: person          joy_negated         10             0          NA 0.0000000000
# 11: person              sadness         10             0          NA 0.0000000000
# 12: person      sadness_negated         10             0          NA 0.0000000000
# 13: person             surprise         10             0          NA 0.0000000000
# 14: person     surprise_negated         10             0          NA 0.0000000000
# 15: person                trust         10             0          NA 0.0000000000
# 16: person        trust_negated         10             0          NA 0.0000000000
# 17:   time                anger          9             0          NA 0.0000000000
# 18:   time        anger_negated          9             0          NA 0.0000000000
# 19:   time         anticipation          9             0          NA 0.0000000000
# 20:   time anticipation_negated          9             0          NA 0.0000000000
# 21:   time              disgust          9             0          NA 0.0000000000
# 22:   time      disgust_negated          9             0          NA 0.0000000000
# 23:   time                 fear          9             0          NA 0.0000000000
# 24:   time         fear_negated          9             0          NA 0.0000000000
# 25:   time                  joy          9             0          NA 0.0000000000
# 26:   time          joy_negated          9             0          NA 0.0000000000
# 27:   time              sadness          9             0          NA 0.0000000000
# 28:   time      sadness_negated          9             0          NA 0.0000000000
# 29:   time             surprise          9             0          NA 0.0000000000
# 30:   time     surprise_negated          9             0          NA 0.0000000000
# 31:   time                trust          9             3          NA 0.3333333333
# 32:   time        trust_negated          9             0          NA 0.0000000000
# 33:   <NA>                anger      42689           351 0.027994351 0.0082222587
# 34:   <NA>        anger_negated      42689            40 0.012895922 0.0009370095
# 35:   <NA>         anticipation      42689           832 0.053376685 0.0194897983
# 36:   <NA> anticipation_negated      42689            73 0.016252142 0.0017100424
# 37:   <NA>              disgust      42689           154 0.020035878 0.0036074867
# 38:   <NA>      disgust_negated      42689            17 0.004780696 0.0003982291
# 39:   <NA>                 fear      42689           510 0.034817032 0.0119468716
# 40:   <NA>         fear_negated      42689            75 0.016886758 0.0017568929
# 41:   <NA>                  joy      42689           598 0.047350996 0.0140082925
# 42:   <NA>          joy_negated      42689            64 0.020203098 0.0014992153
# 43:   <NA>              sadness      42689           422 0.032549813 0.0098854506
# 44:   <NA>      sadness_negated      42689            41 0.013538601 0.0009604348
# 45:   <NA>             surprise      42689           289 0.039821866 0.0067698939
# 46:   <NA>     surprise_negated      42689            27 0.007772531 0.0006324814
# 47:   <NA>                trust      42689          1597 0.104054383 0.0374101056
# 48:   <NA>        trust_negated      42689           134 0.025001228 0.0031389819
#       time         emotion_type word_count emotion_count          sd  ave_emotion
trinker commented 5 years ago

Misues...proper way:

> with(presidential_debates_2012,  sentiment_by(pres_sent2, by = list(person, time)))
       person   time word_count        sd ave_sentiment
 1:     OBAMA time 1       3599 0.2535006    0.12256892
 2:     OBAMA time 2       7477 0.2509177    0.11217673
 3:     OBAMA time 3       7243 0.2441394    0.07975688
 4:    ROMNEY time 1       4085 0.2525596    0.10151917
 5:    ROMNEY time 2       7536 0.2205169    0.08791018
 6:    ROMNEY time 3       8303 0.2623534    0.09968544
 7:   CROWLEY time 2       1672 0.2181662    0.19455290
 8:    LEHRER time 1        765 0.2973360    0.15473364
 9:  QUESTION time 2        583 0.1756778    0.03197751
10: SCHIEFFER time 3       1445 0.2345187    0.08843478
> with(presidential_debates_2012,  emotion_by(pres_emo, by = list(person, time)))
     person   time         emotion_type word_count emotion_count          sd  ave_emotion
  1: LEHRER time 1                anger        765             3 0.019164239 0.0039215686
  2: LEHRER time 1        anger_negated        765             0 0.000000000 0.0000000000
  3: LEHRER time 1         anticipation        765            16 0.049885491 0.0209150327
  4: LEHRER time 1 anticipation_negated        765             0 0.000000000 0.0000000000
  5: LEHRER time 1              disgust        765             1 0.004845471 0.0013071895
 ---                                                                                     
156:  OBAMA time 3      sadness_negated       7243             4 0.006132821 0.0005522574
157:  OBAMA time 3             surprise       7243            38 0.018355134 0.0052464448
158:  OBAMA time 3     surprise_negated       7243             6 0.006236082 0.0008283860
159:  OBAMA time 3                trust       7243           260 0.119461749 0.0358967279
160:  OBAMA time 3        trust_negated       7243            28 0.025254012 0.0038658015