cumulative method for termco

trinker commented 10 years ago

There should be a cumulative method for termco similar to: http://flowingdata.com/2014/09/17/search-for-word-usage-in-movies-and-television-over-time/

This could be raw counts or relative to number of words (target word use/words)

trinker commented 10 years ago

This is an initial prototype but am not convinced this is useful yet.

term <- c("the ", "she", " wh")
(out <- with(raj.act.1,  termco(dialogue, person, term)))

library(tidyr)
library(dplyr)

dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))

cummean(dat[[2]])

datw <- dat %>%
    mutate(n=qdapTools::id(dat, pad=FALSE)) %>%
    gather(terms, counts, 3:5) %>%
    mutate(terms =  mgsub(c("term\\(", "\\)$"), c("[", "]"), terms, fixed=FALSE))

ggplot(datw, aes(y=counts, x=n, color=terms)) +
    geom_line(size=1)

Imgur

trinker commented 10 years ago

politness <- c("please", "excuse me", "thank you", "you welcome",
    "you're welcome", "i'm sorry", "forgive me", "pardon me")

out <- with(pres_debates2012, termco(dialogue, list(person, time), politness))

library(tidyr)
library(dplyr)

dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))
dat <- dat[, !sapply(dat, function(x) is.numeric(x) && all(x == 0))]

dat2 <- attributes(out[["raw"]])[["by.row"]]
dat2 <- dat2[, !sapply(dat2, function(x) is.numeric(x) && all(x == 0))]
colnames(dat2) <- gsub("\\s+", "_", (Trim(gsub("\\)$|term\\(|'", "",  colnames(dat2)))))

datw2 <- dat2 %>% colsplit2df(new.names=c("Person", "Time")) %>%
    mutate(n=qdapTools::id(dat, pad=FALSE), 
        Time=gsub("\\D", "", Time)
    ) %>%
    group_by(Time, Person) %>%
    mutate(please = cumsum(please), 
        excuse_me = cumsum(excuse_me), 
        thank_you = cumsum(thank_you), 
        im_sorry = cumsum(im_sorry)
    ) %>%
    gather(terms, counts, 4:7) 

ggplot(datw2, aes(y=counts, x=n, color=terms)) +
    geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

ggplot(datw2 %>% filter(terms != "[thank you]"), aes(y=counts, x=n, color=terms)) +
    geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

datw2 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=terms)) +
    geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

datw2 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=Person)) +
    geom_line(size=.75) + facet_grid(terms~Time, scales="free_x", space="free")

datw3 <- dat2 %>% colsplit2df(new.names=c("Person", "Time")) %>%
    mutate(n=qdapTools::id(dat, pad=FALSE), 
        Time=gsub("\\D", "", Time)
    ) %>%
    gather(terms, counts, 4:7) 

datw3 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=Person)) +
    geom_line(size=.75, alpha=.3) + 
    facet_grid(terms~Time, scales="free_x", space="free")

datw3 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(weight=counts, x=n, fill=terms)) +
    geom_bar(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")politness <- c("please", "excuse me", "thank you", "you welcome",
    "you're welcome", "i'm sorry", "forgive me", "pardon me")

out <- with(pres_debates2012, termco(dialogue, list(person, role, time), politness))

library(tidyr)
library(dplyr)

dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))
dat <- dat[, !sapply(dat, function(x) is.numeric(x) && all(x == 0))]

dat2 <- attributes(out[["raw"]])[["by.row"]]
dat2 <- dat2[, !sapply(dat2, function(x) is.numeric(x) && all(x == 0))]
colnames(dat2) <- gsub("\\s+", "_", (Trim(gsub("\\)$|term\\(|'", "",  colnames(dat2)))))

datw2 <- dat2 %>% colsplit2df(new.names=c("Person", "Role", "Time")) %>%
    mutate(n=qdapTools::id(dat, pad=FALSE), 
        Time=gsub("\\D", "", Time)
    ) %>%
    group_by(Time, Person) %>%
    mutate(please = cumsum(please), 
        excuse_me = cumsum(excuse_me), 
        thank_you = cumsum(thank_you), 
        im_sorry = cumsum(im_sorry)
    ) %>%
    gather(terms, counts, 5:8) 

datw2 %>% filter(Role=="candidate") %>%
    ggplot(aes(y=counts, x=n, color=terms)) +
        geom_line(size=.885, alpha= .5) + facet_grid(Person~Time, scales="free_x", space="free") +
        theme_bw() + 
        guides(colour = guide_legend(override.aes = list(alpha = 1)))

ggplot(datw2 %>% filter(terms != "[thank you]"), aes(y=counts, x=n, color=terms)) +
    geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

datw2 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=terms)) +
    geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

datw2 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=Person)) +
    geom_line(size=.75) + facet_grid(terms~Time, scales="free_x", space="free")

datw3 <- dat2 %>% colsplit2df(new.names=c("Person", "Role", "Time")) %>%
    mutate(n=qdapTools::id(dat, pad=FALSE), 
        Time=gsub("\\D", "", Time)
    ) %>%
    gather(terms, counts, 5:8) 

datw3 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=Person)) +
    geom_line(size=.75, alpha=.3) + 
    facet_grid(terms~Time, scales="free_x", space="free")

datw3 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(weight=counts, x=n, fill=terms)) +
    geom_bar(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")

trinker / qdap

cumulative method for termco #198