Open trinker opened 10 years ago
This is an initial prototype but am not convinced this is useful yet.
term <- c("the ", "she", " wh")
(out <- with(raj.act.1, termco(dialogue, person, term)))
library(tidyr)
library(dplyr)
dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))
cummean(dat[[2]])
datw <- dat %>%
mutate(n=qdapTools::id(dat, pad=FALSE)) %>%
gather(terms, counts, 3:5) %>%
mutate(terms = mgsub(c("term\\(", "\\)$"), c("[", "]"), terms, fixed=FALSE))
ggplot(datw, aes(y=counts, x=n, color=terms)) +
geom_line(size=1)
politness <- c("please", "excuse me", "thank you", "you welcome",
"you're welcome", "i'm sorry", "forgive me", "pardon me")
out <- with(pres_debates2012, termco(dialogue, list(person, time), politness))
library(tidyr)
library(dplyr)
dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))
dat <- dat[, !sapply(dat, function(x) is.numeric(x) && all(x == 0))]
dat2 <- attributes(out[["raw"]])[["by.row"]]
dat2 <- dat2[, !sapply(dat2, function(x) is.numeric(x) && all(x == 0))]
colnames(dat2) <- gsub("\\s+", "_", (Trim(gsub("\\)$|term\\(|'", "", colnames(dat2)))))
datw2 <- dat2 %>% colsplit2df(new.names=c("Person", "Time")) %>%
mutate(n=qdapTools::id(dat, pad=FALSE),
Time=gsub("\\D", "", Time)
) %>%
group_by(Time, Person) %>%
mutate(please = cumsum(please),
excuse_me = cumsum(excuse_me),
thank_you = cumsum(thank_you),
im_sorry = cumsum(im_sorry)
) %>%
gather(terms, counts, 4:7)
ggplot(datw2, aes(y=counts, x=n, color=terms)) +
geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
ggplot(datw2 %>% filter(terms != "[thank you]"), aes(y=counts, x=n, color=terms)) +
geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
datw2 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=terms)) +
geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
datw2 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=Person)) +
geom_line(size=.75) + facet_grid(terms~Time, scales="free_x", space="free")
datw3 <- dat2 %>% colsplit2df(new.names=c("Person", "Time")) %>%
mutate(n=qdapTools::id(dat, pad=FALSE),
Time=gsub("\\D", "", Time)
) %>%
gather(terms, counts, 4:7)
datw3 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(y=counts, x=n, color=Person)) +
geom_line(size=.75, alpha=.3) +
facet_grid(terms~Time, scales="free_x", space="free")
datw3 %>% filter(terms != "[thank you]" & Person %in% c("OBAMA", "ROMNEY")) %>%
ggplot(aes(weight=counts, x=n, fill=terms)) +
geom_bar(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")politness <- c("please", "excuse me", "thank you", "you welcome",
"you're welcome", "i'm sorry", "forgive me", "pardon me")
out <- with(pres_debates2012, termco(dialogue, list(person, role, time), politness))
library(tidyr)
library(dplyr)
dat <- attributes(out[["raw"]])[["by.row"]]
dat[, -c(1:2)] <- apply(dat[, -c(1:2)], 2, function(x) cumsum(x)/cumsum(dat[[2]]))
dat <- dat[, !sapply(dat, function(x) is.numeric(x) && all(x == 0))]
dat2 <- attributes(out[["raw"]])[["by.row"]]
dat2 <- dat2[, !sapply(dat2, function(x) is.numeric(x) && all(x == 0))]
colnames(dat2) <- gsub("\\s+", "_", (Trim(gsub("\\)$|term\\(|'", "", colnames(dat2)))))
datw2 <- dat2 %>% colsplit2df(new.names=c("Person", "Role", "Time")) %>%
mutate(n=qdapTools::id(dat, pad=FALSE),
Time=gsub("\\D", "", Time)
) %>%
group_by(Time, Person) %>%
mutate(please = cumsum(please),
excuse_me = cumsum(excuse_me),
thank_you = cumsum(thank_you),
im_sorry = cumsum(im_sorry)
) %>%
gather(terms, counts, 5:8)
datw2 %>% filter(Role=="candidate") %>%
ggplot(aes(y=counts, x=n, color=terms)) +
geom_line(size=.885, alpha= .5) + facet_grid(Person~Time, scales="free_x", space="free") +
theme_bw() +
guides(colour = guide_legend(override.aes = list(alpha = 1)))
ggplot(datw2 %>% filter(terms != "[thank you]"), aes(y=counts, x=n, color=terms)) +
geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
datw2 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=terms)) +
geom_line(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
datw2 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=Person)) +
geom_line(size=.75) + facet_grid(terms~Time, scales="free_x", space="free")
datw3 <- dat2 %>% colsplit2df(new.names=c("Person", "Role", "Time")) %>%
mutate(n=qdapTools::id(dat, pad=FALSE),
Time=gsub("\\D", "", Time)
) %>%
gather(terms, counts, 5:8)
datw3 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(y=counts, x=n, color=Person)) +
geom_line(size=.75, alpha=.3) +
facet_grid(terms~Time, scales="free_x", space="free")
datw3 %>% filter(terms != "[thank you]" & Role == "candidate") %>%
ggplot(aes(weight=counts, x=n, fill=terms)) +
geom_bar(size=.75) + facet_grid(Person~Time, scales="free_x", space="free")
There should be a
cumulative
method fortermco
similar to: http://flowingdata.com/2014/09/17/search-for-word-usage-in-movies-and-television-over-time/This could be raw counts or relative to number of words (target word use/words)