Closed geokilly closed 2 years ago
Hi,
Running findThoughts returns documents that have almost no apparent association with the topic (i.e. the frex words from the topic are not present in any of the documents).
I wondered if I am missing something with my data / code or both.
Many thanks, Geoff
TextB <- readtext("Speeches KW MIG.csv", text_field = "text") subtextB <-subset.data.frame(TextB, Date >= "1996") subtextB$Date_ok <- as.numeric(as.factor(subtextB$Date)) newCorpB <- corpus(subtextB, docid_field = "doc_id") migTokB <- tokens(newCorpB, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE) %>% tokens_remove(stopwords("english")) %>% tokens_wordstem() dfm_speech <-dfm(migTokB) dfm_speech.trim <- dfm_trim(dfm_speech, min_docfreq = 0.075, max_docfreq = 0.90, docfreq_type = "prop") stm_speech <- convert(dfm_speech.trim, to = "stm", docvars = docvars(newCorpB)) stmFitted <- stm(documents = stm_speech$documents, vocab = stm_speech$vocab, K = 45, prevalence = ~s(Date_ok), max.em.its = 75, data = stm_speech$meta, init.type = "Spectral") plot(stmFitted, type = "labels", labeltype = c("frex")) docs_S <- newCorpB$documents$texts thought36 <- findThoughts(stmFitted, texts=docs_S, topic=36, n=4) plotQuote(thought35, width = 90, main = "Topic 35")
newCorpB[["Speeches KW MIG.csv.586"]] newCorpB[["Speeches KW MIG.csv.666"]] newCorpB[["Speeches KW MIG.csv.739"]] newCorpB[["Speeches KW MIG.csv.434"]] Speeches KW MIG.csv
Hi,
Running findThoughts returns documents that have almost no apparent association with the topic (i.e. the frex words from the topic are not present in any of the documents).
I wondered if I am missing something with my data / code or both.
Many thanks, Geoff
TextB <- readtext("Speeches KW MIG.csv", text_field = "text") subtextB <-subset.data.frame(TextB, Date >= "1996") subtextB$Date_ok <- as.numeric(as.factor(subtextB$Date)) newCorpB <- corpus(subtextB, docid_field = "doc_id") migTokB <- tokens(newCorpB, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE) %>% tokens_remove(stopwords("english")) %>% tokens_wordstem() dfm_speech <-dfm(migTokB) dfm_speech.trim <- dfm_trim(dfm_speech, min_docfreq = 0.075, max_docfreq = 0.90, docfreq_type = "prop") stm_speech <- convert(dfm_speech.trim, to = "stm", docvars = docvars(newCorpB)) stmFitted <- stm(documents = stm_speech$documents, vocab = stm_speech$vocab, K = 45, prevalence = ~s(Date_ok), max.em.its = 75, data = stm_speech$meta, init.type = "Spectral") plot(stmFitted, type = "labels", labeltype = c("frex")) docs_S <- newCorpB$documents$texts thought36 <- findThoughts(stmFitted, texts=docs_S, topic=36, n=4) plotQuote(thought35, width = 90, main = "Topic 35")
newCorpB[["Speeches KW MIG.csv.586"]] newCorpB[["Speeches KW MIG.csv.666"]] newCorpB[["Speeches KW MIG.csv.739"]] newCorpB[["Speeches KW MIG.csv.434"]] Speeches KW MIG.csv