bnosac / doc2vec

Distributed Representations of Sentences and Documents
Other
46 stars 5 forks source link

boom #21

Closed jwijffels closed 2 years ago

jwijffels commented 2 years ago
library(doc2vec)

corpus <-data.frame(doc_id=1,text="here are some words for training the model")
model <- paragraph2vec(x = corpus, type = "PV-DM", dim = 10 , iter = 20,min_count=1)

# this text will successfully run
successtext <- "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
nchar(successtext)
predict(model, newdata = list(a=successtext), type = "embedding", which = "docs")

# this text will cause a crash
failtext <- "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
nchar(failtext)
predict(model, newdata = list(a=failtext), type = "embedding", which = "docs")