Open Noeverer opened 4 years ago
Could you please make your examples reproducible? Ideally, you should be able to copy-paste them into a REPL to reproduce your problem.
Currently, it's not trivial to reproduce your problem, because some variables are not specified (e.g. train_corpus, num_topics, dictionary, etc).
I am getting the same problem. My corpus has 500 documents, and the dictionary has ~5000 unique tokens. Unfortunately, I am unsure how to get you the exact data to reproduce.
My machine details are:
This LdaMulticore code does not work:
# # Set training parameters.
# num_topics = 10
# chunksize = 2000
# workers=15
# passes = 20
# iterations = 50
# eval_every = 10 # Don't evaluate model perplexity, takes too much time.
# # Make a index to word dictionary.
# temp = dictionary[0] # This is only to "load" the dictionary.
# id2word = dictionary.id2token
# model = ldamulticore.LdaMulticore(
# corpus=corpus,
# id2word=id2word,
# workers=workers,
# chunksize=chunksize,
# batch=False,
# alpha='symmetric',
# eta='auto',
# iterations=iterations,
# num_topics=num_topics,
# passes=passes,
# eval_every=eval_every,
# decay=0.5,
# offset=1.0,
# gamma_threshold=0.001,
# random_state=None,
# minimum_probability=0.01,
# minimum_phi_value=0.01,
# per_word_topics=False,
# )
Here is the location of the process at keyboard interrupt:
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-53-f4eefd2beb6e> in <module>
----> 1 lda = ldamulticore.LdaMulticore(corpus, id2word=id2word, num_topics=10, workers=15)
~/.conda/envs/sheth7/lib/python3.8/site-packages/gensim/models/ldamulticore.py in __init__(self, corpus, num_topics, id2word, workers, chunksize, passes, batch, alpha, eta, decay, offset, eval_every, iterations, gamma_threshold, random_state, minimum_probability, minimum_phi_value, per_word_topics, dtype)
177 raise NotImplementedError("auto-tuning alpha not implemented in multicore LDA; use plain LdaModel.")
178
--> 179 super(LdaMulticore, self).__init__(
180 corpus=corpus, num_topics=num_topics,
181 id2word=id2word, chunksize=chunksize, passes=passes, alpha=alpha, eta=eta,
~/.conda/envs/sheth7/lib/python3.8/site-packages/gensim/models/ldamodel.py in __init__(self, corpus, num_topics, id2word, distributed, chunksize, passes, update_every, alpha, eta, decay, offset, eval_every, iterations, gamma_threshold, minimum_probability, random_state, ns_conf, minimum_phi_value, per_word_topics, callbacks, dtype)
517 if corpus is not None:
518 use_numpy = self.dispatcher is not None
--> 519 self.update(corpus, chunks_as_numpy=use_numpy)
520
521 def init_dir_prior(self, prior, name):
~/.conda/envs/sheth7/lib/python3.8/site-packages/gensim/models/ldamulticore.py in update(self, corpus, chunks_as_numpy)
308 # wait for all outstanding jobs to finish
309 while queue_size[0] > 0:
--> 310 process_result_queue(force=True)
311
312 if reallen != lencorpus:
~/.conda/envs/sheth7/lib/python3.8/site-packages/gensim/models/ldamulticore.py in process_result_queue(force)
266 """
267 merged_new = False
--> 268 while not result_queue.empty():
269 other.merge(result_queue.get())
270 queue_size[0] -= 1
~/.conda/envs/sheth7/lib/python3.8/multiprocessing/queues.py in empty(self)
121
122 def empty(self):
--> 123 return not self._poll()
124
125 def full(self):
~/.conda/envs/sheth7/lib/python3.8/multiprocessing/connection.py in poll(self, timeout)
255 self._check_closed()
256 self._check_readable()
--> 257 return self._poll(timeout)
258
259 def __enter__(self):
~/.conda/envs/sheth7/lib/python3.8/multiprocessing/connection.py in _poll(self, timeout)
422
423 def _poll(self, timeout):
--> 424 r = wait([self], timeout)
425 return bool(r)
426
~/.conda/envs/sheth7/lib/python3.8/multiprocessing/connection.py in wait(object_list, timeout)
929
930 while True:
--> 931 ready = selector.select(timeout)
932 if ready:
933 return [key.fileobj for (key, events) in ready]
~/.conda/envs/sheth7/lib/python3.8/selectors.py in select(self, timeout)
413 ready = []
414 try:
--> 415 fd_event_list = self._selector.poll(timeout)
416 except InterruptedError:
417 return ready
When I run the non multicore LdaModel, it runs and surprisingly uses all cores on my machine.
My pc is 64 cpu with cuda GPU,but when i use
using the code output nothing and mechine seems get dead cycle, but there are nothing output.
when i use the below code:
got work.
why not use multicore when in linux(ubuntu)? any help is thanks very much.