aertslab / pycisTopic

pycisTopic is a Python module to simultaneously identify cell states and cis-regulatory topics from single cell epigenomics data.
Other
55 stars 10 forks source link

CalledProcessError: Command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' returned non-zero exit status 127.Bug report [BUG] #121

Open w1973145618 opened 3 months ago

w1973145618 commented 3 months ago

Describe the bug CalledProcessError: Command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' returned non-zero exit status 127.

To Reproduce os.environ['MALLET_MEMORY'] = '200G' from pycisTopic.lda_models import run_cgs_models_mallet

Configure path Mallet

mallet_path="/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet"

Run models

models=run_cgs_models_mallet( cistopic_obj, n_topics=[2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50], n_cpu=12, n_iter=500, random_state=555, alpha=50, alpha_by_topic=True, eta=0.1, eta_by_topic=False, tmp_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial", save_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial", mallet_path=mallet_path, )

Error output CalledProcessError Traceback (most recent call last) File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:532, in LDAMallet.convert_input(self, corpus) 531 try: --> 532 subprocess.check_output(args=cmd, shell=False, stderr=subprocess.STDOUT) 533 except subprocess.CalledProcessError as e:

File ~/anaconda3/envs/pycisTopic/lib/python3.11/subprocess.py:466, in check_output(timeout, *popenargs, *kwargs) 464 kwargs['input'] = empty --> 466 return run(popenargs, stdout=PIPE, timeout=timeout, check=True, 467 **kwargs).stdout

File ~/anaconda3/envs/pycisTopic/lib/python3.11/subprocess.py:571, in run(input, capture_output, timeout, check, *popenargs, **kwargs) 570 if check and retcode: --> 571 raise CalledProcessError(retcode, process.args, 572 output=stdout, stderr=stderr) 573 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' returned non-zero exit status 127.

During handling of the above exception, another exception occurred:

RuntimeError Traceback (most recent call last) Cell In[47], line 6 4 mallet_path="/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet" 5 # Run models ----> 6 models=run_cgs_models_mallet( 7 cistopic_obj, 8 n_topics=[2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50], 9 n_cpu=12, 10 n_iter=500, 11 random_state=555, 12 alpha=50, 13 alpha_by_topic=True, 14 eta=0.1, 15 eta_by_topic=False, 16 tmp_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial", 17 save_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial", 18 mallet_path=mallet_path, 19 )

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:806, in run_cgs_models_mallet(cistopic_obj, n_topics, n_cpu, n_iter, random_state, alpha, alpha_by_topic, eta, eta_by_topic, top_topics_coh, tmp_path, save_path, reuse_corpus, mallet_path) 803 corpus = matutils.Sparse2Corpus(binary_matrix) 804 id2word = utils.FakeDict(len(region_names)) --> 806 model_list = [ 807 run_cgs_model_mallet( 808 binary_matrix=binary_matrix, 809 corpus=corpus, 810 id2word=id2word, 811 n_topics=n_topic, 812 cell_names=cell_names, 813 region_names=region_names, 814 n_cpu=n_cpu, 815 n_iter=n_iter, 816 random_state=random_state, 817 alpha=alpha, 818 alpha_by_topic=alpha_by_topic, 819 eta=eta, 820 eta_by_topic=eta_by_topic, 821 top_topics_coh=top_topics_coh, 822 tmp_path=tmp_path, 823 save_path=save_path, 824 reuse_corpus=reuse_corpus, 825 mallet_path=mallet_path, 826 ) 827 for n_topic in n_topics 828 ] 829 return model_list

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:807, in (.0) 803 corpus = matutils.Sparse2Corpus(binary_matrix) 804 id2word = utils.FakeDict(len(region_names)) 806 model_list = [ --> 807 run_cgs_model_mallet( 808 binary_matrix=binary_matrix, 809 corpus=corpus, 810 id2word=id2word, 811 n_topics=n_topic, 812 cell_names=cell_names, 813 region_names=region_names, 814 n_cpu=n_cpu, 815 n_iter=n_iter, 816 random_state=random_state, 817 alpha=alpha, 818 alpha_by_topic=alpha_by_topic, 819 eta=eta, 820 eta_by_topic=eta_by_topic, 821 top_topics_coh=top_topics_coh, 822 tmp_path=tmp_path, 823 save_path=save_path, 824 reuse_corpus=reuse_corpus, 825 mallet_path=mallet_path, 826 ) 827 for n_topic in n_topics 828 ] 829 return model_list

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:916, in run_cgs_model_mallet(binary_matrix, corpus, id2word, n_topics, cell_names, region_names, n_cpu, n_iter, random_state, alpha, alpha_by_topic, eta, eta_by_topic, top_topics_coh, tmp_path, save_path, reuse_corpus, mallet_path) 914 start = time.time() 915 log.info(f"Running model with {n_topics} topics") --> 916 model = LDAMallet( 917 corpus=corpus, 918 id2word=id2word, 919 num_topics=n_topics, 920 iterations=n_iter, 921 alpha=alpha, 922 eta=eta, 923 n_cpu=n_cpu, 924 tmp_dir=tmp_path, 925 random_seed=random_state, 926 reuse_corpus=reuse_corpus, 927 mallet_path=mallet_path, 928 ) 929 end_time = time.time() - start 931 # Get distributions

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:467, in LDAMallet.init(self, num_topics, corpus, alpha, eta, id2word, n_cpu, tmp_dir, optimize_interval, iterations, topic_threshold, random_seed, reuse_corpus, mallet_path) 465 self.mallet_path = mallet_path 466 if corpus is not None: --> 467 self.train(corpus, reuse_corpus)

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:552, in LDAMallet.train(self, corpus, reuse_corpus) 550 logger = logging.getLogger("LDAMalletWrapper") 551 if os.path.isfile(self.fcorpusmallet()) is False or reuse_corpus is False: --> 552 self.convert_input(corpus) 553 else: 554 logger.info("MALLET corpus already exists, training model")

File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:534, in LDAMallet.convert_input(self, corpus) 532 subprocess.check_output(args=cmd, shell=False, stderr=subprocess.STDOUT) 533 except subprocess.CalledProcessError as e: --> 534 raise RuntimeError( 535 f"command '{e.cmd}' return with error (code {e.returncode}): {e.output}" 536 )

RuntimeError: command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' return with error (code 127): b'/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet: \xe8\xa1\x8c 60: java: \xe6\x9c\xaa\xe6\x89\xbe\xe5\x88\xb0\xe5\x91\xbd\xe4\xbb\xa4\n'

w1973145618 commented 3 months ago

“Mallet is based on Java, make sure Java is installed in the environment.”