microsoft / nlp-recipes

Natural Language Processing Best Practices & Examples
MIT License
6.37k stars 917 forks source link

[BUG] java.lang.NullPointerException: java.lang.NullPointerException #550

Closed SlowMonk closed 4 years ago

SlowMonk commented 4 years ago
X_train = test.loc[:, 'document'].values
y_train = train.loc[:, 'label'].values

y_test = test.loc[:,'document'].values

tfidf = TfidfVectorizer(tokenizer=tokenizer_morphs)
multi_nbc = Pipeline([('vect', tfidf), ('nbc', MultinomialNB())])

start = time()
multi_nbc.fit(X_train, y_train)
end = time()
print('Time: {:f}s'.format(end-start))

java.lang.NullPointerException Traceback (most recent call last)

in 1 start = time() ----> 2 multi_nbc.fit(X_train, y_train) 3 end = time() 4 print('Time: {:f}s'.format(end-start)) ~/venv/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params) 346 This estimator 347 """ --> 348 Xt, fit_params = self._fit(X, y, **fit_params) 349 with _print_elapsed_time('Pipeline', 350 self._log_message(len(self.steps) - 1)): ~/venv/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params) 311 message_clsname='Pipeline', 312 message=self._log_message(step_idx), --> 313 **fit_params_steps[name]) 314 # Replace the transformer of the step with the fitted 315 # transformer. This is necessary when loading the transformer ~/venv/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs) 353 354 def __call__(self, *args, **kwargs): --> 355 return self.func(*args, **kwargs) 356 357 def call_and_shelve(self, *args, **kwargs): ~/venv/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 724 with _print_elapsed_time(message_clsname, message): 725 if hasattr(transformer, 'fit_transform'): --> 726 res = transformer.fit_transform(X, y, **fit_params) 727 else: 728 res = transformer.fit(X, y, **fit_params).transform(X) ~/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in fit_transform(self, raw_documents, y) 1853 """ 1854 self._check_params() -> 1855 X = super().fit_transform(raw_documents) 1856 self._tfidf.fit(X) 1857 # X is already a transformed view of raw_documents so ~/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in fit_transform(self, raw_documents, y) 1218 1219 vocabulary, X = self._count_vocab(raw_documents, -> 1220 self.fixed_vocabulary_) 1221 1222 if self.binary: ~/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in _count_vocab(self, raw_documents, fixed_vocab) 1129 for doc in raw_documents: 1130 feature_counter = {} -> 1131 for feature in analyze(doc): 1132 try: 1133 feature_idx = vocabulary[feature] ~/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words) 103 doc = preprocessor(doc) 104 if tokenizer is not None: --> 105 doc = tokenizer(doc) 106 if ngrams is not None: 107 if stop_words is not None: in tokenizer_morphs(doc) 14 15 def tokenizer_morphs(doc): ---> 16 return komoran.morphs(doc) ~/venv/lib/python3.7/site-packages/konlpy/tag/_komoran.py in morphs(self, phrase) 87 """Parse phrase to morphemes.""" 88 ---> 89 return [s for s, t in self.pos(phrase)] 90 91 def __init__(self, jvmpath=None, userdic=None, modelpath=None, max_heap_size=1024): ~/venv/lib/python3.7/site-packages/konlpy/tag/_komoran.py in pos(self, phrase, flatten, join) 66 if not sentence: 67 continue ---> 68 result = self.jki.analyze(sentence).getTokenList() 69 result = [(token.getMorph(), token.getPos()) for token in result] 70 java.lang.NullPointerException: java.lang.NullPointerException
saidbleik commented 4 years ago

wrong repo?