from deepcut import DeepcutTokenizer
tokenizer = DeepcutTokenizer(ngram_range=(1,1),
max_df=1.0, min_df=0.0)
deepcut.tokenize('ตัดคำได้ดีมาก')
----------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-9855ac2d86cd> in <module>
----> 1 X = tokenizer.fit_tranform(['ฉันบินได้', 'ฉันกินข้าว', 'ฉันอยากบิน'])
~/.pyenv/versions/3.6.6/envs/surasak/lib/python3.6/site-packages/deepcut/deepcut.py in fit_tranform(self, raw_documents)
276 sparse CSR format (see scipy)
277 """
--> 278 X = self.transform(raw_documents, new_document=True)
279 return X
280
~/.pyenv/versions/3.6.6/envs/surasak/lib/python3.6/site-packages/deepcut/deepcut.py in transform(self, raw_documents, new_document)
265 max_doc_count,
266 min_doc_count,
--> 267 self.max_features)
268 self.vocabulary_ = vocabulary
269
~/.pyenv/versions/3.6.6/envs/surasak/lib/python3.6/site-packages/deepcut/deepcut.py in _limit_features(self, X, vocabulary, high, low, limit)
212 removed_terms.add(term)
213 kept_indices = np.where(mask)[0]
--> 214 if not kept_indices:
215 raise ValueError("After pruning, no terms remain. Try a lower"
216 " min_df or a higher max_df.")
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
deepcut==0.6.1.0 Python3.6.6