# running jieba.cut()
jieba_words = [word for word in jieba.cut(text, HMM=True) if len(word) > 1 and len(word) <= 4]
# running jieba_fast.cut()
jb_fast_words = []
for word in jieba_fast.cut(text, HMM=True):
if len(word) > 1 and len(word) <= 4:
jb_fast_words.append(word)
其中 jieba.cut() 運行正常
jieba_fast.cut() 會報以下錯:
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x88 in position 0: invalid start byte
The above exception was the direct cause of the following exception:
SystemError Traceback (most recent call last)
<ipython-input-37-ab60a3468933> in <module>()
9 jb_fast_words = []
10
---> 11 for word in jieba_fast.cut(text, HMM=True):
12 if len(word) > 1 and len(word) <= 4:
13 jb_fast_words.append(word)
~/anaconda/envs/python3/lib/python3.6/site-packages/jieba_fast/__init__.py in cut(self, sentence, cut_all, HMM)
306 continue
307 if re_han.match(blk):
--> 308 for word in cut_block(blk):
309 yield word
310 else:
~/anaconda/envs/python3/lib/python3.6/site-packages/jieba_fast/__init__.py in __cut_DAG(self, sentence)
271 elif not self.FREQ.get(buf):
272 recognized = finalseg.cut(buf)
--> 273 for t in recognized:
274 yield t
275 else:
~/anaconda/envs/python3/lib/python3.6/site-packages/jieba_fast/finalseg/__init__.py in cut(sentence)
95 for blk in blocks:
96 if re_han.match(blk):
---> 97 for word in __cut(blk):
98 if word not in Force_Split_Words:
99 yield word
~/anaconda/envs/python3/lib/python3.6/site-packages/jieba_fast/finalseg/__init__.py in __cut(sentence)
67 def __cut(sentence):
68 global emit_P
---> 69 prob, pos_list = _jieba_fast_functions._viterbi(sentence, 'BMES', start_P, trans_P, emit_P)
70 begin, nexti = 0, 0
71 for i, char in enumerate(sentence):
SystemError: <built-in function _viterbi> returned a result with an error set
環境是 macOS 10.13.3, Python 3.6.1
其中 jieba.cut() 運行正常 jieba_fast.cut() 會報以下錯:
附上文本(UTF-8 的西遊記 1-50 回),感謝! jttw_1-50.txt