Closed dejungle closed 4 years ago
输出结果不正确。
分析句子“前进100米。”时出错。得到如下结果: [{"relate": "ATT", "cont": "\u524d\u8fdb", "id": 0, "parent": 1, "pos": "v"}, {"relate": "ATT", "cont": "1", "id": 1, "parent": 3, "pos": "n"}, {"relate": "ATT", "cont": "0", "id": 2, "parent": 3, "pos": "v"}, {"relate": "SBV", "cont": "0", "id": 3, "parent": 4, "pos": "n"}, {"relate": "HED", "cont": "\u7c73", "id": 4, "parent": -1, "pos": "v"}, {"relate": "VOB", "cont": "\u3002", "id": 5, "parent": 4, "pos": "n"}]
import json import pyltp from pyltp import SentenceSplitter, Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
ROOTDIR = os.path.abspath(os.path.curdir) MODELDIR = os.path.join(ROOTDIR, 'ltp_data') segmentor = Segmentor() segmentor.load(os.path.join(MODELDIR, 'cws.model')) postagger = Postagger() postagger.load(os.path.join(MODELDIR, 'pos.model')) parser = Parser() parser.load(os.path.join(MODELDIR, 'parser.model'))
text = '前进100米。' words = segmentor.segment(text) postags = postagger.postag(words) arcs = parser.parse(words, postags) res = [] for i in range(len(words)): res.append({ 'id' : i, 'cont' : words[i], 'pos' : postags[i], 'parent': arcs[i].head-1, 'relate': arcs[i].relation, }) print(json.dumps(res))
Nvidia Tegra TX2 JetPack 3.3 (Linux 16.04) python 2.7.6 pyltp 版本:0.1.9 LTP 版本:3.3.2 模型版本:3.3.1
[{"relate": "HED", "cont": "\u524d\u8fdb", "id": 0, "parent": -1, "pos": "v"}, {"relate": "ATT", "cont": "100", "id": 1, "parent": 2, "pos": "m"}, {"relate": "CMP", "cont": "\u7c73", "id": 2, "parent": 0, "pos": "q"}, {"relate": "WP", "cont": "\u3002", "id": 3, "parent": 0, "pos": "wp"}]
问题类型
输出结果不正确。
出错场景
分析句子“前进100米。”时出错。得到如下结果: [{"relate": "ATT", "cont": "\u524d\u8fdb", "id": 0, "parent": 1, "pos": "v"}, {"relate": "ATT", "cont": "1", "id": 1, "parent": 3, "pos": "n"}, {"relate": "ATT", "cont": "0", "id": 2, "parent": 3, "pos": "v"}, {"relate": "SBV", "cont": "0", "id": 3, "parent": 4, "pos": "n"}, {"relate": "HED", "cont": "\u7c73", "id": 4, "parent": -1, "pos": "v"}, {"relate": "VOB", "cont": "\u3002", "id": 5, "parent": 4, "pos": "n"}]
代码片段
import json import pyltp from pyltp import SentenceSplitter, Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
ROOTDIR = os.path.abspath(os.path.curdir) MODELDIR = os.path.join(ROOTDIR, 'ltp_data') segmentor = Segmentor() segmentor.load(os.path.join(MODELDIR, 'cws.model')) postagger = Postagger() postagger.load(os.path.join(MODELDIR, 'pos.model')) parser = Parser() parser.load(os.path.join(MODELDIR, 'parser.model'))
text = '前进100米。' words = segmentor.segment(text) postags = postagger.postag(words) arcs = parser.parse(words, postags) res = [] for i in range(len(words)): res.append({ 'id' : i, 'cont' : words[i], 'pos' : postags[i], 'parent': arcs[i].head-1, 'relate': arcs[i].relation, }) print(json.dumps(res))
运行环境
Nvidia Tegra TX2 JetPack 3.3 (Linux 16.04) python 2.7.6 pyltp 版本:0.1.9 LTP 版本:3.3.2 模型版本:3.3.1
期望结果
[{"relate": "HED", "cont": "\u524d\u8fdb", "id": 0, "parent": -1, "pos": "v"}, {"relate": "ATT", "cont": "100", "id": 1, "parent": 2, "pos": "m"}, {"relate": "CMP", "cont": "\u7c73", "id": 2, "parent": 0, "pos": "q"}, {"relate": "WP", "cont": "\u3002", "id": 3, "parent": 0, "pos": "wp"}]