Closed zhaoyiyong closed 2 years ago
import os import sys import json import rocketqa from pprint import pprint
def initmodel(): de_conf = { "model": 'zh_dureader_de', "use_cuda": False, "device_id": 0, "batch_size": 32 } ce_conf = { "model": 'zh_dureader_ce', "use_cuda": False, "device_id": 0, "batch_size": 32 } dual_encoder = rocketqa.load_model(de_conf) cross_encoder = rocketqa.load_model(ce_conf) return dual_encoder,cross_encoder
de,ce = initmodel()
q=['教育储蓄险是什么险种'] p=['教育储蓄险是什么险种'] t=['教育储蓄险是什么险种']
descore = de.matching(query=q, para=p, title=t) print(descore) cescore = ce.matching(query=q, para=p, title=t) print(cescore) [463.5213928222656] [0.7785768508911133]
估计是因为对于问答模型,tp和q一模一样不代表能够回答q中表述的问题,所以分数很低。对问答数据来说,相关性更多是答案和问题的相关性,不仅仅是字面匹配。
import os import sys import json import rocketqa from pprint import pprint
def initmodel(): de_conf = { "model": 'zh_dureader_de', "use_cuda": False, "device_id": 0, "batch_size": 32 } ce_conf = { "model": 'zh_dureader_ce', "use_cuda": False, "device_id": 0, "batch_size": 32 } dual_encoder = rocketqa.load_model(de_conf) cross_encoder = rocketqa.load_model(ce_conf) return dual_encoder,cross_encoder
de,ce = initmodel()
q=['教育储蓄险是什么险种'] p=['教育储蓄险是什么险种'] t=['教育储蓄险是什么险种']
descore = de.matching(query=q, para=p, title=t) print(descore) cescore = ce.matching(query=q, para=p, title=t) print(cescore) [463.5213928222656] [0.7785768508911133]