import os
import numpy as np
from bert4keras.backend import keras, K
from bert4keras.models import build_transformer_model
from bert4keras.tokenizers import Tokenizer
from bert4keras.snippets import sequence_padding, AutoRegressiveDecoder
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
def gen_all_sim_value(ques: list):
X, S = [], []
for que in ques:
x, s = tokenizer.encode(que)
X.append(x)
S.append(s)
X = sequence_padding(X)
S = sequence_padding(S)
Z = encoder.predict([X, S])
Z /= (Z2).sum(axis=1, keepdims=True)0.5
res = np.dot(Z[1:], Z[0])
res = list(res)
return res
import os import numpy as np from bert4keras.backend import keras, K from bert4keras.models import build_transformer_model from bert4keras.tokenizers import Tokenizer from bert4keras.snippets import sequence_padding, AutoRegressiveDecoder from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession
限制显存的使用
config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config)
获取当前路径
dir_path = os.getcwd()
bert配置
config_path = dir_path + \ '/simbert/chinese_simbert_L-12_H-768_A-12/bert_config.json' checkpoint_path = dir_path + \ '/simbert/chinese_simbert_L-12_H-768_A-12/bert_model.ckpt' dict_path = dir_path + '/simbert/chinese_simbert_L-12_H-768_A-12/vocab.txt'
建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立分词器
建立加载模型
bert = build_transformer_model( config_path, checkpoint_path, with_pool='linear', application='unilm', return_keras_model=False, ) encoder = keras.models.Model(bert.model.inputs, bert.model.outputs[0])
def gen_all_sim_value(ques: list): X, S = [], [] for que in ques: x, s = tokenizer.encode(que) X.append(x) S.append(s) X = sequence_padding(X) S = sequence_padding(S) Z = encoder.predict([X, S]) Z /= (Z2).sum(axis=1, keepdims=True)0.5 res = np.dot(Z[1:], Z[0]) res = list(res) return res