import os import numpy as np from bert4keras.backend import keras, K from bert4keras.models import build_transformer_model from bert4keras.tokenizers import Tokenizer from bert4keras.snippets import sequence_padding, AutoRegressiveDecoder from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession

限制显存的使用

config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config)

获取当前路径

dir_path = os.getcwd()

bert配置

config_path = dir_path + \ '/simbert/chinese_simbert_L-12_H-768_A-12/bert_config.json' checkpoint_path = dir_path + \ '/simbert/chinese_simbert_L-12_H-768_A-12/bert_model.ckpt' dict_path = dir_path + '/simbert/chinese_simbert_L-12_H-768_A-12/vocab.txt'

建立分词器

tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立分词器

建立加载模型

bert = build_transformer_model( config_path, checkpoint_path, with_pool='linear', application='unilm', return_keras_model=False, ) encoder = keras.models.Model(bert.model.inputs, bert.model.outputs[0])

def gen_all_sim_value(ques: list): X, S = [], [] for que in ques: x, s = tokenizer.encode(que) X.append(x) S.append(s) X = sequence_padding(X) S = sequence_padding(S) Z = encoder.predict([X, S]) Z /= (Z2).sum(axis=1, keepdims=True)0.5 res = np.dot(Z[1:], Z[0]) res = list(res) return res

bojone / bert4keras

tf 2.3.0，用TF_KERAS=1，也就是tf.keras，发现预测的时候，速度不如用tf2.2.0+Keras2.3.1的速度 #411

限制显存的使用

获取当前路径

bert配置

建立分词器

建立加载模型