Closed wangbq18 closed 3 years ago
t1/t2的形状不匹配吧?t1/t2的代码贴一下看看
t1/t2的形状不匹配吧?t1/t2的代码贴一下看看 没有给定形状, t1_in = Input(shape=(None,)) t2_in = Input(shape=(None,)) t3_in = Input(shape=(None,)) t4_in = Input(shape=(None,))
bert的输入是[token_ids, segment_ids], 不是你这么用的,请参考example 中的例子
bert的输入是[token_ids, segment_ids], 不是你这么用的,请参考example 中的例子
恩,我明白您的意思,我这里的t1=token_ids,t2=segment_ids 我的code 用 keras_bert 这个包没问题,代码如下,改成您的框架后就不行了,希望帮看看是什么问题 bert_model = load_trained_model_from_checkpoint( config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True
x1_in = Input(shape=(None,))
m1_in = Input(shape=(None,))
x2_in = Input(shape=(None,))
m2_in = Input(shape=(None,))
c_in = Input(shape=(None,))
y_in = Input(shape=(None,))
mask1 = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(x1_in)
mask2 = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(x2_in)
q1 = bert_model([x1_in, m1_in])
q2 = bert_model([x2_in, m2_in])
x1_in = Input(shape=(None, ))
m1_in = Input(shape=(None, ))
x2_in = Input(shape=(None, ))
m2_in = Input(shape=(None, ))
q1 = bert([x1_in, m1_in])
q2 = bert([x2_in, m2_in])
model = Model([x1_in, m1_in, x2_in, m2_in], [q1, q1])
model.summary()
x1_in = Input(shape=(None, )) m1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) m2_in = Input(shape=(None, )) q1 = bert([x1_in, m1_in]) q2 = bert([x2_in, m2_in]) model = Model([x1_in, m1_in, x2_in, m2_in], [q1, q1]) model.summary() 您好,按照您的说方式输入,还是不行,这是原始脚本 [baseline_my2.txt](https://github.com/xv44586/toolkit4nlp/files/5610600/baseline_my2.txt)
采用苏神最近版本的keras4bert也是报同样的错误,不过他一直没时间去debug, 如果换成keras_bert 就没问题,而且苏神的kears4bert 0.5.8版本是可以跑通这个脚本的
import numpy as np
from tqdm import tqdm
from toolkit4nlp.utils import *
from toolkit4nlp.models import *
from toolkit4nlp.tokenizers import *
from toolkit4nlp.backend import *
from toolkit4nlp.layers import *
from toolkit4nlp.optimizers import *
maxlen = 128
batch_size = 16
epochs = 5
# bert配置
config_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json'
checkpoint_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt'
dict_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt'
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# load train_data
train_data = []
class data_generator(DataGenerator):
def __iter__(self, shuffle=False):
batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2, batch_labels = [], [], [], [], []
for is_end, (q_id, q, r_id, r, label) in self.get_sample(shuffle):
label = int(label)
token_ids, segment_ids = tokenizer.encode(q)
token_ids_2, segment_ids_2 = tokenizer.encode(r)
batch_token_ids.append(token_ids)
batch_segment_ids.append(segment_ids)
batch_token_ids_2.append(token_ids_2)
batch_segment_ids_2.append(segment_ids_2)
batch_labels.append([label])
if is_end or len(batch_token_ids) == self.batch_size:
batch_token_ids = pad_sequences(batch_token_ids, maxlen=maxlen)
batch_segment_ids = pad_sequences(batch_segment_ids, maxlen=maxlen)
batch_labels = pad_sequences(batch_labels)
batch_token_ids_2 = pad_sequences(batch_token_ids_2,maxlen=maxlen)
batch_segment_ids_2 = pad_sequences(batch_segment_ids_2,maxlen=maxlen)
yield [batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2], batch_labels
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
batch_token_ids_2, batch_segment_ids_2 = [], []
train_generator = data_generator(data=train_data, batch_size=batch_size)
valid_generator = data_generator(data=valid_data, batch_size=batch_size)
# 加载预训练模型
bert = build_transformer_model(
config_path=config_path,
checkpoint_path=checkpoint_path,
)
t1_in = Input(shape=(None,))
s1_in = Input(shape=(None,))
t2_in = Input(shape=(None,))
s2_in = Input(shape=(None,))
q1 = bert([t1_in, s1_in])
q2 = bert([t2_in, s2_in])
q1_output = Lambda(lambda x: x[:, 0])(q1)
q2_output = Lambda(lambda x: x[:, 0])(q2)
x = Concatenate(-1)([q1_output, q2_output])
x = Dense(1, activation='sigmoid')(x)
model = Model([t1_in, s1_in, t2_in, s2_in], x)
model.summary()
model.compile(
loss = K.binary_crossentropy,
optimizer=Adam(2e-5),
metrics=['accuracy'],
)
model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=epochs)
import numpy as np from tqdm import tqdm from toolkit4nlp.utils import * from toolkit4nlp.models import * from toolkit4nlp.tokenizers import * from toolkit4nlp.backend import * from toolkit4nlp.layers import * from toolkit4nlp.optimizers import * maxlen = 128 batch_size = 16 epochs = 5 # bert配置 config_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json' checkpoint_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt' dict_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt' # 建立分词器 tokenizer = Tokenizer(dict_path, do_lower_case=True) # load train_data train_data = [] class data_generator(DataGenerator): def __iter__(self, shuffle=False): batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2, batch_labels = [], [], [], [], [] for is_end, (q_id, q, r_id, r, label) in self.get_sample(shuffle): label = int(label) token_ids, segment_ids = tokenizer.encode(q) token_ids_2, segment_ids_2 = tokenizer.encode(r) batch_token_ids.append(token_ids) batch_segment_ids.append(segment_ids) batch_token_ids_2.append(token_ids_2) batch_segment_ids_2.append(segment_ids_2) batch_labels.append([label]) if is_end or len(batch_token_ids) == self.batch_size: batch_token_ids = pad_sequences(batch_token_ids, maxlen=maxlen) batch_segment_ids = pad_sequences(batch_segment_ids, maxlen=maxlen) batch_labels = pad_sequences(batch_labels) batch_token_ids_2 = pad_sequences(batch_token_ids_2,maxlen=maxlen) batch_segment_ids_2 = pad_sequences(batch_segment_ids_2,maxlen=maxlen) yield [batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] batch_token_ids_2, batch_segment_ids_2 = [], [] train_generator = data_generator(data=train_data, batch_size=batch_size) valid_generator = data_generator(data=valid_data, batch_size=batch_size) # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, ) t1_in = Input(shape=(None,)) s1_in = Input(shape=(None,)) t2_in = Input(shape=(None,)) s2_in = Input(shape=(None,)) q1 = bert([t1_in, s1_in]) q2 = bert([t2_in, s2_in]) q1_output = Lambda(lambda x: x[:, 0])(q1) q2_output = Lambda(lambda x: x[:, 0])(q2) x = Concatenate(-1)([q1_output, q2_output]) x = Dense(1, activation='sigmoid')(x) model = Model([t1_in, s1_in, t2_in, s2_in], x) model.summary() model.compile( loss = K.binary_crossentropy, optimizer=Adam(2e-5), metrics=['accuracy'], ) model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=epochs)
1. 你还是没搞清楚bert 的输入是什么; 2. 你并没有按我说的改, mask1/mask2 是干嘛的? 3. 运行报错是因为你同一个batch 内样本size 不同,需要保证同一batch 内 q1, q2 size相同,具体就是data_generator输出前padding_seq 到相同长度, 看上面demo; 4. 直接用bert 就好了,接那么多层干嘛?
感谢
您好!采用您的框架做问句匹配,分开对query1,query2编码时,会报错,请问能解决吗?
调用方式: model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, model=bert_type, return_keras_model=True,)
这里model只初始化一次,两次调用共享
t1 = model([t1, t2]) # bert 获取query1编码特征 t 2= model([t3, t4]) # bert 获取query2编码特征
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found. (0) Invalid argument: Incompatible shapes: [4,12,128,128] vs. [4,1,1,36] [[{{node model_1_1/Transformer-0-MultiHeadSelfAttention/sub_1}}]] [[Mean_2/_1007]] (1) Invalid argument: Incompatible shapes: [4,12,128,128] vs. [4,1,1,36] [[{{node model_1_1/Transformer-0-MultiHeadSelfAttention/sub_1}}]] 0 successful operations. 0 derived errors ignored.