xv44586 / toolkit4nlp

transformers implement (architecture, task example, serving and more)
Apache License 2.0
97 stars 18 forks source link

报错 #2

Closed wangbq18 closed 3 years ago

wangbq18 commented 3 years ago

您好!采用您的框架做问句匹配,分开对query1,query2编码时,会报错,请问能解决吗?

调用方式: model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, model=bert_type, return_keras_model=True,)

这里model只初始化一次,两次调用共享

t1 = model([t1, t2]) # bert 获取query1编码特征 t 2= model([t3, t4]) # bert 获取query2编码特征

tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found. (0) Invalid argument: Incompatible shapes: [4,12,128,128] vs. [4,1,1,36] [[{{node model_1_1/Transformer-0-MultiHeadSelfAttention/sub_1}}]] [[Mean_2/_1007]] (1) Invalid argument: Incompatible shapes: [4,12,128,128] vs. [4,1,1,36] [[{{node model_1_1/Transformer-0-MultiHeadSelfAttention/sub_1}}]] 0 successful operations. 0 derived errors ignored.

xv44586 commented 3 years ago

t1/t2的形状不匹配吧?t1/t2的代码贴一下看看

wangbq18 commented 3 years ago

t1/t2的形状不匹配吧?t1/t2的代码贴一下看看 没有给定形状, t1_in = Input(shape=(None,)) t2_in = Input(shape=(None,)) t3_in = Input(shape=(None,)) t4_in = Input(shape=(None,))

xv44586 commented 3 years ago

bert的输入是[token_ids, segment_ids], 不是你这么用的,请参考example 中的例子

wangbq18 commented 3 years ago

bert的输入是[token_ids, segment_ids], 不是你这么用的,请参考example 中的例子

恩,我明白您的意思,我这里的t1=token_ids,t2=segment_ids 我的code 用 keras_bert 这个包没问题,代码如下,改成您的框架后就不行了,希望帮看看是什么问题 bert_model = load_trained_model_from_checkpoint( config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True

x1_in = Input(shape=(None,))
m1_in = Input(shape=(None,))
x2_in = Input(shape=(None,))
m2_in = Input(shape=(None,))
c_in = Input(shape=(None,))
y_in = Input(shape=(None,))

mask1 = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(x1_in)
mask2 = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(x2_in)

q1 = bert_model([x1_in, m1_in])
q2 = bert_model([x2_in, m2_in])
xv44586 commented 3 years ago
x1_in = Input(shape=(None, ))
m1_in = Input(shape=(None, ))
x2_in = Input(shape=(None, ))
m2_in = Input(shape=(None, ))

q1  = bert([x1_in, m1_in])
q2 = bert([x2_in, m2_in])

model = Model([x1_in, m1_in, x2_in, m2_in], [q1, q1])
model.summary()
wangbq18 commented 3 years ago

x1_in = Input(shape=(None, ))
m1_in = Input(shape=(None, ))
x2_in = Input(shape=(None, ))
m2_in = Input(shape=(None, ))

q1  = bert([x1_in, m1_in])
q2 = bert([x2_in, m2_in])

model = Model([x1_in, m1_in, x2_in, m2_in], [q1, q1])
model.summary()

您好,按照您的说方式输入,还是不行,这是原始脚本
[baseline_my2.txt](https://github.com/xv44586/toolkit4nlp/files/5610600/baseline_my2.txt)
wangbq18 commented 3 years ago

采用苏神最近版本的keras4bert也是报同样的错误,不过他一直没时间去debug, 如果换成keras_bert 就没问题,而且苏神的kears4bert 0.5.8版本是可以跑通这个脚本的

xv44586 commented 3 years ago
import numpy as np
from tqdm import tqdm

from toolkit4nlp.utils import *
from toolkit4nlp.models import *
from toolkit4nlp.tokenizers import *
from toolkit4nlp.backend import *
from toolkit4nlp.layers import *
from toolkit4nlp.optimizers import *

maxlen = 128
batch_size = 16
epochs = 5
# bert配置
config_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json'
checkpoint_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt'
dict_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt'

# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)

# load train_data
train_data = []

class data_generator(DataGenerator):
    def __iter__(self, shuffle=False):
        batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2, batch_labels = [], [], [], [], []
        for is_end, (q_id, q, r_id, r, label) in self.get_sample(shuffle):
            label = int(label)

            token_ids, segment_ids = tokenizer.encode(q)
            token_ids_2, segment_ids_2 = tokenizer.encode(r)

            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_token_ids_2.append(token_ids_2)
            batch_segment_ids_2.append(segment_ids_2)
            batch_labels.append([label])

            if is_end or len(batch_token_ids) == self.batch_size:
                batch_token_ids = pad_sequences(batch_token_ids, maxlen=maxlen)
                batch_segment_ids = pad_sequences(batch_segment_ids, maxlen=maxlen)
                batch_labels = pad_sequences(batch_labels)
                batch_token_ids_2 = pad_sequences(batch_token_ids_2,maxlen=maxlen)
                batch_segment_ids_2 = pad_sequences(batch_segment_ids_2,maxlen=maxlen)

                yield [batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2], batch_labels

                batch_token_ids, batch_segment_ids, batch_labels = [], [], []
                batch_token_ids_2, batch_segment_ids_2 = [], []

train_generator = data_generator(data=train_data, batch_size=batch_size)
valid_generator = data_generator(data=valid_data, batch_size=batch_size)

# 加载预训练模型
bert = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
)
t1_in = Input(shape=(None,))
s1_in = Input(shape=(None,))
t2_in = Input(shape=(None,))
s2_in = Input(shape=(None,))

q1 = bert([t1_in, s1_in])
q2 = bert([t2_in, s2_in])

q1_output = Lambda(lambda x: x[:, 0])(q1)
q2_output = Lambda(lambda x: x[:, 0])(q2)

x = Concatenate(-1)([q1_output, q2_output])
x = Dense(1, activation='sigmoid')(x)

model = Model([t1_in, s1_in, t2_in, s2_in], x)
model.summary()

model.compile(
    loss = K.binary_crossentropy,
    optimizer=Adam(2e-5),  
    metrics=['accuracy'],
)
model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=epochs)
  1. 你还是没搞清楚bert 的输入是什么;
  2. 你并没有按我说的改, mask1/mask2 是干嘛的?
  3. 运行报错是因为你同一个batch 内样本size 不同,需要保证同一batch 内 q1, q2 size相同,具体就是data_generator输出前padding_seq 到相同长度, 看上面demo;
  4. 直接用bert 就好了,接那么多层干嘛?
wangbq18 commented 3 years ago
import numpy as np
from tqdm import tqdm

from toolkit4nlp.utils import *
from toolkit4nlp.models import *
from toolkit4nlp.tokenizers import *
from toolkit4nlp.backend import *
from toolkit4nlp.layers import *
from toolkit4nlp.optimizers import *

maxlen = 128
batch_size = 16
epochs = 5
# bert配置
config_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json'
checkpoint_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt'
dict_path = '/home/mingming.xu/pretrain/NLP/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt'

# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)

# load train_data
train_data = []

class data_generator(DataGenerator):
    def __iter__(self, shuffle=False):
        batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2, batch_labels = [], [], [], [], []
        for is_end, (q_id, q, r_id, r, label) in self.get_sample(shuffle):
            label = int(label)

            token_ids, segment_ids = tokenizer.encode(q)
            token_ids_2, segment_ids_2 = tokenizer.encode(r)

            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_token_ids_2.append(token_ids_2)
            batch_segment_ids_2.append(segment_ids_2)
            batch_labels.append([label])

            if is_end or len(batch_token_ids) == self.batch_size:
                batch_token_ids = pad_sequences(batch_token_ids, maxlen=maxlen)
                batch_segment_ids = pad_sequences(batch_segment_ids, maxlen=maxlen)
                batch_labels = pad_sequences(batch_labels)
                batch_token_ids_2 = pad_sequences(batch_token_ids_2,maxlen=maxlen)
                batch_segment_ids_2 = pad_sequences(batch_segment_ids_2,maxlen=maxlen)

                yield [batch_token_ids, batch_segment_ids, batch_token_ids_2, batch_segment_ids_2], batch_labels

                batch_token_ids, batch_segment_ids, batch_labels = [], [], []
                batch_token_ids_2, batch_segment_ids_2 = [], []

train_generator = data_generator(data=train_data, batch_size=batch_size)
valid_generator = data_generator(data=valid_data, batch_size=batch_size)

# 加载预训练模型
bert = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
)
t1_in = Input(shape=(None,))
s1_in = Input(shape=(None,))
t2_in = Input(shape=(None,))
s2_in = Input(shape=(None,))

q1 = bert([t1_in, s1_in])
q2 = bert([t2_in, s2_in])

q1_output = Lambda(lambda x: x[:, 0])(q1)
q2_output = Lambda(lambda x: x[:, 0])(q2)

x = Concatenate(-1)([q1_output, q2_output])
x = Dense(1, activation='sigmoid')(x)

model = Model([t1_in, s1_in, t2_in, s2_in], x)
model.summary()

model.compile(
    loss = K.binary_crossentropy,
    optimizer=Adam(2e-5),  
    metrics=['accuracy'],
)
model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=epochs)
1. 你还是没搞清楚bert 的输入是什么;

2. 你并没有按我说的改, mask1/mask2 是干嘛的?

3. 运行报错是因为你同一个batch 内样本size 不同,需要保证同一batch 内 q1, q2 size相同,具体就是data_generator输出前padding_seq 到相同长度, 看上面demo;

4. 直接用bert 就好了,接那么多层干嘛?

感谢