Closed junrong1 closed 3 years ago
可能是因为dataloader返回的batch中的元素的顺序和BertModel接收的参数的顺序不一致。 Transformer 2.9中BertModel的forward参数顺序是
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
encoder_hidden_states=None,
encoder_attention_mask=None,
)
你的dataloader返回的batch中元素的顺序是否与之一致?
建议自定义返回dict而不是tuple的dataset,使每个key和forward中参数名匹配,不容易发生此类对齐问题,例如:
class DictDataset(Dataset):
def __init__(self,all_input_ids, all_input_mask, all_token_type_ids, all_labels):
super(DictDataset, self).__init__()
self.all_input_ids = all_input_ids
self.all_input_mask = all_input_mask
self.all_token_type_ids = all_token_type_ids
self.all_labels = all_labels
def __getitem__(self, index):
input_ids = self.all_input_ids[index]
input_mask = self.all_input_mask[index]
token_type_ids = self.all_token_type_ids[index]
labels = self.all_labels[index]
return {'input_ids':input_ids,
'attention_mask':input_mask,
'token_type_ids': token_type_ids,
'labels':labels}
def __len__(self):
return len(self.all_labels)
可能是因为dataloader返回的batch中的元素的顺序和BertModel接收的参数的顺序不一致。 Transformer 2.9中BertModel的forward参数顺序是
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, encoder_hidden_states=None, encoder_attention_mask=None, )
你的dataloader返回的batch中元素的顺序是否与之一致?
建议自定义返回dict而不是tuple的dataset,使每个key和forward中参数名匹配,不容易发生此类对齐问题,例如:
class DictDataset(Dataset): def __init__(self,all_input_ids, all_input_mask, all_token_type_ids, all_labels): super(DictDataset, self).__init__() self.all_input_ids = all_input_ids self.all_input_mask = all_input_mask self.all_token_type_ids = all_token_type_ids self.all_labels = all_labels def __getitem__(self, index): input_ids = self.all_input_ids[index] input_mask = self.all_input_mask[index] token_type_ids = self.all_token_type_ids[index] labels = self.all_labels[index] return {'input_ids':input_ids, 'attention_mask':input_mask, 'token_type_ids': token_type_ids, 'labels':labels} def __len__(self): return len(self.all_labels)
通过改变dataset解决了这个维度不匹配的问题,然后又出现了一个新的问题 TypeError: forward() got an unexpected keyword argument 'labels'
可能是因为dataloader返回的batch中的元素的顺序和BertModel接收的参数的顺序不一致。 Transformer 2.9中BertModel的forward参数顺序是
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, encoder_hidden_states=None, encoder_attention_mask=None, )
你的dataloader返回的batch中元素的顺序是否与之一致? 建议自定义返回dict而不是tuple的dataset,使每个key和forward中参数名匹配,不容易发生此类对齐问题,例如:
class DictDataset(Dataset): def __init__(self,all_input_ids, all_input_mask, all_token_type_ids, all_labels): super(DictDataset, self).__init__() self.all_input_ids = all_input_ids self.all_input_mask = all_input_mask self.all_token_type_ids = all_token_type_ids self.all_labels = all_labels def __getitem__(self, index): input_ids = self.all_input_ids[index] input_mask = self.all_input_mask[index] token_type_ids = self.all_token_type_ids[index] labels = self.all_labels[index] return {'input_ids':input_ids, 'attention_mask':input_mask, 'token_type_ids': token_type_ids, 'labels':labels} def __len__(self): return len(self.all_labels)
通过改变dataset解决了这个维度不匹配的问题,然后又出现了一个新的问题 TypeError: forward() got an unexpected keyword argument 'labels'
Traceback (most recent call last): File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 265, in get_outputs_from_batch results_T = auto_forward(model_T,batch,args) File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 287, in auto_forward results = model(batch, args) File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) TypeError: forward() got an unexpected keyword argument 'labels' python-BaseException
完整的报错信息
可能是因为dataloader返回的batch中的元素的顺序和BertModel接收的参数的顺序不一致。 Transformer 2.9中BertModel的forward参数顺序是
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, encoder_hidden_states=None, encoder_attention_mask=None, )
你的dataloader返回的batch中元素的顺序是否与之一致? 建议自定义返回dict而不是tuple的dataset,使每个key和forward中参数名匹配,不容易发生此类对齐问题,例如:
class DictDataset(Dataset): def __init__(self,all_input_ids, all_input_mask, all_token_type_ids, all_labels): super(DictDataset, self).__init__() self.all_input_ids = all_input_ids self.all_input_mask = all_input_mask self.all_token_type_ids = all_token_type_ids self.all_labels = all_labels def __getitem__(self, index): input_ids = self.all_input_ids[index] input_mask = self.all_input_mask[index] token_type_ids = self.all_token_type_ids[index] labels = self.all_labels[index] return {'input_ids':input_ids, 'attention_mask':input_mask, 'token_type_ids': token_type_ids, 'labels':labels} def __len__(self): return len(self.all_labels)
通过改变dataset解决了这个维度不匹配的问题,然后又出现了一个新的问题 TypeError: forward() got an unexpected keyword argument 'labels'
Traceback (most recent call last): File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 265, in get_outputs_from_batch results_T = auto_forward(model_T,batch,args) File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 287, in auto_forward results = model(batch, args) File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) TypeError: forward() got an unexpected keyword argument 'labels' python-BaseException
完整的报错信息
我目前用的transformer的版本是2.9.0
DictDataset仅做示例,其返回的字典的键值根据实际使用的模型自行修改
这个改变dataset是怎么改变的,我也遇到这个问题了,谢谢!
` with open('config.json') as f: config = json.load(fp=f)
Traceback (most recent call last): File "/Users/ray.yao/Desktop/daas-text-align/model/know_dis.py", line 96, in
distiller.train(optimizer, train_dataloader, num_epochs=num_epochs, scheduler_class=scheduler_class, scheduler_args=scheduler_args, callback=None)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_basic.py", line 283, in train
self.train_with_num_epochs(optimizer, scheduler, tqdm_disable, dataloader, max_grad_norm, num_epochs, callback, batch_postprocessor, args)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_basic.py", line 212, in train_with_num_epochs
total_loss, losses_dict = self.train_on_batch(batch,args)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_general.py", line 74, in train_on_batch
(teacher_batch, results_T), (student_batch, results_S) = get_outputs_from_batch(batch, self.t_config.device, self.model_T, self.model_S, args)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 274, in get_outputs_from_batch
results_T = auto_forward(model_T,batch,args)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/textbrewer/distiller_utils.py", line 294, in auto_forward
results = model(*batch, *args)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(input, kwargs)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/transformers/modeling_bert.py", line 728, in forward
embedding_output = self.embeddings(
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/ray.yao/opt/anaconda3/envs/text-align/lib/python3.8/site-packages/transformers/modeling_bert.py", line 177, in forward
embeddings = inputs_embeds + position_embeddings + token_type_embeddings
RuntimeError: The size of tensor a (30) must match the size of tensor b (512) at non-singleton dimension 1
Process finished with exit code 1
inputs_embeds.size() = [512, 30, 768] # batch, seq_len, hid_dim position_embeddings.size() = [512, 768] token_type_embeddings = [512, 30, 768]