We implement the meta-learning where using roberta in Transformer. The code runs successfully on single GPU. When we apply Dataparallel, it prompts: KeyError: 'weight
Traceback (most recent call last):
File "/home/hk/workshop_43/My_project/FSL/train_demo.py", line 268, in
main()
File "/home/hk/workshop_43/My_project/FSL/train_demo.py", line 254, in main
learning_rate=opt.lr, use_sgd_for_bert=opt.use_sgd_for_bert, grad_iter=opt.grad_iter)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 184, in train
loss, right = self.maml_iter(model, N_for_train, K, Q, na_rate, self.train_data_loader, optimizer, learning_rate, fp16)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 129, in maml_iter
num_adaptation_steps=self.num_adaptation_steps, fp16=fp16)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 113, in adapt
support_logits, support_pred = model(support, "", N, K, (QN+Qna_rate), params=None)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/apex/amp/_initialize.py", line 197, in new_fwd
applier(kwargs, input_caster))
File "/home/hk/workshop_43/My_project/FSL/models/question.py", line 47, in forward
logits = self.sentence_encoder(input, params=self.get_subdict(params, 'sentence_encoder'))
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 155, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 165, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 85, in parallel_apply
output.reraise()
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/_utils.py", line 395, in reraise
raise self.exc_type(msg)
KeyError: Caught KeyError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 60, in _worker
output = module(*input, *kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/sentence_encoder.py", line 344, in forward
x = self.roberta(inputs['word'], attention_mask=inputs['mask'], params=self.get_subdict(params, 'roberta'))[0]
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 924, in forward
params=self.get_subdict(params, 'roberta')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, *kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 835, in forward
params=self.get_subdict(params, 'encoder')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 522, in forward
output_attentions,
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 405, in forward
params=self.get_subdict(params, 'attention')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, *kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 333, in forward
params=self.get_subdict(params, 'self')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 188, in forward
mixed_query_layer = self.query(hidden_states, params=self.get_subdict(params, 'query'))
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torchmeta/modules/linear.py", line 14, in forward
return F.linear(input, params['weight'], bias)
KeyError: 'weight'
Process finished with exit code 1
(1) The problem seems similar with #97. I have try the solution in #97, and using new version torch-meta:1.7, but the problem still exist.
(2) I also try to use torch-meta:1.5.3, which fixed the problem in #97, it doesn't work.
(3) I use: from torchmeta.modules import DataParallel
(4) I have check all the keys of your parameters, not one have the prefix the module.
(5) It seems when params=None in forward():
def forward(self, input, params=None):
if params is None:
params = OrderedDict(self.named_parameters())
# self.named_parameters() = None --》here
My model :
class Meta_Question(fewshot_re_kit.maml.Maml_Model):
def __init__(self, sentence_encoder):
fewshot_re_kit.maml.Maml_Model.__init__(self, sentence_encoder)
self.drop = nn.Dropout()
def forward(self, input, placehold, N, K, total_Q, params=None):
logits = self.sentence_encoder(input, params=self.get_subdict(params, 'sentence_encoder'))
logits = logits.view(-1, total_Q, N, K, 2)
logits = logits.mean(3) # (-1, total_Q, N, 2)
logits_na, _ = logits[:, :, :, 0].min(2, keepdim=True) # (-1, totalQ, 1)
logits = logits[:, :, :, 1] # (-1, total_Q, N)
logits = torch.cat([logits, logits_na], 2) # (B, total_Q, N + 1 (+1=None of above class ) )
_, pred = torch.max(logits.view(-1, N + 1), 1)
return logits, pred
Hi, thanks for providing this cool library!
We implement the meta-learning where using roberta in Transformer. The code runs successfully on single GPU. When we apply Dataparallel, it prompts: KeyError: 'weight
Traceback (most recent call last): File "/home/hk/workshop_43/My_project/FSL/train_demo.py", line 268, in
main()
File "/home/hk/workshop_43/My_project/FSL/train_demo.py", line 254, in main
learning_rate=opt.lr, use_sgd_for_bert=opt.use_sgd_for_bert, grad_iter=opt.grad_iter)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 184, in train
loss, right = self.maml_iter(model, N_for_train, K, Q, na_rate, self.train_data_loader, optimizer, learning_rate, fp16)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 129, in maml_iter
num_adaptation_steps=self.num_adaptation_steps, fp16=fp16)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/maml.py", line 113, in adapt
support_logits, support_pred = model(support, "", N, K, (QN+Qna_rate), params=None)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/apex/amp/_initialize.py", line 197, in new_fwd
applier(kwargs, input_caster))
File "/home/hk/workshop_43/My_project/FSL/models/question.py", line 47, in forward
logits = self.sentence_encoder(input, params=self.get_subdict(params, 'sentence_encoder'))
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 155, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 165, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 85, in parallel_apply
output.reraise()
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/_utils.py", line 395, in reraise
raise self.exc_type(msg)
KeyError: Caught KeyError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 60, in _worker
output = module(*input, *kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/sentence_encoder.py", line 344, in forward
x = self.roberta(inputs['word'], attention_mask=inputs['mask'], params=self.get_subdict(params, 'roberta'))[0]
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 924, in forward
params=self.get_subdict(params, 'roberta')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, *kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 835, in forward
params=self.get_subdict(params, 'encoder')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 522, in forward
output_attentions,
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 405, in forward
params=self.get_subdict(params, 'attention')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, *kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 333, in forward
params=self.get_subdict(params, 'self')
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(input, kwargs)
File "/home/hk/workshop_43/My_project/FSL/fewshot_re_kit/meta_transformers/meta_models/meta_roberta/modeling_roberta.py", line 188, in forward
mixed_query_layer = self.query(hidden_states, params=self.get_subdict(params, 'query'))
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/hk/.conda/envs/fsl/lib/python3.7/site-packages/torchmeta/modules/linear.py", line 14, in forward
return F.linear(input, params['weight'], bias)
KeyError: 'weight'
Process finished with exit code 1
(1) The problem seems similar with #97. I have try the solution in #97, and using new version torch-meta:1.7, but the problem still exist.
(2) I also try to use torch-meta:1.5.3, which fixed the problem in #97, it doesn't work.
(3) I use: from torchmeta.modules import DataParallel
(4) I have check all the keys of your parameters, not one have the prefix the module.
(5) It seems when params=None in forward():
My model :
self.sentence_encoder = Transofmer.RobertaForSequenceClassification
Do you have any workaround solution? Thanks!