Open nanlin00 opened 2 months ago
Please provide your operation list, check steps at https://github.com/FlagOpen/FlagPerf/tree/main/training/nvidia/bert-paddle. Check dataset, envs(in container or on baremetal). If you still meet this bug, you can also refer to https://github.com/PaddlePaddle/PaddleNLP/issues/6145 or report bugs to PaddlePaddle
在A800服务器运行还是报一样的错
Please do not use bert-pytorch docs to set envs for bert-paddle training
Traceback (most recent call last): File "model_framework/bert/paddle/run_pretraining.py", line 136, in
config, state = main()
File "model_framework/bert/paddle/run_pretraining.py", line 91, in main
eval_loss, eval_mlm_acc = evaluator.evaluate(trainer)
File "/usr/local/lib/python3.7/dist-packages/decorator.py", line 232, in fun
return caller(func, *(extras + args), kw)
File "/usr/local/lib/python3.7/dist-packages/paddle/fluid/dygraph/base.py", line 347, in _decorate_functio
n
return func(*args, *kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/train/evaluator.py", line 59, in evaluate
loss, mlm_acc, num_masked = trainer.inference(batch)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/train/trainer.py", line 228, in inference
return self.forward(batch)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/train/trainer.py", line 223, in forward
next_sentence_labels)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(inputs, kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/model/models/modeling.py", line 729, in forward
next_sentence_label, return_dict)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(*inputs, kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/model/models/modeling.py", line 729, in forward
next_sentence_label, return_dict)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(*inputs, *kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/model/models/modeling.py", line 623, in forw
ard
labels) #masked_positions这个参数应该是找到那些被mask的值
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(inputs, kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/model/models/modeling.py", line 581, in forw
ard
prediction_scores = self.predictions(sequence_output, masked_positions)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(*inputs, *kwargs)
File "/nfs/aishPerf_test/training/model_framework/bert/paddle/model/models/modeling.py", line 517, in forward
hidden_states = self.transform(hidden_states)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/layers.py", line 1254, in call
return self.forward(inputs, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/layer/common.py", line 175, in forward
x=input, weight=self.weight, bias=self.bias, name=self.name
File "/usr/local/lib/python3.7/dist-packages/paddle/nn/functional/common.py", line 1842, in linear
return _C_ops.linear(x, weight, bias)
ValueError: (InvalidArgument) The Input(X) dims size must not be equal 0, but reviced dims size is 0.
[Hint: Expected phi::product(x.dims()) != 0, but received phi::product(x.dims()):0 == 0:0.] (at ../paddle/
phi/kernels/impl/matmul_kernel_impl.h:978)