Traceback (most recent call last):
File "/data/anaconda3/envs/ymz_vecalign/bin/fairseq-train", line 8, in
sys.exit(cli_main())
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 557, in cli_main
distributed_utils.call_main(cfg, main)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/distributed/utils.py", line 369, in call_main
main(cfg, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 190, in main
valid_losses, should_stop = train(cfg, trainer, task, epoch_itr)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/contextlib.py", line 75, in inner
return func(*args, *kwds)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 316, in train
log_output = trainer.train_step(samples)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/contextlib.py", line 75, in inner
return func(args, kwds)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/trainer.py", line 857, in train_step
raise e
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/trainer.py", line 824, in train_step
loss, sample_size_i, logging_output = self.task.train_step(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/tasks/fairseq_task.py", line 515, in train_step
loss, sample_size, logging_output = criterion(model, sample)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/criterions/label_smoothed_cross_entropy.py", line 79, in forward
net_output = model(sample["net_input"])
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_base.py", line 144, in forward
encoder_out = self.encoder(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_encoder.py", line 165, in forward
return self.forward_scriptable(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_encoder.py", line 294, in forward_scriptable
lr = layer(x, encoder_padding_mask=encoder_padding_mask_out)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/modules/transformerlayer.py", line 351, in forward
x, = self.self_attn(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/modules/multihead_attention.py", line 544, in forward
return F.multi_head_attention_forward(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/functional.py", line 5075, in multi_head_attention_forward
q, k, v = _in_projection(query, key, value, q_proj_weight, k_proj_weight, v_proj_weight, b_q, b_k, b_v)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/functional.py", line 4813, in _in_projection
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)
使用的是experiments/fine-tune-configs/en2de_config.yml
Traceback (most recent call last): File "/data/anaconda3/envs/ymz_vecalign/bin/fairseq-train", line 8, in
sys.exit(cli_main())
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 557, in cli_main
distributed_utils.call_main(cfg, main)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/distributed/utils.py", line 369, in call_main
main(cfg, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 190, in main
valid_losses, should_stop = train(cfg, trainer, task, epoch_itr)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/contextlib.py", line 75, in inner
return func(*args, *kwds)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq_cli/train.py", line 316, in train
log_output = trainer.train_step(samples)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/contextlib.py", line 75, in inner
return func(args, kwds)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/trainer.py", line 857, in train_step
raise e
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/trainer.py", line 824, in train_step
loss, sample_size_i, logging_output = self.task.train_step(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/tasks/fairseq_task.py", line 515, in train_step
loss, sample_size, logging_output = criterion(model, sample)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/criterions/label_smoothed_cross_entropy.py", line 79, in forward
net_output = model(sample["net_input"])
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_base.py", line 144, in forward
encoder_out = self.encoder(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_encoder.py", line 165, in forward
return self.forward_scriptable(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/models/transformer/transformer_encoder.py", line 294, in forward_scriptable
lr = layer(x, encoder_padding_mask=encoder_padding_mask_out)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/modules/transformerlayer.py", line 351, in forward
x, = self.self_attn(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/fairseq/modules/multihead_attention.py", line 544, in forward
return F.multi_head_attention_forward(
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/functional.py", line 5075, in multi_head_attention_forward
q, k, v = _in_projection(query, key, value, q_proj_weight, k_proj_weight, v_proj_weight, b_q, b_k, b_v)
File "/data/anaconda3/envs/ymz_vecalign/lib/python3.8/site-packages/torch/nn/functional.py", line 4813, in _in_projection
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling
cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)