TransMIL: Transformer based Correlated Multiple Instance Learning for Whole Slide Image Classification
363
stars
74
forks
source link
how to resolve this problem "RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling `cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)`" #38
Traceback (most recent call last):
File "train.py", line 91, in
main(cfg)
File "train.py", line 70, in main
trainer.fit(model = model, datamodule = dm)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 514, in fit
self.dispatch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 554, in dispatch
self.accelerator.start_training(self)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in start_training
self.training_type_plugin.start_training(trainer)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 111, in start_training
self._results = trainer.run_train()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 645, in run_train
self.train_loop.run_training_epoch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 493, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 632, in run_training_batch
split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 743, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 293, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 157, in training_step
return self.training_type_plugin.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 122, in training_step
return self.lightning_module.training_step(args, kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/model_interface.py", line 81, in training_step
results_dict = self.model(data=data, label=label)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 77, in forward
h = self.layer1(h) #[B, N, 512]
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 24, in forward
x = x + self.attn(self.norm(x))
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/nystrom_attention/nystrom_attention.py", line 82, in forward
q, k, v = self.to_qkv(x).chunk(3, dim = -1)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)
Traceback (most recent call last): File "train.py", line 91, in
main(cfg)
File "train.py", line 70, in main
trainer.fit(model = model, datamodule = dm)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 514, in fit
self.dispatch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 554, in dispatch
self.accelerator.start_training(self)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in start_training
self.training_type_plugin.start_training(trainer)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 111, in start_training
self._results = trainer.run_train()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 645, in run_train
self.train_loop.run_training_epoch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 493, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 632, in run_training_batch
split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 743, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 293, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 157, in training_step
return self.training_type_plugin.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 122, in training_step
return self.lightning_module.training_step(args, kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/model_interface.py", line 81, in training_step
results_dict = self.model(data=data, label=label)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 77, in forward
h = self.layer1(h) #[B, N, 512]
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 24, in forward
x = x + self.attn(self.norm(x))
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/nystrom_attention/nystrom_attention.py", line 82, in forward
q, k, v = self.to_qkv(x).chunk(3, dim = -1)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling
cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)