Open jsong0041 opened 2 years ago
hello @jsong0041 , sorry for my late reply. Based on my experience, you are getting out of memory error in the validation step but it was logged in another way. You can reference issue #6 and the README to find which arguments you can change to make it works.
First of all, congratulations for your recent paper '3D-UCaps: 3D Capsules Unet for Volumetric Image Segmentation' accepted by MICCAI'21, it's really a great job, and thank you very much for your open source code in github.
As for codes, I used a new dataset as inputs with .tif format, but following errors are thrown:
Validation sanity check: 0%| | 0/1 [00:00<?, ?it/s]C :/w/b/windows/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:312: block: [1926,0,0], thread: [32,0,0] Assertion
idx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ... === Transform input info -- AsDiscrete === Traceback (most recent call last): File "C:\Python36\lib\site-packages\monai\transforms\transform.py", line 84, in apply_transform return _apply_transform(transform, data, unpack_items) File "C:\Python36\lib\site-packages\monai\transforms\transform.py", line 52, in _apply_transform return transform(parameters) File "C:\Python36\lib\site-packages\monai\transforms\post\array.py", line 174, in call img = one_hot(img, num_classes=_nclasses, dim=0) File "C:\Python36\lib\site-packages\monai\networks\utils.py", line 86, in onehot labels = o.scatter(dim=dim, index=labels.long(), value=1) RuntimeError: CUDA error: device-side assert triggeredDuring handling of the above exception, another exception occurred:
Traceback (most recent call last): File "train.py", line 129, in
trainer.fit(net, datamodule=data_module)
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 741, in fit
self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 777, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1199, in _run
self._dispatch()
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1279, in _dispatch
self.training_type_plugin.start_training(self)
File "C:\Python36\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1289, in run_stage
return self._run_train()
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1311, in _run_train
self._run_sanity_check(self.lightning_module)
File "C:\Python36\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1375, in _run_sanity_check
self._evaluation_loop.run()
File "C:\Python36\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
self.advance(*args, *kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\loops\dataloader\evaluation_loop.py", line 110, in advance
dl_outputs = self.epoch_loop.run(dataloader, dataloader_idx, dl_max_batches, self.num_dataloaders)
File "C:\Python36\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
self.advance(args, kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\loops\epoch\evaluation_epoch_loop.py", line 122, in advance
output = self._evaluation_step(batch, batch_idx, dataloader_idx)
File "C:\Python36\lib\site-packages\pytorch_lightning\loops\epoch\evaluation_epoch_loop.py", line 217, in _evaluation_step
output = self.trainer.accelerator.validation_step(step_kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\accelerators\accelerator.py", line 239, in validation_step
return self.training_type_plugin.validation_step(step_kwargs.values())
File "C:\Python36\lib\site-packages\pytorch_lightning\plugins\training_type\dp.py", line 104, in validation_step
return self.model(args, kwargs)
File "C:\Python36\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, *kwargs)
File "C:\Python36\lib\site-packages\torch\nn\parallel\data_parallel.py", line 159, in forward
return self.module(inputs[0], kwargs[0])
File "C:\Python36\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\overrides\data_parallel.py", line 63, in forward
output = super().forward(*inputs, *kwargs)
File "C:\Python36\lib\site-packages\pytorch_lightning\overrides\base.py", line 92, in forward
output = self.module.validation_step(inputs, kwargs)
File "E:#project_b\3d-ucaps-master\module\ucaps.py", line 265, in validation_step
labels = [self.post_label(label) for label in decollate_batch(labels)]
File "E:#project_b\3d-ucaps-master\module\ucaps.py", line 265, in
labels = [self.post_label(label) for label in decollatebatch(labels)]
File "C:\Python36\lib\site-packages\monai\transforms\compose.py", line 159, in call
input = apply_transform(transform, input, self.map_items, self.unpack_items)
File "C:\Python36\lib\site-packages\monai\transforms\transform.py", line 107, in apply_transform
_log_stats(data=data)
File "C:\Python36\lib\site-packages\monai\transforms\transform.py", line 98, in _log_stats
datastats(img=data, data_shape=True, value_range=True, prefix=prefix) # type: ignore
File "C:\Python36\lib\site-packages\monai\transforms\utility\array.py", line 524, in call
lines.append(f"Value range: ({torch.min(img)}, {torch.max(img)})")
RuntimeError: CUDA error: device-side assert triggered
Any help is much appreciated.