I also changed the train.py to use CPU instead of GPU the errors were more understandable.
# if cfg.device.gpu_ids == -1:
# logger.info("Using CPU training")
# accelerator, devices, strategy = "cpu", None, None
# else:
# accelerator, devices, strategy = "gpu", cfg.device.gpu_ids, None
accelerator, devices, strategy = "cpu", None, None # CPU training
After running it, I get the following errors.
(.venv) mosminin@debian:~/dev/nanodet$ python tools/train.py /home/mosminin/dev/nanodet/config/nanodet-plus-m_416_person.yml
/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/utilities/cloud_io.py:33: LightningDeprecationWarning: `pytorch_lightning.utilities.cloud_io.get_filesystem` has been deprecated in v1.8.0 and will be removed in v1.10.0. Please use `lightning_lite.utilities.cloud_io.get_filesystem` instead.
rank_zero_deprecation(
[NanoDet][12-18 14:05:30]INFO:Setting up data...
loading annotations into memory...
Done (t=4.35s)
creating index...
index created!
loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
[NanoDet][12-18 14:05:35]INFO:Creating model...
model size is 1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/setup.py:175: PossibleUserWarning: GPU available but not used. Set `accelerator` and `devices` using `Trainer(accelerator='gpu', devices=1)`.
rank_zero_warn(
| Name | Type | Params
------------------------------------------
0 | model | NanoDetPlus | 4.1 M
1 | avg_model | NanoDetPlus | 4.1 M
------------------------------------------
8.2 M Trainable params
0 Non-trainable params
8.2 M Total params
32.903 Total estimated model params size (MB)
[NanoDet][12-18 14:05:35]INFO:Weight Averaging is enabled
/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3190.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Traceback (most recent call last):
File "/home/mosminin/dev/nanodet/tools/train.py", line 147, in <module>
main(args)
File "/home/mosminin/dev/nanodet/tools/train.py", line 142, in main
trainer.fit(task, train_dataloader, val_dataloader)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 603, in fit
call._call_and_handle_interrupt(
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py", line 38, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 645, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1098, in _run
results = self._run_stage()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1177, in _run_stage
self._run_train()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1200, in _run_train
self.fit_loop.run()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 214, in advance
batch_output = self.batch_loop.run(kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 88, in advance
outputs = self.optimizer_loop.run(optimizers, kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 200, in advance
result = self._run_optimization(kwargs, self._optimizers[self.optim_progress.optimizer_position])
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 247, in _run_optimization
self._optimizer_step(optimizer, opt_idx, kwargs.get("batch_idx", 0), closure)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 357, in _optimizer_step
self.trainer._call_lightning_module_hook(
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1342, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/home/mosminin/dev/nanodet/nanodet/trainer/task.py", line 281, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py", line 169, in step
step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/strategies/strategy.py", line 234, in optimizer_step
return self.precision_plugin.optimizer_step(
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 121, in optimizer_step
return optimizer.step(closure=closure, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/optim/lr_scheduler.py", line 68, in wrapper
return wrapped(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/optim/optimizer.py", line 140, in wrapper
out = func(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/optim/adamw.py", line 120, in step
loss = closure()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 107, in _wrap_closure
closure_result = closure()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 147, in __call__
self._result = self.closure(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 133, in closure
step_output = self._step_fn()
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 406, in _training_step
training_step_output = self.trainer._call_strategy_hook("training_step", *kwargs.values())
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1480, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/pytorch_lightning/strategies/strategy.py", line 378, in training_step
return self.model.training_step(*args, **kwargs)
File "/home/mosminin/dev/nanodet/nanodet/trainer/task.py", line 78, in training_step
preds, loss, loss_states = self.model.forward_train(batch)
File "/home/mosminin/dev/nanodet/nanodet/model/arch/nanodet_plus.py", line 56, in forward_train
loss, loss_states = self.head.loss(head_out, gt_meta, aux_preds=aux_head_out)
File "/home/mosminin/dev/nanodet/nanodet/model/head/nanodet_plus_head.py", line 198, in loss
batch_assign_res = multi_apply(
File "/home/mosminin/dev/nanodet/nanodet/util/misc.py", line 24, in multi_apply
return tuple(map(list, zip(*map_results)))
File "/home/mosminin/dev/nanodet/.venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/mosminin/dev/nanodet/nanodet/model/head/nanodet_plus_head.py", line 314, in target_assign_single_img
assign_result = self.assigner.assign(
File "/home/mosminin/dev/nanodet/nanodet/model/head/assigner/dsl_assigner.py", line 86, in assign
F.one_hot(gt_labels.to(torch.int64), pred_scores.shape[-1])
RuntimeError: Class values must be smaller than num_classes.
I used the converted COCO 2017 with only labeled persons. Вот мой config:
I also changed the train.py to use CPU instead of GPU the errors were more understandable.
After running it, I get the following errors.
What am I doing wrong?