Closed ShaileshSardaTTL closed 1 year ago
I got the solution for this. It was related to the Learning Rate. I followed the below logic to tackle this issue: num_gpu = 1 bs = (num_gpu 2) cfg.SOLVER.BASE_LR = 0.02 bs / 16 # pick a good LR
Issue Resolved.
Traceback (most recent call last): File "train_net.py", line 230, in
launch(
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/engine/launch.py", line 82, in launch
main_func(args)
File "train_net.py", line 192, in main
return trainer.train()
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/engine/defaults.py", line 489, in train
super().train(self.start_iter, self.max_iter)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/engine/train_loop.py", line 149, in train
self.run_step()
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/engine/defaults.py", line 499, in run_step
self._trainer.run_step()
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 157, in forward
proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, kwargs)
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 477, in forward
proposals = self.predict_proposals(
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 503, in predict_proposals
return find_top_rpn_proposals(
File "/home/sss924877/tech_pub/venv_py3.8/lib/python3.8/site-packages/detectron2/modeling/proposal_generator/proposal_utils.py", line 103, in find_top_rpn_proposals
raise FloatingPointError(
FloatingPointError: Predicted boxes or scores contain Inf/NaN. Training has diverged.**