I'm trying to train the model with youtube-vis dataset following the instructions but got loss_match equals zero at the beginning.
Do you have any idea about this?
Here is the error info:
loss {'loss_rpn_cls': [tensor([0.5429], device='cuda:0', grad_fn=), tensor([0.1198], device='cuda:0', grad_fn=), tensor([0.0242], device='cuda:0', grad_fn=), tensor([0.0027], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=)], 'loss_rpn_reg': [tensor([0.], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=), tensor([0.0009], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=)], 'loss_cls': tensor([3.6361], device='cuda:0', grad_fn=), 'acc': tensor([1.4925], device='cuda:0'), 'loss_reg': tensor([4.3604e-06], device='cuda:0', grad_fn=), 'loss_match': 0.0, 'loss_mask': tensor([0.7678], device='cuda:0', grad_fn=)}
Traceback (most recent call last):
File "tools/train.py", line 93, in
main()
File "tools/train.py", line 89, in main
logger=logger)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 60, in train_detector
_non_dist_train(model, dataset, cfg, validate=validate)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 122, in _non_dist_train
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/qianyu/anaconda3/envs/p3t4/lib/python3.7/site-packages/mmcv/runner/runner.py", line 358, in run
epoch_runner(data_loaders[i], kwargs)
File "/home/qianyu/anaconda3/envs/p3t4/lib/python3.7/site-packages/mmcv/runner/runner.py", line 264, in train
self.model, data_batch, train_mode=True, kwargs)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 39, in batch_processor
loss, log_vars = parse_losses(losses)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 25, in parse_losses
'{} is not a tensor or list of tensors'.format(loss_name))
TypeError: loss_match is not a tensor or list of tensors
I'm trying to train the model with youtube-vis dataset following the instructions but got loss_match equals zero at the beginning. Do you have any idea about this?
Here is the error info: loss {'loss_rpn_cls': [tensor([0.5429], device='cuda:0', grad_fn=), tensor([0.1198], device='cuda:0', grad_fn=), tensor([0.0242], device='cuda:0', grad_fn=), tensor([0.0027], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=)], 'loss_rpn_reg': [tensor([0.], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=), tensor([0.0009], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=), tensor([0.], device='cuda:0', grad_fn=)], 'loss_cls': tensor([3.6361], device='cuda:0', grad_fn=), 'acc': tensor([1.4925], device='cuda:0'), 'loss_reg': tensor([4.3604e-06], device='cuda:0', grad_fn=), 'loss_match': 0.0, 'loss_mask': tensor([0.7678], device='cuda:0', grad_fn=)}
Traceback (most recent call last):
File "tools/train.py", line 93, in
main()
File "tools/train.py", line 89, in main
logger=logger)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 60, in train_detector
_non_dist_train(model, dataset, cfg, validate=validate)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 122, in _non_dist_train
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/qianyu/anaconda3/envs/p3t4/lib/python3.7/site-packages/mmcv/runner/runner.py", line 358, in run
epoch_runner(data_loaders[i], kwargs)
File "/home/qianyu/anaconda3/envs/p3t4/lib/python3.7/site-packages/mmcv/runner/runner.py", line 264, in train
self.model, data_batch, train_mode=True, kwargs)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 39, in batch_processor
loss, log_vars = parse_losses(losses)
File "/data/qianyu/detection/MaskTrackRCNN/mmdet/apis/train.py", line 25, in parse_losses
'{} is not a tensor or list of tensors'.format(loss_name))
TypeError: loss_match is not a tensor or list of tensors