when I run the CUDA_VISIBLE_DEVICES=0 python tools/train_net.py --cfg cfgs/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml commend,I only have one GPU,but the uv_ann_labels problems always cause the mistake as follows:
Traceback (most recent call last):
File "tools/train_net.py", line 147, in
main()
File "tools/train_net.py", line 142, in main
train(model, train_loader, optimizer, scheduler, checkpointer, training_logger)
File "tools/train_net.py", line 73, in train
outputs = model(images, targets)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, kwargs)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 150, in forward
return self.module(*inputs[0], *kwargs[0])
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(input, kwargs)
File "/data/parsing/rcnn/modeling/model_builder.py", line 138, in forward
x, result, loss_uv = self.UV_RCNN(conv_features, result, targets)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, **kwargs)
File "/data/parsing/rcnn/modeling/uv_rcnn/uv_rcnn.py", line 31, in forward
return self._forward_train(conv_features, proposals, targets)
File "/data/parsing/rcnn/modeling/uv_rcnn/uv_rcnn.py", line 43, in _forward_train
loss_Upoints, loss_Vpoints, loss_seg_AnnIndex, loss_IndexUVPoints = self.loss_evaluator(uv_logits)
File "/data/parsing/rcnn/modeling/uv_rcnn/loss.py", line 159, in call
uv_ann_labels = torch.from_numpy(uv_ann_labels.astype('int64')).cuda(device_id)
RuntimeError: CUDA error: an illegal memory access was encountered
is there anybody who has the same problem can help me?
when I run the CUDA_VISIBLE_DEVICES=0 python tools/train_net.py --cfg cfgs/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml commend,I only have one GPU,but the uv_ann_labels problems always cause the mistake as follows:
Traceback (most recent call last): File "tools/train_net.py", line 147, in
main()
File "tools/train_net.py", line 142, in main
train(model, train_loader, optimizer, scheduler, checkpointer, training_logger)
File "tools/train_net.py", line 73, in train
outputs = model(images, targets)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, kwargs)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 150, in forward
return self.module(*inputs[0], *kwargs[0])
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(input, kwargs)
File "/data/parsing/rcnn/modeling/model_builder.py", line 138, in forward
x, result, loss_uv = self.UV_RCNN(conv_features, result, targets)
File "/data/parsing_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, **kwargs)
File "/data/parsing/rcnn/modeling/uv_rcnn/uv_rcnn.py", line 31, in forward
return self._forward_train(conv_features, proposals, targets)
File "/data/parsing/rcnn/modeling/uv_rcnn/uv_rcnn.py", line 43, in _forward_train
loss_Upoints, loss_Vpoints, loss_seg_AnnIndex, loss_IndexUVPoints = self.loss_evaluator(uv_logits)
File "/data/parsing/rcnn/modeling/uv_rcnn/loss.py", line 159, in call
uv_ann_labels = torch.from_numpy(uv_ann_labels.astype('int64')).cuda(device_id)
RuntimeError: CUDA error: an illegal memory access was encountered
is there anybody who has the same problem can help me?