Traceback (most recent call last):
File "/sematicSegmentation/RDRNet/mmsegmentation/tools/train.py", line 109, in
main()
File "/sematicSegmentation/RDRNet/mmsegmentation/tools/train.py", line 102, in main
runner.train()
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1721, in train
model = self.train_loop.run() # type: ignore
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/loops.py", line 278, in run
self.run_iter(data_batch)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/loops.py", line 301, in run_iter
outputs = self.runner.model.train_step(
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 114, in train_step
losses = self._run_forward(data, mode='loss') # type: ignore
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 340, in _run_forward
results = self(data, mode=mode)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/base.py", line 94, in forward
return self.loss(inputs, data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 176, in loss
loss_decode = self._decode_head_forward_train(x, data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 137, in _decode_head_forward_train
loss_decode = self.decode_head.loss(inputs, data_samples,
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/decode_heads/decode_head.py", line 262, in loss
losses = self.loss_by_feat(seg_logits, batch_data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/decode_heads/ddr_head.py", line 112, in loss_by_feat
loss['loss_context'] = self.loss_decode[0](context_logit, seg_label)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/losses/ohem_cross_entropy_loss.py", line 81, in forward
pred = pred.gather(1, tmp_target.unsqueeze(1))
RuntimeError: CUDA error: device-side assert triggered
按照cityspace的数据集生成的标签,8个类别 mask是给0-8。 但是在计算损失的时候会报错,请大佬解疑答惑,谢谢大佬,报错信息在下面了。
11/23 08:21:33 - mmengine - INFO - Iter(train) [ 50/120000] lr: 9.9963e-03 eta: 1 day, 1:05:13 time: 0.7358 data_time: 0.0070 memory: 13965 loss: 1.1757 decode.loss_context: 0.7401 decode.loss_spatial: 0.4356 decode.acc_seg: 100.0000 ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15551,0,0], thread: [0,0,0] Assertion
idx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15551,0,0], thread: [1,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15551,0,0], thread: [2,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15482,0,0], thread: [33,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15482,0,0], thread: [34,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. (): block: [15162,0,0], thread: [45,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15162,0,0], thread: [46,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15162,0,0], thread: [47,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15094,0,0], thread: [47,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15094,0,0], thread: [48,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15094,0,0], thread: [49,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15141,0,0], thread: [45,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15141,0,0], thread: [46,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15141,0,0], thread: [47,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15141,0,0], thread: [48,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15146,0,0], thread: [45,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15146,0,0], thread: [46,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15146,0,0], thread: [47,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15146,0,0], thread: [48,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15036,0,0], thread: [49,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed. ../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:144: operator(): block: [15036,0,0], thread: [50,0,0] Assertionidx_dim >= 0 && idx_dim < index_size && "index out of bounds"
failed.Traceback (most recent call last): File "/sematicSegmentation/RDRNet/mmsegmentation/tools/train.py", line 109, in
main()
File "/sematicSegmentation/RDRNet/mmsegmentation/tools/train.py", line 102, in main
runner.train()
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1721, in train
model = self.train_loop.run() # type: ignore
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/loops.py", line 278, in run
self.run_iter(data_batch)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/runner/loops.py", line 301, in run_iter
outputs = self.runner.model.train_step(
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 114, in train_step
losses = self._run_forward(data, mode='loss') # type: ignore
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 340, in _run_forward
results = self(data, mode=mode)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/base.py", line 94, in forward
return self.loss(inputs, data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 176, in loss
loss_decode = self._decode_head_forward_train(x, data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 137, in _decode_head_forward_train
loss_decode = self.decode_head.loss(inputs, data_samples,
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/decode_heads/decode_head.py", line 262, in loss
losses = self.loss_by_feat(seg_logits, batch_data_samples)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/decode_heads/ddr_head.py", line 112, in loss_by_feat
loss['loss_context'] = self.loss_decode[0](context_logit, seg_label)
File "/home/dell/Anaconda/envs/RDRNet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/sematicSegmentation/RDRNet/mmsegmentation/mmseg/models/losses/ohem_cross_entropy_loss.py", line 81, in forward
pred = pred.gather(1, tmp_target.unsqueeze(1))
RuntimeError: CUDA error: device-side assert triggered