NVlabs / VoxFormer

Official PyTorch implementation of VoxFormer [CVPR 2023 Highlight]
Other
1.07k stars 87 forks source link

Stage 2 training issue #58

Closed yunlaoban closed 3 months ago

yunlaoban commented 4 months ago

Hi,thanks for your good work how can i address this problem `2024-07-24 15:21:13,812 - mmdet - INFO - workflow: [('train', 1)], max: 20 epochs Traceback (most recent call last): File "./tools/train.py", line 263, in main() File "./tools/train.py", line 252, in main custom_train_model( File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/apis/train.py", line 27, in custom_train_model custom_train_detector( File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/apis/mmdet_train.py", line 200, in custom_train_detector runner.run(data_loaders, cfg.workflow) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run epoch_runner(data_loaders[i], kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train self.run_iter(data_batch, train_mode=True, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter outputs = self.model.train_step(data_batch, self.optimizer, File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/parallel/distributed.py", line 52, in train_step output = self.module.train_step(inputs[0], kwargs[0]) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmdet/models/detectors/base.py", line 237, in train_step losses = self(data) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 108, in forward return self.forward_train(kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 138, in forward_train losses_pts = self.forward_pts_train(img_feats, img_metas, target) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 93, in forward_pts_train outs = self.pts_bbox_head(img_feats, img_metas, target) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/dense_heads/voxformer_head.py", line 95, in forward seed_feats = self.cross_transformer.get_vox_features( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/transformer.py", line 136, in get_vox_features bev_embed = self.encoder( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(*args, *kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/encoder.py", line 205, in forward output = layer( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/encoder.py", line 372, in forward query = self.attentions[attn_index]( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 186, in new_func return old_func(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/deformable_cross_attention.py", line 166, in forward queries = self.deformable_attention(query=queries_rebatch.view(bsself.num_cams, max_len, self.embed_dims), key=key, value=value, File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/deformable_cross_attention.py", line 394, in forward output = MultiScaleDeformableAttnFunction.apply( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/cuda/amp/autocast_mode.py", line 219, in decorate_fwd return fwd(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/multi_scale_deformable_attn_function.py", line 118, in forward output = ext_module.ms_deform_attn_forward( RuntimeError: ms_deform_attn_impl_forward: implementation for device cuda:1 not found.

Traceback (most recent call last): File "./tools/train.py", line 263, in main() File "./tools/train.py", line 252, in main custom_train_model( File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/apis/train.py", line 27, in custom_train_model custom_train_detector( File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/apis/mmdet_train.py", line 200, in custom_train_detector runner.run(data_loaders, cfg.workflow) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run epoch_runner(data_loaders[i], kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train self.run_iter(data_batch, train_mode=True, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter outputs = self.model.train_step(data_batch, self.optimizer, File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/parallel/distributed.py", line 52, in train_step output = self.module.train_step(inputs[0], kwargs[0]) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmdet/models/detectors/base.py", line 237, in train_step losses = self(data) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 108, in forward return self.forward_train(kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 138, in forward_train losses_pts = self.forward_pts_train(img_feats, img_metas, target) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/detectors/voxformer.py", line 93, in forward_pts_train outs = self.pts_bbox_head(img_feats, img_metas, target) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/dense_heads/voxformer_head.py", line 95, in forward seed_feats = self.cross_transformer.get_vox_features( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/transformer.py", line 136, in get_vox_features bev_embed = self.encoder( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(*args, *kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/encoder.py", line 205, in forward output = layer( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/encoder.py", line 372, in forward query = self.attentions[attn_index]( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 186, in new_func return old_func(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/deformable_cross_attention.py", line 166, in forward queries = self.deformable_attention(query=queries_rebatch.view(bsself.num_cams, max_len, self.embed_dims), key=key, value=value, File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/deformable_cross_attention.py", line 394, in forward output = MultiScaleDeformableAttnFunction.apply( File "/media/pai/sda/soft/anaconda3/envs/voxformer/lib/python3.8/site-packages/torch/cuda/amp/autocast_mode.py", line 219, in decorate_fwd return fwd(*args, kwargs) File "/media/pai/sda/sy/VoxFormer/projects/mmdet3d_plugin/voxformer/modules/multi_scale_deformable_attn_function.py", line 118, in forward output = ext_module.ms_deform_attn_forward( RuntimeError: ms_deform_attn_impl_forward: implementation for device cuda:0 not found. `