Closed randomfforest closed 1 year ago
Hi @randomfforest Have you solved the issue? Can you please share your method? thanks
你好,这个问题你解决了吗?
你好,这个问题你解决了吗?
Pred和gt维度不一样,调试了一下发现是pred维度的问题,然后往回推发现配置文件里面的 SparseEncoder里面的 sparse_shape的值应该是实际的点云范围差值除以voxel_size,而不是那个 (41, 1024, 1024)
Prerequisite
Task
I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
Branch
main branch https://github.com/open-mmlab/mmdetection3d
Environment
System environment: sys.platform: linux Python: 3.10.0 (default, Mar 3 2022, 09:58:08) [GCC 7.5.0] CUDA available: True numpy_random_seed: 545278729 GPU 0: NVIDIA RTX A4000 Laptop GPU CUDA_HOME: /usr/local/cuda-11.7 NVCC: Cuda compilation tools, release 11.7, V11.7.99 GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 PyTorch: 2.0.1+cu117 PyTorch compiling details: PyTorch built with:
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.7, CUDNN_VERSION=8.5.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision: 0.15.2+cu117 OpenCV: 4.8.0 MMEngine: 0.8.4
Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: 545278729 Distributed launcher: none Distributed training: False GPU number: 1
Reproduces the problem - code sample
voxel_size = [0.2, 0.2, 4] #xg point_cloud_range = [0, -40, -3, 70.4, 40, 1] model = dict( type='CenterPoint', data_preprocessor=dict( type='Det3DDataPreprocessor', voxel=True, voxel_layer=dict( max_num_points=20, point_cloud_range = point_cloud_range, #xg voxel_size=voxel_size,
max_voxels=(30000, 40000))), #xg
dataset_type = 'KittiDataset' data_root = 'data/kitti/' class_names = ['Pedestrian', 'Cyclist', 'Car']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False) metainfo = dict(classes=class_names) db_sampler = dict( data_root=data_root, info_path=data_root + 'kitti_dbinfos_train.pkl', rate=1.0, prepare=dict( filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), classes=class_names,
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6), #centerpoint-kitti
train_pipeline = [ dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, # x, y, z, intensity use_dim=4, backend_args=backend_args), # dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler), dict( type='ObjectNoise', num_try=100, translation_std=[1.0, 1.0, 0.5], global_rot_range=[0.0, 0.0], rot_range=[-0.78539816, 0.78539816]), dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05]), dict(type='PointsRangeFilter', point_cloud_range= point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range= point_cloud_range), dict(type='PointShuffle'), dict( type='Pack3DDetInputs', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) # test_pipeline = [ dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, backend_args=backend_args), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict( type='GlobalRotScaleTrans', rot_range=[0, 0], scale_ratio_range=[1., 1.], translation_std=[0, 0, 0]), dict(type='RandomFlip3D'), dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), dict(type='Pack3DDetInputs', keys=['points']) ] eval_pipeline = [ dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, backend_args=backend_args), dict(type='Pack3DDetInputs', keys=['points']) ] train_dataloader = dict( batch_size=2, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type='RepeatDataset', times=2, # dataset=dict( type=dataset_type, data_root=data_root, ann_file='kitti_infos_train.pkl', # data_prefix=dict(pts='training/velodyne_reduced'), # pipeline=train_pipeline, modality=input_modality, test_mode=False, metainfo=metainfo,
we use box_type_3d='LiDAR' in kitti and nuscenes dataset
vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') lr = 0.0018 optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01), clip_grad=dict(max_norm=10, norm_type=2)) param_scheduler = [ dict( type='CosineAnnealingLR', T_max=16, eta_min=lr 10, begin=0, end=16, by_epoch=True, convert_to_iter_based=True), dict( type='CosineAnnealingLR', T_max=24, eta_min=lr 1e-4, begin=16, end=40, by_epoch=True, convert_to_iter_based=True), dict( type='CosineAnnealingMomentum', T_max=16, eta_min=0.85 / 0.95, begin=0, end=16, by_epoch=True, convert_to_iter_based=True), dict( type='CosineAnnealingMomentum', T_max=24, eta_min=1, begin=16, end=40, by_epoch=True, convert_to_iter_based=True) ] train_cfg = dict(by_epoch=True, max_epochs=40, val_interval=1)
val_cfg = dict()
test_cfg = dict()
auto_scale_lr = dict(enable=False, base_batch_size=48) default_scope = 'mmdet3d' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=2), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='Det3DVisualizationHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl'), ) log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) log_level = 'INFO' load_from = None resume = False
Reproduces the problem - command or script
python tools/train config/centerpoint/centerpoint_kitti-3d.py
centerpoint_head.py(changed) def get_targets_single(self, gt_instances_3d: InstanceData) -> Tuple[Tensor]: """Generate training targets for a single sample.
Reproduces the problem - error message
Traceback (most recent call last): File "/home/ypx/mmdetection3d/tools/train.py", line 135, in
main()
File "/home/ypx/mmdetection3d/tools/train.py", line 131, in main
runner.train()
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/runner/runner.py", line 1745, in train
model = self.train_loop.run() # type: ignore
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/runner/loops.py", line 96, in run
self.run_epoch()
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/runner/loops.py", line 112, in run_epoch
self.run_iter(idx, data_batch)
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/runner/loops.py", line 128, in run_iter
outputs = self.runner.model.train_step(
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/model/base_model/base_model.py", line 114, in train_step
losses = self._run_forward(data, mode='loss') # type: ignore
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmengine/model/base_model/base_model.py", line 340, in _run_forward
results = self(data, mode=mode)
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmdet3d-1.2.0-py3.10.egg/mmdet3d/models/detectors/base.py", line 75, in forward
return self.loss(inputs, data_samples, kwargs)
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmdet3d-1.2.0-py3.10.egg/mmdet3d/models/detectors/mvx_two_stage.py", line 274, in loss
losses_pts = self.pts_bbox_head.loss(pts_feats, batch_data_samples,
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmdet3d-1.2.0-py3.10.egg/mmdet3d/models/dense_heads/centerpoint_head.py", line 622, in loss
losses = self.loss_by_feat(outs, batch_gt_instance_3d)
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/mmdet3d-1.2.0-py3.10.egg/mmdet3d/models/dense_heads/centerpoint_head.py", line 650, in loss_by_feat
loss_heatmap = self.loss_cls(
File "/home/ypx/miniconda3/envs/mtdet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/ypx/mmdetection/mmdet/models/losses/gaussian_focal_loss.py", line 176, in forward
loss_reg = self.loss_weight * gaussian_focal_loss(
File "/home/ypx/mmdetection/mmdet/models/losses/utils.py", line 121, in wrapper
loss = loss_func(pred, target, *kwargs)
File "/home/ypx/mmdetection/mmdet/models/losses/gaussian_focal_loss.py", line 35, in gaussian_focal_loss
pos_loss = -(pred + eps).log() (1 - pred).pow(alpha) * pos_weights
RuntimeError: The size of tensor a (88) must match the size of tensor b (81) at non-singleton dimension 3
Additional information
I have changed the centerpoint_head.py because the different between the kitti dataset and nuscenes datasets.but there is still a problem, the error seems from 'loss' in centerpoint_head.py,but i donot know how to solve it.