# During the first 16 epochs, learning rate increases from 0 to lr * 10
# during the next 24 epochs, learning rate decreases from lr * 10 to
# lr * 1e-4
dict(
type='CosineAnnealingLR',
T_max=15,
eta_min=lr * 10,
begin=0,
end=15,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=25,
eta_min=lr * 1e-4,
begin=15,
end=40,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 16 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 24 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type='CosineAnnealingMomentum',
T_max=15,
eta_min=0.85 / 0.95,
begin=0,
end=15,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingMomentum',
T_max=25,
eta_min=1,
begin=15,
end=40,
by_epoch=True,
convert_to_iter_based=True)
I am trying to train the PV-RCNN model with the Waymo database. I have processed the waymo database, as it says in the documentation getting the following folders: kitti_format and waymo_format.
The config file has been modified to use the waymo base file and the input channels. The training starts correctly and completes the first epoch without problems. However, when the program tries to perform the evaluation at the end of the first epoch, it fails and throws an error.
Is the error due to the conversion of the data used, or is it directly caused by the model configuration, which does not accept this database?
Prerequisite
Task
I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
Branch
main branch https://github.com/open-mmlab/mmdetection3d
Environment
sys.platform: linux Python: 3.8.19 (default, Mar 20 2024, 19:58:24) [GCC 11.2.0] CUDA available: True MUSA available: False numpy_random_seed: 2147483648 GPU 0,1: NVIDIA GeForce RTX 3090 CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 12.2, V12.2.140 GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 PyTorch: 2.3.1 PyTorch compiling details: PyTorch built with:
TorchVision: 0.18.1 OpenCV: 4.10.0 MMEngine: 0.10.4 MMDetection: 3.3.0 MMDetection3D: 1.4.0+962f093 spconv2.0: False
Reproduces the problem - code sample
base = [ '../base/datasets/waymoD5-3d-3class.py', '../base/schedules/cyclic-40e.py', '../base/default_runtime.py' ]
voxel_size = [0.05, 0.05, 0.1] point_cloud_range = [0, -40, -3, 70.4, 40, 1]
data_root = 'data/waymo/kitti_format/' class_names = ['Pedestrian', 'Cyclist', 'Car'] metainfo = dict(CLASSES=class_names) backend_args = None db_sampler = dict( data_root=data_root, info_path=data_root + 'waymo_dbinfos_train.pkl', rate=1.0, prepare=dict( filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), classes=class_names, sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), points_loader=dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=4, backend_args=backend_args), backend_args=backend_args)
train_pipeline = [ dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=4, backend_args=backend_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler, use_ground_plane=False), dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05]), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointShuffle'), dict( type='Pack3DDetInputs', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] test_pipeline = [ dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=4, backend_args=backend_args), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict( type='GlobalRotScaleTrans', rot_range=[0, 0], scale_ratio_range=[1., 1.], translation_std=[0, 0, 0]), dict(type='RandomFlip3D'), dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), dict(type='Pack3DDetInputs', keys=['points']) ]
model = dict( type='PointVoxelRCNN', data_preprocessor=dict( type='Det3DDataPreprocessor', voxel=True, voxel_layer=dict( max_num_points=5, # max_points_per_voxel point_cloud_range=point_cloud_range, voxel_size=voxel_size, max_voxels=(16000, 40000))), voxel_encoder=dict(type='HardSimpleVFE'), middle_encoder=dict( type='SparseEncoder', in_channels=4, sparse_shape=[41, 1600, 1408], order=('conv', 'norm', 'act'), encoder_paddings=((0, 0, 0), ((1, 1, 1), 0, 0), ((1, 1, 1), 0, 0), ((0, 1, 1), 0, 0)), return_middle_feats=True), points_encoder=dict( type='VoxelSetAbstraction', num_keypoints=2048, fused_out_channel=128, voxel_size=voxel_size, point_cloud_range=point_cloud_range, voxel_sa_cfgs_list=[ dict( type='StackedSAModuleMSG', in_channels=16, scale_factor=1, radius=(0.4, 0.8), sample_nums=(16, 16), mlp_channels=((16, 16), (16, 16)), use_xyz=True), dict( type='StackedSAModuleMSG', in_channels=32, scale_factor=2, radius=(0.8, 1.2), sample_nums=(16, 32), mlp_channels=((32, 32), (32, 32)), use_xyz=True), dict( type='StackedSAModuleMSG', in_channels=64, scale_factor=4, radius=(1.2, 2.4), sample_nums=(16, 32), mlp_channels=((64, 64), (64, 64)), use_xyz=True), dict( type='StackedSAModuleMSG', in_channels=64, scale_factor=8, radius=(2.4, 4.8), sample_nums=(16, 32), mlp_channels=((64, 64), (64, 64)), use_xyz=True) ], rawpoints_sa_cfgs=dict( type='StackedSAModuleMSG', in_channels=1, radius=(0.4, 0.8), sample_nums=(16, 16), mlp_channels=((16, 16), (16, 16)), use_xyz=True), bev_feat_channel=256, bev_scale_factor=8), backbone=dict( type='SECOND', in_channels=256, layer_nums=[5, 5], layer_strides=[1, 2], out_channels=[128, 256]), neck=dict( type='SECONDFPN', in_channels=[128, 256], upsample_strides=[1, 2], out_channels=[256, 256]), rpn_head=dict( type='PartA2RPNHead', num_classes=3, in_channels=512, feat_channels=512, use_direction_classifier=True, dir_offset=0.78539, anchor_generator=dict( type='Anchor3DRangeGenerator', ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78]], sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, assigner_per_size=True, assign_per_class=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_cls=dict( type='mmdet.FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict( type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_dir=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), roi_head=dict( type='PVRCNNRoiHead', num_classes=3, semantic_head=dict( type='ForegroundSegmentationHead', in_channels=640, extra_width=0.1, loss_seg=dict( type='mmdet.FocalLoss', use_sigmoid=True, reduction='sum', gamma=2.0, alpha=0.25, activated=True, loss_weight=1.0)), bbox_roi_extractor=dict( type='Batch3DRoIGridExtractor', grid_size=6, roi_layer=dict( type='StackedSAModuleMSG', in_channels=128, radius=(0.8, 1.6), sample_nums=(16, 16), mlp_channels=((64, 64), (64, 64)), use_xyz=True, pool_mod='max'), ), bbox_head=dict( type='PVRCNNBBoxHead', in_channels=128, grid_size=6, num_classes=3, class_agnostic=True, shared_fc_channels=(256, 256), reg_channels=(256, 256), cls_channels=(256, 256), dropout_ratio=0.3, with_corner_loss=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_bbox=dict( type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, reduction='sum', loss_weight=1.0), loss_cls=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='sum', loss_weight=1.0))),
model training and testing settings
train_dataloader = dict( batch_size=2, num_workers=2, dataset=dict(dataset=dict(pipeline=train_pipeline, metainfo=metainfo))) test_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo)) eval_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo)) lr = 0.001 optim_wrapper = dict(optimizer=dict(lr=lr)) param_scheduler = [
learning rate scheduler
]
Reproduces the problem - command or script
python tools/train.py configs/pv_rcnn/pv_rcnn_8xb2-80e_waymoD5-3d-3class.py
Reproduces the problem - error message
Additional information
I am trying to train the PV-RCNN model with the Waymo database. I have processed the waymo database, as it says in the documentation getting the following folders: kitti_format and waymo_format. The config file has been modified to use the waymo base file and the input channels. The training starts correctly and completes the first epoch without problems. However, when the program tries to perform the evaluation at the end of the first epoch, it fails and throws an error.
Is the error due to the conversion of the data used, or is it directly caused by the model configuration, which does not accept this database?