Open antoniskef opened 1 year ago
Can you provide the specific config file?
thank you for your reply
voxel_size = [0.25, 0.25, 8] model = dict( type='MVXFasterRCNN', data_preprocessor=dict( type='Det3DDataPreprocessor', voxel=True, voxel_layer=dict( max_num_points=64, point_cloud_range=[-50, -50, -5, 50, 50, 3], voxel_size=voxel_size, max_voxels=(30000, 40000))), pts_voxel_encoder=dict( type='HardVFE', in_channels=4, feat_channels=[64, 64], with_distance=False, voxel_size=voxel_size, with_cluster_center=True, with_voxel_center=True, point_cloud_range=[-50, -50, -5, 50, 50, 3], norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), pts_middle_encoder=dict( type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), pts_backbone=dict( type='SECOND', in_channels=64, norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], out_channels=[64, 128, 256]), pts_neck=dict( type='mmdet.FPN', norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), act_cfg=dict(type='ReLU'), in_channels=[64, 128, 256], out_channels=256, start_level=0, num_outs=3), pts_bbox_head=dict( type='Anchor3DHead', num_classes=10, in_channels=256, feat_channels=256, use_direction_classifier=True, anchor_generator=dict( type='AlignedAnchor3DRangeGenerator', ranges=[[-50, -50, -1.8, 50, 50, -1.8]], scales=[1, 2, 4], sizes=[ [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) [1.7321, 0.5774, 1.], # 1 / sqrt(3) [1., 1., 1.], [0.4, 0.4, 1], ], custom_values=[0, 0], rotations=[0, 1.57], reshape_out=True), assigner_per_size=False, diff_rad_by_sin=True, dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), loss_cls=dict( type='mmdet.FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict( type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_dir=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
train_cfg=dict(
pts=dict(
assigner=dict(
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500)))
We use MVXFasterRCNN
as a detector to train pp-nus, which can also be used to train multi-modal detectors. For voxel encoder module, HardVFE
and PillarFeatureNet
share the same structure, but HardVFE
can also be used for multi-modal methods.
okay nice. but can you tell me why the feat_channels=[64, 64] are two in this and in the kitti implementation is one (pillar feature net is used there).
hv_pointpillars_secfpn_kitti:
voxel_size = [0.16, 0.16, 4]
model = dict( type='VoxelNet', voxel_layer=dict( max_num_points=32, # max_points_per_voxel point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], voxel_size=voxel_size, max_voxels=(16000, 40000) # (training, testing) max_voxels ), voxel_encoder=dict( type='PillarFeatureNet', in_channels=4, feat_channels=[64], with_distance=False, voxel_size=voxel_size, point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), middle_encoder=dict( type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), backbone=dict( type='SECOND', in_channels=64, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], out_channels=[64, 128, 256]), neck=dict( type='SECONDFPN', in_channels=[64, 128, 256], upsample_strides=[1, 2, 4], out_channels=[128, 128, 128]), bbox_head=dict( type='Anchor3DHead', num_classes=3, in_channels=384, feat_channels=384, use_direction_classifier=True, anchor_generator=dict( type='Anchor3DRangeGenerator', ranges=[ [0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -1.78, 70.4, 39.68, -1.78], ], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_dir=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
Hello, I would like to ask about the pointpillars implementation for NuScenes. How is this implementation pointpillars, although the voxel encoder is changed from pillar feature net to hardVFE and so no pillars are created?