Open SH-Tan opened 2 years ago
应该只需要改config里面的in_channel。
I changed the in_channel in dynamic voxel to 4. Below is the model config now.
model = dict( type='DynamicCenterPoint',
voxel_layer=dict(
voxel_size=voxel_size,
max_num_points=5,
point_cloud_range=point_cloud_range,
max_voxels=(-1,-1)
),
voxel_encoder=dict(
type='DynamicVFE',
in_channels=4,
feat_channels=[64, 128],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)
),
middle_encoder=dict(
type='SSTInputLayerV2',
window_shape=window_shape,
sparse_shape=(468, 468, 1),
shuffle_voxels=True,
debug=True,
drop_info=drop_info,
pos_temperature=1000,
normalize_pos=False,
),
backbone=dict(
type='SSTv2',
d_model=[128,] * 4,
nhead=[4, ] * 4,
num_blocks=4,
dim_feedforward=[256, ] * 4,
output_shape=[468, 468],
num_attached_conv=4,
conv_kwargs=[
dict(kernel_size=3, dilation=1, padding=1, stride=1),
dict(kernel_size=3, dilation=1, padding=1, stride=1),
dict(kernel_size=3, dilation=1, padding=1, stride=1),
dict(kernel_size=3, dilation=2, padding=2, stride=1),
],
conv_in_channel=128,
conv_out_channel=128,
debug=True,
layer_cfg=dict(use_bn=False, cosine=True, tau_min=0.01),
checkpoint_blocks=[0, 1], # Consider removing it if the GPU memory is suffcient
conv_shortcut=True,
),
neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[128,],
upsample_strides=[1,],
out_channels=[128, ]
),
bbox_head=dict(
type='CenterHead',
_delete_=True,
in_channels=128,
tasks=[
dict(num_class=3, class_names=['car', 'pedestrian', 'cyclist']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)
),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
post_center_range=[0, -40, -3, 70.4, 40, 1],
max_num=4096,
score_threshold=0.1,
out_size_factor=1,
voxel_size=voxel_size[:2],
pc_range=point_cloud_range[:2],
code_size=9),
separate_head=dict(
type='DCNSeparateHead', init_bias=-2.19, final_kernel=3,
dcn_config=dict(
type='DCN',
in_channels=64,
out_channels=64,
kernel_size=3,
padding=1,
groups=4,
bias=False
), # mmcv 1.2.6 doesn't support bias=True anymore
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=2),
norm_bbox=True
),
# model training and testing settings
train_cfg=dict(
grid_size=[468, 468, 1],
voxel_size=voxel_size,
out_size_factor=1,
dense_reg=1, # not used
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
point_cloud_range=point_cloud_range,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
),
test_cfg=dict(
post_center_limit_range=[-80, -80, -10, 80, 80, 10],
max_per_img=500, # what is this
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175], # not used in normal nms, task-wise
score_threshold=0.1,
pc_range=point_cloud_range[:2], # seems not used
out_size_factor=1,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=4096,
post_max_size=500,
nms_thr=0.7
)
)
应该只需要改config里面的in_channel。
改完之后跑出现了这个问题
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [36,0,0], thread: [95,0,0] Assertion
index >= -sizes[i] && index < sizes[i] && "index out of bounds"
failed. Traceback (most recent call last): File "tools/train.py", line 230, inmain() File "tools/train.py", line 220, in main train_model( File "/home/tan/SST/mmdet3d/apis/train.py", line 27, in train_model train_detector( File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmdet/apis/train.py", line 244, in train_detector runner.run(data_loaders, cfg.workflow) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run epoch_runner(data_loaders[i], kwargs) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train self.run_iter(data_batch, train_mode=True, kwargs) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter outputs = self.model.train_step(data_batch, self.optimizer, File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmcv/parallel/data_parallel.py", line 75, in train_step return self.module.train_step(inputs[0], kwargs[0]) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmdet/models/detectors/base.py", line 248, in train_step losses = self(data) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, kwargs) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 98, in new_func return old_func(args, kwargs) File "/home/tan/SST/mmdet3d/models/detectors/base.py", line 58, in forward return self.forward_train(kwargs) File "/home/tan/SST/mmdet3d/models/detectors/dynamic_voxelnet.py", line 122, in forward_train x = self.extract_feat(points, img_metas) File "/home/tan/SST/mmdet3d/models/detectors/dynamic_voxelnet.py", line 46, in extract_feat x = self.backbone(x) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, kwargs) File "/home/tan/SST/mmdet3d/models/backbones/sst_v2.py", line 142, in forward temp = conv(output) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(*input, *kwargs) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/container.py", line 141, in forward input = module(input) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, **kwargs) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 446, in forward return self._conv_forward(input, self.weight, self.bias) File "/home/tan/anaconda3/envs/mmdet3d/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 442, in _conv_forward return F.conv2d(input, weight, bias, self.stride, RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED terminate called after throwing an instance of 'c10::CUDAError' what(): CUDA error: device-side assert triggered
需要改很多和点云范围相关的config参数: 比如main config和data config里的point cloud range。main config里面的output shape,grid size,sparse shape。
需要改很多和点云范围相关的config参数: 比如main config和data config里的point cloud range。main config里面的output shape,grid size,sparse shape。
这个参数的修改有参考资料吗,还是主要靠经验和尝试。。。。
在kitti上能跑起来
segmentor = dict(
type='VoteSegmentor',
voxel_layer=dict(
voxel_size=seg_voxel_size,
max_num_points=-1,
point_cloud_range=point_cloud_range,
max_voxels=(-1, -1)
),
voxel_encoder=dict(
type='DynamicScatterVFE',
in_channels=4,
feat_channels=[64, 64],
voxel_size=seg_voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
unique_once=True,
),
middle_encoder=dict(
type='PseudoMiddleEncoderForSpconvFSD',
),
backbone=dict(
type='SimpleSparseUNet',
in_channels=64,
sparse_shape=[32, 640, 640],
order=('conv', 'norm', 'act'),
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
base_channels=64,
output_channels=128,
encoder_channels=((64, ), (64, 64, 64), (64, 64, 64), (128, 128, 128), (256, 256, 256)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1), (1, 1, 1)),
decoder_channels=((256, 256, 128), (128, 128, 64), (64, 64, 64), (64, 64, 64), (64, 64, 64)),
decoder_paddings=((1, 1), (1, 0), (1, 0), (0, 0), (0, 1)), # decoder paddings seem useless in SubMConv
),
decode_neck=dict(
type='Voxel2PointScatterNeck',
voxel_size=seg_voxel_size,
point_cloud_range=point_cloud_range,
),
segmentation_head=dict(
type='VoteSegHead',
in_channel=67,
hidden_dims=[128, 128],
num_classes=num_classes,
dropout_ratio=0.0,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='naiveSyncBN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=3.0,
alpha=0.8,
loss_weight=1.0),
loss_vote=dict(
type='L1Loss',
loss_weight=1.0),
),
train_cfg=dict(
point_loss=True,
score_thresh=seg_score_thresh, # for training log
class_names=('Car', 'Ped', 'Cyc'), # for training log
centroid_offset=False,
),
)
model = dict(
type='FSD',
segmentor=segmentor,
backbone=dict(
type='SIR',
num_blocks=3,
in_channels=[83,] + [132, ] * 2,
feat_channels=[[128, 128], ] * 3,
rel_mlp_hidden_dims=[[16, 32],] * 3,
norm_cfg=dict(type='LN', eps=1e-3),
mode='max',
xyz_normalizer=[20, 20, 4],
act='gelu',
unique_once=True,
),
bbox_head=dict(
type='SparseClusterHeadV2',
num_classes=num_classes,
bbox_coder=dict(type='BasePointBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_center=dict(type='L1Loss', loss_weight=0.5),
loss_size=dict(type='L1Loss', loss_weight=0.5),
loss_rot=dict(type='L1Loss', loss_weight=0.2),
in_channel=128 * 3 * 2,
shared_mlp_dims=[1024, 1024],
train_cfg=None,
test_cfg=None,
norm_cfg=dict(type='LN'),
tasks=[
dict(class_names=['Car',]),
dict(class_names=['Pedestrian',]),
dict(class_names=['Cyclist',]),
],
class_names=class_names,
common_attrs=dict(
center=(3, 2, 128), dim=(3, 2, 128), rot=(2, 2, 128), # (out_dim, num_layers, hidden_dim)
),
num_cls_layer=2,
cls_hidden_dim=128,
separate_head=dict(
type='FSDSeparateHead',
norm_cfg=dict(type='LN'),
act='relu',
),
as_rpn=True,
),
roi_head=dict(
type='GroupCorrectionHead',
num_classes=num_classes,
roi_extractor=dict(
type='DynamicPointROIExtractor',
extra_wlh=[0.5, 0.5, 0.5],
max_inbox_point=256,
debug=False,
),
bbox_head=dict(
type='FullySparseBboxHead',
num_classes=num_classes,
num_blocks=6,
in_channels=[212, 145, 145, 145, 145, 145],
feat_channels=[[128, 128], ] * 6,
rel_mlp_hidden_dims=[[16, 32],] * 6,
rel_mlp_in_channels=[13, ] * 6,
reg_mlp=[512, 512],
cls_mlp=[512, 512],
mode='max',
xyz_normalizer=[20, 20, 4],
act='gelu',
geo_input=True,
with_corner_loss=True,
corner_loss_weight=1.0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
norm_cfg=dict(type='LN', eps=1e-3),
unique_once=True,
loss_bbox=dict(
type='L1Loss',
reduction='mean',
loss_weight=2.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=1.0),
cls_dropout=0.1,
reg_dropout=0.1,
),
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None
),
train_cfg=dict(
score_thresh=seg_score_thresh,
sync_reg_avg_factor=True,
pre_voxelization_size=(0.1, 0.1, 0.1),
disable_pretrain=True,
disable_pretrain_topks=[600, 200, 200],
rpn=dict(
use_rotate_nms=True,
nms_pre=-1,
nms_thr=None,
score_thr=0.1,
min_bbox_size=0,
max_num=500,
),
rcnn=dict(
assigner=[
dict( # Car
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.45,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1
),
dict( # Ped
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.35,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1
),
dict( # Cyc
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.35,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1
),
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=256,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True
),
cls_pos_thr=(0.8, 0.65, 0.65),
cls_neg_thr=(0.2, 0.15, 0.15),
sync_reg_avg_factor=True,
sync_cls_avg_factor=True,
corner_loss_only_car=True,
class_names=class_names,
)
),
test_cfg=dict(
score_thresh=seg_score_thresh,
pre_voxelization_size=(0.1, 0.1, 0.1),
skip_rcnn=False,
rpn=dict(
use_rotate_nms=True,
nms_pre=-1,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500,
),
rcnn=dict(
use_rotate_nms=True,
nms_pre=-1,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500,
),
),
cluster_assigner=dict(
cluster_voxel_size=dict(
Car=(0.3, 0.3, 6),
Cyclist=(0.2, 0.2, 6),
Pedestrian=(0.05, 0.05, 6),
),
min_points=2,
point_cloud_range=point_cloud_range,
connected_dist=dict(
Car=0.6,
Cyclist=0.4,
Pedestrian=0.1,
), # xy-plane distance
class_names=class_names,
),
)
在kitti上能跑起来
segmentor = dict( type='VoteSegmentor', voxel_layer=dict( voxel_size=seg_voxel_size, max_num_points=-1, point_cloud_range=point_cloud_range, max_voxels=(-1, -1) ), voxel_encoder=dict( type='DynamicScatterVFE', in_channels=4, feat_channels=[64, 64], voxel_size=seg_voxel_size, with_cluster_center=True, with_voxel_center=True, point_cloud_range=point_cloud_range, norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01), unique_once=True, ), middle_encoder=dict( type='PseudoMiddleEncoderForSpconvFSD', ), backbone=dict( type='SimpleSparseUNet', in_channels=64, sparse_shape=[32, 640, 640], order=('conv', 'norm', 'act'), norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01), base_channels=64, output_channels=128, encoder_channels=((64, ), (64, 64, 64), (64, 64, 64), (128, 128, 128), (256, 256, 256)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1), (1, 1, 1)), decoder_channels=((256, 256, 128), (128, 128, 64), (64, 64, 64), (64, 64, 64), (64, 64, 64)), decoder_paddings=((1, 1), (1, 0), (1, 0), (0, 0), (0, 1)), # decoder paddings seem useless in SubMConv ), decode_neck=dict( type='Voxel2PointScatterNeck', voxel_size=seg_voxel_size, point_cloud_range=point_cloud_range, ), segmentation_head=dict( type='VoteSegHead', in_channel=67, hidden_dims=[128, 128], num_classes=num_classes, dropout_ratio=0.0, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='naiveSyncBN1d'), act_cfg=dict(type='ReLU'), loss_decode=dict( type='FocalLoss', use_sigmoid=True, gamma=3.0, alpha=0.8, loss_weight=1.0), loss_vote=dict( type='L1Loss', loss_weight=1.0), ), train_cfg=dict( point_loss=True, score_thresh=seg_score_thresh, # for training log class_names=('Car', 'Ped', 'Cyc'), # for training log centroid_offset=False, ), ) model = dict( type='FSD', segmentor=segmentor, backbone=dict( type='SIR', num_blocks=3, in_channels=[83,] + [132, ] * 2, feat_channels=[[128, 128], ] * 3, rel_mlp_hidden_dims=[[16, 32],] * 3, norm_cfg=dict(type='LN', eps=1e-3), mode='max', xyz_normalizer=[20, 20, 4], act='gelu', unique_once=True, ), bbox_head=dict( type='SparseClusterHeadV2', num_classes=num_classes, bbox_coder=dict(type='BasePointBBoxCoder'), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_center=dict(type='L1Loss', loss_weight=0.5), loss_size=dict(type='L1Loss', loss_weight=0.5), loss_rot=dict(type='L1Loss', loss_weight=0.2), in_channel=128 * 3 * 2, shared_mlp_dims=[1024, 1024], train_cfg=None, test_cfg=None, norm_cfg=dict(type='LN'), tasks=[ dict(class_names=['Car',]), dict(class_names=['Pedestrian',]), dict(class_names=['Cyclist',]), ], class_names=class_names, common_attrs=dict( center=(3, 2, 128), dim=(3, 2, 128), rot=(2, 2, 128), # (out_dim, num_layers, hidden_dim) ), num_cls_layer=2, cls_hidden_dim=128, separate_head=dict( type='FSDSeparateHead', norm_cfg=dict(type='LN'), act='relu', ), as_rpn=True, ), roi_head=dict( type='GroupCorrectionHead', num_classes=num_classes, roi_extractor=dict( type='DynamicPointROIExtractor', extra_wlh=[0.5, 0.5, 0.5], max_inbox_point=256, debug=False, ), bbox_head=dict( type='FullySparseBboxHead', num_classes=num_classes, num_blocks=6, in_channels=[212, 145, 145, 145, 145, 145], feat_channels=[[128, 128], ] * 6, rel_mlp_hidden_dims=[[16, 32],] * 6, rel_mlp_in_channels=[13, ] * 6, reg_mlp=[512, 512], cls_mlp=[512, 512], mode='max', xyz_normalizer=[20, 20, 4], act='gelu', geo_input=True, with_corner_loss=True, corner_loss_weight=1.0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), norm_cfg=dict(type='LN', eps=1e-3), unique_once=True, loss_bbox=dict( type='L1Loss', reduction='mean', loss_weight=2.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=1.0), cls_dropout=0.1, reg_dropout=0.1, ), train_cfg=None, test_cfg=None, pretrained=None, init_cfg=None ), train_cfg=dict( score_thresh=seg_score_thresh, sync_reg_avg_factor=True, pre_voxelization_size=(0.1, 0.1, 0.1), disable_pretrain=True, disable_pretrain_topks=[600, 200, 200], rpn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=None, score_thr=0.1, min_bbox_size=0, max_num=500, ), rcnn=dict( assigner=[ dict( # Car type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.45, neg_iou_thr=0.45, min_pos_iou=0.45, ignore_iof_thr=-1 ), dict( # Ped type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.35, neg_iou_thr=0.35, min_pos_iou=0.35, ignore_iof_thr=-1 ), dict( # Cyc type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.35, neg_iou_thr=0.35, min_pos_iou=0.35, ignore_iof_thr=-1 ), ], sampler=dict( type='IoUNegPiecewiseSampler', num=256, pos_fraction=0.55, neg_piece_fractions=[0.8, 0.2], neg_iou_piece_thrs=[0.55, 0.1], neg_pos_ub=-1, add_gt_as_proposals=False, return_iou=True ), cls_pos_thr=(0.8, 0.65, 0.65), cls_neg_thr=(0.2, 0.15, 0.15), sync_reg_avg_factor=True, sync_cls_avg_factor=True, corner_loss_only_car=True, class_names=class_names, ) ), test_cfg=dict( score_thresh=seg_score_thresh, pre_voxelization_size=(0.1, 0.1, 0.1), skip_rcnn=False, rpn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=0.25, score_thr=0.1, min_bbox_size=0, max_num=500, ), rcnn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=0.25, score_thr=0.1, min_bbox_size=0, max_num=500, ), ), cluster_assigner=dict( cluster_voxel_size=dict( Car=(0.3, 0.3, 6), Cyclist=(0.2, 0.2, 6), Pedestrian=(0.05, 0.05, 6), ), min_points=2, point_cloud_range=point_cloud_range, connected_dist=dict( Car=0.6, Cyclist=0.4, Pedestrian=0.1, ), # xy-plane distance class_names=class_names, ), )
您好,请问您在KITTI上训练的时候,训练策略和学习率是怎么选取的呢
和原版一致,结果看起来还不错
在kitti上能跑起来
segmentor = dict( type='VoteSegmentor', voxel_layer=dict( voxel_size=seg_voxel_size, max_num_points=-1, point_cloud_range=point_cloud_range, max_voxels=(-1, -1) ), voxel_encoder=dict( type='DynamicScatterVFE', in_channels=4, feat_channels=[64, 64], voxel_size=seg_voxel_size, with_cluster_center=True, with_voxel_center=True, point_cloud_range=point_cloud_range, norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01), unique_once=True, ), middle_encoder=dict( type='PseudoMiddleEncoderForSpconvFSD', ), backbone=dict( type='SimpleSparseUNet', in_channels=64, sparse_shape=[32, 640, 640], order=('conv', 'norm', 'act'), norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01), base_channels=64, output_channels=128, encoder_channels=((64, ), (64, 64, 64), (64, 64, 64), (128, 128, 128), (256, 256, 256)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1), (1, 1, 1)), decoder_channels=((256, 256, 128), (128, 128, 64), (64, 64, 64), (64, 64, 64), (64, 64, 64)), decoder_paddings=((1, 1), (1, 0), (1, 0), (0, 0), (0, 1)), # decoder paddings seem useless in SubMConv ), decode_neck=dict( type='Voxel2PointScatterNeck', voxel_size=seg_voxel_size, point_cloud_range=point_cloud_range, ), segmentation_head=dict( type='VoteSegHead', in_channel=67, hidden_dims=[128, 128], num_classes=num_classes, dropout_ratio=0.0, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='naiveSyncBN1d'), act_cfg=dict(type='ReLU'), loss_decode=dict( type='FocalLoss', use_sigmoid=True, gamma=3.0, alpha=0.8, loss_weight=1.0), loss_vote=dict( type='L1Loss', loss_weight=1.0), ), train_cfg=dict( point_loss=True, score_thresh=seg_score_thresh, # for training log class_names=('Car', 'Ped', 'Cyc'), # for training log centroid_offset=False, ), ) model = dict( type='FSD', segmentor=segmentor, backbone=dict( type='SIR', num_blocks=3, in_channels=[83,] + [132, ] * 2, feat_channels=[[128, 128], ] * 3, rel_mlp_hidden_dims=[[16, 32],] * 3, norm_cfg=dict(type='LN', eps=1e-3), mode='max', xyz_normalizer=[20, 20, 4], act='gelu', unique_once=True, ), bbox_head=dict( type='SparseClusterHeadV2', num_classes=num_classes, bbox_coder=dict(type='BasePointBBoxCoder'), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_center=dict(type='L1Loss', loss_weight=0.5), loss_size=dict(type='L1Loss', loss_weight=0.5), loss_rot=dict(type='L1Loss', loss_weight=0.2), in_channel=128 * 3 * 2, shared_mlp_dims=[1024, 1024], train_cfg=None, test_cfg=None, norm_cfg=dict(type='LN'), tasks=[ dict(class_names=['Car',]), dict(class_names=['Pedestrian',]), dict(class_names=['Cyclist',]), ], class_names=class_names, common_attrs=dict( center=(3, 2, 128), dim=(3, 2, 128), rot=(2, 2, 128), # (out_dim, num_layers, hidden_dim) ), num_cls_layer=2, cls_hidden_dim=128, separate_head=dict( type='FSDSeparateHead', norm_cfg=dict(type='LN'), act='relu', ), as_rpn=True, ), roi_head=dict( type='GroupCorrectionHead', num_classes=num_classes, roi_extractor=dict( type='DynamicPointROIExtractor', extra_wlh=[0.5, 0.5, 0.5], max_inbox_point=256, debug=False, ), bbox_head=dict( type='FullySparseBboxHead', num_classes=num_classes, num_blocks=6, in_channels=[212, 145, 145, 145, 145, 145], feat_channels=[[128, 128], ] * 6, rel_mlp_hidden_dims=[[16, 32],] * 6, rel_mlp_in_channels=[13, ] * 6, reg_mlp=[512, 512], cls_mlp=[512, 512], mode='max', xyz_normalizer=[20, 20, 4], act='gelu', geo_input=True, with_corner_loss=True, corner_loss_weight=1.0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), norm_cfg=dict(type='LN', eps=1e-3), unique_once=True, loss_bbox=dict( type='L1Loss', reduction='mean', loss_weight=2.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=1.0), cls_dropout=0.1, reg_dropout=0.1, ), train_cfg=None, test_cfg=None, pretrained=None, init_cfg=None ), train_cfg=dict( score_thresh=seg_score_thresh, sync_reg_avg_factor=True, pre_voxelization_size=(0.1, 0.1, 0.1), disable_pretrain=True, disable_pretrain_topks=[600, 200, 200], rpn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=None, score_thr=0.1, min_bbox_size=0, max_num=500, ), rcnn=dict( assigner=[ dict( # Car type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.45, neg_iou_thr=0.45, min_pos_iou=0.45, ignore_iof_thr=-1 ), dict( # Ped type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.35, neg_iou_thr=0.35, min_pos_iou=0.35, ignore_iof_thr=-1 ), dict( # Cyc type='MaxIoUAssigner', iou_calculator=dict( type='BboxOverlaps3D', coordinate='lidar'), pos_iou_thr=0.35, neg_iou_thr=0.35, min_pos_iou=0.35, ignore_iof_thr=-1 ), ], sampler=dict( type='IoUNegPiecewiseSampler', num=256, pos_fraction=0.55, neg_piece_fractions=[0.8, 0.2], neg_iou_piece_thrs=[0.55, 0.1], neg_pos_ub=-1, add_gt_as_proposals=False, return_iou=True ), cls_pos_thr=(0.8, 0.65, 0.65), cls_neg_thr=(0.2, 0.15, 0.15), sync_reg_avg_factor=True, sync_cls_avg_factor=True, corner_loss_only_car=True, class_names=class_names, ) ), test_cfg=dict( score_thresh=seg_score_thresh, pre_voxelization_size=(0.1, 0.1, 0.1), skip_rcnn=False, rpn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=0.25, score_thr=0.1, min_bbox_size=0, max_num=500, ), rcnn=dict( use_rotate_nms=True, nms_pre=-1, nms_thr=0.25, score_thr=0.1, min_bbox_size=0, max_num=500, ), ), cluster_assigner=dict( cluster_voxel_size=dict( Car=(0.3, 0.3, 6), Cyclist=(0.2, 0.2, 6), Pedestrian=(0.05, 0.05, 6), ), min_points=2, point_cloud_range=point_cloud_range, connected_dist=dict( Car=0.6, Cyclist=0.4, Pedestrian=0.1, ), # xy-plane distance class_names=class_names, ), )
Do you have any relevant configurations for CenterHead SST
你好,我想用kitti的数据集跑一下, 我修改了config里面的点云范围,现在DynamicVFE里面输入的input channel和点云的特征通道匹配不上
在mmdet3d/models/detectors/dynamic_voxelnet.py 43行点云体素化的特征矩阵大小为(M*4) 在mmdet3d/models/voxel_encoders/utils.py 141行linear曾输入维度为config里面的5+3+3=11,与上面4+3+3=10对不上 现在不太清楚需要修改什么地方,是模型用在kiiti上里面网络层参数也需要改变吗?