Closed missTL closed 1 month ago
Why do our reproduced results always stabilize around 65.0, compared to 66.4 in your paper.
There may be some random for the final results. According to our experience, it is normal to get results around 66.0. How many ‘grad_norm: nan’ in your log? Too many ‘grad_norm: nan’ may cause a lower result, which should not happen.
Why do our reproduced results always stabilize around 65.0, compared to 66.4 in your paper. The configuration is as follows: `base = [ '../datasets/custom_nus-3d.py', '../base/default_runtime.py' ] # plugin = True plugin_dir = 'projects/mmdet3d_plugin/'
If point cloud range is changed, the models should also change their point
cloud range accordingly
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
point_cloud_range = [-15.0, -30.0,-2.0, 15.0, 30.0, 2.0] voxel_size = [0.15, 0.15, 4.0] dbound=[1.0, 35.0, 0.5]
grid_config = { 'x': [-30.0, -30.0, 0.15], # useless 'y': [-15.0, -15.0, 0.15], # useless 'z': [-10, 10, 20], # useless 'depth': [1.0, 35.0, 0.5], # useful }
img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
For nuScenes we usually do 10-class detection
class_names = [ 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' ]
map has classes: divider, ped_crossing, boundary
map_classes = ['divider', 'ped_crossing','boundary']
fixed_ptsnum_per_line = 20
map_classes = ['divider',]
num_vec=50 fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0 fixed_ptsnum_per_pred_line = 20 eval_use_same_gt_sample_num_flag=True num_map_classes = len(map_classes)
input_modality = dict( use_lidar=False, use_camera=True, use_radar=False, use_map=False, use_external=True)
dim = 256 _posdim = dim//2 _ffndim = dim*2 _numlevels = 1 _num_points_inpillar = 8
bevh = 50
bevw = 50
bevh = 200 bevw = 100 queue_length = 1 # each sequence contains
queue_length
frames.aux_seg_cfg = dict( use_aux_seg=True, bev_seg=True, pv_seg=True, seg_classes=1, feat_down_sample=32, pv_thickness=1, )
model = dict( type='MapTRv2', use_grid_mask=True, video_test_mode=False, pretrained=dict(img='ckpts/resnet50-19c8e357.pth'), img_backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(3,), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='pytorch'), img_neck=dict( type='FPN', in_channels=[2048], out_channels=dim, start_level=0, add_extra_convs='on_output', num_outs=_numlevels, relu_before_extra_convs=True), pts_bbox_head=dict( type='MapTRv2Head', bev_h=bevh, bev_w=bevw, num_query=900, num_vec_one2one=100, num_vec_one2many=600, k_one2many=6, num_pts_per_vec=fixed_ptsnum_per_pred_line, # one bbox num_pts_per_gt_vec=fixed_ptsnum_per_gt_line, dir_interval=1,
query_embed_type='instance_pts',
dataset_type = 'CustomNuScenesOfflineLocalMapDataset' data_root = file_client_args = dict(backend='disk')
train_pipeline = [ dict(type='LoadMultiViewImageFromFiles', to_float32=True), dict(type='RandomScaleImageMultiViewImage', scales=[0.5]), dict(type='PhotoMetricDistortionMultiViewImage'), dict(type='NormalizeMultiviewImage', **img_norm_cfg), dict( type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5, file_client_args=file_client_args), dict(type='CustomPointToMultiViewDepth', downsample=1, grid_config=grid_config), dict(type='PadMultiViewImageDepth', size_divisor=32), dict(type='DefaultFormatBundle3D', with_gt=False, with_label=False,class_names=map_classes), dict(type='CustomCollect3D', keys=['img', 'gt_depth']) ]
test_pipeline = [ dict(type='LoadMultiViewImageFromFiles', to_float32=True), dict(type='RandomScaleImageMultiViewImage', scales=[0.5]), dict(type='NormalizeMultiviewImage', **img_norm_cfg),
]
data = dict( samples_per_gpu=4, workers_per_gpu=4, # TODO train=dict( type=dataset_type, data_root=data_root, ann_file=data_root + 'nuscenes_map_infos_temporal_train.pkl', pipeline=train_pipeline, classes=class_names, modality=input_modality, aux_seg=aux_seg_cfg, test_mode=False, use_valid_flag=True, bev_size=(bevh, bevw), pc_range=point_cloud_range, fixed_ptsnum_per_line=fixed_ptsnum_per_gt_line, eval_use_same_gt_sample_num_flag=eval_use_same_gt_sample_num_flag, padding_value=-10000, map_classes=map_classes, queue_length=queue_length,
we use box_type_3d='LiDAR' in kitti and nuscenes dataset
)
optimizer = dict( type='AdamW', lr=6e-4, paramwise_cfg=dict( custom_keys={ 'img_backbone': dict(lr_mult=0.1), }), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
learning policy
lr_config = dict( policy='CosineAnnealing', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, min_lr_ratio=1e-3) total_epochs = 24 evaluation = dict(interval=2, pipeline=test_pipeline, metric='chamfer', save_best='NuscMap_chamfer/mAP', rule='greater')
total_epochs = 50
evaluation = dict(interval=1, pipeline=test_pipeline)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook') ]) fp16 = dict(loss_scale=512.) checkpoint_config = dict(max_keep_ckpts=1, interval=2) find_unused_parameters=True`