Closed kyle263 closed 1 year ago
Hi, I followed the difference between voxel_rcnn_car.yaml and voxel_rcnn_3classes.yaml to change the VirConv-L.yaml as below, but after training still get 0 in pedestrian and cyclist, could you please release the multi-class version?
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] DATA_CONFIG: _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml DATASET: 'KittiDatasetMM' INPUT_DISCARD_RATE: 0.8 # actually discard more than 90% virtual points of RGB image, # as we only saved less than 50% RGB points during depth2points conversion. LATER_FUSION: False MM_PATH: 'velodyne_depth' USE_VAN: True DATA_SPLIT: { 'train': train, 'test': val } INFO_PATH: { 'train': [kitti_infos_train.pkl], 'test': [kitti_infos_val.pkl], } DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - NAME: gt_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train_mm.pkl PREPARE: { filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10', 'Pedestrian:10', 'Cyclist:10'] NUM_POINT_FEATURES: 8 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: da_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train_mm.pkl PREPARE: { filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10', 'Pedestrian:10', 'Cyclist:10'] MIN_SAMPLING_DIS: 0 MAX_SAMPLING_DIS: 20 OCCLUSION_NOISE: 0.2 OCCLUSION_OFFSET: 2. SAMPLING_METHOD: 'LiDAR-aware' VERT_RES: 0.006 HOR_RES: 0.003 NUM_POINT_FEATURES: 8 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: random_local_noise LOCAL_ROT_RANGE: [-0.78539816, 0.78539816] TRANSLATION_STD: [1.0, 1.0, 0.5] GLOBAL_ROT_RANGE: [0.0, 0.0] EXTRA_WIDTH: [0.2, 0.2, 0.] - NAME: random_world_rotation WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] - NAME: random_world_flip ALONG_AXIS_LIST: ['x'] - NAME: random_world_scaling WORLD_SCALE_RANGE: [0.95, 1.05] - NAME: random_local_pyramid_aug DROP_PROB: 0.25 SPARSIFY_PROB: 0.05 SPARSIFY_MAX_NUM: 50 SWAP_PROB: 0.1 SWAP_MAX_NUM: 50 POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding_mm, used_feature_list: ['x', 'y', 'z', 'intensity'], src_feature_list: ['x', 'y', 'z', 'intensity'], num_features: 8 } DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range REMOVE_OUTSIDE_BOXES: True - NAME: shuffle_points SHUFFLE_ENABLED: { 'train': True, 'test': False } - NAME: transform_points_to_voxels VOXEL_SIZE: [0.05, 0.05, 0.05] LIDAR_FIRST: True MAX_POINTS_PER_VOXEL: 5 MAX_NUMBER_OF_VOXELS: { 'train': 40000, # using 40000 voxels is enough after input discard 'test': 40000 # The 40000 voxels include both LiDAR and virtual points } MODEL: NAME: VoxelRCNN VFE: NAME: MeanVFE MODEL: 'max' BACKBONE_3D: NAME: VirConvL8x NUM_FILTERS: [16, 32, 64, 64] RETURN_NUM_FEATURES_AS_DICT: True OUT_FEATURES: 64 LAYER_DISCARD_RATE: 0.1 MAP_TO_BEV: NAME: HeightCompression NUM_BEV_FEATURES: 256 BACKBONE_2D: NAME: BaseBEVBackbone LAYER_NUMS: [4, 4] LAYER_STRIDES: [1, 2] NUM_FILTERS: [64, 128] UPSAMPLE_STRIDES: [1, 2] NUM_UPSAMPLE_FILTERS: [128, 128] DENSE_HEAD: NAME: AnchorHeadSingle CLASS_AGNOSTIC: False USE_DIRECTION_CLASSIFIER: True DIR_OFFSET: 0.78539 DIR_LIMIT_OFFSET: 0.0 NUM_DIR_BINS: 2 ANCHOR_GENERATOR_CONFIG: [ { 'class_name': 'Car', 'anchor_sizes': [[3.9, 1.6, 1.56]], 'anchor_rotations': [0, 1.57], 'anchor_bottom_heights': [-1.78], 'align_center': False, 'feature_map_stride': 8, 'matched_threshold': 0.6, 'unmatched_threshold': 0.45 }, { 'class_name': 'Pedestrian', 'anchor_sizes': [[0.8, 0.6, 1.73]], 'anchor_rotations': [0, 1.57], 'anchor_bottom_heights': [-0.6], 'align_center': False, 'feature_map_stride': 8, 'matched_threshold': 0.5, 'unmatched_threshold': 0.35 }, { 'class_name': 'Cyclist', 'anchor_sizes': [[1.76, 0.6, 1.73]], 'anchor_rotations': [0, 1.57], 'anchor_bottom_heights': [-0.6], 'align_center': False, 'feature_map_stride': 8, 'matched_threshold': 0.5, 'unmatched_threshold': 0.35 } ] TARGET_ASSIGNER_CONFIG: NAME: AxisAlignedTargetAssigner POS_FRACTION: -1.0 SAMPLE_SIZE: 512 NORM_BY_NUM_EXAMPLES: False MATCH_HEIGHT: False BOX_CODER: ResidualCoder LOSS_CONFIG: LOSS_WEIGHTS: { 'cls_weight': 1.0, 'loc_weight': 2.0, 'dir_weight': 0.2, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } ROI_HEAD: NAME: TEDMHead CLASS_AGNOSTIC: True ROT_NUM: 1 # set 1 to simply keep a similar architecture as Voxel-RCNN SHARED_FC: [256, 256] CLS_FC: [256, 256] REG_FC: [256, 256] DP_RATIO: 0.01 NMS_CONFIG: TRAIN: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False NMS_PRE_MAXSIZE: 4000 NMS_POST_MAXSIZE: 512 NMS_THRESH: 0.8 TEST: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False USE_FAST_NMS: True SCORE_THRESH: 0.0 NMS_PRE_MAXSIZE: 2000 NMS_POST_MAXSIZE: 50 NMS_THRESH: 0.75 ROI_GRID_POOL: FEATURES_SOURCE: ['x_conv3','x_conv4'] PRE_MLP: True GRID_SIZE: 6 POOL_LAYERS: x_conv3: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.4, 0.8] NSAMPLE: [16, 16] POOL_METHOD: max_pool x_conv4: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.8, 1.6] NSAMPLE: [16, 16] POOL_METHOD: max_pool ROI_GRID_POOL_MM: FEATURES_SOURCE: ['x_conv3','x_conv4'] PRE_MLP: True GRID_SIZE: 4 POOL_LAYERS: x_conv3: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.4, 0.8] NSAMPLE: [16, 16] POOL_METHOD: max_pool x_conv4: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.8, 1.6] NSAMPLE: [16, 16] POOL_METHOD: max_pool TARGET_CONFIG: BOX_CODER: ResidualCoder STAGE0: ROI_PER_IMAGE: 160 FG_RATIO: 0.5 SAMPLE_ROI_BY_EACH_CLASS: True CLS_SCORE_TYPE: roi_iou_x CLS_FG_THRESH: [0.75] CLS_BG_THRESH: [0.25] CLS_BG_THRESH_LO: 0.1 HARD_BG_RATIO: 0.8 REG_FG_THRESH: [0.55] ENABLE_HARD_SAMPLING: True HARD_SAMPLING_THRESH: [0.5] HARD_SAMPLING_RATIO: [0.5] LOSS_CONFIG: CLS_LOSS: BinaryCrossEntropy REG_LOSS: smooth-l1 CORNER_LOSS_REGULARIZATION: True GRID_3D_IOU_LOSS: False LOSS_WEIGHTS: { 'rcnn_cls_weight': 1.0, 'rcnn_reg_weight': 1.0, 'rcnn_corner_weight': 1.0, 'rcnn_iou3d_weight': 1.0, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } POST_PROCESSING: RECALL_THRESH_LIST: [0.3, 0.5, 0.7] SCORE_THRESH: 0.5 OUTPUT_RAW_SCORE: False EVAL_METRIC: kitti NMS_CONFIG: MULTI_CLASSES_NMS: False NMS_TYPE: nms_gpu NMS_THRESH: 0.1 NMS_PRE_MAXSIZE: 4096 NMS_POST_MAXSIZE: 500 OPTIMIZATION: BATCH_SIZE_PER_GPU: 2 NUM_EPOCHS: 50 OPTIMIZER: adam_onecycle LR: 0.01 WEIGHT_DECAY: 0.01 MOMENTUM: 0.9 MOMS: [0.95, 0.85] PCT_START: 0.4 DIV_FACTOR: 10 DECAY_STEP_LIST: [35, 45] LR_DECAY: 0.1 LR_CLIP: 0.0000001 LR_WARMUP: False WARMUP_EPOCH: 1 GRAD_NORM_CLIP: 10
please refer to issue 8
Hi, I followed the difference between voxel_rcnn_car.yaml and voxel_rcnn_3classes.yaml to change the VirConv-L.yaml as below, but after training still get 0 in pedestrian and cyclist, could you please release the multi-class version?