kujason / avod

Code for 3D object detection for autonomous driving
MIT License
939 stars 347 forks source link

multi_class train on this model #158

Closed Jiachenyin1 closed 4 years ago

Jiachenyin1 commented 5 years ago

when I create multi_class trainning before gen_min_batch with car people and cyclists. when i val this model, only one class (car) is showing in result! I wodder if you have same issue,please tell me why ?

Jiachenyin1 commented 5 years ago

Example cars config with pyramid feature extractor and augmentation

For each experiment, copy this config and modify the copy

model_config { model_name: 'avod_model' checkpoint_name: 'all_aug'

input_config {
    bev_depth: 6
    img_depth: 3

    img_dims_w: 1200
    img_dims_h: 360
}

rpn_config {
    rpn_proposal_roi_crop_size: 3
    rpn_fusion_method: 'mean'
    rpn_train_nms_size: 1024
    rpn_test_nms_size: 1024
    rpn_nms_iou_thresh: 0.8
}

avod_config {
    avod_proposal_roi_crop_size: 7
    avod_positive_selection: 'not_bkg'
    avod_nms_size: 100
    avod_nms_iou_thresh: 0.01
    avod_box_representation: 'box_4ca'
}

label_smoothing_epsilon: 0.001
expand_proposals_xz: 0.0
# To disable path drop, set both to 1.0
path_drop_probabilities: [0.9, 0.9]
train_on_all_samples: False
eval_all_samples: True

layers_config {
    bev_feature_extractor {
        bev_vgg_pyr {
            vgg_conv1: [2, 32]
            vgg_conv2: [2, 64]
            vgg_conv3: [3, 128]
            vgg_conv4: [3, 256]
            l2_weight_decay: 0.0005
        }
    }
    img_feature_extractor {
        img_vgg_pyr {
            vgg_conv1: [2, 32]
            vgg_conv2: [2, 64]
            vgg_conv3: [3, 128]
            vgg_conv4: [3, 256]
            l2_weight_decay: 0.0005
        }
    }

    rpn_config {
        cls_fc6: 256
        cls_fc7: 256
        reg_fc6: 256
        reg_fc7: 256
        l2_weight_decay: 0.0005
        keep_prob: 0.5
    }
    avod_config {
        # basic_fc_layers {
        #     num_layers: 3
        #     layer_sizes: [2048, 2048, 2048]
        #     l2_weight_decay: 0.005
        #     keep_prob: 0.5
        #     fusion_method: 'mean'  # 'mean' or 'concat'
        # }
        fusion_fc_layers {
            num_layers: 3
            layer_sizes: [2048, 2048, 2048]
            l2_weight_decay: 0.005
            keep_prob: 0.5
            fusion_method: 'mean'  # 'mean', 'concat', or 'max'
            fusion_type: 'early'  # 'early', 'late', 'deep'
        }
    }
}

# Loss function weights
loss_config {
    cls_loss_weight: 1.0
    reg_loss_weight: 5.0
    ang_loss_weight: 1.0
}

}

train_config {

batch_size: 1

optimizer {
    adam_optimizer {
        learning_rate {
            exponential_decay_learning_rate {
                initial_learning_rate: 0.0001
                decay_steps: 30000
                decay_factor: 0.8
            }
        }
    }
}

overwrite_checkpoints: False

max_checkpoints_to_keep: 20
max_iterations: 120000
checkpoint_interval: 2000

summary_interval: 50
summary_histograms: False
summary_img_images: True
summary_bev_images: True

allow_gpu_mem_growth: True

}

eval_config { eval_interval: 2000 eval_mode: 'val' ckpt_indices: -1 evaluate_repeatedly: True

allow_gpu_mem_growth: True

}

dataset_config { name: 'kitti'

dataset_dir: '/notebooks/DATA/Kitti/object'
# data_split: 'train'

data_split_dir: 'training'
has_labels: True

cluster_split: 'train'
classes: ['Car', 'Pedestrian', 'Cyclist']
num_clusters: [2, 1, 1]

bev_source: 'lidar'
aug_list: ['flipping', 'pca_jitter']

kitti_utils_config {
    area_extents: [-40, 40, -5, 3, 0, 70]
    voxel_size: 0.1
    anchor_strides: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    density_threshold: 1

    bev_generator {
        slices {
            height_lo: -0.2
            height_hi: 2.3
            num_slices: 5
        }
    }

    mini_batch_config {
        density_threshold: 1

        rpn_config {
            iou_2d_thresholds {
               neg_iou_lo: 0.0
               neg_iou_hi: 0.3
               pos_iou_lo: 0.45
               pos_iou_hi: 1.0
            }
            # iou_3d_thresholds {
            #     neg_iou_lo: 0.0
            #     neg_iou_hi: 0.3
            #     pos_iou_lo: 0.4
            #     pos_iou_hi: 1.0
            # }

            mini_batch_size: 512
        }

        avod_config {
            iou_2d_thresholds {
                neg_iou_lo: 0.0
                neg_iou_hi: 0.45
                pos_iou_lo: 0.55
                pos_iou_hi: 1.0
            }

            mini_batch_size: 1024
        }
    }
}

}

kujason commented 4 years ago

You will need to make additional modifications to the code to support all 3 classes at once, just modifying the config will not work.