Deepwise-AILab / ChestX-Det-Dataset

Apache License 2.0
34 stars 4 forks source link

Question about reproducibility of your baseline results #3

Open mohit-baldoni opened 2 years ago

mohit-baldoni commented 2 years ago

Hello,

I'm trying to reproduce your baseline results (not SAR-Net) using a publicly available version of Faster R-CNN (FPN), in particular Detectron2, but my AP50 score is well below the one you claim: about 35 instead of 41. I would like to assess the quality of your paper and try to improve over it in the context of a publication.

I converted the images from grayscale to RGB by stacking the same layer 3 times; is it necessary to normalize them somehow? Used 10% of the train test for validation - both random sampling and stratified random sampling. How many runs did you perform? The value you claim, APbb50 = 41.7, was obtained as a best within a set of runs or as an average? In both cases, what is the mean and standard deviation of your values? Does the image augmentation include scaling and horizontal flipping only? Do you apply also rotations, crops or vertical flipping?

I'm using the dataset in this repo with the following Detectron2 configuration, could you please help me figure out what is wrong? Using this configuration, you should be able to help reproducing your results without revealing any of your code.

{
  "detectron2_config": {
    "VERSION": 2,
    "MODEL": {
      "LOAD_PROPOSALS": false,
      "MASK_ON": false,
      "KEYPOINT_ON": false,
      "DEVICE": "cuda",
      "META_ARCHITECTURE": "GeneralizedRCNN",
      "WEIGHTS": "https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl",
      "PIXEL_MEAN": [103.53,116.28,123.675],
      "PIXEL_STD": [1,1,1],
      "BACKBONE": {
        "NAME": "build_resnet_fpn_backbone",
        "FREEZE_AT": 2
      },
      "FPN": {
        "IN_FEATURES": ["res2","res3","res4","res5"],
        "OUT_CHANNELS": 256,
        "NORM": "",
        "FUSE_TYPE": "sum"
      },
      "PROPOSAL_GENERATOR": {
        "NAME": "RPN",
        "MIN_SIZE": 0
      },
      "ANCHOR_GENERATOR": {
        "NAME": "DefaultAnchorGenerator",
        "SIZES": [[32],[64],[128],[256],[512]],
        "ASPECT_RATIOS": [[0.5, 1, 2]],
        "ANGLES": [[-90,0,90]],
        "OFFSET": 0
      },
      "RPN": {
        "HEAD_NAME": "StandardRPNHead",
        "IN_FEATURES": ["p2","p3","p4","p5","p6"],
        "BOUNDARY_THRESH": -1,
        "IOU_THRESHOLDS": [0.3,0.7],
        "IOU_LABELS": [0,-1,1],
        "BATCH_SIZE_PER_IMAGE": 512,
        "POSITIVE_FRACTION": 0.5,
        "BBOX_REG_LOSS_TYPE": "smooth_l1",
        "BBOX_REG_LOSS_WEIGHT": 1,
        "BBOX_REG_WEIGHTS": [1,1,1,1],
        "SMOOTH_L1_BETA": 0,
        "LOSS_WEIGHT": 1,
        "PRE_NMS_TOPK_TRAIN": 2000,
        "PRE_NMS_TOPK_TEST": 1000,
        "POST_NMS_TOPK_TRAIN": 1000,
        "POST_NMS_TOPK_TEST": 1000,
        "NMS_THRESH": 0.7,
        "CONV_DIMS": [
          -1
        ]
      },
      "ROI_HEADS": {
        "NAME": "StandardROIHeads",
        "NUM_CLASSES": 13,
        "IN_FEATURES": ["p2","p3","p4","p5"],
        "IOU_THRESHOLDS": [0.5],
        "IOU_LABELS": [0,1],
        "BATCH_SIZE_PER_IMAGE": 512,
        "POSITIVE_FRACTION": 0.25,
        "SCORE_THRESH_TEST": 0.05,
        "NMS_THRESH_TEST": 0.5,
        "PROPOSAL_APPEND_GT": true
      },
      "ROI_BOX_HEAD": {
        "NAME": "FastRCNNConvFCHead",
        "BBOX_REG_LOSS_TYPE": "smooth_l1",
        "BBOX_REG_LOSS_WEIGHT": 1,
        "BBOX_REG_WEIGHTS": [10,10,5,5],
        "SMOOTH_L1_BETA": 0,
        "POOLER_RESOLUTION": 7,
        "POOLER_SAMPLING_RATIO": 0,
        "POOLER_TYPE": "ROIAlignV2",
        "NUM_FC": 2,
        "FC_DIM": 1024,
        "NUM_CONV": 0,
        "CONV_DIM": 256,
        "NORM": "",
        "CLS_AGNOSTIC_BBOX_REG": false,
        "TRAIN_ON_PRED_BOXES": false
      },
      "ROI_BOX_CASCADE_HEAD": {
        "BBOX_REG_WEIGHTS": [
            [10,10,5,5],
            [20,20,10,10],
            [30,30,15,15]
        ],
        "IOUS": [0.5,0.6,0.7]
      },
      "ROI_MASK_HEAD": {
        "NAME": "MaskRCNNConvUpsampleHead",
        "POOLER_RESOLUTION": 14,
        "POOLER_SAMPLING_RATIO": 0,
        "NUM_CONV": 4,
        "CONV_DIM": 256,
        "NORM": "",
        "CLS_AGNOSTIC_MASK": false,
        "POOLER_TYPE": "ROIAlignV2"
      },
      "ROI_KEYPOINT_HEAD": {
        "NAME": "KRCNNConvDeconvUpsampleHead",
        "POOLER_RESOLUTION": 14,
        "POOLER_SAMPLING_RATIO": 0,
        "CONV_DIMS": [512,512,512,512,512,512,512,512],
        "NUM_KEYPOINTS": 17,
        "MIN_KEYPOINTS_PER_IMAGE": 1,
        "NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS": true,
        "LOSS_WEIGHT": 1,
        "POOLER_TYPE": "ROIAlignV2"
      },
      "SEM_SEG_HEAD": {
        "NAME": "SemSegFPNHead",
        "IN_FEATURES": ["p2","p3","p4","p5"],
        "IGNORE_VALUE": 255,
        "NUM_CLASSES": 54,
        "CONVS_DIM": 128,
        "COMMON_STRIDE": 4,
        "NORM": "GN",
        "LOSS_WEIGHT": 1
      },
      "PANOPTIC_FPN": {
        "INSTANCE_LOSS_WEIGHT": 1,
        "COMBINE": {
          "ENABLED": true,
          "OVERLAP_THRESH": 0.5,
          "STUFF_AREA_LIMIT": 4096,
          "INSTANCES_CONFIDENCE_THRESH": 0.5
        }
      },
      "RETINANET": {
        "NUM_CLASSES": 80,
        "IN_FEATURES": ["p3","p4","p5","p6","p7"],
        "NUM_CONVS": 4,
        "IOU_THRESHOLDS": [0.4,0.5],
        "IOU_LABELS": [0,-1,1],
        "PRIOR_PROB": 0.01,
        "SCORE_THRESH_TEST": 0.05,
        "TOPK_CANDIDATES_TEST": 1000,
        "NMS_THRESH_TEST": 0.5,
        "BBOX_REG_WEIGHTS": [1,1,1,1],
        "FOCAL_LOSS_GAMMA": 2,
        "FOCAL_LOSS_ALPHA": 0.25,
        "SMOOTH_L1_LOSS_BETA": 0.1,
        "BBOX_REG_LOSS_TYPE": "smooth_l1",
        "NORM": ""
      },
      "RESNETS": {
        "DEPTH": 50,
        "OUT_FEATURES": ["res2","res3","res4","res5"],
        "NUM_GROUPS": 1,
        "NORM": "FrozenBN",
        "WIDTH_PER_GROUP": 64,
        "STRIDE_IN_1X1": true,
        "RES5_DILATION": 1,
        "RES2_OUT_CHANNELS": 256,
        "STEM_OUT_CHANNELS": 64,
        "DEFORM_ON_PER_STAGE": [false,false,false,false],
        "DEFORM_MODULATED": false,
        "DEFORM_NUM_GROUPS": 1
      }
    },
    "INPUT": {
      "MIN_SIZE_TRAIN": [800,1400],
      "MIN_SIZE_TRAIN_SAMPLING": "range",
      "MAX_SIZE_TRAIN": 1400,
      "MIN_SIZE_TEST": 1200,
      "MAX_SIZE_TEST": 1200,
      "RANDOM_FLIP": "horizontal",
      "CROP": {
        "ENABLED": false,
        "TYPE": "relative_range",
        "SIZE": [0.9,0.9]
      },
      "FORMAT": "BGR",
      "MASK_FORMAT": "polygon"
    },
    "DATASETS": {
      "TRAIN": [
        "chest_dataset_train"
      ],
      "PROPOSAL_FILES_TRAIN": [],
      "PRECOMPUTED_PROPOSAL_TOPK_TRAIN": 2000,
      "TEST": [
        "chest_dataset_val"
      ],
      "PROPOSAL_FILES_TEST": [],
      "PRECOMPUTED_PROPOSAL_TOPK_TEST": 1000
    },
    "DATALOADER": {
      "NUM_WORKERS": 4,
      "ASPECT_RATIO_GROUPING": true,
      "SAMPLER_TRAIN": "TrainingSampler",
      "REPEAT_THRESHOLD": 0,
      "FILTER_EMPTY_ANNOTATIONS": true
    },
    "SOLVER": {
      "LR_SCHEDULER_NAME": "WarmupMultiStepLR",
      "MAX_ITER": 150000,
      "BASE_LR": 0.01,
      "MOMENTUM": 0.9,
      "NESTEROV": false,
      "WEIGHT_DECAY": 0.0001,
      "WEIGHT_DECAY_NORM": 0,
      "GAMMA": 0.1,
      "STEPS": [27000,54000,128000],
      "WARMUP_FACTOR": 0.001,
      "WARMUP_ITERS": 1000,
      "WARMUP_METHOD": "linear",
      "CHECKPOINT_PERIOD": 5000,
      "IMS_PER_BATCH": 2,
      "REFERENCE_WORLD_SIZE": 0,
      "BIAS_LR_FACTOR": 1,
      "WEIGHT_DECAY_BIAS": null,
      "CLIP_GRADIENTS": {
        "ENABLED": false,
        "CLIP_TYPE": "value",
        "CLIP_VALUE": 1,
        "NORM_TYPE": 2
      },
      "AMP": {
        "ENABLED": false
      }
    },
    "TEST": {
      "EXPECTED_RESULTS": [],
      "EVAL_PERIOD": 100,
      "KEYPOINT_OKS_SIGMAS": [],
      "DETECTIONS_PER_IMAGE": 100,
      "AUG": {
        "ENABLED": false,
        "MIN_SIZES": [400,500,600,700,800,900,1000,1100,1200],
        "MAX_SIZE": 4000,
        "FLIP": true
      },
      "PRECISE_BN": {
        "ENABLED": false,
        "NUM_ITER": 200
      }
    },
    "OUTPUT_DIR": "output_d90a05cb9adb4f7ea0f95e72ccfd3216",
    "SEED": -1,
    "CUDNN_BENCHMARK": false,
    "VIS_PERIOD": 0,
    "GLOBAL": {
      "HACK": 1
    }
  }
}