TRI-ML / dd3d

Official PyTorch implementation of DD3D: Is Pseudo-Lidar needed for Monocular 3D Object detection? (ICCV 2021), Dennis Park*, Rares Ambrus*, Vitor Guizilini, Jie Li, and Adrien Gaidon.
MIT License
451 stars 74 forks source link

'ValueError: cannot reshape array of size 14 into shape (4)' when running scripts #14

Closed komzy closed 2 years ago

komzy commented 2 years ago

Hi,

Whenever I run evaluation on sample of the Kitti dataset. I get this error. I also get the same error when running the following script: ./scripts/visualize_dataloader.py +experiments=dd3d_kitti_dla34 SOLVER.IMS_PER_BATCH=4 Here are my terminal logs from running the above command:

No protocol specified
No protocol specified
No protocol specified
/usr/local/lib/python3.8/dist-packages/hydra/_internal/defaults_list.py:251: UserWarning: In 'visualize_dataloader': Defaults list is missing `_self_`. See https://hydra.cc/docs/upgrades/1.0_to_1.1/default_composition_order for more information
  warnings.warn(msg, UserWarning)
[11/02 13:22:33 tridet.utils.s3]: Downloading initial weights:
[11/02 13:22:33 tridet.utils.s3]:   src: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_dla34-y1urdmir-20210422_165446-model_final-remapped.pth
[11/02 13:22:33 tridet.utils.s3]:   dst: /tmp/tmpwxys0idg.pth
835it [00:00, 25902.64it/s]
[11/02 13:28:08 tridet.utils.hydra.callbacks]: Rank of current process: 0. World size: 1
[11/02 13:28:08 tridet.utils.setup]: Working Directory: /workspace/dd3d/outputs/2021-11-02/13-22-33
[11/02 13:28:08 tridet.utils.setup]: Full config:
{
  "WANDB": {
    "ENABLED": false,
    "DRYRUN": false,
    "PROJECT": "dd3d",
    "GROUP": null,
    "TAGS": [
      "kitti-val",
      "dla34",
      "bn"
    ]
  },
  "EVAL_ONLY": false,
  "EVAL_ON_START": false,
  "ONLY_REGISTER_DATASETS": false,
  "OUTPUT_ROOT": "./outputs",
  "SYNC_OUTPUT_DIR_S3": {
    "ENABLED": false,
    "ROOT_IN_S3": "???",
    "PERIOD": 1000
  },
  "DATASET_ROOT": "/data/datasets/",
  "TMP_DIR": "/tmp/",
  "DATASETS": {
    "TRAIN": {
      "NAME": "kitti_3d_train",
      "CANONICAL_BOX3D_SIZES": [
        [
          1.61876949,
          3.89154523,
          1.52969237
        ],
        [
          0.62806586,
          0.82038497,
          1.76784787
        ],
        [
          0.56898187,
          1.77149234,
          1.7237099
        ],
        [
          1.9134491,
          5.15499603,
          2.18998422
        ],
        [
          2.61168401,
          9.22692319,
          3.36492722
        ],
        [
          0.5390196,
          1.08098042,
          1.28392158
        ],
        [
          2.36044838,
          15.56991038,
          3.5289238
        ],
        [
          1.24489164,
          2.51495357,
          1.61402478
        ]
      ],
      "DATASET_MAPPER": "default",
      "NUM_CLASSES": 5,
      "MEAN_DEPTH_PER_LEVEL": [
        32.594,
        15.178,
        8.424,
        5.004,
        4.662
      ],
      "STD_DEPTH_PER_LEVEL": [
        14.682,
        7.139,
        4.345,
        2.399,
        2.587
      ]
    },
    "TEST": {
      "NAME": "kitti_3d_val",
      "NUSC_SAMPLE_AGGREGATE_IN_INFERENCE": false,
      "DATASET_MAPPER": "default"
    }
  },
  "FE": {
    "FPN": {
      "IN_FEATURES": [
        "level3",
        "level4",
        "level5"
      ],
      "OUT_FEATURES": null,
      "OUT_CHANNELS": 256,
      "NORM": "FrozenBN",
      "FUSE_TYPE": "sum"
    },
    "BUILDER": "build_fcos_dla_fpn_backbone_p67",
    "BACKBONE": {
      "NAME": "DLA-34",
      "OUT_FEATURES": [
        "level3",
        "level4",
        "level5"
      ],
      "NORM": "FrozenBN"
    },
    "OUT_FEATURES": null
  },
  "DD3D": {
    "IN_FEATURES": null,
    "NUM_CLASSES": 5,
    "FEATURE_LOCATIONS_OFFSET": "none",
    "SIZES_OF_INTEREST": [
      64,
      128,
      256,
      512
    ],
    "INFERENCE": {
      "DO_NMS": true,
      "DO_POSTPROCESS": true,
      "DO_BEV_NMS": false,
      "BEV_NMS_IOU_THRESH": 0.3,
      "NUSC_SAMPLE_AGGREGATE": false
    },
    "FCOS2D": {
      "_VERSION": "v2",
      "NORM": "BN",
      "NUM_CLS_CONVS": 4,
      "NUM_BOX_CONVS": 4,
      "USE_DEFORMABLE": false,
      "USE_SCALE": true,
      "BOX2D_SCALE_INIT_FACTOR": 1.0,
      "LOSS": {
        "ALPHA": 0.25,
        "GAMMA": 2.0,
        "LOC_LOSS_TYPE": "giou"
      },
      "INFERENCE": {
        "THRESH_WITH_CTR": true,
        "PRE_NMS_THRESH": 0.05,
        "PRE_NMS_TOPK": 1000,
        "POST_NMS_TOPK": 100,
        "NMS_THRESH": 0.75
      }
    },
    "FCOS3D": {
      "NORM": "FrozenBN",
      "NUM_CONVS": 4,
      "USE_DEFORMABLE": false,
      "USE_SCALE": true,
      "DEPTH_SCALE_INIT_FACTOR": 0.3,
      "PROJ_CTR_SCALE_INIT_FACTOR": 1.0,
      "PER_LEVEL_PREDICTORS": false,
      "SCALE_DEPTH_BY_FOCAL_LENGTHS": true,
      "SCALE_DEPTH_BY_FOCAL_LENGTHS_FACTOR": 500.0,
      "MEAN_DEPTH_PER_LEVEL": [
        32.594,
        15.178,
        8.424,
        5.004,
        4.662
      ],
      "STD_DEPTH_PER_LEVEL": [
        14.682,
        7.139,
        4.345,
        2.399,
        2.587
      ],
      "MIN_DEPTH": 0.1,
      "MAX_DEPTH": 80.0,
      "CANONICAL_BOX3D_SIZES": [
        [
          1.61876949,
          3.89154523,
          1.52969237
        ],
        [
          0.62806586,
          0.82038497,
          1.76784787
        ],
        [
          0.56898187,
          1.77149234,
          1.7237099
        ],
        [
          1.9134491,
          5.15499603,
          2.18998422
        ],
        [
          2.61168401,
          9.22692319,
          3.36492722
        ],
        [
          0.5390196,
          1.08098042,
          1.28392158
        ],
        [
          2.36044838,
          15.56991038,
          3.5289238
        ],
        [
          1.24489164,
          2.51495357,
          1.61402478
        ]
      ],
      "CLASS_AGNOSTIC_BOX3D": false,
      "PREDICT_ALLOCENTRIC_ROT": true,
      "PREDICT_DISTANCE": false,
      "LOSS": {
        "SMOOTH_L1_BETA": 0.05,
        "MAX_LOSS_PER_GROUP_DISENT": 20.0,
        "CONF_3D_TEMPERATURE": 1.0,
        "WEIGHT_BOX3D": 2.0,
        "WEIGHT_CONF3D": 1.0
      },
      "PREPARE_TARGET": {
        "CENTER_SAMPLE": true,
        "POS_RADIUS": 1.5
      }
    }
  },
  "VIS": {
    "DATALOADER_ENABLED": true,
    "DATALOADER_PERIOD": 1000,
    "DATALOADER_MAX_NUM_SAMPLES": 10,
    "PREDICTIONS_ENABLED": true,
    "PREDICTIONS_MAX_NUM_SAMPLES": 20,
    "D2": {
      "DATALOADER": {
        "ENABLED": true,
        "SCALE": 1.0,
        "COLOR_MODE": "image"
      },
      "PREDICTIONS": {
        "ENABLED": true,
        "SCALE": 1.0,
        "COLOR_MODE": "image",
        "THRESHOLD": 0.4
      }
    },
    "BOX3D": {
      "DATALOADER": {
        "ENABLED": true,
        "SCALE": 1.0,
        "RENDER_LABELS": true
      },
      "PREDICTIONS": {
        "ENABLED": true,
        "SCALE": 1.0,
        "RENDER_LABELS": true,
        "THRESHOLD": 0.5,
        "MIN_DEPTH_CENTER": 0.0
      }
    }
  },
  "INPUT": {
    "FORMAT": "BGR",
    "AUG_ENABLED": true,
    "RESIZE": {
      "ENABLED": true,
      "MIN_SIZE_TRAIN": [
        288,
        304,
        320,
        336,
        352,
        368,
        384,
        400,
        416,
        448,
        480,
        512,
        544,
        576
      ],
      "MIN_SIZE_TRAIN_SAMPLING": "choice",
      "MAX_SIZE_TRAIN": 10000,
      "MIN_SIZE_TEST": 384,
      "MAX_SIZE_TEST": 100000
    },
    "CROP": {
      "ENABLED": false,
      "TYPE": "relative_range",
      "SIZE": [
        0.9,
        0.9
      ]
    },
    "RANDOM_FLIP": {
      "ENABLED": true,
      "HORIZONTAL": true,
      "VERTICAL": false
    },
    "COLOR_JITTER": {
      "ENABLED": true,
      "BRIGHTNESS": [
        0.2,
        0.2
      ],
      "SATURATION": [
        0.2,
        0.2
      ],
      "CONTRAST": [
        0.2,
        0.2
      ]
    }
  },
  "MODEL": {
    "DEVICE": "cuda",
    "META_ARCHITECTURE": "DD3D",
    "PIXEL_MEAN": [
      103.53,
      116.28,
      123.675
    ],
    "PIXEL_STD": [
      57.375,
      57.12,
      58.395
    ],
    "CKPT": "/tmp/tmpwxys0idg.pth",
    "BOX2D_ON": true,
    "BOX3D_ON": true,
    "DEPTH_ON": false,
    "CHECKPOINT": ""
  },
  "DATALOADER": {
    "TRAIN": {
      "NUM_WORKERS": 12,
      "FILTER_EMPTY_ANNOTATIONS": true,
      "SAMPLER": "RepeatFactorTrainingSampler",
      "REPEAT_THRESHOLD": 0.4,
      "ASPECT_RATIO_GROUPING": false
    },
    "TEST": {
      "NUM_WORKERS": 4,
      "SAMPLER": "InferenceSampler"
    }
  },
  "SOLVER": {
    "IMS_PER_BATCH": 4,
    "BASE_LR": 0.002,
    "MOMENTUM": 0.9,
    "NESTEROV": false,
    "WEIGHT_DECAY": 0.0001,
    "WEIGHT_DECAY_NORM": 0.0,
    "BIAS_LR_FACTOR": 1.0,
    "WEIGHT_DECAY_BIAS": 0.0001,
    "GAMMA": 0.1,
    "LR_SCHEDULER_NAME": "WarmupMultiStepLR",
    "STEPS": [
      21500,
      24000
    ],
    "WARMUP_FACTOR": 0.0001,
    "WARMUP_ITERS": 2000,
    "WARMUP_METHOD": "linear",
    "CLIP_GRADIENTS": {
      "ENABLED": false,
      "CLIP_TYPE": "value",
      "CLIP_VALUE": 1.0,
      "NORM_TYPE": 2.0
    },
    "CHECKPOINT_PERIOD": 2000,
    "MIXED_PRECISION_ENABLED": true,
    "DDP_FIND_UNUSED_PARAMETERS": false,
    "ACCUMULATE_GRAD_BATCHES": 1,
    "SYNCBN_USE_LOCAL_WORKERS": false,
    "MAX_ITER": 25000
  },
  "TEST": {
    "ENABLED": true,
    "EVAL_PERIOD": 2000,
    "EVAL_ON_START": false,
    "ADDITIONAL_EVAL_STEPS": [],
    "IMS_PER_BATCH": 80,
    "AUG": {
      "ENABLED": true,
      "MIN_SIZES": [
        320,
        384,
        448,
        512,
        576
      ],
      "MAX_SIZE": 100000,
      "FLIP": true
    }
  },
  "USE_TEST": false,
  "EVALUATORS": {
    "KITTI3D": {
      "IOU_THRESHOLDS": [
        0.5,
        0.7
      ],
      "ONLY_PREPARE_SUBMISSION": false
    }
  }
}
[11/02 13:28:08 tridet.data.datasets.kitti_3d]: KITTI-3D dataset(s): kitti_3d_train, kitti_3d_val 
Error executing job with overrides: ['+experiments=dd3d_kitti_dla34', 'SOLVER.IMS_PER_BATCH=4']
multiprocessing.pool.RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
    return list(map(*args))
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 123, in _read_calibration_file
    P_20 = calibration.loc[2].values[1:].reshape(-1, 4).astype(np.float64)
ValueError: cannot reshape array of size 14 into shape (4)
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "./scripts/visualize_dataloader.py", line 26, in main
    dataset_names = register_datasets(cfg)
  File "/workspace/dd3d/tridet/data/datasets/__init__.py", line 19, in register_datasets
    dataset_names.extend(register_kitti_3d_datasets(required_datasets, cfg))
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/__init__.py", line 41, in register_kitti_3d_datasets
    fn(name, **kwargs)
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 312, in register_kitti_3d_metadata
    dataset_dicts = DatasetCatalog.get(dataset_name)
  File "/usr/local/lib/python3.8/dist-packages/detectron2/data/catalog.py", line 58, in get
    return f()
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 298, in build_monocular_kitti3d_dataset
    dataset = KITTI3DMonocularDataset(root_dir, mv3d_split, class_names, sensors, box2d_from_box3d, max_num_items)
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 283, in __init__
    self._kitti_dset = KITTI3DDataset(root_dir, mv3d_split, class_names, sensors, box2d_from_box3d, max_num_items)
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 73, in __init__
    self.calibration_table = self._parse_calibration_files()
  File "/workspace/dd3d/tridet/data/datasets/kitti_3d/build.py", line 95, in _parse_calibration_files
    (_proc.map(self._read_calibration_file, calibration_files))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
    return self._map_async(func, iterable, mapstar, chunksize).get()
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 771, in get
    raise self._value
ValueError: cannot reshape array of size 14 into shape (4)
komzy commented 2 years ago

I'm running it on Ubuntu 20.04, using the docker image for cuda 11.1. Had to change g++-4.8 to g++-9.

dennis-park-TRI commented 2 years ago

Could you check you got the KITTI dataset organized correctly, as explained in README? It looks like the raw data files (the calibration data) is in a wrong format.