Closed michelewang closed 3 years ago
You've chosen to report an unexpected problem or bug. Unless you already know the root cause of it, please include details about it by filling the issue template. The following information is missing: "Your Environment";
Does
print("XBDDATA_TRAIN", DatasetCatalog.get("xbddata_train"))
produce empty data? It seems to suggest that the custom data loading function in your code returns empty data
Hi @ppwwyyxx you were completely right! That was the issue. Thank you so much, I really appreciate it!
Instructions To Reproduce the 🐛 Bug:
""" DeepLab Training Script.
This script is a simplified version of the training script in detectron2/tools. """
import os import torch
import detectron2.data.transforms as T import detectron2.utils.comm as comm from detectron2.checkpoint import DetectionCheckpointer from detectron2.config import get_cfg from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_train_loader from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch from detectron2.evaluation import CityscapesSemSegEvaluator, DatasetEvaluators, SemSegEvaluator from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler
import numpy as np import json import matplotlib.pyplot as plt import cv2 import random import glob from datetime import datetime import pickle from pathlib import Path from tqdm import tqdm
from detectron2.data import DatasetCatalog, MetadataCatalog from detectron2.structures import BoxMode from detectron2.utils.visualizer import ColorMode
import some common detectron2 utilities
from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer
def build_sem_seg_train_aug(cfg): augs = [ T.ResizeShortestEdge( cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING ) ] if cfg.INPUT.CROP.ENABLED: augs.append( T.RandomCrop_CategoryAreaConstraint( cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE, cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA, cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, ) ) augs.append(T.RandomFlip()) return augs
class Trainer(DefaultTrainer): """ We use the "DefaultTrainer" which contains a number pre-defined logic for standard training workflow. They may not work for you, especially if you are working on a new research project. In that case you can use the cleaner "SimpleTrainer", or write your own training loop. """
def setup(args): """ Create configs and perform basic setups. """ cfg = get_cfg() add_deeplab_config(cfg) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() default_setup(cfg, args) return cfg
I CHANGED THIS PART
def get_building_dicts(img_dir): """This function loads the JSON file created with the annotator and converts it to the detectron2 metadata specifications. """ img_links = glob.glob(img_dir+"labels/*.json")
only keep the images that include post
def main(args):
I CHANGED THIS PART, REGISTERING MY DATASETS HERE
buildingmetadata = MetadataCatalog.get("xbddata" + d)
if name == "main": args = default_argument_parser().parse_args() print("Command Line Args:", args) launch( main, args.num_gpus, num_machines=args.num_machines, machine_rank=args.machine_rank, dist_url=args.dist_url, args=(args,), )
BASE: base.yaml MODEL: WEIGHTS: "detectron2://DeepLab/R-103.pkl" PIXEL_MEAN: [123.675, 116.280, 103.530] PIXEL_STD: [58.395, 57.120, 57.375] BACKBONE: NAME: "build_resnet_deeplab_backbone" RESNETS: DEPTH: 101 NORM: "SyncBN" OUT_FEATURES: ["res2", "res5"] RES5_MULTI_GRID: [1, 2, 4] STEM_TYPE: "deeplab" STEM_OUT_CHANNELS: 128 STRIDE_IN_1X1: False SEM_SEG_HEAD: NAME: "DeepLabV3PlusHead" IN_FEATURES: ["res2", "res5"] PROJECT_FEATURES: ["res2"] PROJECT_CHANNELS: [48] NORM: "SyncBN" COMMON_STRIDE: 4 INPUT: FORMAT: "RGB"
BASE: "../../../../configs/Base-RCNN-DilatedC5.yaml" MODEL: META_ARCHITECTURE: "SemanticSegmentor" BACKBONE: FREEZE_AT: 0 SEM_SEG_HEAD: NAME: "DeepLabV3Head" IN_FEATURES: ["res5"] ASPP_CHANNELS: 256 ASPP_DILATIONS: [6, 12, 18] ASPP_DROPOUT: 0.1 CONVS_DIM: 256 COMMON_STRIDE: 16 NUM_CLASSES: 19 LOSS_TYPE: "hard_pixel_mining" DATASETS: TRAIN: ("xbddata_train",) TEST: ("xbddata_test",) SOLVER: BASE_LR: 0.01 MAX_ITER: 90000 LR_SCHEDULER_NAME: "WarmupPolyLR" IMS_PER_BATCH: 16 INPUT: MIN_SIZE_TRAIN: (1024,) MIN_SIZE_TRAIN_SAMPLING: "choice" MIN_SIZE_TEST: 1024 MAX_SIZE_TRAIN: 1024 MAX_SIZE_TEST: 1024 CROP: ENABLED: True TYPE: "absolute" SIZE: (512, 1024) SINGLE_CATEGORY_MAX_AREA: 1.0 DATALOADER: NUM_WORKERS: 10
cuobjdump info : File '/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/_C.cpython-38-x86_64-linux-gnu.so' does not contain device code cuobjdump info : File '/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/_C.cpython-38-x86_64-linux-gnu.so' does not contain device code cuobjdump info : File '/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/_C.cpython-38-x86_64-linux-gnu.so' does not contain device code cuobjdump info : File '/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/_C.cpython-38-x86_64-linux-gnu.so' does not contain device code Traceback (most recent call last): File "train_net_xbd.py", line 194, in
launch(
File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/engine/launch.py", line 55, in launch
mp.spawn(
File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 199, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 157, in start_processes
while not context.join():
File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 118, in join
raise Exception(msg)
Exception:
-- Process 2 terminated with the following error: Traceback (most recent call last): File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 19, in _wrap fn(i, args) File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/engine/launch.py", line 94, in _distributed_worker main_func(args) File "/n/home07/michelewang/thesis/detectron2/projects/DeepLab/train_net_xbd.py", line 186, in main trainer = Trainer(cfg) File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/engine/defaults.py", line 312, in init data_loader = self.build_train_loader(cfg) File "/n/home07/michelewang/thesis/detectron2/projects/DeepLab/train_net_xbd.py", line 109, in build_train_loader return build_detection_train_loader(cfg, mapper=mapper) File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/config/config.py", line 201, in wrapped explicit_args = _get_args_from_config(from_config, *args, *kwargs) File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/config/config.py", line 238, in _get_args_from_config ret = from_config_func(args, kwargs) File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/data/build.py", line 310, in _train_loader_from_config dataset = get_detection_dataset_dicts( File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/data/build.py", line 231, in get_detection_dataset_dicts assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) AssertionError: Dataset 'xbddata_train' is empty!**