Open YashRunwal opened 2 years ago
Hi,
I am training a custom dataset class with num_classes=1.
My exp script:
class Exp(MyExp): def __init__(self): super(Exp, self).__init__() self.num_classes = 1 self.depth = 0.33 self.width = 0.50 self.warmup_epochs = 1 self.max_epoch = 100 self.data_num_workers = 1 self.eval_interval = 1 # ---------- transform config ------------ # self.mosaic_prob = 1.0 self.mixup_prob = 1.0 self.hsv_prob = 1.0 self.flip_prob = 0.5 self.data_dir = r'datasets\classify' self.train_ann = "instances_train.json" self.val_ann = "instances_val.json" self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( COCODataset, TrainTransform, YoloBatchSampler, DataLoader, InfiniteSampler, MosaicDetection, worker_init_reset_seed, ) from yolox.utils import ( wait_for_the_master, get_local_rank, ) local_rank = get_local_rank() with wait_for_the_master(local_rank): dataset = COCODataset( data_dir=self.data_dir, json_file=self.train_ann, name='train', img_size=self.input_size, preproc=TrainTransform( max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob), cache=cache_img, ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( max_labels=120, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, mosaic_scale=self.mosaic_scale, mixup_scale=self.mixup_scale, shear=self.shear, enable_mixup=self.enable_mixup, mosaic_prob=self.mosaic_prob, mixup_prob=self.mixup_prob, ) self.dataset = dataset if is_distributed: batch_size = batch_size // dist.get_world_size() sampler = InfiniteSampler( len(self.dataset), seed=self.seed if self.seed else 0 ) batch_sampler = YoloBatchSampler( sampler=sampler, batch_size=batch_size, drop_last=False, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler # Make sure each process has different random seed, especially for 'fork' method dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import COCODataset, ValTransform valdataset = COCODataset( data_dir=self.data_dir, json_file=self.val_ann if not testdev else self.test_ann, name="val" if not testdev else "test2017", img_size=self.test_size, preproc=ValTransform(legacy=legacy), ) if is_distributed: batch_size = batch_size // dist.get_world_size() sampler = torch.utils.data.distributed.DistributedSampler( valdataset, shuffle=False ) else: sampler = torch.utils.data.SequentialSampler(valdataset) dataloader_kwargs = { "num_workers": self.data_num_workers, "pin_memory": True, "sampler": sampler, } dataloader_kwargs["batch_size"] = batch_size val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) return val_loader def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import COCOEvaluator val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = COCOEvaluator( dataloader=val_loader, img_size=self.test_size, confthre=self.test_conf, nmsthre=self.nmsthre, num_classes=self.num_classes, testdev=testdev, ) return evaluator
I have used:
Batch Size = 1 Devices = 1
But I am getting the following error:
I tried the following code snippet to check if I could reproduce the same issue:
import torch torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False torch.backends.cudnn.allow_tf32 = True data = torch.randn([4, 12, 320, 320], dtype=torch.half, device='cuda', requires_grad=True) net = torch.nn.Conv2d(12, 32, kernel_size=[3, 3], padding=[1, 1], stride=[1, 1], dilation=[1, 1], groups=1) net = net.cuda().half() out = net(data) out.backward(torch.randn_like(out)) torch.cuda.synchronize()
But this code snippet doesn't reproduce the same issue.
What can be the problem? Would really appreciate your help
I'm not sure what your problem exactly is... Could you tell me more details?
Hi,
I am training a custom dataset class with num_classes=1.
My exp script:
I have used:
But I am getting the following error:
I tried the following code snippet to check if I could reproduce the same issue:
But this code snippet doesn't reproduce the same issue.
What can be the problem? Would really appreciate your help