Tutorial 5 Model training, issue with training the model using

🐛 Bug

A clear and concise description of what the bug is.

To Reproduce (REQUIRED)

Input: Project: KSO Select model: Yolo5m-classifier Model type: Object detection model

mlp.train_yolov5(
    exp_name.value,
    weights.artifact_path,
    project,
    epochs=epochs.value,
    batch_size=batch_size.value,
    img_size=(img_h.value, img_w.value),
)

Output:

AttributeError                            Traceback (most recent call last)
Cell In[16], line 1
----> 1 mlp.train_yolov5(
      2     exp_name.value,
      3     weights.artifact_path,
      4     project,
      5     epochs=epochs.value,
      6     batch_size=batch_size.value,
      7     img_size=(img_h.value, img_w.value),
      8 )

File /usr/src/app/kso-dev/kso_utils/project.py:1255, in MLProjectProcessor.train_yolov5(self, exp_name, weights, project, epochs, batch_size, img_size)
   1251 def train_yolov5(
   1252     self, exp_name, weights, project, epochs=50, batch_size=16, img_size=[640, 640]
   1253 ):
   1254     if self.model_type == 1:
-> 1255         self.modules["train"].run(
   1256             entity=self.team_name,
   1257             data=self.data_path,
   1258             hyp=self.hyp_path,
   1259             weights=weights,
   1260             project=project,
   1261             name=exp_name,
   1262             imgsz=img_size,
   1263             batch_size=int(batch_size),
   1264             epochs=epochs,
   1265             single_cls=False,
   1266             cache_images=True,
   1267             upload_dataset=True,
   1268         )
   1269     elif self.model_type == 2:
   1270         self.modules["train"].run(
   1271             entity=self.team_name,
   1272             data=self.data_path,
   (...)
   1278             epochs=epochs,
   1279         )

File /usr/src/app/kso/yolov5/train.py:627, in run(**kwargs)
    625 for k, v in kwargs.items():
    626     setattr(opt, k, v)
--> 627 main(opt)
    628 return opt

File /usr/src/app/kso/yolov5/train.py:527, in main(opt, callbacks)
    525 # Train
    526 if not opt.evolve:
--> 527     train(opt.hyp, opt, device, callbacks)
    529 # Evolve hyperparameters (optional)
    530 else:
    531     # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
    532     meta = {
    533         'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
    534         'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
   (...)
    560         'mixup': (1, 0.0, 1.0),  # image mixup (probability)
    561         'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)

File /usr/src/app/kso/yolov5/train.py:124, in train(hyp, opt, device, callbacks)
    122     weights = attempt_download(weights)  # download if not found locally
    123 ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
--> 124 model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
    125 exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
    126 csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32

File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1614, in Module.__getattr__(self, name)
   1612     if name in modules:
   1613         return modules[name]
-> 1614 raise AttributeError("'{}' object has no attribute '{}'".format(
   1615     type(self).__name__, name))

AttributeError: 'ClassificationModel' object has no attribute 'yaml'

Thanks @jannesgg , that might have been it, tried the baseline-Yolov5 model instead, but got a different error

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[19], line 1
----> 1 mlp.train_yolov5(
      2     exp_name.value,
      3     weights.artifact_path,
      4     project,
      5     epochs=epochs.value,
      6     batch_size=batch_size.value,
      7     img_size=(img_h.value, img_w.value),
      8 )

File /usr/src/app/kso-dev/kso_utils/project.py:1255, in MLProjectProcessor.train_yolov5(self, exp_name, weights, project, epochs, batch_size, img_size)
   1251 def train_yolov5(
   1252     self, exp_name, weights, project, epochs=50, batch_size=16, img_size=[640, 640]
   1253 ):
   1254     if self.model_type == 1:
-> 1255         self.modules["train"].run(
   1256             entity=self.team_name,
   1257             data=self.data_path,
   1258             hyp=self.hyp_path,
   1259             weights=weights,
   1260             project=project,
   1261             name=exp_name,
   1262             imgsz=img_size,
   1263             batch_size=int(batch_size),
   1264             epochs=epochs,
   1265             single_cls=False,
   1266             cache_images=True,
   1267             upload_dataset=True,
   1268         )
   1269     elif self.model_type == 2:
   1270         self.modules["train"].run(
   1271             entity=self.team_name,
   1272             data=self.data_path,
   (...)
   1278             epochs=epochs,
   1279         )

File /usr/src/app/kso/yolov5/train.py:627, in run(**kwargs)
    625 for k, v in kwargs.items():
    626     setattr(opt, k, v)
--> 627 main(opt)
    628 return opt

File /usr/src/app/kso/yolov5/train.py:527, in main(opt, callbacks)
    525 # Train
    526 if not opt.evolve:
--> 527     train(opt.hyp, opt, device, callbacks)
    529 # Evolve hyperparameters (optional)
    530 else:
    531     # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
    532     meta = {
    533         'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
    534         'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
   (...)
    560         'mixup': (1, 0.0, 1.0),  # image mixup (probability)
    561         'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)

File /usr/src/app/kso/yolov5/train.py:187, in train(hyp, opt, device, callbacks)
    184     LOGGER.info('Using SyncBatchNorm()')
    186 # Trainloader
--> 187 train_loader, dataset = create_dataloader(train_path,
    188                                           imgsz,
    189                                           batch_size // WORLD_SIZE,
    190                                           gs,
    191                                           single_cls,
    192                                           hyp=hyp,
    193                                           augment=True,
    194                                           cache=None if opt.cache == 'val' else opt.cache,
    195                                           rect=opt.rect,
    196                                           rank=LOCAL_RANK,
    197                                           workers=workers,
    198                                           image_weights=opt.image_weights,
    199                                           quad=opt.quad,
    200                                           prefix=colorstr('train: '),
    201                                           shuffle=True)
    202 labels = np.concatenate(dataset.labels, 0)
    203 mlc = int(labels[:, 0].max())  # max label class

File /usr/src/app/kso/yolov5/utils/dataloaders.py:123, in create_dataloader(path, imgsz, batch_size, stride, single_cls, hyp, augment, cache, pad, rect, rank, workers, image_weights, quad, prefix, shuffle)
    121     shuffle = False
    122 with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
--> 123     dataset = LoadImagesAndLabels(
    124         path,
    125         imgsz,
    126         batch_size,
    127         augment=augment,  # augmentation
    128         hyp=hyp,  # hyperparameters
    129         rect=rect,  # rectangular batches
    130         cache_images=cache,
    131         single_cls=single_cls,
    132         stride=int(stride),
    133         pad=pad,
    134         image_weights=image_weights,
    135         prefix=prefix)
    137 batch_size = min(batch_size, len(dataset))
    138 nd = torch.cuda.device_count()  # number of CUDA devices

File /usr/src/app/kso/yolov5/utils/dataloaders.py:456, in LoadImagesAndLabels.__init__(self, path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, stride, pad, min_items, prefix)
    454 self.rect = False if image_weights else rect
    455 self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
--> 456 self.mosaic_border = [-img_size // 2, -img_size // 2]
    457 self.stride = stride
    458 self.path = path

TypeError: bad operand type for unary -: 'list'

ocean-data-factory-sweden / kso

Tutorial 5 Model training, issue with training the model using #286

🐛 Bug

To Reproduce (REQUIRED)