Closed armanivers closed 1 week ago
I have no clue how to debug the issue that is most likely in Ultralytics and not Albumentations.
Could you provide a minimal sample of code that reproduces the issue?
P.S. Will be happy to reopen when there will be more clarity.
Sorry about that!
ultralytics v8.3.25 albumentations v1.4.20
I'm guessing some kind of change in ultralytics lead to this, but I can't manage to downgrade albumentations and ultralytics to a last working version.
(it should be ultralytics 8.2.50 and albumentations 1.4.14, but it still doesn't work, can you make it work?)
Snipped from the code I'm using https://github.com/ultralytics/ultralytics/pull/6624
Using the code the following way:
...
T = [
A.Blur(p=0.01),
A.MedianBlur(p=0.01),
A.ToGray(p=0.01),
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(p=0.0),
A.RandomGamma(p=0.0),
A.ImageCompression(quality_lower=75, p=0.0)] # transforms
bbox_params = A.BboxParams(format='yolo', label_fields=['class_labels', 'indices'])
self.transform = A.Compose(T, bbox_params=bbox_params)
...
def __call__(self, labels):
"""Generates object detections and returns a dictionary with detection results."""
im = labels['img']
cls = labels['cls']
if len(cls):
h, w = im.shape[:2]
# Remember if instances are normalized before transform to restore later
normalized = labels['instances'].normalized
# Convert denormalized segments to masks
labels['instances'].denormalize(w, h)
masks = None
if labels['instances'].segments.shape[0] > 0:
masks = polygons2masks((h, w), labels['instances'].segments, color=1, downsample_ratio=1)
labels['instances'].convert_bbox('xywh')
labels['instances'].normalize(w, h)
bboxes = labels['instances'].bboxes
# TODO: add support for keypoints
if self.transform and random.random() < self.p:
new = self.transform(image=im,
masks=masks,
bboxes=bboxes,
class_labels=cls,
indices=np.arange(len(bboxes))) # transformed
labels['img'] = new['image']
if len(new['class_labels']) > 0: # skip update if no bbox in new im
labels['cls'] = np.array(new['class_labels'])
bboxes_new = np.array(new['bboxes'], dtype=np.float32)
if masks is None:
labels['instances'].update(bboxes=bboxes_new)
else:
masks_new = np.array(new['masks'])[new['indices']] # use bbox indices to find matching masks
segments_new = masks2segments(masks_new, strategy='largest')
non_empty = [s.shape[0] != 0 for s in segments_new] # find non empty segments
segments_out = [segment for segment, flag in zip(segments_new, non_empty) if flag]
bboxes_out = bboxes_new[non_empty]
if len(segments_out) > 0:
segments_out = resample_segments(segments_out)
segments_out = np.stack(segments_out, axis=0)
segments_out /= (w, h)
else:
segments_out = np.zeros((0, 1000, 2), dtype=np.float32)
labels['instances'].update(bboxes=bboxes_out, segments=segments_out)
if normalized:
labels['instances'].normalize(w, h)
else:
labels['instances'].denormalize(w, h)
return labels
A.ShiftScaleRotate( shift_limit=(-0.3, 0.3), # ScaleFloatType scale_limit=(0,0), # ScaleFloatType rotate_limit=(0,0), # ScaleFloatType interpolation=1, # <class 'int'> border_mode=0, # int value=0, # ColorType mask_value=0, # ColorType shift_limit_x=None, # ScaleFloatType | None shift_limit_y=None, # ScaleFloatType | None rotate_method="largest_box", # Literal['largest_box', 'ellipse'] always_apply=None, # bool | None p=1.0, # float )
results into
RuntimeError: Tensors must have same number of dimensions: got 2 and 1
Console log:
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/model.py:802, in Model.train(self, trainer, **kwargs)
799 self.model = self.trainer.model
801 self.trainer.hub_session = self.session # attach optional HUB session
--> 802 self.trainer.train()
803 # Update model and cfg after training
804 if RANK in {-1, 0}:
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:207, in BaseTrainer.train(self)
204 ddp_cleanup(self, str(file))
206 else:
--> 207 self._do_train(world_size)
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:367, in BaseTrainer._do_train(self, world_size)
365 pbar = TQDM(enumerate(self.train_loader), total=nb)
366 self.tloss = None
--> 367 for i, batch in pbar:
368 self.run_callbacks("on_train_batch_start")
369 # Warmup
File ~/.local/lib/python3.11/site-packages/tqdm/std.py:1181, in tqdm.__iter__(self)
1178 time = self._time
1180 try:
-> 1181 for obj in iterable:
1182 yield obj
1183 # Update and possibly print the progressbar.
1184 # Note: does not call self.update(1) for speed optimisation.
File ~/.local/lib/python3.11/site-packages/ultralytics/data/build.py:48, in InfiniteDataLoader.__iter__(self)
46 """Creates a sampler that repeats indefinitely."""
47 for _ in range(len(self)):
---> 48 yield next(self.iterator)
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630, in _BaseDataLoaderIter.__next__(self)
627 if self._sampler_iter is None:
628 # TODO(https://github.com/pytorch/pytorch/issues/76750)
629 self._reset() # type: ignore[call-arg]
--> 630 data = self._next_data()
631 self._num_yielded += 1
632 if self._dataset_kind == _DatasetKind.Iterable and \
633 self._IterableDataset_len_called is not None and \
634 self._num_yielded > self._IterableDataset_len_called:
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1344, in _MultiProcessingDataLoaderIter._next_data(self)
1342 else:
1343 del self._task_info[idx]
-> 1344 return self._process_data(data)
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1370, in _MultiProcessingDataLoaderIter._process_data(self, data)
1368 self._try_put_index()
1369 if isinstance(data, ExceptionWrapper):
-> 1370 data.reraise()
1371 return data
File ~/.local/lib/python3.11/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self)
702 except TypeError:
703 # If the exception takes multiple arguments, don't try to
704 # instantiate since we don't know how to
705 raise RuntimeError(msg) from None
--> 706 raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 3.
Original Traceback (most recent call last):
File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
data = fetcher.fetch(index) # type: ignore[possibly-undefined]
^^^^^^^^^^^^^^^^^^^^
File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/.local/lib/python3.11/site-packages/ultralytics/data/dataset.py", line 240, in collate_fn
value = torch.cat(value, 0)
^^^^^^^^^^^^^^^^^^^
RuntimeError: Tensors must have same number of dimensions: got 1 and 2
all three of these
A.Perspective(p=0.7, scale=(0.05, 0.15), keep_size=True, pad_mode=0, pad_val=0, mask_pad_val=0, fit_output=False, interpolation=1, always_apply=None)
A.RandomResizedCrop( size=[512,512], width=512, height=512, scale=(0.5, 1), ratio=(1, 1), interpolation=1, always_apply=None, p=1)
A.RandomSizedCrop( min_max_height=[100,512], size=None, width=512, height=512, w2h_ratio=1, interpolation=1, always_apply=None, p=1)
and
results into
RuntimeError: Tensors must have same number of dimensions: got 1 and 2
and this during training
Epoch GPU_mem box_loss seg_loss cls_loss dfl_loss Instances Size
1/95 10.1G 1.512 3.122 4.335 1.758 17 512: 32%|███▏ | 13/41 [00:05<00:10, 2.55it/s]/home/user/.local/lib/python3.11/site-packages/albumentations/core/bbox_utils.py:478: RuntimeWarning: invalid value encountered in divide
& (clipped_box_areas / denormalized_box_areas >= min_visibility - epsilon)
Console Log:
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/model.py:802, in Model.train(self, trainer, **kwargs)
799 self.model = self.trainer.model
801 self.trainer.hub_session = self.session # attach optional HUB session
--> 802 self.trainer.train()
803 # Update model and cfg after training
804 if RANK in {-1, 0}:
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:207, in BaseTrainer.train(self)
204 ddp_cleanup(self, str(file))
206 else:
--> 207 self._do_train(world_size)
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:367, in BaseTrainer._do_train(self, world_size)
365 pbar = TQDM(enumerate(self.train_loader), total=nb)
366 self.tloss = None
--> 367 for i, batch in pbar:
368 self.run_callbacks("on_train_batch_start")
369 # Warmup
File ~/.local/lib/python3.11/site-packages/tqdm/std.py:1181, in tqdm.__iter__(self)
1178 time = self._time
1180 try:
-> 1181 for obj in iterable:
1182 yield obj
1183 # Update and possibly print the progressbar.
1184 # Note: does not call self.update(1) for speed optimisation.
File ~/.local/lib/python3.11/site-packages/ultralytics/data/build.py:48, in InfiniteDataLoader.__iter__(self)
46 """Creates a sampler that repeats indefinitely."""
47 for _ in range(len(self)):
---> 48 yield next(self.iterator)
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630, in _BaseDataLoaderIter.__next__(self)
627 if self._sampler_iter is None:
628 # TODO(https://github.com/pytorch/pytorch/issues/76750)
629 self._reset() # type: ignore[call-arg]
--> 630 data = self._next_data()
631 self._num_yielded += 1
632 if self._dataset_kind == _DatasetKind.Iterable and \
633 self._IterableDataset_len_called is not None and \
634 self._num_yielded > self._IterableDataset_len_called:
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1344, in _MultiProcessingDataLoaderIter._next_data(self)
1342 else:
1343 del self._task_info[idx]
-> 1344 return self._process_data(data)
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1370, in _MultiProcessingDataLoaderIter._process_data(self, data)
1368 self._try_put_index()
1369 if isinstance(data, ExceptionWrapper):
-> 1370 data.reraise()
1371 return data
File ~/.local/lib/python3.11/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self)
702 except TypeError:
703 # If the exception takes multiple arguments, don't try to
704 # instantiate since we don't know how to
705 raise RuntimeError(msg) from None
--> 706 raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 7.
Original Traceback (most recent call last):
File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
data = fetcher.fetch(index) # type: ignore[possibly-undefined]
^^^^^^^^^^^^^^^^^^^^
File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/.local/lib/python3.11/site-packages/ultralytics/data/dataset.py", line 240, in collate_fn
value = torch.cat(value, 0)
^^^^^^^^^^^^^^^^^^^
RuntimeError: Tensors must have same number of dimensions: got 1 and 2
Do you have an image that these transforms fail on?
I did a few changes recently in these transforms, hence it could be a bug, but I cannot figure out the issue from the error in the UltraLutics output.
I'm using the train data from this public dataset
Do you know the exact image it fails on?
This I cannot specify, it should be any one from the beginning. It's the exact dataset from gitub, I assume it loads them by the default order in the folder?
this is the script I use to convert the masks to polygon files for the yolo training:
import os
import cv2
def convert_masks(input_dir, output_dir):
for j in os.listdir(input_dir):
image_path = os.path.join(input_dir, j)
# load the binary mask and get its contours
mask = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
_, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
H, W = mask.shape
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# convert the contours to polygons
polygons = []
for cnt in contours:
if cv2.contourArea(cnt) > 200:
polygon = []
for point in cnt:
x, y = point[0]
polygon.append(x / W)
polygon.append(y / H)
polygons.append(polygon)
# print the polygons
with open('{}.txt'.format(os.path.join(output_dir, j)[:-4]), 'w') as f:
for polygon in polygons:
for p_, p in enumerate(polygon):
if p_ == len(polygon) - 1:
f.write('{}\n'.format(p))
elif p_ == 0:
f.write('0 {} '.format(p))
else:
f.write('{} '.format(p))
f.close()
def main():
'''
train_dir = './yolov8-wound-seg/data_raw/FUSeg/train/labels'
train_dir_out = './yolov8-wound-seg/data_raw/FUSeg/train/labels_txt'
if not os.path.exists(train_dir_out):
os.makedirs(train_dir_out)
convert_masks(train_dir, train_dir_out)
'''
validation_dir = './yolov8-wound-seg/data_raw/FUSeg/validation/labels'
validation_dir_out = './yolov8-wound-seg/data_raw/FUSeg/validation/labels_txt'
if not os.path.exists(validation_dir_out):
os.makedirs(validation_dir_out)
convert_masks(validation_dir, validation_dir_out)
if __name__ == '__main__':
main()
I need minimal example to reproduce the issue, till then let's assume that it is on the side of UltraLytics,
The may be more helpful: https://github.com/ultralytics/ultralytics
from version 1.4.16 of albumentations, the filter_bboxes function (in the core/bboxes_utils.py file) returns an empty numpy array of shape [0,4] if there is no target boxes in the image after the transformation. check pull
Ultralytics does not consider this situation.
downgrading to <= 1.4.15 may be the easiest way
@sralvins thanks for the suggestion. Yes, that's the only fix that worked for me until now.
Albumentations 1.4.14 + Ultralytics 8.3.4 (installing the albucore version as an extra command probably wasn't necessary, but for me it sometimes lead to not being detected)
pip install albumentation==1.4.14
pip install albucore==0.0.14
pip install ultralytics==8.3.4
(Albumentations v.1.4.15 doesn't get recogcniez for me on ultralytics 8.3.4, like the message "Albumentations: (with the augmentations applied" doesn't appear during training hence no data augmentation is done. Any suggestion why some versions don't get detected sometimes?
ps: I've been trying to find which image causes this problem, I've managed to locate it being in the validation set of the dataset I've mentioned. Still working on a clear minimal reproduce, sorry.
Did you try albumentations version 1.4.21 ?
Hello,
been recieving the following error when training a YOLOv8 model with Albumentations:
RuntimeError: Tensors must have same number of dimensions: got 2 and 1
Until now I've only seen it happen when using this specific augmentation: (shifting the picture)
A.ShiftScaleRotate( shift_limit=(-0.3, 0.3), # ScaleFloatType scale_limit=(0,0), # ScaleFloatType rotate_limit=(0,0), # ScaleFloatType interpolation=1, # <class 'int'> border_mode=0, # int value=0, # ColorType mask_value=0, # ColorType shift_limit_x=None, # ScaleFloatType | None shift_limit_y=None, # ScaleFloatType | None rotate_method="largest_box", # Literal['largest_box', 'ellipse'] always_apply=None, # bool | None p=1.0, # float )
I'm sadly not very sure when the error started appearing.
Console Log:
``
`RuntimeError Traceback (most recent call last) Cell In[6], line 35 32 current_params = kfold_params[i] 34 model = YOLO('yolov8x-seg.pt') ---> 35 model_results = model.train(data=os.path.join(ROOT_DIR, f'config{i}.yaml'), imgsz=512, batch=16, deterministic=True, plots=True, 36 close_mosaic = 0, 37 optimizer = current_params["optimizer"], 38 epochs = current_params["epochs"], 39 lr0 = current_params["lr"], 40 dropout = current_params["dropout"], 41 # disable built-in augmentation, instead use Albumentations Library 42 augment=False, hsv_h=0, hsv_s=0, hsv_v=0, degrees=0, translate=0, 43 scale=0, shear=0.0, perspective=0, flipud=0, fliplr=0, bgr=0, 44 mosaic=0, mixup=0, copy_paste=0, erasing=0, crop_fraction=0) 45 results = model.val() 47 print("\n" + "#" * 60)
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/model.py:802, in Model.train(self, trainer, **kwargs) 799 self.model = self.trainer.model 801 self.trainer.hub_session = self.session # attach optional HUB session --> 802 self.trainer.train() 803 # Update model and cfg after training 804 if RANK in {-1, 0}:
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:207, in BaseTrainer.train(self) 204 ddp_cleanup(self, str(file)) 206 else: --> 207 self._do_train(world_size)
File ~/.local/lib/python3.11/site-packages/ultralytics/engine/trainer.py:367, in BaseTrainer._do_train(self, world_size) 365 pbar = TQDM(enumerate(self.train_loader), total=nb) 366 self.tloss = None --> 367 for i, batch in pbar: 368 self.run_callbacks("on_train_batch_start") 369 # Warmup
File ~/.local/lib/python3.11/site-packages/tqdm/std.py:1181, in tqdm.iter(self) 1178 time = self._time 1180 try: -> 1181 for obj in iterable: 1182 yield obj 1183 # Update and possibly print the progressbar. 1184 # Note: does not call self.update(1) for speed optimisation.
File ~/.local/lib/python3.11/site-packages/ultralytics/data/build.py:48, in InfiniteDataLoader.iter(self) 46 """Creates a sampler that repeats indefinitely.""" 47 for _ in range(len(self)): ---> 48 yield next(self.iterator)
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630, in _BaseDataLoaderIter.next(self) 627 if self._sampler_iter is None: 628 # TODO(https://github.com/pytorch/pytorch/issues/76750) 629 self._reset() # type: ignore[call-arg] --> 630 data = self._next_data() 631 self._num_yielded += 1 632 if self._dataset_kind == _DatasetKind.Iterable and \ 633 self._IterableDataset_len_called is not None and \ 634 self._num_yielded > self._IterableDataset_len_called:
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1324, in _MultiProcessingDataLoaderIter._next_data(self) 1322 if len(self._task_info[self._rcvd_idx]) == 2: 1323 data = self._task_info.pop(self._rcvd_idx)[1] -> 1324 return self._process_data(data) 1326 assert not self._shutdown and self._tasks_outstanding > 0 1327 idx, data = self._get_data()
File ~/.local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:1370, in _MultiProcessingDataLoaderIter._process_data(self, data) 1368 self._try_put_index() 1369 if isinstance(data, ExceptionWrapper): -> 1370 data.reraise() 1371 return data
File ~/.local/lib/python3.11/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self) 702 except TypeError: 703 # If the exception takes multiple arguments, don't try to 704 # instantiate since we don't know how to 705 raise RuntimeError(msg) from None --> 706 raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 3. Original Traceback (most recent call last): File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop data = fetcher.fetch(index) # type: ignore[possibly-undefined] ^^^^^^^^^^^^^^^^^^^^ File "/home/user/.local/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch return self.collate_fn(data) ^^^^^^^^^^^^^^^^^^^^^ File "/home/user/.local/lib/python3.11/site-packages/ultralytics/data/dataset.py", line 240, in collate_fn value = torch.cat(value, 0) ^^^^^^^^^^^^^^^^^^^ RuntimeError: Tensors must have same number of dimensions: got 2 and 1`