albumentations-team / albumentations

Fast and flexible image augmentation library. Paper about the library: https://www.mdpi.com/2078-2489/11/2/125
https://albumentations.ai
MIT License
14.22k stars 1.65k forks source link

Yolo Darknet bounding boxes are not shown #828

Open jigsawcoder opened 3 years ago

jigsawcoder commented 3 years ago

🐛 Bug

I am using my custom Yolo darknet dataset having multiple classes per image and I tried to visualize the bounding box after converting them from Yolo to the one used by Albumentation but I can't see any bounding box on the images.

I used the following code snippets to convert the bounding boxes from Yolo to the format used by Albumentation:

def denormalize_bbox(bbox, rows, cols):

    (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])

    if rows <= 0:
        raise ValueError("Argument rows must be positive integer")
    if cols <= 0:
        raise ValueError("Argument cols must be positive integer")

    x_min, x_max = x_min * cols, x_max * cols
    y_min, y_max = y_min * rows, y_max * rows

    return (x_min, y_min, x_max, y_max) + tail

 def normalize_bbox(bbox, rows, cols):

    (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])

    if rows <= 0:
        raise ValueError("Argument rows must be positive integer")
    if cols <= 0:
        raise ValueError("Argument cols must be positive integer")

    x_min, x_max = x_min / cols, x_max / cols
    y_min, y_max = y_min / rows, y_max / rows

    return (x_min, y_min, x_max, y_max) + tail

def convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity=False):

    if source_format not in {"coco", "pascal_voc", "yolo"}:
        raise ValueError(
            "Unknown source_format {}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'".format(source_format)
        )
   if isinstance(bbox, np.ndarray):
        bbox = bbox.tolist()

   if source_format == "coco":
        (x_min, y_min, width, height), tail = bbox[:4], tuple(bbox[4:])
        x_max = x_min + width
        y_max = y_min + height

    elif source_format == "yolo":
        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
        bbox, tail = bbox[:4], tuple(bbox[4:])
        _bbox = np.array(bbox[:4])
     if np.any((_bbox <= 0) | (_bbox > 1)):
        raise ValueError("In YOLO format all labels must be float and in range (0, 1]")

    x, y, width, height = denormalize_bbox(bbox, rows, cols)

    x_min = int(x - width / 2 + 1)
    x_max = int(x_min + width)
    y_min = int(y - height / 2 + 1)
    y_max = int(y_min + height)
    else:
        (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])

    bbox = (x_min, y_min, x_max, y_max) + tail
    bbox = normalize_bbox(bbox, rows, cols)
    if check_validity:
        check_bbox(bbox)
    return bbox

Also, I used to following code for visualization:

BOX_COLOR = (255, 0, 0) # Red
TEXT_COLOR = (255, 255, 255) # White

def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):

    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)

    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)

    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35, 
        color=TEXT_COLOR, 
        lineType=cv2.LINE_AA,
    )
    return img

def visualize(image, bboxes, category_ids, category_id_to_name):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
        class_name = category_id_to_name[category_id]
        img = visualize_bbox(img, bbox, class_name)
    plt.figure(figsize=(12, 12))
    plt.axis('off')
    plt.imshow(img)

I am getting this image, there are a total of 7 bounding box in this image but they are not drawn correctly, is there something I am missing??

1 4 2 3

I am following these two links:

https://albumentations.ai/docs/examples/example_bboxes/

https://github.com/albumentations-team/albumentations/blob/13f44d1397f11cb371436715d50739a8c90b6abb/albumentations/augmentations/bbox_utils.py#L195

Environment

Additional context

Dipet commented 3 years ago

I didn't understand what you want to do. If you work with bboxes in yolo format you don't need to convert them to albumentations format.

import numpy as np
import albumentations as A
import random
import cv2 as cv
import matplotlib.pyplot as plt

def draw_bboxes(img, bboxes):
    height, width = img.shape[:2]
    for x, y, w, h, label in bboxes:
        x *= width
        y *= height
        w *= width
        h *= height

        x1 = int(x - w / 2 + 1)
        x2 = int(x1 + w)
        y1 = int(y - h / 2 + 1)
        y2 = int(y1 + h)

        cv.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), thickness=height // 100)

random.seed(0)
np.random.seed(0)

image = np.zeros([1000, 1000, 3], dtype=np.uint8)
bboxes = [
    [0.05, 0.05, 0.1, 0.1, 0],
    [0.3, 0.3, 0.25, 0.25, 0],
    [0.25, 0.17, 0.38, 0.34, 0],
    [0.7, 0.3, 0.11, 0.33, 0],
]

transforms = A.Compose([A.Resize(100, 100)], bbox_params=A.BboxParams(format="yolo"))

res = transforms(image=image, bboxes=bboxes)

plt.subplot(211, title="original")
draw_bboxes(image, bboxes)
plt.imshow(image, vmin=0, vmax=255)

plt.subplot(212, title="result")
draw_bboxes(res["image"], res["bboxes"])
plt.imshow(res["image"], vmin=0, vmax=255)

plt.show()