ultralytics / yolov5

YOLOv5 🚀 in PyTorch > ONNX > CoreML > TFLite
https://docs.ultralytics.com
GNU Affero General Public License v3.0
50.87k stars 16.38k forks source link

How to save bounding box (xmin, ymin, xmax, ymax), calss name, img height and width from inference? #3223

Closed Laudarisd closed 3 years ago

Laudarisd commented 3 years ago

I will really appreciate your suggestions.

Recently I am trying to do object detection project for my personal project. To run training I need lots of images and annotation files. For that I managed to do almost 800 annotations files. But it takes lots of time to collect such annotation files.

So I tried to do training for 800 images and corresponding annotation files with YOLO-V5. Now I have .pt model and it gives me around 80% detection box on my test images.

I want to save those bounding box , image name, and class in csv so that I can create more annotations files for next training.

So How can I extract those parameters in csv ?

Import library and functions

import argparse
import time
from pathlib import Path
import csv
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

I guess this part loads model and classifier

def detect(opt):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith('.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

I guess in this part we get normalize boxes, prediction with classes and accuracy.

# Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

I guess we get tensor value from pred (in this section) which are related to bounding boxes.

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                    #print(c)

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                        print(line)
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or opt.save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}')
                        #print(label)

                        plot_one_box(xyxy, im0, label=label, color=colors[c], line_thickness=opt.line_thickness)
                        if opt.save_crop:
                            save_one_box(xyxy, im0s, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)

I tried to add this script after this pred section

make_csv = []
for i in pred:
 make_csv.append(i)
 #make_csv.append(label)
  with open('./data.csv', 'w',) as csvfile:
      writer = csv.writer(csvfile)
      writer.writerow(['image','class', 'height','widht', 'xmin','ymin','xmax','ymax'])
      writer.writerow(make_csv)

I somehow managed to save only one tensor value in one cell (all together) but couldn't go through loop for other parameters.

Any help would be appreciated.

And it would be a great help for me to generate annotation files.

Hoping for your kind answer.

Thank you

glenn-jocher commented 3 years ago

@Laudarisd you can use the --save-txt argument in test.py and detect.py to autolabel images.

Laudarisd commented 3 years ago

Let me try, Thanks alot.

Laudarisd commented 3 years ago

Thank you @glenn-jocher It helped me. Is there any chance to save a csv file with xmin, ymin, xmax, ymax without yolo format?

Thank you for your time.

glenn-jocher commented 3 years ago

@Laudarisd you can customize the code here: https://github.com/ultralytics/yolov5/blob/3f74cd9ed1a17de94ed4a19dcefbaa3b27d16a95/detect.py#L101-L108

Laudarisd commented 3 years ago

Thank you @glenn-jocher I will try. If I stuck here I will come back. Really appreciate your answer and time for reviewing my question. Thanks a lot.

glenn-jocher commented 3 years ago

@Laudarisd also note that the YOLOv5 PyTorch Hub models can present results as pandas dataframes which include the class names:

import cv2
import torch
from PIL import Image

# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Images
for f in ['zidane.jpg', 'bus.jpg']:
    torch.hub.download_url_to_file('https://ultralytics.com/images/' + f, f)  # download 2 images
img1 = Image.open('zidane.jpg')  # PIL image
img2 = cv2.imread('bus.jpg')[:, :, ::-1]  # OpenCV image (BGR to RGB)
imgs = [img1, img2]  # batch of images

# Inference
results = model(imgs, size=640)  # includes NMS

# Results
results.print()  
results.save()  # or .show()

results.xyxy[0]  # img1 predictions (tensor)
results.pandas().xyxy[0]  # img1 predictions (pandas)
#      xmin    ymin    xmax   ymax  confidence  class    name
# 0  749.50   43.50  1148.0  704.5    0.874023      0  person
# 1  433.50  433.50   517.5  714.5    0.687988     27     tie
# 2  114.75  195.75  1095.0  708.0    0.624512      0  person
# 3  986.00  304.00  1028.0  420.0    0.286865     27     tie
glenn-jocher commented 3 years ago

@Laudarisd and the PyTorch Hub model results can be presented in 4 different formats, including xyxy and xyxy normalized: https://github.com/ultralytics/yolov5/blob/b7cd1f540d5815b0a1cf2e23ce82a5fdb8f6b525/models/common.py#L308-L311

Laudarisd commented 3 years ago

Thank you so much @glenn-jocher I will go through this one.

Laudarisd commented 3 years ago

Hi @glenn-jocher when I tried to load model from my local. it show HTTPError: HTTP Error 404: Not Found error.

Could you kindly give me little hint ?

I tried to upload model as

path = "./"

model = torch.hub.load(path, 'last')

Similarly tried to import images as

for f in glob.glob('./test/*'):
    #torch.hub.download_url_to_file("./test/" + f, f)
    img = Image.open(f)

Thank you.

glenn-jocher commented 3 years ago

@Laudarisd see PyTorch Hub tutorial for directions on loading local YOLOv5 models:

YOLOv5 Tutorials

Laudarisd commented 3 years ago

Thanks @glenn-jocher .