Closed awsaf49 closed 4 years ago
Update multi_label parameter here as you see fit.
Also do not use exclamation marks in issue titles.
Thank you for your response, I have already tried but still same object is detected more than once with difference classes. Could you please me what does the agnostic_nms
do?
Hi glenn-jocher@ when I am running yolov5 to detect only bicycle from continuous video, same bicycle is continuously detecting several times but I want to detect just one time not several time, could you please tell me, how can I do that? Thanks!
@mintu07ruet I don't understand what you are asking. Best practices is to include as much information as possible in your question, provide examples, screenshots, results, references etc.
Hi @ glenn-jocher
Thank you for your response. Ok. let me explain more details. I was detecting bicycle only at intersection of road from video file, when a bicyclist is crossing the road, the yolov5p5 is continuously detecting the same bicyclist several times. I have outputs in csv where I am getting a same bicyclists detected several times as shown with yellow color in the excel image. I want to detect just first time, not more than one for a same bicyclists. If see below two screen shot then you can see same bicyclist (same person) is detecting two times in some time gap. I want to detect this just once, not multiple times.
Here is the modified detect file. Please suggest me where should i change? Thanks!
""" Run inference on images, videos, directories, streams, etc.
Usage: $ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640 """
import argparse import sys import os import time from csv import DictWriter from pathlib import Path
import cv2 import torch import torch.backends.cudnn as cudnn from numpy import random import numpy as np from models.experimental import attempt_load from utils.datasets import LoadStreams, LoadImages from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ strip_optimizer, set_logging, increment_path from utils.plots import plot_one_box from utils.torch_utils import select_device, load_classifier, time_synchronized
def frames_to_timestamp(frame_num, fps): total_seconds = frame_num / fps mins, secs = divmod(total_seconds, 60) hours, mins = divmod(mins, 60) return "{}:{}:{}".format(int(hours), int(mins), secs)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://'))
fps = None
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
# This below code will check if the specified source is a file. If so, it will assign
# fps a number.
source_path = Path(source)
if source_path.is_file():
vid = cv2.VideoCapture(str(source_path.absolute()))
fps = vid.get(cv2.CAP_PROP_FPS)
print ("FPS of Video Before Data Loading : {0}".format(fps))
vid.release()
#################################################
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
with open('log_file.csv', 'a') as csvfile:
header = (
'Timestamp', 'Frame', 'Class', 'AutoSteer Confidence', 'Indicator', 'Indicator Confidence', 'ProPilot',
'ProPilot Confidence', 'VID - Lanes Detection Status', 'VID LDS Confidence', 'VID - LDW',
'VID LDW Confidence')
csv_writer = DictWriter(csvfile, fieldnames=header, lineterminator='\n', delimiter=',')
csv_writer.writeheader()
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = Path(path[i]), '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = Path(path), '', im0s, getattr(dataset, 'frame', 0)
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
tl1 = cnf1 = tl2 = cnf2 = tl3 = cnf3 = tl4 = cnf4 = tl5 = cnf5 = ''
for *xyxy, conf, cls in reversed(det):
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
if save_txt:
icls = int(cls)
if icls == 0 or icls == 1:
tl1 = int(cls)
cnf1 = '%.2f' % (conf)
if icls == 2 or icls == 3:
tl2 = int(cls)
cnf2 = '%.2f' % (conf)
if icls == 4 or icls == 5 or icls == 6:
tl3 = int(cls)
cnf3 = '%.2f' % (conf)
if icls == 7 or icls == 8:
tl4 = int(cls)
cnf4 = '%.2f' % (conf)
if icls == 9 or icls == 10:
tl5 = int(cls)
cnf5 = '%.2f' % (conf)
with open('log_file.csv', 'a') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=header, lineterminator='\n', delimiter=',')
csv_writer.writerow(
{'Timestamp': frames_to_timestamp(frame, fps), 'Frame': frame, 'Class': tl1,
'AutoSteer Confidence': cnf1, 'Indicator': tl2,
'Indicator Confidence': cnf2, 'ProPilot': tl3, 'ProPilot Confidence': cnf3,
'VID - Lanes Detection Status': tl4, 'VID LDS Confidence': cnf4, 'VID - LDW': tl5,
'VID LDW Confidence': cnf5})
# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
print(f'FPS of video {fps}')
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print('Done. (%.3fs)' % (time.time() - t0))
if name == 'main': parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') opt = parser.parse_args() print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
@mintu07ruet thanks for the details! YOLOv5 is just a detector, it does not track. To implement tracking in between detection frames you might want to look into Kalman Filters (i.e. EKF), KLT trackers (which use pyramid image patches), or other methods like deepsort, which has been implemented with YOLOv5, i.e.: https://github.com/mikel-brostrom/Yolov5_DeepSort_Pytorch
Hi glenn-jocher, Thank you so much for sharing this. I will work on this things!! Appreciate!!
@mintu07ruet I'm curious about how to print it out using the Excel file used in yolov5. Can you share the Excel output timestamp code?
""" Run inference on images, videos, directories, streams, etc.
Usage: $ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640 """
import argparse import sys import os import time from csv import DictWriter from pathlib import Path
import cv2 import torch import torch.backends.cudnn as cudnn from numpy import random import numpy as np from models.experimental import attempt_load from utils.datasets import LoadStreams, LoadImages from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ strip_optimizer, set_logging, increment_path from utils.plots import plot_one_box from utils.torch_utils import select_device, load_classifier, time_synchronized
SECONDS_DELAY_AFTER_DETECTION = 2
def frames_to_timestamp(frame_num, fps): """Converts the current frame number into a timestamp based on the video's fps. Anything over 24 hours is displayed accurately. Example for 100 hour timestamp: 100:00:00"""
total_seconds = frame_num / fps
mins, secs = divmod(total_seconds, 60)
hours, mins = divmod(mins, 60)
return "{}:{}:{}".format(int(hours), int(mins), secs)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://'))
fps = None
last_frame_detected = None
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=12) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
# This below code will check if the specified source is a file. If so, it will assign
# fps a number.
source_path = Path(source)
if source_path.is_file():
vid = cv2.VideoCapture(str(source_path.absolute()))
fps = vid.get(cv2.CAP_PROP_FPS)
last_frame_detected = (SECONDS_DELAY_AFTER_DETECTION*fps)*-1
print("FPS of Video Before Data Loading : {0}".format(fps))
print(f"INITIAL LASTFRAMEDETECTED VALUE: {last_frame_detected}")
vid.release()
#################################################
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
with open(f"{save_dir / 'log_file.csv'}", 'a') as csvfile:
header = (
'Timestamp', 'Frame', 'Class')
csv_writer = DictWriter(csvfile, fieldnames=header, lineterminator='\n', delimiter=',')
csv_writer.writeheader()
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = Path(path[i]), '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = Path(path), '', im0s, getattr(dataset, 'frame', 0)
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det) and ((frame - (last_frame_detected + (SECONDS_DELAY_AFTER_DETECTION * fps))) >= 0):
# if any bicycle is detected, its frame number is recorded.
last_frame_detected = frame
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
tl1 = cnf1 = tl2 = cnf2 = tl3 = cnf3 = tl4 = cnf4 = tl5 = cnf5 = tl6 = cnf6 = ''
for *xyxy, conf, cls in reversed(det):
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
if save_txt:
icls = int(cls)
if icls == 0 or icls == 1:
tl1 = int(cls)
cnf1 = '%.2f' % (conf)
if icls == 2 or icls == 3:
tl2 = int(cls)
cnf2 = '%.2f' % (conf)
if icls == 4 or icls == 5 or icls == 6:
tl3 = int(cls)
cnf3 = '%.2f' % (conf)
if icls == 7 or icls == 8:
tl4 = int(cls)
cnf4 = '%.2f' % (conf)
if icls == 9 or icls == 10:
tl5 = int(cls)
cnf5 = '%.2f' % (conf)
if icls == 17 or icls == 36:
tl6 = int(cls)
cnf6 = '%.2f' % (conf)
with open(f"{save_dir / 'log_file.csv'}", 'a') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=header, lineterminator='\n', delimiter=',')
csv_writer.writerow(
{'Timestamp': frames_to_timestamp(frame, fps), 'Frame': frame, 'Class': tl1,
})
# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
print(f'FPS of video {fps}')
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print('Done. (%.3fs)' % (time.time() - t0))
if name == 'main': parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') opt = parser.parse_args() print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
@oes5756 , I have used above modified script to record the outcomes in excel. Thanks!
🐛 Bug
It seems that due to some reason the same objeect is detected twice with difference classes. Which is creating lots of Falses Positive. Is there anyway to deal with this problem?