Open Owen-Owen opened 1 year ago
+1
When I am running yolov7-pose branch there is a --save-txt in detect.py It just save the bounding box information But I want bbox and kpts info in a txt file who can help !!!
@Owen-Owen @computer-vision666
!git clone https://github.com/WongKinYiu/yolov7.git
cd yolov7
import argparse
import time
from pathlib import Path
import numpy
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
def detect(save_img=False):
bbox = None
source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# print(f"BOXES ---->>>> {det[:, :4]}")
bbox=det[:, :4]
bbox = bbox.numpy()
# Print results
# for c in det[:, -1].unique():
# n = (det[:, -1] == c).sum() # detections per class
# s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# # Write results
# for *xyxy, conf, cls in reversed(det):
# if save_txt: # Write to file
# xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
# line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
# with open(txt_path + '.txt', 'a') as f:
# f.write(('%g ' * len(line)).rstrip() % line + '\n')
# if save_img or view_img: # Add bbox to image
# label = f'{names[int(cls)]} {conf:.2f}'
# plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)
# # Print time (inference + NMS)
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
# # Stream results
# if view_img:
# cv2.imshow(str(p), im0)
# cv2.waitKey(1) # 1 millisecond
# # Save results (image with detections)
# if save_img:
# if dataset.mode == 'image':
# Image.fromarray(im0).resize((300,250)).show()
# # cv2.imwrite(save_path, im0)
# # print(f" The image with the result is saved in: {save_path}")
# print()
# else: # 'video' or 'stream'
# if vid_path != save_path: # new video
# vid_path = save_path
# if isinstance(vid_writer, cv2.VideoWriter):
# vid_writer.release() # release previous video writer
# if vid_cap: # video
# fps = vid_cap.get(cv2.CAP_PROP_FPS)
# w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# else: # stream
# fps, w, h = 30, im0.shape[1], im0.shape[0]
# save_path += '.mp4'
# vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
# vid_writer.write(im0)
# if save_txt or save_img:
# s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
# #print(f"Results saved to {save_dir}{s}")
# print(f'Done. ({time.time() - t0:.3f}s)')
return bbox
class options:
def __init__(self, weights, source, img_size=640, conf_thres=0.1, iou_thres=0.45, device='',
view_img=False, save_txt=False, save_conf=False, nosave=False, classes=None,
agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp',
exist_ok=False, no_trace=False):
self.weights=weights
self.source=source
self.img_size=img_size
self.conf_thres=conf_thres
self.iou_thres=iou_thres
self.device=device
self.view_img=view_img
self.save_txt=save_txt
self.save_conf=save_conf
self.nosave=nosave
self.classes=classes
self.agnostic_nms=agnostic_nms
self.augment=augment
self.update=update
self.project=project
self.name=name
self.exist_ok=exist_ok
self.no_trace=no_trace
if __name__ == '__main__':
path = <path to pretrained model>
source = <path to image dir to be inferenced>
opt = options(weights=f'{path}/best.pt',source=source)
print(opt)
#check_requirements(exclude=('pycocotools', 'thop'))
bbox = None
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov7.pt']:
bbox = detect()
strip_optimizer(opt.weights)
else:
bbox = detect()
I edited the code from detect.py to get the bbox of an image in source directory. You will have to enter path to model weights and source to the image directory according to it's respective location
You also might have to change the bbox variable in the detect() function if there's more than one image present in the directory, If you have multiple images you could change it to a key value pair using the filename as the key and det[:, :4] as the bbox but this should give you a general idea of how to get the bbox coordinates.
Hope this helps!!
thank you so much, sir ! but i want bbox info and kpts info both write to a txt file. i make it by myself , but i meet a new problem, the bbox info is normalized but the kpts info is raw. so how to get normalized kpts info!! tanks again.
from pose branch use detect.py. Divide x,y keypoint coordinates by image width and height
coordinate_x / width
and coordinate_y / height
make sure you are using coordinates that are re-scaled from img_size
to im0
size and then divide by original image shape.
once you get the normalised coordinates just save them in whatever file format you want
yeah,but how can i make it.
using this code you get all 17 keypoints in following format [[0.345, 0.543, 0.6], [0.124, 0.653, 0.8] ..... [0.345, 0.543, 0.6]] where innner list is [norm_cord_x, norm_cord_y, norm_conf] In pose branch detect.py write this code below following line kpts = det[det_index, 6:]
new_kpts = kpts.cpu().detach().numpy().tolist()
norm_kpts = [
[
new_kpts[idx] / im0.shape[1],
new_kpts[idx + 1] / im0.shape[0],
round(new_kpts[idx + 2], 3)
]
for idx in range(0, len(new_kpts), 3)
]
output_path = "#"
with open(output_path, "w") as output:
output.write(str(norm_kpts))
Note: this is a sample code and saves keypoints in a separate file, you can modify it according to your requirements.
@rd-neosoft hello, thank u so much sir, may i ask what's the third number mean? if this number means 0=unmarked, 1=marked but invisible, 2=marked and visible. so how can i get this unchanged number?
the precision and recall is low in training stage for the yolov7-pose? I can not figure out the reason.
When I am running yolov7-pose branch there is a --save-txt in detect.py It just save the bounding box information But I want bbox and kpts info in a txt file who can help !!!