Testing the trained model

Chris-hughes10 / Yolov7-training

A clean, modular implementation of the Yolov7 model family, which uses the official pretrained weights, with utilities for training the model on custom (non-COCO) tasks.

GNU General Public License v3.0

116 stars 35 forks source link

Thank you very much for publishing this code. This has better helped us to understand YOLOv7 building blocks. Recently I used your dataset to train the yolov7 model and it seems to be working fine, even the MAP on the cars dataset released by you is around 0.98. However, after training, I wanted to see if I can do inference on test data using the trained model and did the following code to do so.


image_size: int = 640
num_classes = 1
pretrained=False
DATA_PATH = Path("Yolov7-trainer/datasets/data")
ckpt_path = "./lightning_logs/checkpoints/epoch=4-step=205.ckpt"
batch_size: int = 8

data_path = Path(DATA_PATH)
images_path = data_path / "training_images"
annotations_file_path = data_path / "annotations.csv"

model = create_yolov7_model(
        architecture="yolov7", num_classes=num_classes, pretrained=True
    )
loss_func = create_yolov7_loss(model, image_size=image_size)

optimizer = torch.optim.SGD(
    model.parameters(), lr=0.01, momentum=0.9, nesterov=True
)
loaded_model = Yolov7Trainer.load_from_checkpoint(ckpt_path, model=model, optimizer=optimizer, loss_func=loss_func)
loaded_model.eval()

#single image prediction
img = np.array(Image.open(f'{images_path}/vid_4_2160.jpg').convert("RGB"))
original_image_sizes = torch.tensor([img.shape[:2]])

transforms = create_yolov7_transforms()
img = transforms(image=img, bboxes=[], labels = [])

with torch.no_grad():
    img = torch.from_numpy(img["image"]).permute(2,0,1).unsqueeze(0).type('torch.FloatTensor')
    y_hat = loaded_model(img.cuda())
    preds = model.postprocess(y_hat, conf_thres=0.8) #threshold
    preds = filter_eval_predictions(preds)  #nms

After doing so, I tried to show the inference results on top of the image that is inference. But the results don't seem to align with the ground truth. Here is how I tried to draw the predicted bounding boxes.

img = np.array(Image.open(f'{images_path}/vid_4_2160.jpg').convert("RGB"))
img = transforms(image=img, bboxes=[], labels = [])
fig, axe = plt.subplots()
axe.imshow(img["image"])
bboxes = preds[0][:,:4]
for box in bboxes.cpu():
    box_cvt = torchvision.ops.box_convert(box, in_fmt="xyxy", out_fmt="xywh")
    rect = patches.Rectangle((box_cvt[0],box_cvt[1]), 
                             box_cvt[2], 
                             box_cvt[3],
                             edgecolor="b",
                             fill=False)
    axe.add_patch(rect)

here is the result of the above code run. output

I am wondering if I have a mistake about translating the predicted bounding boxes?

image_size: int = 640 num_classes = 1 pretrained=False batch_size: int = 8 model = create_yolov7_model( architecture="yolov7", num_classes=num_classes, pretrained=pretrained ) loss_func = create_yolov7_loss(model, image_size=image_size) optimizer = torch.optim.SGD( model.parameters(), lr=0.01, momentum=0.9, nesterov=True ) trainer = Yolov7Trainer( model=model, optimizer=optimizer, loss_func=loss_func, filter_eval_predictions_fn=partial( filter_eval_predictions, confidence_threshold=0.01, nms_threshold=0.3 ), callbacks=[], ) trainer.load_checkpoint("../train_on_datasets/best_model.pt") img = np.array(Image.open('../datasets/data/training_images/vid_4_2160.jpg').convert("RGB")) original_image_sizes = torch.tensor([img.shape[:2]]) transforms = create_yolov7_transforms() img = transforms(image=img, bboxes=[], labels = []) img = torch.from_numpy(img["image"]).permute(2,0,1).unsqueeze(0).type('torch.FloatTensor') preds = trainer.predict_on_image(img) # postprocessed

import matplotlib.pyplot as plt from matplotlib import patches import torchvision img = np.array(Image.open('../datasets/data/training_images/vid_4_2160.jpg').convert("RGB")) img = transforms(image=img, bboxes=[], labels = []) fig, axe = plt.subplots() axe.imshow(img["image"]) bboxes = preds[0][:,:4] conf = preds[0][:, 4] print(conf) for box, thresh in zip(bboxes.cpu(), conf): if thresh > 0.5: box_cvt = torchvision.ops.box_convert(box, in_fmt="xyxy", out_fmt="xywh") rect = patches.Rectangle((box_cvt[0],box_cvt[1]), box_cvt[2], box_cvt[3], edgecolor="b", fill=False) axe.add_patch(rect)

def predict_on_image(self, img): with torch.no_grad(): fpn_heads_outputs = self.model(img) preds = self.get_model().postprocess(fpn_heads_outputs, conf_thres=0.001) if self.filter_eval_predictions is not None: preds = self.filter_eval_predictions(preds) return preds

Chris-hughes10 / Yolov7-training

Testing the trained model #24