Explaining this model with grad-CAM

Hello, I used this YOLO version before on my thesis, and now I want to explain the model using Grad CAM to produce a heatmap.

I modified the YOLO.py code to make the code work with the Grad CAM algorithm, however I got plenty of errors and while trying to fix them one of them wouldn't be fixed no matter what I tried and it's the

"RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 3, 20, 20, 9]], which is output 0 of SigmoidBackward0, is at version 2; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck! .

This is the code I am trying to produce the Grad CAM: `

import torch
import torch.nn.functional as F
from torchvision import models
import cv2
import numpy as np
import matplotlib.pyplot as plt

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        self._register_hooks()

    def _register_hooks(self):
        def forward_hook(module, input, output):
            self.activations = output

        def backward_hook(module, grad_in, grad_out):
            self.gradients = grad_out[0]

        target_layer = dict([*self.model.named_modules()])[self.target_layer]
        target_layer.register_forward_hook(forward_hook)
        target_layer.register_backward_hook(backward_hook)

    def generate_cam(self, input_image, target_bbox_idx=None):
        self.model.eval()
        output = self.model(input_image)
        if isinstance(output, tuple):
            output = output[0]  # Get the relevant tensor if model output is a tuple

        # Find the bounding box with the highest confidence score
        if target_bbox_idx is None:
            confidences = output[..., 4]
            target_bbox_idx = torch.argmax(confidences).item()

        self.model.zero_grad()
        class_score = output[..., 4][0, target_bbox_idx]
        class_score.backward()

        gradients = self.gradients[0].cpu().data.numpy()
        activations = self.activations[0].cpu().data.numpy()
        weights = np.mean(gradients, axis=(1, 2))
        cam = np.zeros(activations.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * activations[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (input_image.shape[2], input_image.shape[3]))
        cam -= np.min(cam)
        cam /= np.max(cam)
        return cam

    def __call__(self, input_image, target_bbox_idx=None):
        return self.generate_cam(input_image, target_bbox_idx)

sys.path.append('/content/gdrive/MyDrive/master')  # Add master to the system path
from models.yolo import Model  # Replace with your actual YOLO model import

# Initialize your model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model('/content/gdrive/MyDrive/master/cfg/deploy/yolov7.yaml').to(device)  # Replace with actual model initialization
model.eval()

# Initialize Grad-CAM
grad_cam = GradCAM(model, target_layer="model.24")  # Adjust the target layer name as needed

# Load and preprocess input image
def preprocess_image(img_path):
    img = cv2.imread(img_path, 1)
    img = np.float32(cv2.resize(img, (640, 640))) / 255
    img = img[:, :, ::-1]
    img -= [0.485, 0.456, 0.406]
    img /= [0.229, 0.224, 0.225]
    img = np.ascontiguousarray(np.transpose(img, (2, 0, 1)))
    img = torch.from_numpy(img).unsqueeze(0)
    return img

input_image = preprocess_image("/content/gdrive/MyDrive/mikroskop/1000_F_128042944_dhaclTQ6U0BCtNUSiguItJDid1f06qZa.jpg").to(device)

# Generate Grad-CAM
cam = grad_cam(input_image)

# Visualize the Grad-CAM
def visualize_cam(cam, img_path, alpha=0.5):
    img = cv2.imread(img_path)
    cam = cv2.resize(cam, (img.shape[1], img.shape[0]))
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    overlay = heatmap + np.float32(img) / 255
    overlay = overlay / np.max(overlay)
    cv2.imshow("Grad-CAM", overlay)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

visualize_cam(cam, "/content/gdrive/MyDrive/mikroskop/1000_F_128042944_dhaclTQ6U0BCtNUSiguItJDid1f06qZa.jpg")

WongKinYiu / yolov7

Explaining this model with grad-CAM #2063