jacobgil / pytorch-grad-cam

Advanced AI Explainability for computer vision. Support for CNNs, Vision Transformers, Classification, Object detection, Segmentation, Image similarity and more.
https://jacobgil.github.io/pytorch-gradcam-book
MIT License
9.79k stars 1.52k forks source link

How to Generate Attention Graphs on Custom Models ? #482

Open hsien999 opened 4 months ago

hsien999 commented 4 months ago

Hi, I'm working on the attention mechanism for face recognition models, I'm using the ir model as a backbone, but I don't know much about the details of the implementation of grad-cam, what exactly should I do, and do none of the targets defined in pytorch_grad_cam.utils.model_targets apply to face recognition and verification tasks? How do I rationalize the generation of attention maps? Is it possible to customize targets like cosine_similarity?

grad-cam            1.5.0
torch               1.8.1+cu101
torchvision         0.9.1+cu101

Here's how I realized it:

import warnings

import torch

from face_model import ir152

warnings.filterwarnings('ignore')
import numpy as np
import cv2
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, \
    preprocess_image
from PIL import Image

class FeatureVectorSimilarityTarget:
    def __init__(self, target_feature):
        self.target_feature_vector = target_feature

    def __call__(self, model_output):
        cosine_similarity = torch.nn.functional.cosine_similarity(model_output, self.target_feature_vector, dim=0)
        return cosine_similarity

def load_img(path):
    img = np.float32(cv2.resize(np.array(Image.open(path)), (112, 112))) / 255
    tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    return img, tensor

model = ir152.IR_152((112, 112))
model.load_state_dict(torch.load('face_model_weights/ir152.pth'))
model.to('cuda')
model.eval()

input_img, input_tensor = load_img('001.jpg')
_, target_tensor = load_img('002.jpg')
target_feature = model(target_tensor.to('cuda'))
targets = [FeatureVectorSimilarityTarget(target_feature)]
target_layers = [model]

with GradCAM(model=model, target_layers=target_layers) as cam:
    grayscale_cams = cam(input_tensor=input_tensor, targets=targets)
    cam_image = show_cam_on_image(input_img, grayscale_cams[0, :], use_rgb=True)
    cam = np.uint8(255 * grayscale_cams[0, :])
    cam = cv2.merge([cam, cam, cam])
    images = np.hstack((np.uint8(255 * input_img), cam, cam_image))
    Image.fromarray(images).save('cam.jpg')

I get the following error:

grayscale_cams = cam(input_tensor=input_tensor, targets=targets)
... ...
“torch/autograd/__init__.py", line 50, in _make_grads
RuntimeError: grad can be implicitly created only for scalar outputs

Here's the model:

class Backbone(Module):
    def __init__(self, input_size, num_layers, mode='ir'):
        super(Backbone, self).__init__()
        assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
        assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
        assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
        blocks = get_blocks(num_layers)
        if mode == 'ir':
            unit_module = bottleneck_IR
        elif mode == 'ir_se':
            unit_module = bottleneck_IR_SE
        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
                                      BatchNorm2d(64),
                                      PReLU(64))
        if input_size[0] == 112:
            self.output_layer = Sequential(BatchNorm2d(512),
                                           Dropout(0.4),
                                           Flatten(),
                                           Linear(512 * 7 * 7, 512),
                                           # BatchNorm1d(512, affine=False))
                                           BatchNorm1d(512))
        else:
            self.output_layer = Sequential(BatchNorm2d(512),
                                           Dropout(0.4),
                                           Flatten(),
                                           Linear(512 * 14 * 14, 512),
                                           #    BatchNorm1d(512, affine=False))
                                           BatchNorm1d(512))

        modules = [unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)
                   for block in blocks for bottleneck in block]
        self.body = Sequential(*modules)

        self._initialize_weights()

    def forward(self, x):
        x = self.input_layer(x)
        x = self.body(x)
        conv_out = x.view(x.shape[0], -1)
        x = self.output_layer(x)

        # norm = torch.norm(x, p=2, dim=1)
        # x = torch.div(x, norm)

        # return x, conv_out
        return x

def IR_152(input_size):
    model = Backbone(input_size, 152, 'ir')

    return model