jacobgil / pytorch-grad-cam

Advanced AI Explainability for computer vision. Support for CNNs, Vision Transformers, Classification, Object detection, Segmentation, Image similarity and more.
https://jacobgil.github.io/pytorch-gradcam-book
MIT License
10.06k stars 1.52k forks source link

Gradients are: 'NoneType' object has no attribute 'shape' #193

Closed Anurag14 closed 2 years ago

Anurag14 commented 2 years ago

Hi, thank you for providing an open-source implementation for your work. I am trying to build on top of it to visualize my Resnet-18 encoder for the Image captioning task. It is an encoder-decoder architecture with a resnet-18 encoder and an RNN decoder. Below is my code for reproducibility.

from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
from pytorch_grad_cam.utils.image import show_cam_on_image
from model import EncoderCNN, DecoderRNN

class CaptioningModelOutputWrapper(torch.nn.Module):
    def __init__(self, embedding, gloves): 
        super(CaptioningModelOutputWrapper, self).__init__()
        self.embedding_size = 300
        self.embed_size = 512
        self.embedding = embedding
        self.gloves = gloves
        self.hidden_size = 512
        self.vocab_size = len(data_loader.dataset.vocab)
        self.encoder = EncoderCNN(self.embed_size,self.embedding_size+18)
        self.decoder = DecoderRNN(self.embedding_size, self.embed_size, self.hidden_size, self.vocab_size)
    def forward(self, x):
        features = self.encoder(x, self.embedding)
        outputs = self.decoder(features, self.gloves)
        return outputs

class CaptionModelOutputTarget:
    def __init__(self, index, category):
        self.index = index
        self.category = category
    def __call__(self, model_output):
        print(model_output.shape)
        return model_output[self.index, self.category]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
it=iter(data_loader)

input_tensor, embedding, gloves, caption, string_caption, entity_name, path = next(it)
input_tensor, embedding, gloves = input_tensor.to(device), embedding.to(device), gloves.to(device)
model = CaptioningModelOutputWrapper(embedding, gloves)
model.to(device)

outputs = model.forward(input_tensor)
print(outputs.shape)
target_layers = [model.encoder.resnet[:-2]]
# Note: input_tensor can be a batch tensor with several images!
# Construct the CAM object once, and then re-use it on many images:
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
targets = [CaptionModelOutputTarget(10,caption[0,10])]
cam.model = model.train()
# You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
grayscale_cam = cam(input_tensor=input_tensor, targets=targets)

# In this example grayscale_cam has only one image in the batch:
grayscale_cam = grayscale_cam[0, :]
visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

Error Stack:

torch.Size([1, 100, 31480])
torch.Size([100, 31480])
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_86959/3158658298.py in <module>
     13 cam.model = model.train()
     14 # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
---> 15 grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
     16 
     17 # In this example grayscale_cam has only one image in the batch:

~/anaconda3/envs/fake/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py in __call__(self, input_tensor, targets, aug_smooth, eigen_smooth)
    182                 input_tensor, targets, eigen_smooth)
    183 
--> 184         return self.forward(input_tensor,
    185                             targets, eigen_smooth)
    186 

~/anaconda3/envs/fake/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py in forward(self, input_tensor, targets, eigen_smooth)
     91         # use all conv layers for example, all Batchnorm layers,
     92         # or something else.
---> 93         cam_per_layer = self.compute_cam_per_layer(input_tensor,
     94                                                    targets,
     95                                                    eigen_smooth)

~/anaconda3/envs/fake/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py in compute_cam_per_layer(self, input_tensor, targets, eigen_smooth)
    123                 layer_grads = grads_list[i]
    124 
--> 125             cam = self.get_cam_image(input_tensor,
    126                                      target_layer,
    127                                      targets,

~/anaconda3/envs/fake/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py in get_cam_image(self, input_tensor, target_layer, targets, activations, grads, eigen_smooth)
     48                       eigen_smooth: bool = False) -> np.ndarray:
     49 
---> 50         weights = self.get_cam_weights(input_tensor,
     51                                        target_layer,
     52                                        targets,

~/anaconda3/envs/fake/lib/python3.9/site-packages/pytorch_grad_cam/grad_cam.py in get_cam_weights(self, input_tensor, target_layer, target_category, activations, grads)
     20                         activations,
     21                         grads):
---> 22         return np.mean(grads, axis=(2, 3))

AttributeError: 'NoneType' object has no attribute 'shape'

I add the line python cam.model=model.train() to the training script in order to allow backpropagation in the RNN otherwise it throws cuDNN error that RNN backward cannot be called outside train mode. Solving that error leads to this. It would really help if you can help debug why this issue occurs in first place.

Anurag14 commented 2 years ago

Could resolve the above issue using target_layers=[model.encoder.resnet[-2][-1]] instead to find the right layer.