jacobgil / pytorch-grad-cam

Advanced AI Explainability for computer vision. Support for CNNs, Vision Transformers, Classification, Object detection, Segmentation, Image similarity and more.
https://jacobgil.github.io/pytorch-gradcam-book
MIT License
10.49k stars 1.55k forks source link

Shape mismatch for customized 3DCNN model #142

Closed vctorwei closed 3 years ago

vctorwei commented 3 years ago

Hi I tried GradCAM on my own 3DCNN, and it seems to have shape mismatch problems My CNN model is following

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv3d(4, 32, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv3d(32, 64, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2, stride=2))
        self.layer3 = nn.Sequential(
            nn.Conv3d(64, 128, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm3d(128),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2, stride=2))
        self.fc1 = nn.Sequential(
            nn.Linear(3456, 2048),
            nn.ReLU(),
            nn.Dropout(0.6))
        self.fc2 = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(0.6))
        self.fc3 = nn.Linear(1024, 256)
        self.fc4 = nn.Linear(256, 1)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.softmax = torch.nn.Softmax(dim=1)
    def forward(self, x):
        x = self.layer1(x) 
        x = self.layer2(x) 
        x = self.layer3(x) 
        x = x.reshape(x.size(0), -1)
        x = self.fc1(x) 
        x = self.fc2(x) 
        x = self.fc3(x)
        x2 = self.softmax(x)
        x = self.relu(x) 
        x = self.fc4(x)              
        x1 = self.sigmoid(x)
        return x1, x2

Shape after each layer is following torch.Size([15, 4, 56, 56, 56]) (Input image: 4 channels, 15 batch size, dimension 56,56,56) torch.Size([15, 32, 26, 26, 26]) torch.Size([15, 64, 11, 11, 11]) torch.Size([15, 128, 3, 3, 3]) torch.Size([15, 3456]) torch.Size([15, 2048]) torch.Size([15, 1024]) torch.Size([15, 256]) torch.Size([15, 256]) torch.Size([15, 1])

I convert the output into single output with x1 according to issue, I changed GradCAM and BaseCAM according to this issue

Finally I run this:

#modelCNN = CNN().to(device)
class SingleOutputModel(torch.nn.Module):
    def __init__(self, model):
        super(SingleOutputModel, self).__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)[0]
model_to_use_for_grad_cam = SingleOutputModel(modelCNN)
target_layers = modelCNN.layer1[-1]
input_tensor = torch.tensor(train_loader.dataset[0][0])
cam = GradCAM(model=model_to_use_for_grad_cam, target_layer=target_layers, use_cuda=True)

Error message is :

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-80-0272626a6dc8> in <module>()
     10 input_tensor = torch.tensor(train_loader.dataset[0][0])
     11 cam = GradCAM(model=model_to_use_for_grad_cam, target_layer=target_layers, use_cuda=True)
---> 12 grayscale_cam = cam(input_tensor=input_tensor)
     13 
     14 # In this example grayscale_cam has only one image in the batch:

11 frames
<ipython-input-16-8a8618c00724> in __call__(self, input_tensor, target_category, aug_smooth, eigen_smooth)
    127 
    128         return self.forward(input_tensor,
--> 129             target_category, eigen_smooth)
    130 
    131 

<ipython-input-16-8a8618c00724> in forward(self, input_tensor, target_category, eigen_smooth)
     57             input_tensor = input_tensor.cuda()
     58 
---> 59         output = self.activations_and_grads(input_tensor)
     60 
     61         if type(target_category) is int:

/usr/local/lib/python3.7/dist-packages/pytorch_grad_cam/activations_and_gradients.py in __call__(self, x)
     33         self.gradients = []
     34         self.activations = []
---> 35         return self.model(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-80-0272626a6dc8> in forward(self, x)
      5         self.model = model
      6     def forward(self, x):
----> 7         return self.model(x)[0]
      8 model_to_use_for_grad_cam = SingleOutputModel(modelCNN)
      9 target_layers = modelCNN.layer1[-1]

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-11-1ab584951483> in forward(self, x)
     85         self.softmax = torch.nn.Softmax(dim=1)
     86     def forward(self, x):
---> 87         x = self.layer1(x)
     88         x = self.layer2(x)
     89         x = self.layer3(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py in forward(self, input)
    137     def forward(self, input):
    138         for module in self:
--> 139             input = module(input)
    140         return input
    141 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1069             input = bw_hook.setup_input_hook(input)
   1070 
-> 1071         result = forward_call(*input, **kwargs)
   1072         if _global_forward_hooks or self._forward_hooks:
   1073             for hook in itertools.chain(

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in forward(self, input)
    585 
    586     def forward(self, input: Tensor) -> Tensor:
--> 587         return self._conv_forward(input, self.weight, self.bias)
    588 
    589 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
    581             )
    582         return F.conv3d(
--> 583             input, weight, bias, self.stride, self.padding, self.dilation, self.groups
    584         )
    585 

RuntimeError: Expected 5-dimensional input for 5-dimensional weight [32, 4, 5, 5, 5], but got 4-dimensional input of size [4, 56, 56, 56] instead
vctorwei commented 3 years ago

Problem solved by adding new dimension

input_tensor = torch.tensor(train_loader.dataset[0][0])[newaxis,:,:,:].to(torch.float)