OrderedDict mutated during iteration

🐛 Bug

To Reproduce

Steps to reproduce the behavior:


class FeatExt(nn.Module):
    def __init__(self):
        super(FeatExt, self).__init__()

        # input.shape: (1, 105, 105)

        self.cnn1 = nn.Conv2d(1, 64, kernel_size=10) # (1, 105, 105) -> (64, 96, 96)
        self.relu1 = nn.ReLU()

        # feature maps.shape: (64, 96, 96)
        self.max_pool1 = nn.MaxPool2d(kernel_size=2) # (64, 96, 96) -> (64, 48, 48)

        # feature maps.shape: (64, 48, 48)
        self.cnn2 = nn.Conv2d(64, 128, kernel_size=7) # (64, 48, 48) -> (128, 42, 42)
        self.relu2 = nn.ReLU()

        # feature maps.shape: (128, 42, 42)
        self.max_pool2 = nn.MaxPool2d(kernel_size=2) # (128, 42, 42) -> (128, 21, 21)

        # feature maps.shape: (128, 21, 21)
        self.cnn3 = nn.Conv2d(128, 128, kernel_size=4) # (128, 21, 21) -> (128, 18, 18)
        self.relu3 = nn.ReLU()

        # feature maps.shape: (128, 18, 18)
        self.max_pool3 = nn.MaxPool2d(kernel_size=2) # (128, 18, 18) -> (128, 9, 9)

        # feature maps.shape: (128, 9, 9)
        self.cnn4 = nn.Conv2d(128, 256, kernel_size=4) # (128, 9, 9) -> (256, 6, 6)
        self.relu4 = nn.ReLU()

        # feature maps.shape: (256, 6, 6)
        # torch.flatten: (256, 6, 6) -> (9216)

        # feature maps.shape: (9216)
        self.fc1 = nn.Linear(9216, 4096) # (9216) -> (4096)
        self.sigmoid1 = nn.Sigmoid()

        # # feature maps.shape: (4096)
        # self.fc2 = nn.Linear(4096, 1)
        # self.sigmoid2 = nn.Sigmoid()

    def forward(self, x):
        x = self.max_pool1(self.relu1(self.cnn1(x)))

        x = self.max_pool2(self.relu2(self.cnn2(x)))

        x = self.max_pool3(self.relu3(self.cnn3(x)))

        x = self.relu4(self.cnn4(x))

        x = torch.flatten(x, start_dim=1)

        x = self.sigmoid1(self.fc1(x))

        # x = self.sigmoid2(self.fc2(x))

        return x

class SiameseNet(nn.Module):
  def __init__(self):
    super(SiameseNet, self).__init__()
    self.feat = FeatExt()
    # feature maps.shape: (4096)
    self.fc = nn.Linear(4096, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x1 = self.feat(x)
    x2 = self.feat(x)

    return self.sigmoid(self.fc((x1-x2).abs()))

summary(model, (1, 105, 105))

Expected behavior

Environment

Please describe your environement so that the bug can be easily reproduced:

Google Colab Defaults

Additional context

Hi there @harshraj22 :wave:

Thanks for reporting that! Would you mind sharing your environment setup?

Here is mine: PyTorch Version: 1.9.0 OS: Ubuntu 20.04.2 LTS (x86_64) Python version: 3.8 (64-bit runtime) Torchscan version: 0.1.1+61116d9

On my end, I tried the following snippet and it ran correctly:

import torch
from torch import nn
from torchscan import summary

class FeatExt(nn.Module):
    def __init__(self):
        super(FeatExt, self).__init__()

        # input.shape: (1, 105, 105)

        self.cnn1 = nn.Conv2d(1, 64, kernel_size=10) # (1, 105, 105) -> (64, 96, 96)
        self.relu1 = nn.ReLU()

        # feature maps.shape: (64, 96, 96)
        self.max_pool1 = nn.MaxPool2d(kernel_size=2) # (64, 96, 96) -> (64, 48, 48)

        # feature maps.shape: (64, 48, 48)
        self.cnn2 = nn.Conv2d(64, 128, kernel_size=7) # (64, 48, 48) -> (128, 42, 42)
        self.relu2 = nn.ReLU()

        # feature maps.shape: (128, 42, 42)
        self.max_pool2 = nn.MaxPool2d(kernel_size=2) # (128, 42, 42) -> (128, 21, 21)

        # feature maps.shape: (128, 21, 21)
        self.cnn3 = nn.Conv2d(128, 128, kernel_size=4) # (128, 21, 21) -> (128, 18, 18)
        self.relu3 = nn.ReLU()

        # feature maps.shape: (128, 18, 18)
        self.max_pool3 = nn.MaxPool2d(kernel_size=2) # (128, 18, 18) -> (128, 9, 9)

        # feature maps.shape: (128, 9, 9)
        self.cnn4 = nn.Conv2d(128, 256, kernel_size=4) # (128, 9, 9) -> (256, 6, 6)
        self.relu4 = nn.ReLU()

        # feature maps.shape: (256, 6, 6)
        # torch.flatten: (256, 6, 6) -> (9216)

        # feature maps.shape: (9216)
        self.fc1 = nn.Linear(9216, 4096) # (9216) -> (4096)
        self.sigmoid1 = nn.Sigmoid()

        # # feature maps.shape: (4096)
        # self.fc2 = nn.Linear(4096, 1)
        # self.sigmoid2 = nn.Sigmoid()

    def forward(self, x):
        x = self.max_pool1(self.relu1(self.cnn1(x)))

        x = self.max_pool2(self.relu2(self.cnn2(x)))

        x = self.max_pool3(self.relu3(self.cnn3(x)))

        x = self.relu4(self.cnn4(x))

        x = torch.flatten(x, start_dim=1)

        x = self.sigmoid1(self.fc1(x))

        # x = self.sigmoid2(self.fc2(x))

        return x

class SiameseNet(nn.Module):
  def __init__(self):
    super(SiameseNet, self).__init__()
    self.feat = FeatExt()
    # feature maps.shape: (4096)
    self.fc = nn.Linear(4096, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x1 = self.feat(x)
    x2 = self.feat(x)

    return self.sigmoid(self.fc((x1-x2).abs()))

model = SiameseNet()
summary(model, (1, 105, 105))

yielding

_________________________________________________________________
Layer               Type          Output Shape         Param #   
=================================================================
siamesenet          SiameseNet    (-1, 1)              0         
├─feat              FeatExt       (-1, 4096)           0         
|    └─cnn1         Conv2d        (-1, 64, 96, 96)     6,464     
|    └─relu1        ReLU          (-1, 64, 96, 96)     0         
|    └─max_pool1    MaxPool2d     (-1, 64, 48, 48)     0         
|    └─cnn2         Conv2d        (-1, 128, 42, 42)    401,536   
|    └─relu2        ReLU          (-1, 128, 42, 42)    0         
|    └─max_pool2    MaxPool2d     (-1, 128, 21, 21)    0         
|    └─cnn3         Conv2d        (-1, 128, 18, 18)    262,272   
|    └─relu3        ReLU          (-1, 128, 18, 18)    0         
|    └─max_pool3    MaxPool2d     (-1, 128, 9, 9)      0         
|    └─cnn4         Conv2d        (-1, 256, 6, 6)      524,544   
|    └─relu4        ReLU          (-1, 256, 6, 6)      0         
|    └─fc1          Linear        (-1, 4096)           37,752,832
|    └─sigmoid1     Sigmoid       (-1, 4096)           0         
├─fc                Linear        (-1, 1)              4,097     
├─sigmoid           Sigmoid       (-1, 1)              0         
=================================================================
Trainable params: 38,951,745
Non-trainable params: 0
Total params: 38,951,745
-----------------------------------------------------------------
Model size (params + buffers): 148.59 Mb
Framework & CUDA overhead: 0.00 Mb
Total RAM usage: 148.59 Mb
-----------------------------------------------------------------
Floating Point Operations on forward: 1.82 GFLOPs
Multiply-Accumulations on forward: 908.84 MMACs
Direct memory accesses on forward: 913.52 MDMAs
_________________________________________________________________

I suggest you upgrade your PyTorch version and then install torchscan from source (latest version of the library) and try again:

git clone https://github.com/frgfm/torch-scan.git
pip install -e torch-scan/. --upgrade

frgfm / torch-scan