pranoyr / cnn-lstm

CNN LSTM architecture implemented in Pytorch for Video Classification
MIT License
260 stars 46 forks source link

cnnlstm: loss does not drop? #5

Closed AbnerAI closed 3 years ago

AbnerAI commented 3 years ago

Hi, The following code is fine-tuned according to the code you wrote. When I only use resnet for training, the loss can drop normally, but when I use resnet+lstm, the loss has been around 0.6 and does not drop. Could you please guide me?

 class CNNLSTM(nn.Module):
    def __init__(self, num_classes=2):
        super(CNNLSTM, self).__init__()
        self.resnet = resnet34(pretrained=True)
        self.resnet.fc = nn.Sequential(nn.Linear(self.resnet.fc.in_features, 300))  
        self.lstm = nn.LSTM(input_size=300, hidden_size=256, num_layers=3)
        self.fc1 = nn.Linear(256, 128) # Fully connected layer
        self.fc2 = nn.Linear(128, num_classes) # Fully connected layer

    def forward(self, x_3d):
        hidden = None
        x_3d = x_3d.unsqueeze(0) # add
        x_ = list()
        for t in range(x_3d.size(1)):
            # with torch.no_grad():
            x = self.resnet(x_3d[:, t, :, :, :]) # x_3d[:, t, :, :, :].shape: [1,3,224,224] 
            out, hidden = self.lstm(x.unsqueeze(0), hidden)    

            x = self.fc1(out[:, -1, :])
            x = F.relu(x)
            x = self.fc2(x)
            if t==0:
                x_ = x
            else:
                x_ = torch.cat([x_, x], dim=0)
        return x_ 
pranoyr commented 3 years ago
for t in range(x_3d.size(1)):
         # with torch.no_grad():
         x = self.resnet(x_3d[:, t, :, :, :]) # x_3d[:, t, :, :, :].shape: [1,3,224,224] 
         out, hidden = self.lstm(x.unsqueeze(0), hidden)    
 x = self.fc1(out[-1, :, :])