Open Neabfi opened 4 years ago
Hello,
I am trying to reproduce the semantic segmentation results on cityscapes. I reproduced the model and the training procedure described in the paper and I’m using the MoCo v1 weights.
Here is the model I ended up with: (The code is inspired from here)
import torch from torch import nn import torch.nn.functional as F import torchvision.models as models class FCN(nn.Module): def __init__(self, classes=2, criterion=nn.CrossEntropyLoss(ignore_index=255)): super(FCN, self).__init__() self.criterion = criterion resnet = models.resnet.resnet50() self.layer0 = nn.Sequential( resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, ) self.layer1 = resnet.layer1 self.layer2 = resnet.layer2 self.layer3 = resnet.layer3 self.layer4 = resnet.layer4 self.layer4[0].conv2 = nn.Conv2d(512, 512, kernel_size=(3, 3), dilation=(2, 2), stride=(1, 1), padding=(2, 2), bias=False) self.layer4[0].downsample[0] = nn.Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) self.classifier = nn.Sequential( nn.Conv2d(2048, 256, kernel_size=3, dilation=6), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, dilation=6), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, classes, kernel_size=1), ) self.score_pool4 = nn.Conv2d(1024, classes, kernel_size=1) self.upsampling2 = nn.ConvTranspose2d(classes, classes, kernel_size=4, stride=2, bias=False) self.upsampling16 = nn.ConvTranspose2d(classes, classes, kernel_size=32, stride=16, bias=False) def forward(self, x, y=None): input_shape = x.shape x = self.layer0(x) x = self.layer1(x) x = self.layer2(x) x16 = self.layer3(x) x32 = self.layer4(x16) x32 = self.classifier(x32) x32 = self.upsampling2(x32) x16 = self.score_pool4(x16) x32 = x32[:, :, 1:1 + x16.size()[2], 1:1 + x16.size()[3]] y_pred = x32 + x16 y_pred = self.upsampling16(y_pred) cx = int((y_pred.shape[3] - input_shape[3]) / 2) cy = int((y_pred.shape[2] - input_shape[2]) / 2) y_pred = y_pred[:, :, cy:cy + input_shape[2], cx:cx + input_shape[3]] if self.training: main_loss = self.criterion(y_pred, y) return y_pred.max(1)[1], main_loss, torch.zeros_like(main_loss) else: return y_pred
mIoU_train:
Validation: mIoU/mAcc/allAcc 0.5278/0.6497/0.7965.
I’m definitely doing something wrong, are you using different loss weights for the different classes?
Hello,
I am trying to reproduce the semantic segmentation results on cityscapes. I reproduced the model and the training procedure described in the paper and I’m using the MoCo v1 weights.
Here is the model I ended up with: (The code is inspired from here)
mIoU_train:
Validation: mIoU/mAcc/allAcc 0.5278/0.6497/0.7965.
I’m definitely doing something wrong, are you using different loss weights for the different classes?