Open zpge opened 4 years ago
Hi @zpge
I have prepared a script that allows you train your own data using this repo's training strategy. It works on my machine with Pytorch-1.4.0 and Python-3.6.7 (Ubuntu 18.04.03). The modified training script works as well, but since I do not have sufficient training resources, I am unable to train it on my machine. Maybe we could collaborate over this, if you have not already found a solution. The following script works for me.
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import os.path as osp
import os
from PIL import Image
import numpy as np
import json
from transform import *
class CamVid(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
super(CamVid, self).__init__()
assert mode in ('train', 'val', 'test')
self.mode = mode
self.ignore_lb = 255
train_folder = 'train'
train_lbl_folder = 'trainannot'
# Validation dataset root folders
val_folder = 'val'
val_lbl_folder = 'valannot'
# Test dataset root folders
test_folder = 'test'
test_lbl_folder = 'testannot'
# Images extension
img_extension = '.png'
with open('./camvid_info.json', 'r') as fr:
labels_info = json.load(fr)
self.lb_map = {el['id']: el['trainId'] for el in labels_info}
## parse img directory
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, mode)
# folders = os.listdir(impth)
# for fd in folders:
#fdpth = osp.join(impth, fd)
im_names = os.listdir(impth)
names = [el.replace('.png', '') for el in im_names]
impths = [osp.join(impth, el) for el in im_names]
imgnames.extend(names)
self.imgs.update(dict(zip(names, impths)))
## parse gt directory
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, mode+'annot')
# folders = os.listdir(gtpth)
# for fd in folders:
# fdpth = osp.join(gtpth, fd)
lbnames = os.listdir(gtpth)
names = [el.replace('.png', '') for el in lbnames]
lbpths = [osp.join(gtpth, el) for el in lbnames]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))
self.imnames = imgnames
self.len = len(self.imnames)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())
## pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomCrop(cropsize)
])
def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]
img = Image.open(impth)
label = Image.open(lbpth)
if self.mode == 'train':
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = self.to_tensor(img)
label = np.array(label).astype(np.int64)[np.newaxis, :]
label = self.convert_labels(label)
return img, label
def __len__(self):
return self.len
def convert_labels(self, label):
for k, v in self.lb_map.items():
label[label == k] = v
return label
if __name__ == "__main__":
from tqdm import tqdm
ds = CamVid('./CamVid/', n_classes=11, mode='val')
uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
#print(uni)
print(set(uni))
Apart from this you would need to create a camvid_info.jso
just like cityscapes_info.json
, which could look something like:
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "void",
"ignoreInEval": true,
"id": 0,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "sky",
"catid": 1,
"name": "sky",
"ignoreInEval": false,
"id": 1,
"color": [
128,
128,
128
],
"trainId": 0
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "building",
"ignoreInEval": false,
"id": 2,
"color": [
128,
0,
0
],
"trainId": 1
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "pole",
"ignoreInEval": false,
"id": 3,
"color": [
192,
192,
128
],
"trainId": 2
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "road_marking",
"ignoreInEval": true,
"id": 4,
"color": [
255,
69,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "road",
"ignoreInEval": false,
"id": 5,
"color": [
128,
64,
128
],
"trainId": 3
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "pavement",
"ignoreInEval": false,
"id": 6,
"color": [
60,
40,
222
],
"trainId": 4
},
{
"hasInstances": false,
"category": "vegetation",
"catid": 3,
"name": "tree",
"ignoreInEval": false,
"id": 7,
"color": [
128,
128,
0
],
"trainId": 5
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "sign_symbol",
"ignoreInEval": false,
"id": 8,
"color": [
192,
128,
128
],
"trainId": 6
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "fence",
"ignoreInEval": false,
"id": 9,
"color": [
64,
64,
128
],
"trainId": 7
},
{
"hasInstances": false,
"category": "traffic",
"catid": 4,
"name": "car",
"ignoreInEval": false,
"id": 10,
"color": [
64,
0,
128
],
"trainId": 8
},
{
"hasInstances": false,
"category": "traffic",
"catid": 4,
"name": "pedestrian",
"ignoreInEval": false,
"id": 11,
"color": [
64,
64,
0
],
"trainId": 9
},
{
"hasInstances": false,
"category": "traffic",
"catid": 4,
"name": "bicyclist",
"ignoreInEval": false,
"id": 11,
"color": [
0,
128,
192
],
"trainId": 10
}
]
Once this is done, you can call CamVid
into your training code, just like CityScapes
. Please let me know if all this works. @CoinCheung please let me know your thoughts as well. Also, I have used the same normalization values as CityScapes for now, I am working on that. So if you follow the directory structure of CamVid dataset, you can pre-process any dataset you like even your own. The reference CamVid dataset for this design can be seen here.
Cheers!
Hi @zpge
I have prepared a script that allows you train your own data using this repo's training strategy. It works on my machine with Pytorch-1.4.0 and Python-3.6.7 (Ubuntu 18.04.03). The modified training script works as well, but since I do not have sufficient training resources, I am unable to train it on my machine. Maybe we could collaborate over this, if you have not already found a solution. The following script works for me.
#!/usr/bin/python # -*- encoding: utf-8 -*- import torch from torch.utils.data import Dataset import torchvision.transforms as transforms import os.path as osp import os from PIL import Image import numpy as np import json from transform import * class CamVid(Dataset): def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs): super(CamVid, self).__init__() assert mode in ('train', 'val', 'test') self.mode = mode self.ignore_lb = 255 train_folder = 'train' train_lbl_folder = 'trainannot' # Validation dataset root folders val_folder = 'val' val_lbl_folder = 'valannot' # Test dataset root folders test_folder = 'test' test_lbl_folder = 'testannot' # Images extension img_extension = '.png' with open('./camvid_info.json', 'r') as fr: labels_info = json.load(fr) self.lb_map = {el['id']: el['trainId'] for el in labels_info} ## parse img directory self.imgs = {} imgnames = [] impth = osp.join(rootpth, mode) # folders = os.listdir(impth) # for fd in folders: #fdpth = osp.join(impth, fd) im_names = os.listdir(impth) names = [el.replace('.png', '') for el in im_names] impths = [osp.join(impth, el) for el in im_names] imgnames.extend(names) self.imgs.update(dict(zip(names, impths))) ## parse gt directory self.labels = {} gtnames = [] gtpth = osp.join(rootpth, mode+'annot') # folders = os.listdir(gtpth) # for fd in folders: # fdpth = osp.join(gtpth, fd) lbnames = os.listdir(gtpth) names = [el.replace('.png', '') for el in lbnames] lbpths = [osp.join(gtpth, el) for el in lbnames] gtnames.extend(names) self.labels.update(dict(zip(names, lbpths))) self.imnames = imgnames self.len = len(self.imnames) assert set(imgnames) == set(gtnames) assert set(self.imnames) == set(self.imgs.keys()) assert set(self.imnames) == set(self.labels.keys()) ## pre-processing self.to_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) self.trans_train = Compose([ ColorJitter( brightness = 0.5, contrast = 0.5, saturation = 0.5), HorizontalFlip(), RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), RandomCrop(cropsize) ]) def __getitem__(self, idx): fn = self.imnames[idx] impth = self.imgs[fn] lbpth = self.labels[fn] img = Image.open(impth) label = Image.open(lbpth) if self.mode == 'train': im_lb = dict(im = img, lb = label) im_lb = self.trans_train(im_lb) img, label = im_lb['im'], im_lb['lb'] img = self.to_tensor(img) label = np.array(label).astype(np.int64)[np.newaxis, :] label = self.convert_labels(label) return img, label def __len__(self): return self.len def convert_labels(self, label): for k, v in self.lb_map.items(): label[label == k] = v return label if __name__ == "__main__": from tqdm import tqdm ds = CamVid('./CamVid/', n_classes=11, mode='val') uni = [] for im, lb in tqdm(ds): lb_uni = np.unique(lb).tolist() uni.extend(lb_uni) #print(uni) print(set(uni))
Apart from this you would need to create a
camvid_info.jso
just likecityscapes_info.json
, which could look something like:[ { "hasInstances": false, "category": "void", "catid": 0, "name": "void", "ignoreInEval": true, "id": 0, "color": [ 0, 0, 0 ], "trainId": 255 }, { "hasInstances": false, "category": "sky", "catid": 1, "name": "sky", "ignoreInEval": false, "id": 1, "color": [ 128, 128, 128 ], "trainId": 0 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "building", "ignoreInEval": false, "id": 2, "color": [ 128, 0, 0 ], "trainId": 1 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "pole", "ignoreInEval": false, "id": 3, "color": [ 192, 192, 128 ], "trainId": 2 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "road_marking", "ignoreInEval": true, "id": 4, "color": [ 255, 69, 0 ], "trainId": 255 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "road", "ignoreInEval": false, "id": 5, "color": [ 128, 64, 128 ], "trainId": 3 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "pavement", "ignoreInEval": false, "id": 6, "color": [ 60, 40, 222 ], "trainId": 4 }, { "hasInstances": false, "category": "vegetation", "catid": 3, "name": "tree", "ignoreInEval": false, "id": 7, "color": [ 128, 128, 0 ], "trainId": 5 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "sign_symbol", "ignoreInEval": false, "id": 8, "color": [ 192, 128, 128 ], "trainId": 6 }, { "hasInstances": false, "category": "construction", "catid": 2, "name": "fence", "ignoreInEval": false, "id": 9, "color": [ 64, 64, 128 ], "trainId": 7 }, { "hasInstances": false, "category": "traffic", "catid": 4, "name": "car", "ignoreInEval": false, "id": 10, "color": [ 64, 0, 128 ], "trainId": 8 }, { "hasInstances": false, "category": "traffic", "catid": 4, "name": "pedestrian", "ignoreInEval": false, "id": 11, "color": [ 64, 64, 0 ], "trainId": 9 }, { "hasInstances": false, "category": "traffic", "catid": 4, "name": "bicyclist", "ignoreInEval": false, "id": 11, "color": [ 0, 128, 192 ], "trainId": 10 } ]
Once this is done, you can call
CamVid
into your training code, just likeCityScapes
. Please let me know if all this works. @CoinCheung please let me know your thoughts as well. Also, I have used the same normalization values as CityScapes for now, I am working on that. So if you follow the directory structure of CamVid dataset, you can pre-process any dataset you like even your own. The reference CamVid dataset for this design can be seen here. Cheers!
Thanks. It's very nice of you. I will try your method. I also tried another way following https://github.com/mcordts/cityscapesScripts/tree/master/cityscapesscripts. It works for me.
I have prepared a script that allows you train your own data using this repo's training strategy. It works on my machine with Pytorch-1.4.0 and Python-3.6.7 (Ubuntu 18.04.03). The modified training script works as well, but since I do not have sufficient training resources, I am unable to train it on my machine. Maybe we could collaborate over this, if you have not already found a solution. The following script works for me.
@dronefreak Thank you for the script.But since the number of classes here are different as compared to the pretrained weights,(which is on cityscapes),will it still work if we initialize the weights for training from the pretrained weights?
Hi @poornimajd
You can load the weights as it is from a pre-trained model, no issues. Just one thing that you need to be careful about is the final layers (that determine the number of classes), should be dropped, and the final layers should be fine-tuned on your new dataset. IN this case of transfer learning, you need to change the number of classes in .json script as well, because that file determines what kind of dataset your algorithm is being finetuned on. Hope this helps!
Thanks for the reply! I actually modified the cityscape.json file similar to the camvid one which you have shown,and also the cityscape.py file i.e loading the images and ground_truth.I used 19 classes from my custom data,and since the number of classes are similar to that of the pretrained weights,I did not modify the last classification layer.But I got nan values for the iou on some classes while training.I am not sure what caused this issue.
That may be because the classes might be under-represented. You may want to use class balancing techniques like ENet, or maybe use OHEM (Online hard example mining) loss to mitigate the under-representation issue. Also try increasing the number of training iterations.
Thanks for the quick reply! I will try out the techniques suggested by you.
Thanks for the quick reply! I will try out the techniques suggested by you.
hello, I found some problems just as you! when I try to train on pascal voc2012(have changed the pascal_info.json) ,but when i try to evaluate it . mIOU is nan! This really confused me... have you solved it?
@leedoge ,as suggested by @dronefreak ,some classes were under-represented in my data.I just checked out the IOU for each class,and it was nan for some under represented classes.Hence the mean IOU will be nan.So just check IOU for each class and to get rid of nan for those under represented classes I either trained it more or just ignored those classes.The segmentation output looks good inspite of nan miou.
I want to train on my own data. Can anyone give a short explaination on how to prepare and put the files?