Closed marcusvlc closed 5 years ago
Here is a custom dataset loader that you can modify as per your need
# ============================================
__author__ = "Sachin Mehta"
__maintainer__ = "Sachin Mehta"
# ============================================
import os
from PIL import Image
from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, ToTensor, Normalize, Resize
from transforms.classification.data_transforms import MEAN, STD
from torch.utils import data
class CustomDataset(data.Dataset):
def __init__(self, root, inp_size=224, scale=(0.2, 1.0), is_training=True):
super(CustomDataset, self).__init__()
self.root = root
if is_training:
## I am assuming that train.txt contains a comma-separated file with IMAGE-LABEL pairs, something like this
## /images/image1.png, 0
## /images/image2.png, 1
data_file = os.path.join(root, 'train.txt')
else:
data_file = os.path.join(root, 'val.txt')
self.images = []
self.labels = []
with open(data_file, 'r') as lines:
for line in lines:
rgb_img_loc = root + os.sep + line.split()[0]
label = int(line.split()[1])
assert os.path.isfile(rgb_img_loc)
self.images.append(rgb_img_loc)
self.labels.append(label)
self.transform = self.transforms(inp_size=inp_size, inp_scale=scale, is_training=is_training)
def transforms(self, inp_size, inp_scale, is_training):
if is_training:
return Compose(
[
RandomResizedCrop(inp_size, scale=inp_scale),
RandomHorizontalFlip(),
ToTensor(),
Normalize(mean=MEAN, std=STD)
]
)
else:
return Compose(
[
Resize(size=(inp_size, inp_size)),
ToTensor(),
Normalize(mean=MEAN, std=STD)
]
)
def __len__(self):
return len(self.ids)
def __getitem__(self, index):
rgb_img = Image.open(self.images[index]).convert('RGB')
target = self.labels[index]
if self.transform is not None:
rgb_img = self.transform(rgb_img)
return rgb_img, target
Here is a custom dataset loader that you can modify as per your need
# ============================================ __author__ = "Sachin Mehta" __maintainer__ = "Sachin Mehta" # ============================================ import os from PIL import Image from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, ToTensor, Normalize, Resize from transforms.classification.data_transforms import MEAN, STD from torch.utils import data class CustomDataset(data.Dataset): def __init__(self, root, inp_size=224, scale=(0.2, 1.0), is_training=True): super(CustomDataset, self).__init__() self.root = root if is_training: ## I am assuming that train.txt contains a comma-separated file with IMAGE-LABEL pairs, something like this ## /images/image1.png, 0 ## /images/image2.png, 1 data_file = os.path.join(root, 'train.txt') else: data_file = os.path.join(root, 'val.txt') self.images = [] self.labels = [] with open(data_file, 'r') as lines: for line in lines: rgb_img_loc = root + os.sep + line.split()[0] label = int(line.split()[1]) assert os.path.isfile(rgb_img_loc) self.images.append(rgb_img_loc) self.labels.append(label) self.transform = self.transforms(inp_size=inp_size, inp_scale=scale, is_training=is_training) def transforms(self, inp_size, inp_scale, is_training): if is_training: return Compose( [ RandomResizedCrop(inp_size, scale=inp_scale), RandomHorizontalFlip(), ToTensor(), Normalize(mean=MEAN, std=STD) ] ) else: return Compose( [ Resize(size=(inp_size, inp_size)), ToTensor(), Normalize(mean=MEAN, std=STD) ] ) def __len__(self): return len(self.ids) def __getitem__(self, index): rgb_img = Image.open(self.images[index]).convert('RGB') target = self.labels[index] if self.transform is not None: rgb_img = self.transform(rgb_img) return rgb_img, target
This code is for image classification, right? I am working on object detection and want to train my custom VOC dataset with only 1 class. I have images and corresponding XML files in a train/val/test folder. Can you help me how to train ESPNETV2 on my dataset? Thank you.
I opened an issue in the ESPNETV2 repository but I'm still in doubt about network training for other datasets. I have a proprietary dataset that involves only two classes (person and not person) and would like to train the espnetv2 network to classify these classes using my dataset, is this possible? How should I structure my dataset for this? I saw that you only allow training and testing on specific datasets, is there any way I can do that?