Question about training/test with another datasets

marcusvlc commented 5 years ago

I opened an issue in the ESPNETV2 repository but I'm still in doubt about network training for other datasets. I have a proprietary dataset that involves only two classes (person and not person) and would like to train the espnetv2 network to classify these classes using my dataset, is this possible? How should I structure my dataset for this? I saw that you only allow training and testing on specific datasets, is there any way I can do that?

sacmehta commented 5 years ago

Here is a custom dataset loader that you can modify as per your need

# ============================================
__author__ = "Sachin Mehta"
__maintainer__ = "Sachin Mehta"
# ============================================

import os
from PIL import Image
from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, ToTensor, Normalize, Resize
from transforms.classification.data_transforms import MEAN, STD
from torch.utils import data

class CustomDataset(data.Dataset):
    def __init__(self, root, inp_size=224, scale=(0.2, 1.0), is_training=True):
        super(CustomDataset, self).__init__()
        self.root = root

        if is_training:
            ## I am assuming that train.txt contains a comma-separated file with IMAGE-LABEL pairs, something like this
            ## /images/image1.png, 0
            ## /images/image2.png, 1
            data_file = os.path.join(root, 'train.txt')
        else:
            data_file = os.path.join(root, 'val.txt')

        self.images = []
        self.labels = []
        with open(data_file, 'r') as lines:
            for line in lines:
                rgb_img_loc = root + os.sep + line.split()[0]
                label = int(line.split()[1])
                assert os.path.isfile(rgb_img_loc)
                self.images.append(rgb_img_loc)
                self.labels.append(label)

        self.transform = self.transforms(inp_size=inp_size, inp_scale=scale, is_training=is_training)

    def transforms(self, inp_size, inp_scale, is_training):
        if is_training:
            return Compose(
                [
                    RandomResizedCrop(inp_size, scale=inp_scale),
                    RandomHorizontalFlip(),
                    ToTensor(),
                    Normalize(mean=MEAN, std=STD)
                ]
            )
        else:
            return Compose(
                [
                    Resize(size=(inp_size, inp_size)),
                    ToTensor(),
                    Normalize(mean=MEAN, std=STD)
                ]
            )

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        rgb_img = Image.open(self.images[index]).convert('RGB')
        target = self.labels[index]
        if self.transform is not None:
            rgb_img = self.transform(rgb_img)

        return rgb_img, target

jigsawcoder commented 2 years ago

Here is a custom dataset loader that you can modify as per your need

# ============================================
__author__ = "Sachin Mehta"
__maintainer__ = "Sachin Mehta"
# ============================================

import os
from PIL import Image
from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, ToTensor, Normalize, Resize
from transforms.classification.data_transforms import MEAN, STD
from torch.utils import data

class CustomDataset(data.Dataset):
    def __init__(self, root, inp_size=224, scale=(0.2, 1.0), is_training=True):
        super(CustomDataset, self).__init__()
        self.root = root

        if is_training:
            ## I am assuming that train.txt contains a comma-separated file with IMAGE-LABEL pairs, something like this
            ## /images/image1.png, 0
            ## /images/image2.png, 1
            data_file = os.path.join(root, 'train.txt')
        else:
            data_file = os.path.join(root, 'val.txt')

        self.images = []
        self.labels = []
        with open(data_file, 'r') as lines:
            for line in lines:
                rgb_img_loc = root + os.sep + line.split()[0]
                label = int(line.split()[1])
                assert os.path.isfile(rgb_img_loc)
                self.images.append(rgb_img_loc)
                self.labels.append(label)

        self.transform = self.transforms(inp_size=inp_size, inp_scale=scale, is_training=is_training)

    def transforms(self, inp_size, inp_scale, is_training):
        if is_training:
            return Compose(
                [
                    RandomResizedCrop(inp_size, scale=inp_scale),
                    RandomHorizontalFlip(),
                    ToTensor(),
                    Normalize(mean=MEAN, std=STD)
                ]
            )
        else:
            return Compose(
                [
                    Resize(size=(inp_size, inp_size)),
                    ToTensor(),
                    Normalize(mean=MEAN, std=STD)
                ]
            )

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        rgb_img = Image.open(self.images[index]).convert('RGB')
        target = self.labels[index]
        if self.transform is not None:
            rgb_img = self.transform(rgb_img)

        return rgb_img, target

This code is for image classification, right? I am working on object detection and want to train my custom VOC dataset with only 1 class. I have images and corresponding XML files in a train/val/test folder. Can you help me how to train ESPNETV2 on my dataset? Thank you.

sacmehta / EdgeNets

Question about training/test with another datasets #9