Closed brando90 closed 1 year ago
doesn't actually work to give the data set to the data loader, the labels aren't actually the right type :/
(Pdb) train_dataset[10002]
(tensor([[[-0.8120, -0.8120, -0.8120, ..., -0.8120, -0.8120, -0.8120],
[ 0.8150, 0.8008, 0.8008, ..., -0.8120, -0.8120, -0.8120],
[ 0.7725, 0.7725, 0.7442, ..., -0.8120, -0.8120, -0.8120],
...,
[ 1.5790, 1.5365, 1.5365, ..., -0.8120, -0.8120, -0.8120],
[ 1.5790, 1.5507, 1.5365, ..., -0.8120, -0.8120, -0.8120],
[ 1.5790, 1.5507, 1.5790, ..., -0.8120, -0.8120, -0.8120]],
[[-0.7703, -0.7703, -0.7703, ..., -0.7703, -0.7703, -0.7703],
[ 0.9580, 0.9433, 0.9433, ..., -0.7703, -0.7703, -0.7703],
[ 0.9580, 0.9580, 0.9140, ..., -0.7703, -0.7703, -0.7703],
...,
[ 1.7635, 1.7049, 1.7342, ..., -0.7703, -0.7703, -0.7703],
[ 1.7635, 1.7342, 1.7342, ..., -0.7703, -0.7703, -0.7703],
[ 1.7342, 1.7196, 1.7342, ..., -0.7703, -0.7703, -0.7703]],
[[-0.5726, -0.5726, -0.5726, ..., -0.5726, -0.5726, -0.5726],
[ 1.7708, 1.7570, 1.7570, ..., -0.5726, -0.5726, -0.5726],
[ 1.7708, 1.7708, 1.7294, ..., -0.5726, -0.5726, -0.5726],
...,
[ 2.0740, 2.0740, 2.0740, ..., -0.5726, -0.5726, -0.5726],
[ 2.0740, 2.0740, 2.0740, ..., -0.5726, -0.5726, -0.5726],
[ 2.0740, 2.0740, 2.0740, ..., -0.5726, -0.5726, -0.5726]]]), 16.0)
even if I gave the target_transformer argument it wouldn't work since it never uses that function:
def __getitem__(self, idx):
data = self.x[idx]
if self.transform:
data = self.transform(data)
return data, self.y[idx]
#!/usr/bin/env python3
from __future__ import print_function
import os
import pickle
import numpy as np
import torch
import torch.utils.data as data
from learn2learn.data.utils import download_file_from_google_drive, download_file
def download_pkl(google_drive_id, data_root, mode):
filename = 'mini-imagenet-cache-' + mode
file_path = os.path.join(data_root, filename)
if not os.path.exists(file_path + '.pkl'):
print('Downloading:', file_path + '.pkl')
download_file_from_google_drive(google_drive_id, file_path + '.pkl')
else:
print("Data was already downloaded")
def index_classes(items):
idx = {}
for i in items:
if (i not in idx):
idx[i] = len(idx)
return idx
class MiniImagenet(data.Dataset):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/vision/datasets/mini_imagenet.py)
**Description**
The *mini*-ImageNet dataset was originally introduced by Vinyals et al., 2016.
It consists of 60'000 colour images of sizes 84x84 pixels.
The dataset is divided in 3 splits of 64 training, 16 validation, and 20 testing classes each containing 600 examples.
The classes are sampled from the ImageNet dataset, and we use the splits from Ravi & Larochelle, 2017.
**References**
1. Vinyals et al. 2016. “Matching Networks for One Shot Learning.” NeurIPS.
2. Ravi and Larochelle. 2017. “Optimization as a Model for Few-Shot Learning.” ICLR.
**Arguments**
* **root** (str) - Path to download the data.
* **mode** (str, *optional*, default='train') - Which split to use.
Must be 'train', 'validation', or 'test'.
* **transform** (Transform, *optional*, default=None) - Input pre-processing.
* **target_transform** (Transform, *optional*, default=None) - Target pre-processing.
* **download** (bool, *optional*, default=False) - Download the dataset if it's not available.
**Example**
~~~python
train_dataset = l2l.vision.datasets.MiniImagenet(root='./data', mode='train')
train_dataset = l2l.data.MetaDataset(train_dataset)
train_generator = l2l.data.TaskGenerator(dataset=train_dataset, ways=ways)
~~~
"""
def __init__(
self,
root,
mode='train',
transform=None,
target_transform=None,
download=False,
):
super(MiniImagenet, self).__init__()
self.root = os.path.expanduser(root)
if not os.path.exists(self.root):
os.mkdir(self.root)
self.transform = transform
self.target_transform = target_transform
self.mode = mode
self._bookkeeping_path = os.path.join(self.root, 'mini-imagenet-bookkeeping-' + mode + '.pkl')
if self.mode == 'test':
google_drive_file_id = '1wpmY-hmiJUUlRBkO9ZDCXAcIpHEFdOhD'
dropbox_file_link = 'https://www.dropbox.com/s/ye9jeb5tyz0x01b/mini-imagenet-cache-test.pkl?dl=1'
elif self.mode == 'train':
google_drive_file_id = '1I3itTXpXxGV68olxM5roceUMG8itH9Xj'
dropbox_file_link = 'https://www.dropbox.com/s/9g8c6w345s2ek03/mini-imagenet-cache-train.pkl?dl=1'
elif self.mode == 'validation':
google_drive_file_id = '1KY5e491bkLFqJDp0-UWou3463Mo8AOco'
dropbox_file_link = 'https://www.dropbox.com/s/ip1b7se3gij3r1b/mini-imagenet-cache-validation.pkl?dl=1'
else:
raise ('ValueError', 'Needs to be train, test or validation')
pickle_file = os.path.join(self.root, 'mini-imagenet-cache-' + mode + '.pkl')
try:
if not self._check_exists() and download:
print('Downloading mini-ImageNet --', mode)
download_pkl(google_drive_file_id, self.root, mode)
with open(pickle_file, 'rb') as f:
self.data = pickle.load(f)
except pickle.UnpicklingError:
if not self._check_exists() and download:
print('Download failed. Re-trying mini-ImageNet --', mode)
download_file(dropbox_file_link, pickle_file)
with open(pickle_file, 'rb') as f:
self.data = pickle.load(f)
self.x = torch.from_numpy(self.data["image_data"]).permute(0, 3, 1, 2).float()
self.y = np.ones(len(self.x))
# TODO Remove index_classes from here
self.class_idx = index_classes(self.data['class_dict'].keys())
for class_name, idxs in self.data['class_dict'].items():
for idx in idxs:
self.y[idx] = self.class_idx[class_name]
def __getitem__(self, idx):
data = self.x[idx]
if self.transform:
data = self.transform(data)
return data, self.y[idx]
def __len__(self):
return len(self.x)
def _check_exists(self):
return os.path.exists(os.path.join(self.root, 'mini-imagenet-cache-' + self.mode + '.pkl'))
if __name__ == '__main__':
mi = MiniImagenet(root='./data', download=True)
__import__('pdb').set_trace()
darn this is annoying, why doesn't this work :/
# - get the datasets
from learn2learn.data import TaskDataset, MetaDataset
if isinstance(tasksets.train, TaskDataset):
# todo: UnionMetaDataset, FilteredMetaDataset, ConcatDataset, ConcatDatasetMutuallyExclusiveLabels
if isinstance(tasksets.train.dataset, MetaDataset):
# for now only for mi & tiered
train_dataset = tasksets.train.dataset.dataset
valid_dataset = tasksets.validation.dataset.dataset
test_dataset = tasksets.test.dataset.dataset
train_dataset.target_transform = label_to_long
valid_dataset.target_transform = label_to_long
test_dataset.target_transform = label_to_long
train_dataset.__getitem__ = get_item
valid_dataset.__getitem__ = get_item
test_dataset.__getitem__ = get_item
assert not isinstance(train_dataset[0][1], float)
# assert not isinstance(train_dataset[0], double)
st()
# elif isinstance(tasksets.train.dataset, UnionMetaDataset):
# train_dataset = tasksets.train.dataset.datasets[0]
# valid_dataset = tasksets.validation.dataset.datasets[0]
# test_dataset = tasksets.test.dataset.datasets[0]
# raise NotImplementedError
# elif isinstance(tasksets.train.dataset, ConcatDataset):
# train_dataset = tasksets.train.dataset.datasets[0]
# valid_dataset = tasksets.validation.dataset.datasets[0]
# test_dataset = tasksets.test.dataset.datasets[0]
# raise NotImplementedError
# elif isinstance(tasksets.train.dataset, ConcatDatasetMutuallyExclusiveLabels):
# train_dataset = tasksets.train.dataset.datasets[0]
# valid_dataset = tasksets.validation.dataset.datasets[0]
# test_dataset = tasksets.test.dataset.datasets[0]
# raise NotImplementedError
else:
raise ValueError(f'not implemented for {type(tasksets.train.dataset)}')
else:
raise ValueError(f'not implemented for {type(tasksets.train)}')
return train_dataset, valid_dataset, test_dataset
def label_to_long(label: int) -> int:
""" Convert the label to long. """
return int(label)
def get_item(self, index: int) -> tuple:
""" Get the item. """
# # - get the item
# img, label = self.data[index], self.targets[index]
# # - transform the item
# if self.transform is not None:
# img = self.transform(img)
# if self.target_transform is not None:
# label = self.target_transform(label)
# return img, label
idx: int = index
data = self.x[idx]
label = self.y[idx]
if self.transform:
data = self.transform(data)
if self.target_transform is not None:
label = self.target_transform(label)
st()
return data, label
Hello @brando90, see how we do it in this example: https://github.com/learnables/learn2learn/blob/master/examples/vision/supervised_pretraining.py
@seba-1511 oh. Darn. Didn't see that. Thank you!
I ended up just creating a SL class that converts the label to int...hopefully that works? There is not hidden bugs in doing that right?
class USLDatasetFromL2L(datasets.Dataset):
def __init__(self, original_l2l_dataset: datasets.Dataset):
self.original_l2l_dataset = original_l2l_dataset
self.transform = self.original_l2l_dataset.transform
self.original_l2l_dataset.target_transform = label_to_long
self.target_transform = self.original_l2l_dataset.target_transform
def __getitem__(self, index: int) -> tuple:
""" overwrite the getitem method for a l2l dataset. """
# - get the item
img, label = self.original_l2l_dataset[index]
# - transform the item only if the transform does exist and its not a tensor already
# img, label = self.original_l2l_dataset.x, self.original_l2l_dataset.y
if self.transform and not isinstance(img, Tensor):
img = self.transform(img)
if self.target_transform and not isinstance(label, Tensor):
label = self.target_transform(label)
return img, label
def __len__(self) -> int:
""" Get the length. """
return len(self.original_l2l_dataset)
e.g. the raw labels not being in the expected range?
I get an issue for mini-imagenet:
why?
I did what this suggested: https://github.com/learnables/learn2learn/issues/301