def batches(self, batch_size, which_set="train"):
x_name = which_set + "_x"
y_name = which_set + "_y"
num_examples = len(getattr(dataset, y_name))
if self.shuffle:
idx = np.arange(num_examples)
np.random.shuffle(idx)
setattr(dataset, x_name, getattr(dataset, x_name)[idx])
setattr(dataset, y_name, getattr(dataset, y_name)[idx])
if which_set == "train":
dataset.label_mask = dataset.label_mask[idx]
dataset_x = getattr(dataset, x_name)
dataset_y = getattr(dataset, y_name)
for ii in range(0, num_examples, batch_size):
x = dataset_x[ii:ii+batch_size]
y = dataset_y[ii:ii+batch_size]
if which_set == "train":
# When we use the data for training, we need to include
# the label mask, so we can pretend we don't have access
# to some of the labels, as an exercise of our semi-supervised
# learning ability
yield x, y, self.label_mask[ii:ii+batch_size]
else:
yield x, y
"dataset" should be replace with "self" at above batch method
"dataset" should be replace with "self" at above batch method
Credits to Phillip Dudero https://discussions.udacity.com/t/getattr-dataset-in-dataset-batches/767950