l meet some problems when l trying to load my local data that says no dataset_info.json
What I've tried so far
Actually l follow the code from Song Yang:solving inverse problems, and l have already download the data,and the code is follow the official documentation on how to build own datasets has such root structure:
--brats(2021)
|--dummy_data
|--init.py
|--brats.py
|--brats_test.py
|--checksums.tsv
This is brats.py
"""brats dataset."""
import tensorflow_datasets as tfds
import os
import SimpleITK as sitk
import numpy as np
import tensorflow as tf
_DESCRIPTION = """
BraTS 2021
"""
_CITATION = """
[1] U.Baid, et al., The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor Segmentation and Radiogenomic Classification, arXiv:2107.02314, 2021.
[2] B. H. Menze, A. Jakab, S. Bauer, J. Kalpathy-Cramer, K. Farahani, J. Kirby, et al. "The Multimodal Brain Tumor Image Segmentation Benchmark (BRATS)", IEEE Transactions on Medical Imaging 34(10), 1993-2024 (2015) DOI: 10.1109/TMI.2014.2377694
[3] S. Bakas, H. Akbari, A. Sotiras, M. Bilello, M. Rozycki, J.S. Kirby, et al., "Advancing The Cancer Genome Atlas glioma MRI collections with expert segmentation labels and radiomic features", Nature Scientific Data, 4:170117 (2017) DOI: 10.1038/sdata.2017.117
[4] S. Bakas, H. Akbari, A. Sotiras, M. Bilello, M. Rozycki, J. Kirby, et al., "Segmentation Labels and Radiomic Features for the Pre-operative Scans of the TCGA-GBM collection", The Cancer Imaging Archive, 2017. DOI: 10.7937/K9/TCIA.2017.KLXWJJ1Q
[5] S. Bakas, H. Akbari, A. Sotiras, M. Bilello, M. Rozycki, J. Kirby, et al., "Segmentation Labels and Radiomic Features for the Pre-operative Scans of the TCGA-LGG collection", The Cancer Imaging Archive, 2017. DOI: 10.7937/K9/TCIA.2017.GJQ7R0EF
"""
class Brats(tfds.core.GeneratorBasedBuilder):
"""DatasetBuilder for brats dataset."""
VERSION = tfds.core.Version('1.0.0')
RELEASE_NOTES = {
'1.0.0': 'Initial release.',
}
def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
datasetInfo = tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
features=tfds.features.FeaturesDict({
# These are the features of your dataset like images, labels ...
'image': tfds.features.Image(shape=(240, 240, 1)),
'label': tfds.features.ClassLabel(names=['t1', 't1ce', 't2', 'flair']),
}),
# If there's a common (input, target) tuple from the
# features, specify them here. They'll be used if
# `as_supervised=True` in `builder.as_dataset`.
supervised_keys=('image', 'label'), # Set to `None` to disable
citation=_CITATION,
)
return datasetInfo
def _split_generators(self, dl_manager: tfds.download.DownloadManager):
"""Returns SplitGenerators."""
train_path = '/home/data/Brats/training/brats/1.0.0'
dl_manager.manual_dir = train_path
return {
'train': self._generate_examples(train_path),
}
def read_img(self, path):
return sitk.GetArrayFromImage(sitk.ReadImage(path))
def get_bound(self, data, return_coord=False):
"""
get the boundary of image z y x
data is padded with 0
"""
data_0 = data - data.min()
# display
# display_arr_stats(data_0)
z, y, x = np.where(data_0)
z_start, z_end = np.min(z), np.max(z)
y_start, y_end = np.min(y), np.max(y)
x_start, x_end = np.min(x), np.max(x)
indicator = np.ones_like(data, dtype=bool)
indicator[z_start:z_end, y_start:y_end, x_start:x_end] = False
if return_coord:
return z_start, z_end, y_start, y_end, x_start, x_end, indicator
return indicator
def mri_data_norm(self, data, scale=6.0, return_v=False):
# important to transfer datatype to keep division works
data = data.astype(float)
# get a box mask to remove background
min_z, max_z, min_y, max_y, min_x, max_x, indicator = self.get_bound(data, return_coord=True)
crop_data = np.array(data[min_z:max_z, min_y:max_y, min_x:max_x] * 1.0)
mean, std = np.mean(crop_data), np.std(crop_data)
# clip outliers
crop_data = np.clip(crop_data, max(mean - scale * std, crop_data.min()), min(mean + scale * std, crop_data.max()))
# normalize scale [0,1]
min_v = crop_data.min()
crop_data = np.array(crop_data - min_v)
max_v = crop_data.max() * 1.0
crop_data = np.array(crop_data) / max_v
data[min_z:max_z, min_y:max_y, min_x:max_x] = np.array(crop_data)
data[indicator] = 0
if return_v:
return np.array(data), [min_v, max_v, np.float(min_y), np.float(max_y), np.float(min_x), np.float(max_x)]
else:
return np.array(data)
def _generate_examples(self, path):
"""Yields examples."""
img_list = tf.io.gfile.listdir(path)
domains = ['t1', 't1ce', 't2', 'flair']
count = -1
for img_folder in img_list:
img_path = os.path.join(path, img_folder, os.path.split(img_folder)[-1] + '_t1.nii.gz')
img_array = self.read_img(img_path)
z, x, y = np.where(img_array)
z_min, z_max = np.min(z), np.max(z)
z_min = z_min + 40
z_max = z_max - 25
for domain in domains:
img_path = os.path.join(path, img_folder, os.path.split(img_folder)[-1] + f'_{domain}.nii.gz')
img_array = self.read_img(img_path)
img_array = self.mri_data_norm(img_array, scale=6.0)
for z_idx in range(z_min, z_max + 1):
img = img_array[z_idx, ...]
# Sanity check intensity values
assert np.min(img) >= 0.0 and np.max(img) <= 1.0 and np.max(img) >= 0.1
count += 1
yield count, {
'image': np.clip(img[..., None] * 255., 0.0, 255.).astype(np.uint8),
'label': domain
}
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/lesjie/scoreInverseProblems_2/main.py", line 65, in <module>
app.run(main)
File "/root/miniconda3/envs/tf2.4/lib/python3.9/site-packages/absl/app.py", line 308, in run
_run_main(main, args)
File "/root/miniconda3/envs/tf2.4/lib/python3.9/site-packages/absl/app.py", line 254, in _run_main
sys.exit(main(argv))
File "/home/lesjie/scoreInverseProblems_2/main.py", line 54, in main
run_lib.train(FLAGS.config, FLAGS.workdir)
File "/home/lesjie/scoreInverseProblems_2/run_lib.py", line 77, in train
train_ds, eval_ds, _ = datasets.get_dataset(config,
File "/home/lesjie/scoreInverseProblems_2/datasets.py", line 122, in get_dataset
dataset_builder = tfds.builder(config.data.dataset.replace('_', ''), data_dir='/home/data/Brats/training/')
File "/root/miniconda3/envs/tf2.4/lib/python3.9/site-packages/tensorflow_datasets/core/load.py", line 177, in builder
return cls(**builder_kwargs) # pytype: disable=not-instantiable
File "/root/miniconda3/envs/tf2.4/lib/python3.9/contextlib.py", line 137, in __exit__
self.gen.throw(typ, value, traceback)
File "/root/miniconda3/envs/tf2.4/lib/python3.9/site-packages/tensorflow_datasets/core/utils/py_utils.py", line 399, in try_reraise
reraise(e, *args, **kwargs)
File "/root/miniconda3/envs/tf2.4/lib/python3.9/site-packages/tensorflow_datasets/core/utils/py_utils.py", line 365, in reraise
raise exception from e
FileNotFoundError: Failed to construct dataset brats: Try to load `DatasetInfo` from a directory which does not exist or does not contain `dataset_info.json`. Please delete the directory `/home/data/Brats/training/brats/1.0.0` if you are trying to re-generate the dataset.
Process finished with exit code 1
It would be nice if...
Acutally l am wondering when register my local own data,when and where that the dataset_info.json will be created automatically or mnually?
It should be created automatically, remove the directory and try again. You could also write DatasetInfo manually by following this method in this DatasetInfo Documentation
l meet some problems when l trying to load my local data that says no dataset_info.json
What I've tried so far Actually l follow the code from Song Yang:solving inverse problems, and l have already download the data,and the code is follow the official documentation on how to build own datasets has such root structure: --brats(2021) |--dummy_data |--init.py |--brats.py |--brats_test.py |--checksums.tsv This is brats.py
It would be nice if... Acutally l am wondering when register my local own data,when and where that the dataset_info.json will be created automatically or mnually?
Environment information (if applicable)
tensorflow-datasets
version: 4.3tensorflow
version: 2.5