frankkramer-lab / MIScnn

A framework for Medical Image Segmentation with Convolutional Neural Networks and Deep Learning
GNU General Public License v3.0
406 stars 115 forks source link

FileNotFoundError in KiTS19 notebook example #77

Open jumutc opened 3 years ago

jumutc commented 3 years ago

The overall pipeline looks the same except some minor details:

# Library import
from miscnn.processing.preprocessor import Preprocessor

# Create and configure the Preprocessor class
pp = Preprocessor(data_io, data_aug=data_aug, batch_size=2, subfunctions=subfunctions, prepare_subfunctions=True, 
                  prepare_batches=False, analysis="patchwise-crop", patch_shape=(80, 160, 160),
                  use_multiprocessing=True)

# Adjust the patch overlap for predictions
pp.patchwise_overlap = (40, 80, 80)
pp.patchwise_skip_blanks = True
...
# Exclude suspious samples from data set
del sample_list[133]
del sample_list[125]
del sample_list[68]
del sample_list[37]
del sample_list[23]
del sample_list[15]

# Create the training/validation sample ID list
import numpy as np
np.random.seed(123)
sample_list = np.random.permutation(sample_list).tolist()
validation_samples = sorted(sample_list[:50])
training_samples = sorted(sample_list[50:])

model.evaluate(training_samples, validation_samples, epochs=epochs, callbacks=[cb_lr, cb_es, cb_mc])

Data is downloaded following the instruction on https://github.com/neheller/kits19 but in the end I get this error:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-10-88f8eac878c5> in <module>
----> 1 model.evaluate(training_samples, validation_samples, epochs=epochs, callbacks=[cb_lr, cb_es, cb_mc])

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\model.py in evaluate(self, training_samples, validation_samples, epochs, iterations, callbacks, class_weight)
    264                                            shuffle=self.shuffle_batches)
    265         # Run training & validation process with the Keras fit
--> 266         history = self.model.fit(dataGen_training,
    267                                  validation_data=dataGen_validation,
    268                                  callbacks=callbacks,

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
    106   def _method_wrapper(self, *args, **kwargs):
    107     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
--> 108       return method(self, *args, **kwargs)
    109 
    110     # Running inside `run_distribute_coordinator` already.

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1047          training_utils.RespectCompiledTrainableState(self):
   1048       # Creates a `tf.data.Dataset` and handles batch and epoch iteration.
-> 1049       data_handler = data_adapter.DataHandler(
   1050           x=x,
   1051           y=y,

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution)
   1103 
   1104     adapter_cls = select_data_adapter(x, y)
-> 1105     self._adapter = adapter_cls(
   1106         x,
   1107         y,

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, shuffle, workers, use_multiprocessing, max_queue_size, model, **kwargs)
    907     self._keras_sequence = x
    908     self._enqueuer = None
--> 909     super(KerasSequenceAdapter, self).__init__(
    910         x,
    911         shuffle=False,  # Shuffle is handed in the _make_callable override.

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)
    784     # Since we have to know the dtype of the python generator when we build the
    785     # dataset, we have to look at a batch to infer the structure.
--> 786     peek, x = self._peek_and_restore(x)
    787     peek = self._standardize_batch(peek)
    788     peek = _process_tensorlike(peek)

~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in _peek_and_restore(x)
    918   @staticmethod
    919   def _peek_and_restore(x):
--> 920     return x[0], x
    921 
    922   def _handle_multiprocessing(self, x, workers, use_multiprocessing,

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\data_generator.py in __getitem__(self, idx)
     63         # Load a batch by generating it or by loading an already prepared
     64         if self.preprocessor.prepare_batches : batch = self.load_batch(idx)
---> 65         else : batch = self.generate_batch(idx)
     66         # Return the batch containing only an image or an image and segmentation
     67         if self.training:

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\data_generator.py in generate_batch(self, idx)
    146                 self.sample_list.extend(samples)
    147             # create a new batch
--> 148             batches = self.preprocessor.run(samples, self.training,
    149                                             self.validation)
    150             # Create threading lock to avoid parallel access

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\processing\preprocessor.py in run(self, indices_list, training, validation)
    131                     sf.preprocessing(sample, training=training)
    132             # Load sample from file with already processed subfunctions
--> 133             else : sample = self.data_io.sample_loader(index, backup=True)
    134             # Cache sample object for prediction
    135             if not training : self.cache[index] = sample

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\data_loading\data_io.py in sample_loader(self, index, load_seg, load_pred, backup)
     84     def sample_loader(self, index, load_seg=True, load_pred=False, backup=False):
     85         # If sample is a backup -> load it from pickle
---> 86         if backup : return self.load_sample_pickle(index)
     87         # Load the image with the I/O interface
     88         image, extended = self.interface.load_image(index)

c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\data_loading\data_io.py in load_sample_pickle(self, index)
    192         sample_path = os.path.join(self.batch_path, str(self.seed) + "." + \
    193                                    index + ".pickle")
--> 194         with open(sample_path,'rb') as reader:
    195             sample = pickle.load(reader)
    196         return sample

FileNotFoundError: [Errno 2] No such file or directory: 'batches\\17488990.case_00058.pickle'
jumutc commented 3 years ago

I have diagnosed the issue: use_multiprocessing=True flag is the cause

muellerdo commented 3 years ago

Hey @jumutc,

thanks for the bug report! Mhm. I can not reproduce this batching IO error on our side.

Could you please try to reproduce it in a jupyter notebook or google colab?

Cheers, Dominik