Closed My12123 closed 1 year ago
Windows or Anaconda specific problem. maybe related to https://discuss.pytorch.org/t/pytorch-windows-eoferror-ran-out-of-input-when-num-workers-0/25918
The following options may work as a workaround.
--num-workers 0
I basically don't use Windows, and I am currently doing a lot of my other work on Linux, so I'll check later.
(nunif) F:\nunif>python train.py waifu2x --method scale --arch waifu2x.upcunet --data-dir F:\nunif\waifu2x --model-dir ./models/waifu2x_mymodel --num-workers 0
{'amp_float': 'fp16',
'arch': 'waifu2x.upcunet',
'b4b': False,
'batch_size': 16,
'checkpoint_file': None,
'da_chshuf_p': 0.0,
'da_grayscale_p': 0.0,
'da_jpeg_p': 0.0,
'da_scale_p': 0.25,
'da_unsharpmask_p': 0.0,
'data_dir': 'F:\nunif\waifu2x',
'deblur': 0.0,
'disable_amp': False,
'gpu': [0],
'handler': <function train at 0x000001EDFD7408B0>,
'hard_example': 'linear',
'learning_rate': 0.0002,
'learning_rate_cycles': 5,
'learning_rate_decay': 0.995,
'learning_rate_decay_step': [1],
'loss': None,
'max_epoch': 200,
'method': 'scale',
'model_dir': './models/waifu2x_mymodel',
'momentum': 0.9,
'noise_level': None,
'num_samples': 50000,
'num_workers': 0,
'optimizer': 'adamw',
'prefetch_factor': 4,
'reset_state': False,
'resize_blur_p': 0.1,
'resume': False,
'scheduler': 'cosine',
'seed': 71,
'size': 112,
'style': 'art',
'warmup_epoch': 0,
'warmup_learning_rate': 1e-06,
'weight_decay': 0.001}
Traceback (most recent call last):
File "F:\nunif\train.py", line 22, in
@nagadomi how to fix it?
(nunif) F:\nunif>python train.py waifu2x --method scale --arch waifu2x.upcunet --data-dir F:\nunif\dataset --model-dir ./models/waifu2x_mymodel {'amp_float': 'fp16', 'arch': 'waifu2x.upcunet', 'b4b': False, 'batch_size': 16, 'checkpoint_file': None, 'da_chshuf_p': 0.0, 'da_grayscale_p': 0.0, 'da_jpeg_p': 0.0, 'da_scale_p': 0.25, 'da_unsharpmask_p': 0.0, 'data_dir': 'F:\nunif\dataset', 'deblur': 0.0, 'disable_amp': False, 'gpu': [0], 'handler': <function train at 0x0000024E0DE608B0>, 'hard_example': 'linear', 'learning_rate': 0.0002, 'learning_rate_cycles': 5, 'learning_rate_decay': 0.995, 'learning_rate_decay_step': [1], 'loss': None, 'max_epoch': 200, 'method': 'scale', 'model_dir': './models/waifu2x_mymodel', 'momentum': 0.9, 'noise_level': None, 'num_samples': 50000, 'num_workers': 2, 'optimizer': 'adamw', 'prefetch_factor': 4, 'reset_state': False, 'resize_blur_p': 0.1, 'resume': False, 'scheduler': 'cosine', 'seed': 71, 'size': 112, 'style': 'art', 'warmup_epoch': 0, 'warmup_learning_rate': 1e-06, 'weight_decay': 0.001} scheduler=cosine: max_epoch: 200 -> 199
epoch: 1, lr: [0.0002, 0.0002]
train 0%| | 0/3125 [00:00<?, ?it/s] Traceback (most recent call last): File "F:\nunif\train.py", line 22, in
main()
File "F:\nunif\train.py", line 18, in main
args.handler(args)
File "F:\nunif\waifu2x\training\trainer.py", line 198, in train
trainer.fit()
File "F:\nunif\nunif\training\trainer.py", line 82, in fit
self.env.train(
File "F:\nunif\nunif\training\env.py", line 64, in train
for data in tqdm(loader, ncols=80):
File "F:\1\envs\nunif\lib\site-packages\tqdm\std.py", line 1178, in iter
for obj in iterable:
File "F:\1\envs\nunif\lib\site-packages\torch\utils\data\dataloader.py", line 430, in iter
self._iterator = self._get_iterator()
File "F:\1\envs\nunif\lib\site-packages\torch\utils\data\dataloader.py", line 381, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "F:\1\envs\nunif\lib\site-packages\torch\utils\data\dataloader.py", line 1034, in init
w.start()
File "F:\1\envs\nunif\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "F:\1\envs\nunif\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "F:\1\envs\nunif\lib\multiprocessing\context.py", line 336, in _Popen
return Popen(process_obj)
File "F:\1\envs\nunif\lib\multiprocessing\popen_spawn_win32.py", line 93, in init
reduction.dump(process_obj, to_child)
File "F:\1\envs\nunif\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'HardExampleSampler.init..'
(nunif) F:\nunif>Traceback (most recent call last): File "", line 1, in
File "F:\1\envs\nunif\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "F:\1\envs\nunif\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input