Open adhesivetape opened 3 years ago
when I use python run.py --problem sdvrp --graph_size 20 --baseline rollout --run_name 'sdvrp20_rollout' I get
python run.py --problem sdvrp --graph_size 20 --baseline rollout --run_name 'sdvrp20_rollout'
{'baseline': 'rollout', 'batch_size': 512, 'bl_alpha': 0.05, 'bl_warmup_epochs': 1, 'checkpoint_encoder': False, 'checkpoint_epochs': 1, 'data_distribution': None, 'embedding_dim': 128, 'epoch_size': 1280000, 'epoch_start': 0, 'eval_batch_size': 1024, 'eval_only': False, 'exp_beta': 0.8, 'graph_size': 20, 'hidden_dim': 128, 'load_path': None, 'log_dir': 'logs', 'log_step': 50, 'lr_critic': 0.0001, 'lr_decay': 1.0, 'lr_model': 0.0001, 'max_grad_norm': 1.0, 'model': 'attention', 'n_encode_layers': 3, 'n_epochs': 100, 'no_cuda': False, 'no_progress_bar': False, 'no_tensorboard': False, 'normalization': 'batch', 'output_dir': 'outputs', 'problem': 'sdvrp', 'resume': None, 'run_name': 'sdvrp20_rollout_20210308T181630', 'save_dir': 'outputs/sdvrp_20/sdvrp20_rollout_20210308T181630', 'seed': 1234, 'shrink_size': None, 'tanh_clipping': 10.0, 'use_cuda': True, 'val_dataset': None, 'val_size': 10000} Evaluating baseline model on evaluation dataset Start train epoch 0, lr=0.0001 for run sdvrp20_rollout_20210308T181630 0%| | 0/2500 [00:00<?, ?it/s]Traceback (most recent call last): File "run.py", line 172, in <module> run(get_options()) File "run.py", line 158, in run train_epoch( File "/tmp/pycharm_project_356/train.py", line 84, in train_epoch for batch_id, batch in enumerate(tqdm(training_dataloader, disable=opts.no_progress_bar)): File "/root/miniconda3/envs/python38/lib/python3.8/site-packages/tqdm/std.py", line 1178, in __iter__ for obj in iterable: File "/root/miniconda3/envs/python38/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 355, in __iter__ return self._get_iterator() File "/root/miniconda3/envs/python38/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 301, in _get_iterator return _MultiProcessingDataLoaderIter(self) File "/root/miniconda3/envs/python38/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 914, in __init__ w.start() File "/root/miniconda3/envs/python38/lib/python3.8/multiprocessing/process.py", line 121, in start self._popen = self._Popen(self) File "/root/miniconda3/envs/python38/lib/python3.8/multiprocessing/context.py", line 224, in _Popen return _default_context.get_context().Process._Popen(process_obj) File "/root/miniconda3/envs/python38/lib/python3.8/multiprocessing/context.py", line 277, in _Popen return Popen(process_obj) File "/root/miniconda3/envs/python38/lib/python3.8/multiprocessing/popen_fork.py", line 19, in __init__ self._launch(process_obj) File "/root/miniconda3/envs/python38/lib/python3.8/multiprocessing/popen_fork.py", line 70, in _launch self.pid = os.fork() OSError: [Errno 12] Cannot allocate memory
How to fix it? By the way, what is the version of TensorFlow?
I am not sure... this seems like a pretty weird error. Do you have sufficient memory? Are other problems working for you?
TensorFlow should not be required, except maybe for TensorBoard.
when I use
python run.py --problem sdvrp --graph_size 20 --baseline rollout --run_name 'sdvrp20_rollout'
I getHow to fix it? By the way, what is the version of TensorFlow?