Traceback (most recent call last):
File "tools/run_net.py", line 27, in <module>
main()
File "tools/run_net.py", line 19, in main
launch_job(cfg=cfg, init_method=args.init_method, func=train)
File "/home/yanai-lab/ide-k/ide-k/mygit/Template/tempmodel/utils/misc.py", line 43, in launch_job
daemon=daemon,
File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 171, in spawn
while not spawn_context.join():
File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 118, in join
raise Exception(msg)
Exception:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 19, in _wrap
fn(i, *args)
File "/home/yanai-lab/ide-k/ide-k/mygit/Template/tempmodel/utils/multiprocessing.py", line 35, in run
raise e
File "/home/yanai-lab/ide-k/ide-k/mygit/Template/tempmodel/utils/multiprocessing.py", line 32, in run
rank=rank,
File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/distributed/distributed_c10d.py", line 406, in init_process_group
store, rank, world_size = next(rendezvous(url))
File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 85, in _tcp_rendezvous_handler
raise _error("port number missing")
ValueError: Error initializing torch.distributed using tcp:// rendezvous: port number missing