Hi All
Anyone with any idea why I get this? I tried a lot of things but no clue
G:\Pinokio\api\comfyui.git\app\custom_nodes\Lora-Training-in-Comfy/sd-scripts/train_network.py
The following values were not passed to accelerate launch and had defaults used instead:
--num_processes was set to a value of 2
More than one GPU was found, enabling multi-GPU training.
If this was unintended please pass in --num_processes=1.
--num_machines was set to a value of 1--mixed_precision was set to a value of 'no'--dynamo_backend was set to a value of 'no'
To avoid this warning pass in values for each of the problematic parameters or run accelerate config.
[2024-03-18 21:29:12,526] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.
[W socket.cpp:697] [c10d] The client socket has failed to connect to [AH-DellXPS]:29500 (system error: 10049 - The requested address is not valid in its context.).
Traceback (most recent call last):
File "G:\Pinokio\bin\miniconda\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "G:\Pinokio\bin\miniconda\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1033, in
main()
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1029, in main
launch_command(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1014, in launch_command
multi_gpu_launcher(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 672, in multi_gpu_launcher
distrib_run.run(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\run.py", line 803, in run
elastic_launch(
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\launcher\api.py", line 135, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\launcher\api.py", line 259, in launch_agent
result = agent.run()
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(*args, kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 727, in run
result = self._invoke_run(role)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 862, in _invoke_run
self._initialize_workers(self._worker_group)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(*args, *kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 699, in _initialize_workers
self._rendezvous(worker_group)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(args, kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 542, in _rendezvous
store, group_rank, group_world_size = spec.rdzv_handler.next_rendezvous()
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\rendezvous\static_tcp_rendezvous.py", line 55, in next_rendezvous
self._store = TCPStore( # type: ignore[call-arg]
torch.distributed.DistNetworkError: Unknown error
Train finished
Prompt executed in 9.62 seconds
Hi All Anyone with any idea why I get this? I tried a lot of things but no clue
G:\Pinokio\api\comfyui.git\app\custom_nodes\Lora-Training-in-Comfy/sd-scripts/train_network.py The following values were not passed to
main()
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1029, in main
launch_command(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1014, in launch_command
multi_gpu_launcher(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 672, in multi_gpu_launcher
distrib_run.run(args)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\run.py", line 803, in run
elastic_launch(
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\launcher\api.py", line 135, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\launcher\api.py", line 259, in launch_agent
result = agent.run()
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(*args, kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 727, in run
result = self._invoke_run(role)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 862, in _invoke_run
self._initialize_workers(self._worker_group)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(*args, *kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 699, in _initialize_workers
self._rendezvous(worker_group)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\metrics\api.py", line 123, in wrapper
result = f(args, kwargs)
File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\agent\server\api.py", line 542, in _rendezvous
accelerate launch
and had defaults used instead:--num_processes
was set to a value of2
More than one GPU was found, enabling multi-GPU training. If this was unintended please pass in--num_processes=1
.--num_machines
was set to a value of1
--mixed_precision
was set to a value of'no'
--dynamo_backend
was set to a value of'no'
To avoid this warning pass in values for each of the problematic parameters or runaccelerate config
. [2024-03-18 21:29:12,526] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs. [W socket.cpp:697] [c10d] The client socket has failed to connect to [AH-DellXPS]:29500 (system error: 10049 - The requested address is not valid in its context.). Traceback (most recent call last): File "G:\Pinokio\bin\miniconda\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "G:\Pinokio\bin\miniconda\lib\runpy.py", line 86, in _run_code exec(code, run_globals) File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\accelerate\commands\launch.py", line 1033, instore, group_rank, group_world_size = spec.rdzv_handler.next_rendezvous() File "G:\Pinokio\api\comfyui.git\app\env\lib\site-packages\torch\distributed\elastic\rendezvous\static_tcp_rendezvous.py", line 55, in next_rendezvous self._store = TCPStore( # type: ignore[call-arg] torch.distributed.DistNetworkError: Unknown error Train finished Prompt executed in 9.62 seconds