[X] I have checked other issues for similar problems.
Backend
Local
Interface Used
UI
CLI Command
No response
UI Screenshots & Parameters
Error Logs
[2024-06-20 07:53:13,283] [WARNING] [real_accelerator.py:162:get_accelerator] Setting accelerator to CPU. If you have GPU or other accelerator, we were unable to detect it.
[2024-06-20 07:53:13,284] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cpu (auto detect)
INFO: 10.16.44.20:42259 - "GET /ui/is_model_training HTTP/1.1" 200 OK
Traceback (most recent call last):
File "/app/env/bin/accelerate", line 8, in
sys.exit(main())
File "/app/env/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main
args.func(args)
File "/app/env/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1082, in launch_command
deepspeed_launcher(args)
File "/app/env/lib/python3.10/site-packages/accelerate/commands/launch.py", line 786, in deepspeed_launcher
distrib_run.run(args)
File "/app/env/lib/python3.10/site-packages/torch/distributed/run.py", line 870, in run
elastic_launch(
File "/app/env/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/app/env/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 230, in launch_agent
spec = WorkerSpec(
File "", line 14, in init
File "/app/env/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py", line 94, in __post_init__
assert self.local_world_size > 0
AssertionError
Additional Information
"""
elif isinstance(params, Seq2SeqParams):
if num_gpus == 0:
logger.warning("No GPU found. Forcing training on CPU. This will be super slow!")
cmd = [
"accelerate",
"launch",
"--cpu",
]
if num_gpus == 1: // should be elif?
cmd = [
"accelerate",
"launch",
"--num_machines",
"1",
"--num_processes",
"1",
]
"""
Prerequisites
Backend
Local
Interface Used
UI
CLI Command
No response
UI Screenshots & Parameters
Error Logs
[2024-06-20 07:53:13,283] [WARNING] [real_accelerator.py:162:get_accelerator] Setting accelerator to CPU. If you have GPU or other accelerator, we were unable to detect it. [2024-06-20 07:53:13,284] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cpu (auto detect) INFO: 10.16.44.20:42259 - "GET /ui/is_model_training HTTP/1.1" 200 OK Traceback (most recent call last): File "/app/env/bin/accelerate", line 8, in
sys.exit(main())
File "/app/env/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main
args.func(args)
File "/app/env/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1082, in launch_command
deepspeed_launcher(args)
File "/app/env/lib/python3.10/site-packages/accelerate/commands/launch.py", line 786, in deepspeed_launcher
distrib_run.run(args)
File "/app/env/lib/python3.10/site-packages/torch/distributed/run.py", line 870, in run
elastic_launch(
File "/app/env/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/app/env/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 230, in launch_agent
spec = WorkerSpec(
File "", line 14, in init
File "/app/env/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py", line 94, in __post_init__
assert self.local_world_size > 0
AssertionError
Additional Information
""" elif isinstance(params, Seq2SeqParams): if num_gpus == 0: logger.warning("No GPU found. Forcing training on CPU. This will be super slow!") cmd = [ "accelerate", "launch", "--cpu", ] if num_gpus == 1: // should be elif? cmd = [ "accelerate", "launch", "--num_machines", "1", "--num_processes", "1", ] """