Closed noname1970 closed 3 months ago
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/train_network.py:873 in │ │ │ │ 870 │ args = parser.parse_args() │ │ 871 │ args = train_util.read_config_from_file(args, parser) │ │ 872 │ │ │ ❱ 873 │ train(args) │ │ 874 │ │ │ │ /content/kohya-trainer/train_network.py:161 in train │ │ │ │ 158 │ │ │ 159 │ # acceleratorを準備する │ │ 160 │ print("preparing accelerator") │ │ ❱ 161 │ accelerator, unwrap_model = train_util.prepare_accelerator(args) │ │ 162 │ is_main_process = accelerator.is_main_process │ │ 163 │ │ │ 164 │ # mixed precisionに対応した型を用意しておき適宜castする │ │ │ │ /content/kohya-trainer/library/train_util.py:3069 in prepare_accelerator │ │ │ │ 3066 │ │ │ if args.wandb_api_key is not None: │ │ 3067 │ │ │ │ wandb.login(key=args.wandb_api_key) │ │ 3068 │ │ │ ❱ 3069 │ accelerator = Accelerator( │ │ 3070 │ │ gradient_accumulation_steps=args.gradient_accumulation_steps, │ │ 3071 │ │ mixed_precision=args.mixed_precision, │ │ 3072 │ │ log_with=log_with, │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:355 in init │ │ │ │ 352 │ │ if self.state.mixed_precision == "fp16" and self.distributed_type != Distributed │ │ 353 │ │ │ self.native_amp = True │ │ 354 │ │ │ if not torch.cuda.is_available() and not parse_flag_from_env("ACCELERATE_USE │ │ ❱ 355 │ │ │ │ raise ValueError(err.format(mode="fp16", requirement="a GPU")) │ │ 356 │ │ │ kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None │ │ 357 │ │ │ if self.distributed_type == DistributedType.FSDP: │ │ 358 │ │ │ │ from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ ValueError: fp16 mixed precision requires a GPU ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/local/bin/accelerate:8 in │ │ │ │ 5 from accelerate.commands.accelerate_cli import main │ │ 6 if name == 'main': │ │ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 8 │ sys.exit(main()) │ │ 9 │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:1104 in launch_command │ │ │ │ 1101 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 1102 │ │ sagemaker_launcher(defaults, args) │ │ 1103 │ else: │ │ ❱ 1104 │ │ simple_launcher(args) │ │ 1105 │ │ 1106 │ │ 1107 def main(): │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:567 in simple_launcher │ │ │ │ 564 │ process = subprocess.Popen(cmd, env=current_env) │ │ 565 │ process.wait() │ │ 566 │ if process.returncode != 0: │ │ ❱ 567 │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 568 │ │ 569 │ │ 570 def multi_gpu_launcher(args): │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['/usr/bin/python3', 'train_network.py', '--dataset_config=/content/drive/MyDrive/Loras/YAGO/dataset_config.toml', '--config_file=/content/drive/MyDrive/Loras/YAGO/training_config.toml']' returned non-zero exit status 1.
Check and make sure you are using a GPU Runtime on Collab. It's telling you there is no GPU. That is the only thing I can think of personally.
See Image below:
If you're not running one then click the down arrow and select "Change Runtime". Pick what ever is available to you.
It work thank you for the help :]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/train_network.py:873 in │
│ │
│ 870 │ args = parser.parse_args() │
│ 871 │ args = train_util.read_config_from_file(args, parser) │
│ 872 │ │
│ ❱ 873 │ train(args) │
│ 874 │
│ │
│ /content/kohya-trainer/train_network.py:161 in train │
│ │
│ 158 │ │
│ 159 │ # acceleratorを準備する │
│ 160 │ print("preparing accelerator") │
│ ❱ 161 │ accelerator, unwrap_model = train_util.prepare_accelerator(args) │
│ 162 │ is_main_process = accelerator.is_main_process │
│ 163 │ │
│ 164 │ # mixed precisionに対応した型を用意しておき適宜castする │
│ │
│ /content/kohya-trainer/library/train_util.py:3069 in prepare_accelerator │
│ │
│ 3066 │ │ │ if args.wandb_api_key is not None: │
│ 3067 │ │ │ │ wandb.login(key=args.wandb_api_key) │
│ 3068 │ │
│ ❱ 3069 │ accelerator = Accelerator( │
│ 3070 │ │ gradient_accumulation_steps=args.gradient_accumulation_steps, │
│ 3071 │ │ mixed_precision=args.mixed_precision, │
│ 3072 │ │ log_with=log_with, │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:355 in init │
│ │
│ 352 │ │ if self.state.mixed_precision == "fp16" and self.distributed_type != Distributed │
│ 353 │ │ │ self.native_amp = True │
│ 354 │ │ │ if not torch.cuda.is_available() and not parse_flag_from_env("ACCELERATE_USE │
│ ❱ 355 │ │ │ │ raise ValueError(err.format(mode="fp16", requirement="a GPU")) │
│ 356 │ │ │ kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None │
│ 357 │ │ │ if self.distributed_type == DistributedType.FSDP: │
│ 358 │ │ │ │ from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: fp16 mixed precision requires a GPU
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /usr/local/bin/accelerate:8 in │
│ │
│ 5 from accelerate.commands.accelerate_cli import main │
│ 6 if name == 'main': │
│ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │
│ ❱ 8 │ sys.exit(main()) │
│ 9 │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │
│ │
│ 42 │ │ exit(1) │
│ 43 │ │
│ 44 │ # Run │
│ ❱ 45 │ args.func(args) │
│ 46 │
│ 47 │
│ 48 if name == "main": │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:1104 in launch_command │
│ │
│ 1101 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │
│ 1102 │ │ sagemaker_launcher(defaults, args) │
│ 1103 │ else: │
│ ❱ 1104 │ │ simple_launcher(args) │
│ 1105 │
│ 1106 │
│ 1107 def main(): │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:567 in simple_launcher │
│ │
│ 564 │ process = subprocess.Popen(cmd, env=current_env) │
│ 565 │ process.wait() │
│ 566 │ if process.returncode != 0: │
│ ❱ 567 │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │
│ 568 │
│ 569 │
│ 570 def multi_gpu_launcher(args): │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
CalledProcessError: Command '['/usr/bin/python3', 'train_network.py',
'--dataset_config=/content/drive/MyDrive/Loras/YAGO/dataset_config.toml',
'--config_file=/content/drive/MyDrive/Loras/YAGO/training_config.toml']' returned non-zero exit
status 1.