Open AbbyaaS opened 9 months ago
When using kohya-LoRA-trainer-XL.ipynb(https://colab.research.google.com/github/panguin6010/kohya_ss_google_colab/blob/master/kohya_ss_colab.ipynb), the following error occurred: If anyone knows the solution to this, please let me know. what is "metadata"? both the training images and their caption text files are directly uploaded to train_data_dir.
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/sdxl_train_network.py:174 in │ │ │ │ 171 │ args = train_util.read_config_from_file(args, parser) │ │ 172 │ │ │ 173 │ trainer = SdxlNetworkTrainer() │ │ ❱ 174 │ trainer.train(args) │ │ 175 │ │ │ │ /content/kohya-trainer/train_network.py:177 in train │ │ │ │ 174 │ │ │ │ │ } │ │ 175 │ │ │ │ │ 176 │ │ │ blueprint = blueprint_generator.generate(user_config, args, tokenizer=tokeni │ │ ❱ 177 │ │ │ train_dataset_group = config_util.generate_dataset_group_by_blueprint(bluepr │ │ 178 │ │ else: │ │ 179 │ │ │ # use arbitrary dataset class │ │ 180 │ │ │ train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer) │ │ │ │ /content/kohya-trainer/library/config_util.py:426 in generate_dataset_group_by_blueprint │ │ │ │ 423 │ dataset_klass = FineTuningDataset │ │ 424 │ │ │ 425 │ subsets = [subset_klass(asdict(subset_blueprint.params)) for subset_blueprint in d │ │ ❱ 426 │ dataset = dataset_klass(subsets=subsets, asdict(dataset_blueprint.params)) │ │ 427 │ datasets.append(dataset) │ │ 428 │ │ 429 # print info │ │ │ │ /content/kohya-trainer/library/train_util.py:1477 in init │ │ │ │ 1474 │ │ │ │ with open(subset.metadata_file, "rt", encoding="utf-8") as f: │ │ 1475 │ │ │ │ │ metadata = json.load(f) │ │ 1476 │ │ │ else: │ │ ❱ 1477 │ │ │ │ raise ValueError(f"no metadata / メタデータファイルがありません: {subset │ │ 1478 │ │ │ │ │ 1479 │ │ │ if len(metadata) < 1: │ │ 1480 │ │ │ │ print(f"ignore subset with '{subset.metadata_file}': no image entries fo │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ ValueError: no metadata / メタデータファイルがありません: /content/LoRA/meta_lat.json ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/local/bin/accelerate:8 in │ │ │ │ 5 from accelerate.commands.accelerate_cli import main │ │ 6 if name == 'main': │ │ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 8 │ sys.exit(main()) │ │ 9 │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['/usr/bin/python3', 'sdxl_train_network.py', '--sample_prompts=/content/LoRA/config/sample_prompt.toml', '--config_file=/content/LoRA/config/config_file.toml', '--wandb_api_key=???????????']' returned non-zero exit status 1.
and following is training-config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [sdxl_arguments] cache_text_encoder_outputs = true no_half_vae = true min_timestep = 0 max_timestep = 1000 shuffle_caption = false lowram = true
[model_arguments] pretrained_model_name_or_path = "Linaqruf/animagine-xl" vae = "/content/vae/sdxl_vae.safetensors"
[dataset_arguments] debug_dataset = false in_json = "/content/LoRA/meta_lat.json" train_data_dir = "/content/drive/MyDrive/kohya_ss/train_images/scheiren/10_scheiren" dataset_repeats = 10 keep_tokens = 1 resolution = "1024,1024" color_aug = false token_warmup_min = 1 token_warmup_step = 0
[training_arguments] output_dir = "/content/drive/MyDrive/kohya-trainer/output/sdxl_lora_scheiren1" output_name = "sdxl_lora_scheiren1" save_precision = "fp16" save_every_n_epochs = 2 train_batch_size = 4 max_token_length = 225 mem_eff_attn = false sdpa = false xformers = true max_train_epochs = 10 max_data_loader_n_workers = 8 persistent_data_loader_workers = true gradient_checkpointing = true gradient_accumulation_steps = 1 mixed_precision = "fp16"
[logging_arguments] log_with = "wandb" log_tracker_name = "sdxl_lora1" logging_dir = "/content/LoRA/logs"
[sample_prompt_arguments] sample_every_n_epochs = 2 sample_sampler = "euler_a"
[saving_arguments] save_model_as = "safetensors"
[optimizer_arguments] optimizer_type = "AdamW" learning_rate = 0.0001 max_grad_norm = 0 optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False",] lr_scheduler = "constant_with_warmup" lr_warmup_steps = 100
[additional_network_arguments] no_metadata = false network_module = "lycoris.kohya" network_dim = 8 network_alpha = 4 network_args = [ "algo=loha", "conv_dim=4", "conv_alpha=1",] network_train_unet_only = true
[advanced_training_config] save_state = false save_last_n_epochs_state = false caption_dropout_rate = 0 caption_tag_dropout_rate = 0.5 caption_dropout_every_n_epochs = 0 min_snr_gamma = 5 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Try changing "mixed_precision = "fp16" to "No". Not really sure, but that helped me.
I also just ran into this issue.
When using kohya-LoRA-trainer-XL.ipynb(https://colab.research.google.com/github/panguin6010/kohya_ss_google_colab/blob/master/kohya_ss_colab.ipynb), the following error occurred: If anyone knows the solution to this, please let me know. what is "metadata"? both the training images and their caption text files are directly uploaded to train_data_dir.
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/sdxl_train_network.py:174 in │
│ │
│ 171 │ args = train_util.read_config_from_file(args, parser) │
│ 172 │ │
│ 173 │ trainer = SdxlNetworkTrainer() │
│ ❱ 174 │ trainer.train(args) │
│ 175 │
│ │
│ /content/kohya-trainer/train_network.py:177 in train │
│ │
│ 174 │ │ │ │ │ } │
│ 175 │ │ │ │
│ 176 │ │ │ blueprint = blueprint_generator.generate(user_config, args, tokenizer=tokeni │
│ ❱ 177 │ │ │ train_dataset_group = config_util.generate_dataset_group_by_blueprint(bluepr │
│ 178 │ │ else: │
│ 179 │ │ │ # use arbitrary dataset class │
│ 180 │ │ │ train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer) │
│ │
│ /content/kohya-trainer/library/config_util.py:426 in generate_dataset_group_by_blueprint │
│ │
│ 423 │ dataset_klass = FineTuningDataset │
│ 424 │ │
│ 425 │ subsets = [subset_klass(asdict(subset_blueprint.params)) for subset_blueprint in d │
│ ❱ 426 │ dataset = dataset_klass(subsets=subsets, asdict(dataset_blueprint.params)) │
│ 427 │ datasets.append(dataset) │
│ 428 │
│ 429 # print info │
│ │
│ /content/kohya-trainer/library/train_util.py:1477 in init │
│ │
│ 1474 │ │ │ │ with open(subset.metadata_file, "rt", encoding="utf-8") as f: │
│ 1475 │ │ │ │ │ metadata = json.load(f) │
│ 1476 │ │ │ else: │
│ ❱ 1477 │ │ │ │ raise ValueError(f"no metadata / メタデータファイルがありません: {subset │
│ 1478 │ │ │ │
│ 1479 │ │ │ if len(metadata) < 1: │
│ 1480 │ │ │ │ print(f"ignore subset with '{subset.metadata_file}': no image entries fo │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: no metadata / メタデータファイルがありません: /content/LoRA/meta_lat.json
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /usr/local/bin/accelerate:8 in │
│ │
│ 5 from accelerate.commands.accelerate_cli import main │
│ 6 if name == 'main': │
│ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │
│ ❱ 8 │ sys.exit(main()) │
│ 9 │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │
│ │
│ 42 │ │ exit(1) │
│ 43 │ │
│ 44 │ # Run │
│ ❱ 45 │ args.func(args) │
│ 46 │
│ 47 │
│ 48 if name == "main": │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:918 in launch_command │
│ │
│ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │
│ 916 │ │ sagemaker_launcher(defaults, args) │
│ 917 │ else: │
│ ❱ 918 │ │ simple_launcher(args) │
│ 919 │
│ 920 │
│ 921 def main(): │
│ │
│ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:580 in simple_launcher │
│ │
│ 577 │ process.wait() │
│ 578 │ if process.returncode != 0: │
│ 579 │ │ if not args.quiet: │
│ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │
│ 581 │ │ else: │
│ 582 │ │ │ sys.exit(1) │
│ 583 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
CalledProcessError: Command '['/usr/bin/python3', 'sdxl_train_network.py',
'--sample_prompts=/content/LoRA/config/sample_prompt.toml',
'--config_file=/content/LoRA/config/config_file.toml',
'--wandb_api_key=???????????']' returned non-zero exit status 1.
and following is training-config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [sdxl_arguments] cache_text_encoder_outputs = true no_half_vae = true min_timestep = 0 max_timestep = 1000 shuffle_caption = false lowram = true
[model_arguments] pretrained_model_name_or_path = "Linaqruf/animagine-xl" vae = "/content/vae/sdxl_vae.safetensors"
[dataset_arguments] debug_dataset = false in_json = "/content/LoRA/meta_lat.json" train_data_dir = "/content/drive/MyDrive/kohya_ss/train_images/scheiren/10_scheiren" dataset_repeats = 10 keep_tokens = 1 resolution = "1024,1024" color_aug = false token_warmup_min = 1 token_warmup_step = 0
[training_arguments] output_dir = "/content/drive/MyDrive/kohya-trainer/output/sdxl_lora_scheiren1" output_name = "sdxl_lora_scheiren1" save_precision = "fp16" save_every_n_epochs = 2 train_batch_size = 4 max_token_length = 225 mem_eff_attn = false sdpa = false xformers = true max_train_epochs = 10 max_data_loader_n_workers = 8 persistent_data_loader_workers = true gradient_checkpointing = true gradient_accumulation_steps = 1 mixed_precision = "fp16"
[logging_arguments] log_with = "wandb" log_tracker_name = "sdxl_lora1" logging_dir = "/content/LoRA/logs"
[sample_prompt_arguments] sample_every_n_epochs = 2 sample_sampler = "euler_a"
[saving_arguments] save_model_as = "safetensors"
[optimizer_arguments] optimizer_type = "AdamW" learning_rate = 0.0001 max_grad_norm = 0 optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False",] lr_scheduler = "constant_with_warmup" lr_warmup_steps = 100
[additional_network_arguments] no_metadata = false network_module = "lycoris.kohya" network_dim = 8 network_alpha = 4 network_args = [ "algo=loha", "conv_dim=4", "conv_alpha=1",] network_train_unet_only = true
[advanced_training_config] save_state = false save_last_n_epochs_state = false caption_dropout_rate = 0 caption_tag_dropout_rate = 0.5 caption_dropout_every_n_epochs = 0 min_snr_gamma = 5 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^