Linaqruf / kohya-trainer

Adapted from https://note.com/kohya_ss/n/nbf7ce8d80f29 for easier cloning
Apache License 2.0
1.85k stars 305 forks source link

ValueError: no metadata / メタデータファイルがありません: /content/LoRA/meta_lat.json #327

Open AbbyaaS opened 9 months ago

AbbyaaS commented 9 months ago

When using kohya-LoRA-trainer-XL.ipynb(https://colab.research.google.com/github/panguin6010/kohya_ss_google_colab/blob/master/kohya_ss_colab.ipynb), the following error occurred: If anyone knows the solution to this, please let me know. what is "metadata"? both the training images and their caption text files are directly uploaded to train_data_dir.

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/sdxl_train_network.py:174 in │ │ │ │ 171 │ args = train_util.read_config_from_file(args, parser) │ │ 172 │ │ │ 173 │ trainer = SdxlNetworkTrainer() │ │ ❱ 174 │ trainer.train(args) │ │ 175 │ │ │ │ /content/kohya-trainer/train_network.py:177 in train │ │ │ │ 174 │ │ │ │ │ } │ │ 175 │ │ │ │ │ 176 │ │ │ blueprint = blueprint_generator.generate(user_config, args, tokenizer=tokeni │ │ ❱ 177 │ │ │ train_dataset_group = config_util.generate_dataset_group_by_blueprint(bluepr │ │ 178 │ │ else: │ │ 179 │ │ │ # use arbitrary dataset class │ │ 180 │ │ │ train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer) │ │ │ │ /content/kohya-trainer/library/config_util.py:426 in generate_dataset_group_by_blueprint │ │ │ │ 423 │ dataset_klass = FineTuningDataset │ │ 424 │ │ │ 425 │ subsets = [subset_klass(asdict(subset_blueprint.params)) for subset_blueprint in d │ │ ❱ 426 │ dataset = dataset_klass(subsets=subsets, asdict(dataset_blueprint.params)) │ │ 427 │ datasets.append(dataset) │ │ 428 │ │ 429 # print info │ │ │ │ /content/kohya-trainer/library/train_util.py:1477 in init │ │ │ │ 1474 │ │ │ │ with open(subset.metadata_file, "rt", encoding="utf-8") as f: │ │ 1475 │ │ │ │ │ metadata = json.load(f) │ │ 1476 │ │ │ else: │ │ ❱ 1477 │ │ │ │ raise ValueError(f"no metadata / メタデータファイルがありません: {subset │ │ 1478 │ │ │ │ │ 1479 │ │ │ if len(metadata) < 1: │ │ 1480 │ │ │ │ print(f"ignore subset with '{subset.metadata_file}': no image entries fo │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ ValueError: no metadata / メタデータファイルがありません: /content/LoRA/meta_lat.json ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/local/bin/accelerate:8 in │ │ │ │ 5 from accelerate.commands.accelerate_cli import main │ │ 6 if name == 'main': │ │ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 8 │ sys.exit(main()) │ │ 9 │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['/usr/bin/python3', 'sdxl_train_network.py', '--sample_prompts=/content/LoRA/config/sample_prompt.toml', '--config_file=/content/LoRA/config/config_file.toml', '--wandb_api_key=???????????']' returned non-zero exit status 1.

and following is training-config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [sdxl_arguments] cache_text_encoder_outputs = true no_half_vae = true min_timestep = 0 max_timestep = 1000 shuffle_caption = false lowram = true

[model_arguments] pretrained_model_name_or_path = "Linaqruf/animagine-xl" vae = "/content/vae/sdxl_vae.safetensors"

[dataset_arguments] debug_dataset = false in_json = "/content/LoRA/meta_lat.json" train_data_dir = "/content/drive/MyDrive/kohya_ss/train_images/scheiren/10_scheiren" dataset_repeats = 10 keep_tokens = 1 resolution = "1024,1024" color_aug = false token_warmup_min = 1 token_warmup_step = 0

[training_arguments] output_dir = "/content/drive/MyDrive/kohya-trainer/output/sdxl_lora_scheiren1" output_name = "sdxl_lora_scheiren1" save_precision = "fp16" save_every_n_epochs = 2 train_batch_size = 4 max_token_length = 225 mem_eff_attn = false sdpa = false xformers = true max_train_epochs = 10 max_data_loader_n_workers = 8 persistent_data_loader_workers = true gradient_checkpointing = true gradient_accumulation_steps = 1 mixed_precision = "fp16"

[logging_arguments] log_with = "wandb" log_tracker_name = "sdxl_lora1" logging_dir = "/content/LoRA/logs"

[sample_prompt_arguments] sample_every_n_epochs = 2 sample_sampler = "euler_a"

[saving_arguments] save_model_as = "safetensors"

[optimizer_arguments] optimizer_type = "AdamW" learning_rate = 0.0001 max_grad_norm = 0 optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False",] lr_scheduler = "constant_with_warmup" lr_warmup_steps = 100

[additional_network_arguments] no_metadata = false network_module = "lycoris.kohya" network_dim = 8 network_alpha = 4 network_args = [ "algo=loha", "conv_dim=4", "conv_alpha=1",] network_train_unet_only = true

[advanced_training_config] save_state = false save_last_n_epochs_state = false caption_dropout_rate = 0 caption_tag_dropout_rate = 0.5 caption_dropout_every_n_epochs = 0 min_snr_gamma = 5 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Karbadel commented 8 months ago

Try changing "mixed_precision = "fp16" to "No". Not really sure, but that helped me.

bryanlovely commented 7 months ago

I also just ran into this issue.