kohya-ss / sd-scripts

Apache License 2.0
4.96k stars 834 forks source link

Training won't start giving me this error #1534

Closed Signorlimone closed 2 weeks ago

Signorlimone commented 3 weeks ago

2024-08-30 17:17:27 INFO move vae and unet to cpu to save memory flux_train_network.py:187 Traceback (most recent call last): File "C:\AI\kohya_ss\sd-scripts\flux_train_network.py", line 446, in trainer.train(args) File "C:\AI\kohya_ss\sd-scripts\train_network.py", line 392, in train self.cache_text_encoder_outputs_if_needed(args, accelerator, unet, vae, text_encoders, train_dataset_group, weight_dtype) File "C:\AI\kohya_ss\sd-scripts\flux_train_network.py", line 191, in cache_text_encoder_outputs_if_needed unet.to("cpu") File "C:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py", line 1174, in to return self._apply(convert) File "C:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py", line 780, in _apply module._apply(fn) File "C:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py", line 805, in _apply param_applied = fn(param) File "C:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py", line 1167, in convert raise NotImplementedError( NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device. Traceback (most recent call last): File "C:\Users\RINO\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\RINO\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code exec(code, run_globals) File "C:\AI\kohya_ss\venv\Scripts\accelerate.EXE__main__.py", line 7, in File "C:\AI\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py", line 48, in main args.func(args) File "C:\AI\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 1106, in launch_command simple_launcher(args) File "C:\AI\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 704, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['C:\AI\kohya_ss\venv\Scripts\python.exe', 'C:/AI/kohya_ss/sd-scripts/flux_train_network.py', '--config_file', 'C:/Users/RINO/Pictures/LoRa_Training/CLAZY_XL/fluxmodels/config_lora-20240830-171623.toml', '--network_args', 'loraplus_unet_lr_ra

these are my settings:

{ "LoRA_type": "Flux1", "LyCORIS_preset": "full", "adaptive_noise_scale": 0, "additional_parameters": "--network_args \"loraplus_unet_lr_ratio=4\" ", "ae": "C:/AI/ComfyUI_windows_portable_nvidia/ComfyUI_windows_portable/ComfyUI/models/vae/FLUX1/ae.safetensors", "apply_t5_attn_mask": false, "async_upload": false, "block_alphas": "", "block_dims": "", "block_lr_zero_threshold": "", "bucket_no_upscale": true, "bucket_reso_steps": 64, "bypass_mode": false, "cache_latents": true, "cache_latents_to_disk": true, "caption_dropout_every_n_epochs": 0, "caption_dropout_rate": 0, "caption_extension": ".txt", "clip_l": "C:/AI/ComfyUI_windows_portable_nvidia/ComfyUI_windows_portable/ComfyUI/models/clip/clip_l.safetensors", "clip_skip": 1, "color_aug": false, "constrain": 0, "conv_alpha": 1, "conv_block_alphas": "", "conv_block_dims": "", "conv_dim": 1, "dataset_config": "", "debiased_estimation_loss": false, "decompose_both": false, "dim_from_weights": false, "discrete_flow_shift": 3, "dora_wd": false, "down_lr_weight": "", "dynamo_backend": "no", "dynamo_mode": "default", "dynamo_use_dynamic": false, "dynamo_use_fullgraph": false, "enable_bucket": true, "epoch": 10, "extra_accelerate_launch_args": "", "factor": -1, "flip_aug": false, "flux1_cache_text_encoder_outputs": true, "flux1_cache_text_encoder_outputs_to_disk": true, "flux1_checkbox": true, "fp8_base": false, "fp8_base_unet": false, "full_bf16": false, "full_fp16": false, "gpu_ids": "", "gradient_accumulation_steps": 1, "gradient_checkpointing": false, "guidance_scale": 3.5, "highvram": false, "huber_c": 0.1, "huber_schedule": "snr", "huggingface_path_in_repo": "", "huggingface_repo_id": "", "huggingface_repo_type": "", "huggingface_repo_visibility": "", "huggingface_token": "", "ip_noise_gamma": 0, "ip_noise_gamma_random_strength": false, "keep_tokens": 0, "learning_rate": 0.0005, "log_config": false, "log_tracker_config": "", "log_tracker_name": "", "log_with": "", "logging_dir": "", "loraplus_lr_ratio": 0, "loraplus_text_encoder_lr_ratio": 0, "loraplus_unet_lr_ratio": 0, "loss_type": "l2", "lowvram": false, "lr_scheduler": "cosine", "lr_scheduler_args": "", "lr_scheduler_num_cycles": 1, "lr_scheduler_power": 1, "lr_scheduler_type": "", "lr_warmup": 10, "main_process_port": 0, "masked_loss": false, "max_bucket_reso": 2048, "max_data_loader_n_workers": 0, "max_grad_norm": 1, "max_resolution": "512,512", "max_timestep": 1000, "max_token_length": 75, "max_train_epochs": 0, "max_train_steps": 1600, "mem_eff_attn": false, "mem_eff_save": false, "metadata_author": "", "metadata_description": "", "metadata_license": "", "metadata_tags": "", "metadata_title": "", "mid_lr_weight": "", "min_bucket_reso": 256, "min_snr_gamma": 0, "min_timestep": 0, "mixed_precision": "fp16", "model_list": "", "model_prediction_type": "sigma_scaled", "module_dropout": 0, "multi_gpu": false, "multires_noise_discount": 0.3, "multires_noise_iterations": 0, "network_alpha": 16, "network_dim": 2, "network_dropout": 0, "network_weights": "", "noise_offset": 0, "noise_offset_random_strength": false, "noise_offset_type": "Original", "num_cpu_threads_per_process": 2, "num_machines": 1, "num_processes": 1, "optimizer": "AdamW8bit", "optimizer_args": "", "output_dir": "C:/Users/RINO/Pictures/LoRa_Training/CLAZY_XL/fluxmodels", "output_name": "clayflux_0.1", "persistent_data_loader_workers": false, "pretrained_model_name_or_path": "C:/AI/kohya_ss/models/flux_dev.safetensors", "prior_loss_weight": 1, "random_crop": false, "rank_dropout": 0, "rank_dropout_scale": false, "reg_data_dir": "", "rescaled": false, "resume": "", "resume_from_huggingface": "", "sample_every_n_epochs": 0, "sample_every_n_steps": 50, "sample_prompts": "claymation, The movie poster for \"The Bowl Is Not Enough\" features James Pond, an anthropomorphic fish dressed as a suave secret agent, at the center of the action. James Pond is depicted in a sleek black tuxedo, holding a small, high-tech gadget in one fin while confidently pointing a stylish pistol with the other. His expression is serious, yet debonair, with a slight smirk that hints at his cleverness. Behind him, a dramatic backdrop showcases a swirling vortex of water, symbolizing high-stakes underwater action.Sharks, underwater fortresses, and enemy fish agents are visible within the vortex, creating a sense of intrigue and danger. At the bottom of the poster, the movie title \"The Bowl Is Not Enough\" is boldly displayed in glass letters half filled with waterThe color scheme is a mix of deep blues and blacks, with highlights of silver and white, enhancing the sleek, aquatic theme. --w512 --h512", "sample_sampler": "euler_a", "save_every_n_epochs": 1, "save_every_n_steps": 70, "save_last_n_steps": 0, "save_last_n_steps_state": 0, "save_model_as": "safetensors", "save_precision": "fp16", "save_state": true, "save_state_on_train_end": true, "save_state_to_huggingface": false, "scale_v_pred_loss_like_noise_pred": false, "scale_weight_norms": 0, "sdxl": false, "sdxl_cache_text_encoder_outputs": false, "sdxl_no_half_vae": false, "seed": 31337, "shuffle_caption": false, "split_mode": false, "split_qkv": false, "stop_text_encoder_training_pct": 0, "t5xxl": "C:/AI/kohya_ss/models/t5xxl_fp16.safetensors", "t5xxl_max_token_length": 512, "text_encoder_lr": 0, "timestep_sampling": "sigma", "train_batch_size": 1, "train_blocks": "single", "train_data_dir": "C:/Users/RINO/Pictures/LoRa_Training/CLAZY_XL/images", "train_norm": false, "train_on_input": true, "training_comment": "", "unet_lr": 0.0005, "unit": 1, "up_lr_weight": "", "use_cp": false, "use_scalar": false, "use_tucker": false, "v2": false, "v_parameterization": false, "v_pred_like_loss": 0, "vae": "", "vae_batch_size": 0, "wandb_api_key": "", "wandb_run_name": "", "weighted_captions": false, "xformers": "sdpa" }

if anyone can help it will be much appreciated

kohya-ss commented 3 weeks ago

The checkpoint seems to be fp8. Please enable fp8_base option.

Signorlimone commented 2 weeks ago

thanks, it works now!

nephi-dev commented 2 weeks ago

facing the same error, yet the fp8_base param did not fixed it for me

nephi-dev commented 2 weeks ago

facing the same error, yet the fp8_base param did not fixed it for me

fixed it, the culprit was the vae