Closed Arron17 closed 8 months ago
Try using AdamW instead of prodigy. There is a lot of update to the code for SDXL and it is quite possible it now take more VRAM compared to previous release due to bug fixes for things that were not properly implemented... and as a result the proper implementation now require more VRAM (not more... but the actual proper amount of VRAM compared to when it was flawed).
SDXL is a pretty big beast to train. 24GB VRAM is borderline OK.
I can train SDXL LoRA on 4090 (24G), but I cannot train SDXL Dreambooth and will prompt CUDA Out of Memory. How to solve it
I was able to train SDXL LoRAs under this commit https://github.com/bmaltais/kohya_ss/commit/429d2f282fb32e7c63b5e5fe6dbc1ddef92bba58
With the exact same settings on the most recent update I cannot and get an out of memory error
Prior to the update I could train a LoRA at batch size 3 size 768,768 without running out of VRAM. I can also do 1024,1024 Batch size 2.
Post update I can't even train batch size 1 at size 768,768
So there is a large regression
Graphics Card: RTX 3080 10GB
Example JSON that works on the listed commit, but does not work on the most recent update:
{ "LoRA_type": "Standard", "adaptive_noise_scale": 0, "additional_parameters": "--max_grad_norm=0", "block_alphas": "", "block_dims": "", "block_lr_zero_threshold": "", "bucket_no_upscale": true, "bucket_reso_steps": 64, "cache_latents": true, "cache_latents_to_disk": true, "caption_dropout_every_n_epochs": 0.0, "caption_dropout_rate": 0, "caption_extension": ".txt", "clip_skip": "1", "color_aug": false, "conv_alpha": 4, "conv_alphas": "", "conv_dim": 8, "conv_dims": "", "decompose_both": false, "dim_from_weights": false, "down_lr_weight": "", "enable_bucket": true, "epoch": 100, "factor": -1, "flip_aug": false, "full_fp16": false, "gradient_accumulation_steps": 1.0, "gradient_checkpointing": true, "keep_tokens": "0", "learning_rate": 1.0, "logging_dir": "D:/Stable-Diffusion-Training/SDXL LoRA/filename/log", "lora_network_weights": "", "lr_scheduler": "cosine", "lr_scheduler_num_cycles": "", "lr_scheduler_power": "", "lr_warmup": 0, "max_data_loader_n_workers": "0", "max_resolution": "1024,1024", "max_timestep": 1000, "max_token_length": "75", "max_train_epochs": "", "mem_eff_attn": true, "mid_lr_weight": "", "min_snr_gamma": 0, "min_timestep": 0, "mixed_precision": "bf16", "model_list": "custom", "module_dropout": 0, "multires_noise_discount": 0.2, "multires_noise_iterations": 8, "network_alpha": 8, "network_dim": 8, "network_dropout": 0, "no_token_padding": false, "noise_offset": 0.0357, "noise_offset_type": "Original", "num_cpu_threads_per_process": 2, "optimizer": "Prodigy", "optimizer_args": "decouple=True weight_decay=0.5 betas=0.9,0.99 use_bias_correction=True d_coef=2", "output_dir": "D:/Stable-Diffusion-Training/SDXL LoRA/filename/model", "output_name": "LoRAXL", "persistent_data_loader_workers": false, "pretrained_model_name_or_path": "D:/Code/Stable-Diffusion/AUTOMATIC1111/stable-diffusion-webui/models/Stable-diffusion/sdxl_base_pruned_no-ema.safetensors", "prior_loss_weight": 1.0, "random_crop": false, "rank_dropout": 0, "reg_data_dir": "", "resume": "", "sample_every_n_epochs": 0, "sample_every_n_steps": 0, "sample_prompts": "", "sample_sampler": "euler_a", "save_every_n_epochs": 2, "save_every_n_steps": 0, "save_last_n_steps": 0, "save_last_n_steps_state": 0, "save_model_as": "safetensors", "save_precision": "bf16", "save_state": false, "scale_v_pred_loss_like_noise_pred": false, "scale_weight_norms": 1, "sdxl": true, "sdxl_cache_text_encoder_outputs": true, "sdxl_no_half_vae": true, "seed": "12345", "shuffle_caption": false, "stop_text_encoder_training_pct": 0, "text_encoder_lr": 0.0, "train_batch_size": 1, "train_data_dir": "D:/Stable-Diffusion-Training/SDXL LoRA/filename/img", "train_on_input": false, "training_comment": "", "unet_lr": 1.0, "unit": 1, "up_lr_weight": "", "use_cp": true, "use_wandb": false, "v2": false, "v_parameterization": false, "vae_batch_size": 0, "wandb_api_key": "", "weighted_captions": false, "xformers": true }