Nerogar / OneTrainer

OneTrainer is a one-stop solution for all your stable diffusion training needs.
GNU Affero General Public License v3.0
1.79k stars 151 forks source link

[Bug]: DoRA training results = colored pixels #428

Closed MNeMoNiCuZ closed 3 months ago

MNeMoNiCuZ commented 3 months ago

What happened?

When training with Decompose Weights on, it looks like this: 2024-08-10_17-09-54-training-sample-300-37-4

I have set fused to false in my config.

What did you expect would happen?

Training!

Relevant log output

Here are my files.
Config.json:

{
    "__version": 4,
    "training_method": "LORA",
    "model_type": "STABLE_DIFFUSION_XL_10_BASE",
    "debug_mode": false,
    "debug_dir": "debug",
    "workspace_dir": "D:\\AI\\Training\\Styles\\JediStyle\\OneTrainer\\PDXL",
    "cache_dir": "D:\\AI\\Training\\Styles\\JediStyle\\OneTrainer\\PDXL",
    "tensorboard": true,
    "tensorboard_expose": true,
    "continue_last_backup": false,
    "include_train_config": "NONE",
    "base_model_name": "stabilityai/stable-diffusion-xl-base-1.0",
    "weight_dtype": "BFLOAT_16",
    "output_dtype": "BFLOAT_16",
    "output_model_format": "SAFETENSORS",
    "output_model_destination": "D:\\AI\\Training\\Styles\\JediStyle\\Model\\PDXL\\JediStylePDXL_DoRA.safetensors",
    "gradient_checkpointing": true,
    "force_circular_padding": false,
    "concept_file_name": "D:\\AI\\Training\\Styles\\JediStyle\\Config\\PDXL\\onetrainer_concepts_pdxl_DoRA.json",
    "aspect_ratio_bucketing": true,
    "latent_caching": true,
    "clear_cache_before_training": false,
    "learning_rate_scheduler": "COSINE_WITH_RESTARTS",
    "custom_learning_rate_scheduler": null,
    "scheduler_params": [],
    "learning_rate": 0.0003,
    "learning_rate_warmup_steps": 1,
    "learning_rate_cycles": 100,
    "epochs": 84,
    "batch_size": 12,
    "gradient_accumulation_steps": 1,
    "ema": "OFF",
    "ema_decay": 0.999,
    "ema_update_step_interval": 5,
    "dataloader_threads": 4,
    "train_device": "cuda:0",
    "temp_device": "cpu",
    "train_dtype": "BFLOAT_16",
    "fallback_train_dtype": "BFLOAT_16",
    "enable_autocast_cache": true,
    "only_cache": false,
    "resolution": "1024",
    "attention_mechanism": "XFORMERS",
    "align_prop": false,
    "align_prop_probability": 0.1,
    "align_prop_loss": "AESTHETIC",
    "align_prop_weight": 0.01,
    "align_prop_steps": 20,
    "align_prop_truncate_steps": 0.5,
    "align_prop_cfg_scale": 7.0,
    "mse_strength": 1.0,
    "mae_strength": 0.0,
    "vb_loss_strength": 1.0,
    "loss_weight_fn": "CONSTANT",
    "loss_weight_strength": 5.0,
    "dropout_probability": 0.15,
    "loss_scaler": "NONE",
    "learning_rate_scaler": "GRADIENT_ACCUMULATION",
    "offset_noise_weight": 0.1,
    "perturbation_noise_weight": 0.0075,
    "rescale_noise_scheduler_to_zero_terminal_snr": false,
    "force_v_prediction": false,
    "force_epsilon_prediction": false,
    "min_noising_strength": 0.0,
    "max_noising_strength": 1.0,
    "timestep_distribution": "SIGMOID",
    "noising_weight": 1,
    "noising_bias": 0.5,
    "unet": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": 0.00015,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "prior": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": 0.00015,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 80,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": 2.5e-05,
        "weight_dtype": "NONE",
        "dropout_probability": 0.1,
        "train_embedding": false,
        "attention_mask": false
    },
    "text_encoder_layer_skip": 0,
    "text_encoder_2": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 80,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": 2.5e-05,
        "weight_dtype": "NONE",
        "dropout_probability": 0.1,
        "train_embedding": true,
        "attention_mask": false
    },
    "text_encoder_2_layer_skip": 0,
    "text_encoder_3": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.1,
        "train_embedding": true,
        "attention_mask": false
    },
    "text_encoder_3_layer_skip": 0,
    "vae": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "FLOAT_32",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "effnet_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "decoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "decoder_text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "decoder_vqgan": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true,
        "attention_mask": false
    },
    "masked_training": false,
    "unmasked_probability": 0.1,
    "unmasked_weight": 0.1,
    "normalize_masked_area_loss": false,
    "embedding_learning_rate": null,
    "preserve_embedding_norm": false,
    "embedding": {
        "__version": 0,
        "uuid": "d11c8d5e-0e34-4a70-90bb-0d7a06df0422",
        "model_name": "",
        "placeholder": "<embedding>",
        "train": false,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "token_count": 1,
        "initial_embedding_text": "*"
    },
    "additional_embeddings": [],
    "embedding_weight_dtype": "FLOAT_32",
    "peft_type": "LORA",
    "lora_model_name": "",
    "lora_rank": 32,
    "lora_alpha": 32.0,
    "lora_decompose": true,
    "lora_decompose_norm_epsilon": true,
    "lora_weight_dtype": "FLOAT_32",
    "lora_layers": "",
    "lora_layer_preset": null,
    "bundle_additional_embeddings": true,
    "optimizer": {
        "__version": 0,
        "optimizer": "ADAMW",
        "adam_w_mode": false,
        "alpha": null,
        "amsgrad": false,
        "beta1": 0.9,
        "beta2": 0.999,
        "beta3": 0.9999,
        "bias_correction": false,
        "block_wise": false,
        "capturable": false,
        "centered": false,
        "clip_threshold": null,
        "d0": null,
        "d_coef": null,
        "dampening": null,
        "decay_rate": null,
        "decouple": false,
        "differentiable": false,
        "eps": 1e-08,
        "eps2": null,
        "foreach": false,
        "fsdp_in_use": false,
        "fused": false,
        "fused_back_pass": false,
        "growth_rate": null,
        "initial_accumulator_value": null,
        "is_paged": false,
        "log_every": null,
        "lr_decay": null,
        "max_unorm": null,
        "maximize": false,
        "min_8bit_size": null,
        "momentum": null,
        "nesterov": false,
        "no_prox": false,
        "optim_bits": null,
        "percentile_clipping": null,
        "r": null,
        "relative_step": false,
        "safeguard_warmup": false,
        "scale_parameter": false,
        "stochastic_rounding": true,
        "use_bias_correction": false,
        "use_triton": false,
        "warmup_init": false,
        "weight_decay": 0.01,
        "weight_lr_power": null
    },
    "optimizer_defaults": {
        "CAME": {
            "__version": 0,
            "optimizer": "CAME",
            "adam_w_mode": false,
            "alpha": null,
            "amsgrad": false,
            "beta1": 0.9,
            "beta2": 0.999,
            "beta3": 0.9999,
            "bias_correction": false,
            "block_wise": false,
            "capturable": false,
            "centered": false,
            "clip_threshold": null,
            "d0": null,
            "d_coef": null,
            "dampening": null,
            "decay_rate": null,
            "decouple": false,
            "differentiable": false,
            "eps": 1e-30,
            "eps2": 1e-16,
            "foreach": false,
            "fsdp_in_use": false,
            "fused": false,
            "fused_back_pass": false,
            "growth_rate": null,
            "initial_accumulator_value": null,
            "is_paged": false,
            "log_every": null,
            "lr_decay": null,
            "max_unorm": null,
            "maximize": false,
            "min_8bit_size": null,
            "momentum": null,
            "nesterov": false,
            "no_prox": false,
            "optim_bits": null,
            "percentile_clipping": null,
            "r": null,
            "relative_step": false,
            "safeguard_warmup": false,
            "scale_parameter": false,
            "stochastic_rounding": true,
            "use_bias_correction": false,
            "use_triton": false,
            "warmup_init": false,
            "weight_decay": 0.01,
            "weight_lr_power": null
        }
    },
    "sample_definition_file_name": "D:\\AI\\Training\\Styles\\JediStyle\\Config\\PDXL\\onetrainer_samples_pdxl_DoRA.json",
    "sample_after": 100,
    "sample_after_unit": "STEP",
    "sample_image_format": "PNG",
    "samples_to_tensorboard": true,
    "non_ema_sampling": true,
    "backup_after": 100,
    "backup_after_unit": "STEP",
    "rolling_backup": true,
    "rolling_backup_count": 1,
    "backup_before_save": true,
    "save_after": 100,
    "save_after_unit": "STEP",
    "save_filename_prefix": "JediStyle_"
}

Concepts.json:

[
    {
        "__version": 1,
        "image": {
            "__version": 0,
            "enable_crop_jitter": true,
            "enable_random_flip": false,
            "enable_fixed_flip": false,
            "enable_random_rotate": false,
            "enable_fixed_rotate": false,
            "random_rotate_max_angle": 0.0,
            "enable_random_brightness": false,
            "enable_fixed_brightness": false,
            "random_brightness_max_strength": 0.0,
            "enable_random_contrast": false,
            "enable_fixed_contrast": false,
            "random_contrast_max_strength": 0.0,
            "enable_random_saturation": false,
            "enable_fixed_saturation": false,
            "random_saturation_max_strength": 0.0,
            "enable_random_hue": false,
            "enable_fixed_hue": false,
            "random_hue_max_strength": 0.0,
            "enable_resolution_override": false,
            "resolution_override": "512",
            "enable_random_circular_mask_shrink": false,
            "enable_random_mask_rotate_crop": false
        },
        "text": {
            "__version": 0,
            "prompt_source": "sample",
            "prompt_path": "",
            "enable_tag_shuffling": false,
            "tag_delimiter": ",",
            "keep_tags_count": 1
        },
        "name": "JediStyle",
        "path": "D:\\AI\\Training\\Styles\\JediStyle\\img\\2_JediStyle",
        "seed": 1,
        "enabled": true,
        "include_subdirectories": true,
        "image_variations": 1,
        "text_variations": 1,
        "balancing": 100,
        "balancing_strategy": "SAMPLES",
        "loss_weight": 1.0
    }
]

Samples JSON:

[
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle an apple and a banana fruit, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "negative_prompt": "ugly",
        "height": 768,
        "width": 768,
        "seed": 1234546756789,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    },
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle parrot in the jungle tree, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "height": 768,
        "width": 768,
        "seed": 2345634577890,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    },
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle motorbike driving on a dirt road, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "height": 768,
        "width": 768,
        "seed": 345673458912,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    },
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle A beautiful woman dancing in the rain, sensual, medium shot, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "height": 768,
        "width": 768,
        "seed": 1234534566789,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    },
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle coffee machine in the kitchen, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "height": 768,
        "width": 768,
        "seed": 234567890,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    },
    {
        "__version": 0,
        "enabled": true,
        "prompt": "JediStyle Middle Eastern warriors fighting to the death, blood, score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up",
        "height": 768,
        "width": 768,
        "seed": 345678912,
        "random_seed": false,
        "diffusion_steps": 30,
        "cfg_scale": 7.0,
        "noise_scheduler": "EULER_A",
        "sample_inpainting": false,
        "base_image_path": "",
        "mask_image_path": ""
    }
]

Trying to train a PDXL DoRA for a style model.


### Output of `pip freeze`

(venv) C:\AI!Training\OneTrainer>pip freeze absl-py==2.1.0 accelerate==0.30.1 aiohappyeyeballs==2.3.4 aiohttp==3.10.1 aiosignal==1.3.1 antlr4-python3-runtime==4.9.3 async-timeout==4.0.3 attrs==24.1.0 bitsandbytes==0.43.1 certifi==2024.7.4 charset-normalizer==3.3.2 cloudpickle==3.0.0 colorama==0.4.6 coloredlogs==15.0.1 contourpy==1.2.1 customtkinter==5.2.2 cycler==0.12.1 dadaptation==3.2 darkdetect==0.8.0 -e git+https://github.com/huggingface/diffusers.git@dd4b731e68f88f58dfabfb68f28e00ede2bb90ae#egg=diffusers filelock==3.15.4 flatbuffers==24.3.25 fonttools==4.53.1 frozenlist==1.4.1 fsspec==2024.6.1 ftfy==6.2.0 grpcio==1.65.4 huggingface-hub==0.23.3 humanfriendly==10.0 idna==3.7 importlib_metadata==8.2.0 intel-openmp==2021.4.0 invisible-watermark==0.2.0 Jinja2==3.1.4 kiwisolver==1.4.5 lightning-utilities==0.11.6 lion-pytorch==0.1.4 Markdown==3.6 markdown-it-py==3.0.0 MarkupSafe==2.1.5 matplotlib==3.9.0 mdurl==0.1.2 -e git+https://github.com/Nerogar/mgds.git@d38efdf377a2d52c32aebf7820f10342e16221bf#egg=mgds mkl==2021.4.0 mpmath==1.3.0 multidict==6.0.5 networkx==3.3 numpy==1.26.4 omegaconf==2.3.0 onnxruntime-gpu==1.18.0 open-clip-torch==2.24.0 opencv-python==4.9.0.80 packaging==24.1 pillow==10.3.0 platformdirs==4.2.2 pooch==1.8.1 prodigyopt==1.0 protobuf==4.25.4 psutil==6.0.0 Pygments==2.18.0 pynvml==11.5.0 pyparsing==3.1.2 pyreadline3==3.4.1 python-dateutil==2.9.0.post0 pytorch-lightning==2.2.5 pytorch_optimizer==3.0.2 PyWavelets==1.6.0 PyYAML==6.0.1 regex==2024.7.24 requests==2.32.3 rich==13.7.1 safetensors==0.4.3 scalene==1.5.41 schedulefree==1.2.5 sentencepiece==0.2.0 six==1.16.0 sympy==1.13.1 tbb==2021.13.0 tensorboard==2.17.0 tensorboard-data-server==0.7.2 timm==1.0.8 tokenizers==0.19.1 torch==2.3.1+cu118 torchmetrics==1.4.1 torchvision==0.18.1+cu118 tqdm==4.66.4 transformers==4.42.3 typing_extensions==4.12.2 urllib3==2.2.2 wcwidth==0.2.13 Werkzeug==3.0.3 xformers==0.0.27+cu118 yarl==1.9.4 zipp==3.19.2

mx commented 3 months ago

I assume that image was from one of the samples during the training run? Turn down your learning rate.

MNeMoNiCuZ commented 3 months ago

Yes, all my samples are like this.

Does DoRA require much different LRs?

I'm running the same LR as I do with other SDXL models and it works just fine.