[Bug]: Prodigy lr stays at Initial D value and not dynamic

betterftr commented 4 months ago

What happened?

trying to train cascade tenc only with prodigy, but no matter what I tried to change the lr stays at the initial D value of the optimizer setting and does not move and the model does not learn anything.

(sd3_attention_mask branch)

What did you expect would happen?

config:

{
    "__version": 4,
    "training_method": "FINE_TUNE",
    "model_type": "STABLE_CASCADE_1",
    "debug_mode": false,
    "debug_dir": "C:\\train\\debug",
    "workspace_dir": "C:\\train",
    "cache_dir": "C:\\train",
    "tensorboard": true,
    "tensorboard_expose": false,
    "continue_last_backup": true,
    "include_train_config": "NONE",
    "base_model_name": "C:\\train\\cascade\\original-stable-cascade-prior",
    "weight_dtype": "BFLOAT_16",
    "output_dtype": "BFLOAT_16",
    "output_model_format": "SAFETENSORS",
    "output_model_destination": "C:\\train",
    "gradient_checkpointing": true,
    "force_circular_padding": false,
    "concept_file_name": "training_concepts/test.json",
    "concepts": [
        {
            "__version": 1,
            "image": {
                "__version": 0,
                "enable_crop_jitter": false,
                "enable_random_flip": false,
                "enable_fixed_flip": false,
                "enable_random_rotate": false,
                "enable_fixed_rotate": false,
                "random_rotate_max_angle": 0.0,
                "enable_random_brightness": false,
                "enable_fixed_brightness": false,
                "random_brightness_max_strength": 0.0,
                "enable_random_contrast": false,
                "enable_fixed_contrast": false,
                "random_contrast_max_strength": 0.0,
                "enable_random_saturation": false,
                "enable_fixed_saturation": false,
                "random_saturation_max_strength": 0.0,
                "enable_random_hue": false,
                "enable_fixed_hue": false,
                "random_hue_max_strength": 0.0,
                "enable_resolution_override": false,
                "resolution_override": "512",
                "enable_random_circular_mask_shrink": false,
                "enable_random_mask_rotate_crop": false
            },
            "text": {
                "__version": 0,
                "prompt_source": "sample",
                "prompt_path": "",
                "enable_tag_shuffling": true,
                "tag_delimiter": ",",
                "keep_tags_count": 0
            },
            "name": "test",
            "path": "F:/test",
            "seed": 484668417,
            "enabled": true,
            "include_subdirectories": false,
            "image_variations": 1,
            "text_variations": 70,
            "balancing": 1.0,
            "balancing_strategy": "REPEATS",
            "loss_weight": 1.0
        }
    ],
    "aspect_ratio_bucketing": true,
    "latent_caching": true,
    "clear_cache_before_training": false,
    "learning_rate_scheduler": "CONSTANT",
    "custom_learning_rate_scheduler": null,
    "scheduler_params": [],
    "learning_rate": 1.0,
    "learning_rate_warmup_steps": 10,
    "learning_rate_cycles": 1,
    "epochs": 1000,
    "batch_size": 40,
    "gradient_accumulation_steps": 1,
    "ema": "OFF",
    "ema_decay": 0.999,
    "ema_update_step_interval": 5,
    "dataloader_threads": 2,
    "train_device": "cuda",
    "temp_device": "cpu",
    "train_dtype": "BFLOAT_16",
    "fallback_train_dtype": "BFLOAT_16",
    "enable_autocast_cache": false,
    "only_cache": false,
    "resolution": "512",
    "attention_mechanism": "SDP",
    "align_prop": false,
    "align_prop_probability": 0.1,
    "align_prop_loss": "AESTHETIC",
    "align_prop_weight": 0.01,
    "align_prop_steps": 20,
    "align_prop_truncate_steps": 0.5,
    "align_prop_cfg_scale": 7.0,
    "mse_strength": 0.7,
    "mae_strength": 0.3,
    "vb_loss_strength": 1.0,
    "loss_weight_fn": "DEBIASED_ESTIMATION",
    "loss_weight_strength": 1.0,
    "dropout_probability": 0.0,
    "loss_scaler": "NONE",
    "learning_rate_scaler": "NONE",
    "offset_noise_weight": 0.0,
    "perturbation_noise_weight": 0.0,
    "rescale_noise_scheduler_to_zero_terminal_snr": false,
    "force_v_prediction": false,
    "force_epsilon_prediction": false,
    "min_noising_strength": 0.0,
    "max_noising_strength": 1.0,
    "timestep_distribution": "HEAVY_TAIL",
    "noising_weight": 1.3,
    "noising_bias": 0.0,
    "unet": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "prior": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": 5e-05,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_layer_skip": 0,
    "text_encoder_2": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_2_layer_skip": 0,
    "text_encoder_3": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_3_layer_skip": 0,
    "vae": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "FLOAT_32",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "effnet_encoder": {
        "__version": 0,
        "model_name": "C:/train/cascade/effnet_encoder.safetensors",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder": {
        "__version": 0,
        "model_name": "stabilityai/stable-cascade",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_vqgan": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "masked_training": false,
    "unmasked_probability": 0.1,
    "unmasked_weight": 0.1,
    "normalize_masked_area_loss": false,
    "embedding_learning_rate": null,
    "preserve_embedding_norm": false,
    "embedding": {
        "__version": 0,
        "uuid": "1154881e-7ea5-437a-9220-86b64dcd2509",
        "model_name": "",
        "placeholder": "<embedding>",
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "token_count": 1,
        "initial_embedding_text": "*"
    },
    "additional_embeddings": [],
    "embedding_weight_dtype": "FLOAT_32",
    "lora_model_name": "",
    "lora_rank": 16,
    "lora_alpha": 1.0,
    "lora_weight_dtype": "FLOAT_32",
    "bundle_additional_embeddings": true,
    "optimizer": {
        "__version": 0,
        "optimizer": "PRODIGY",
        "adam_w_mode": false,
        "alpha": null,
        "amsgrad": false,
        "beta1": 0.9,
        "beta2": 0.999,
        "beta3": null,
        "bias_correction": false,
        "block_wise": false,
        "capturable": false,
        "centered": false,
        "clip_threshold": null,
        "d0": 1e-07,
        "d_coef": 2.0,
        "dampening": null,
        "decay_rate": null,
        "decouple": true,
        "differentiable": false,
        "eps": 1e-08,
        "eps2": null,
        "foreach": false,
        "fsdp_in_use": false,
        "fused": false,
        "fused_back_pass": false,
        "growth_rate": "inf",
        "initial_accumulator_value": null,
        "is_paged": false,
        "log_every": null,
        "lr_decay": null,
        "max_unorm": null,
        "maximize": false,
        "min_8bit_size": null,
        "momentum": null,
        "nesterov": false,
        "no_prox": false,
        "optim_bits": null,
        "percentile_clipping": null,
        "r": null,
        "relative_step": false,
        "safeguard_warmup": true,
        "scale_parameter": false,
        "stochastic_rounding": true,
        "use_bias_correction": true,
        "use_triton": false,
        "warmup_init": false,
        "weight_decay": 0.05,
        "weight_lr_power": null
    }

Update:
changed back to default settings and lr still seems not moving, samples are the same as first one, config:
{
    "__version": 4,
    "training_method": "FINE_TUNE",
    "model_type": "STABLE_CASCADE_1",
    "debug_mode": false,
    "debug_dir": "debug",
    "workspace_dir": "C:\\train",
    "cache_dir": "C:\\train",
    "tensorboard": true,
    "tensorboard_expose": false,
    "continue_last_backup": false,
    "include_train_config": "NONE",
    "base_model_name": "C:\\train\\cascade\\original-stable-cascade-prior",
    "weight_dtype": "BFLOAT_16",
    "output_dtype": "BFLOAT_16",
    "output_model_format": "SAFETENSORS",
    "output_model_destination": "C:\\train",
    "gradient_checkpointing": true,
    "force_circular_padding": false,
    "concept_file_name": "training_concepts/test.json",
    "concepts": [
        {
            "__version": 1,
            "image": {
                "__version": 0,
                "enable_crop_jitter": false,
                "enable_random_flip": false,
                "enable_fixed_flip": false,
                "enable_random_rotate": false,
                "enable_fixed_rotate": false,
                "random_rotate_max_angle": 0.0,
                "enable_random_brightness": false,
                "enable_fixed_brightness": false,
                "random_brightness_max_strength": 0.0,
                "enable_random_contrast": false,
                "enable_fixed_contrast": false,
                "random_contrast_max_strength": 0.0,
                "enable_random_saturation": false,
                "enable_fixed_saturation": false,
                "random_saturation_max_strength": 0.0,
                "enable_random_hue": false,
                "enable_fixed_hue": false,
                "random_hue_max_strength": 0.0,
                "enable_resolution_override": false,
                "resolution_override": "512",
                "enable_random_circular_mask_shrink": false,
                "enable_random_mask_rotate_crop": false
            },
            "text": {
                "__version": 0,
                "prompt_source": "sample",
                "prompt_path": "",
                "enable_tag_shuffling": true,
                "tag_delimiter": ",",
                "keep_tags_count": 0
            },
            "name": "test",
            "path": "F:/test",
            "seed": 484668417,
            "enabled": true,
            "include_subdirectories": false,
            "image_variations": 1,
            "text_variations": 70,
            "balancing": 1.0,
            "balancing_strategy": "REPEATS",
            "loss_weight": 1.0
        }
    ],
    "aspect_ratio_bucketing": true,
    "latent_caching": true,
    "clear_cache_before_training": false,
    "learning_rate_scheduler": "CONSTANT",
    "custom_learning_rate_scheduler": null,
    "scheduler_params": [],
    "learning_rate": 1.0,
    "learning_rate_warmup_steps": 10,
    "learning_rate_cycles": 1,
    "epochs": 1000,
    "batch_size": 40,
    "gradient_accumulation_steps": 1,
    "ema": "OFF",
    "ema_decay": 0.999,
    "ema_update_step_interval": 5,
    "dataloader_threads": 2,
    "train_device": "cuda",
    "temp_device": "cpu",
    "train_dtype": "BFLOAT_16",
    "fallback_train_dtype": "BFLOAT_16",
    "enable_autocast_cache": false,
    "only_cache": false,
    "resolution": "512",
    "attention_mechanism": "SDP",
    "align_prop": false,
    "align_prop_probability": 0.1,
    "align_prop_loss": "AESTHETIC",
    "align_prop_weight": 0.01,
    "align_prop_steps": 20,
    "align_prop_truncate_steps": 0.5,
    "align_prop_cfg_scale": 7.0,
    "mse_strength": 1.0,
    "mae_strength": 0.0,
    "vb_loss_strength": 1.0,
    "loss_weight_fn": "CONSTANT",
    "loss_weight_strength": 1.0,
    "dropout_probability": 0.0,
    "loss_scaler": "NONE",
    "learning_rate_scaler": "NONE",
    "offset_noise_weight": 0.0,
    "perturbation_noise_weight": 0.0,
    "rescale_noise_scheduler_to_zero_terminal_snr": false,
    "force_v_prediction": false,
    "force_epsilon_prediction": false,
    "min_noising_strength": 0.0,
    "max_noising_strength": 1.0,
    "timestep_distribution": "UNIFORM",
    "noising_weight": 0.0,
    "noising_bias": 0.0,
    "unet": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "prior": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": false,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_layer_skip": 0,
    "text_encoder_2": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_2_layer_skip": 0,
    "text_encoder_3": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_3_layer_skip": 0,
    "vae": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "FLOAT_32",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "effnet_encoder": {
        "__version": 0,
        "model_name": "C:/train/cascade/effnet_encoder.safetensors",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder": {
        "__version": 0,
        "model_name": "stabilityai/stable-cascade",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_vqgan": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "masked_training": false,
    "unmasked_probability": 0.1,
    "unmasked_weight": 0.1,
    "normalize_masked_area_loss": false,
    "embedding_learning_rate": null,
    "preserve_embedding_norm": false,
    "embedding": {
        "__version": 0,
        "uuid": "b61ef5e9-100f-422c-af62-3bfafc763091",
        "model_name": "",
        "placeholder": "<embedding>",
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "token_count": 1,
        "initial_embedding_text": "*"
    },
    "additional_embeddings": [],
    "embedding_weight_dtype": "FLOAT_32",
    "lora_model_name": "",
    "lora_rank": 16,
    "lora_alpha": 1.0,
    "lora_weight_dtype": "FLOAT_32",
    "bundle_additional_embeddings": true,
    "optimizer": {
        "__version": 0,
        "optimizer": "PRODIGY",
        "adam_w_mode": false,
        "alpha": null,
        "amsgrad": false,
        "beta1": 0.9,
        "beta2": 0.999,
        "beta3": null,
        "bias_correction": false,
        "block_wise": false,
        "capturable": false,
        "centered": false,
        "clip_threshold": null,
        "d0": 1e-07,
        "d_coef": 2.0,
        "dampening": null,
        "decay_rate": null,
        "decouple": true,
        "differentiable": false,
        "eps": 1e-08,
        "eps2": null,
        "foreach": false,
        "fsdp_in_use": false,
        "fused": false,
        "fused_back_pass": false,
        "growth_rate": "inf",
        "initial_accumulator_value": null,
        "is_paged": false,
        "log_every": null,
        "lr_decay": null,
        "max_unorm": null,
        "maximize": false,
        "min_8bit_size": null,
        "momentum": null,
        "nesterov": false,
        "no_prox": false,
        "optim_bits": null,
        "percentile_clipping": null,
        "r": null,
        "relative_step": false,
        "safeguard_warmup": true,
        "scale_parameter": false,
        "stochastic_rounding": true,
        "use_bias_correction": true,
        "use_triton": false,
        "warmup_init": false,
        "weight_decay": 0.05,
        "weight_lr_power": null

Relevant log output

No response

Output of `pip freeze`

(venv) C:\OneTrainer\venv>pip freeze
absl-py==2.1.0
accelerate==0.30.1
aiohttp==3.9.5
aiosignal==1.3.1
antlr4-python3-runtime==4.9.3
async-timeout==4.0.3
attrs==23.2.0
bitsandbytes==0.43.1
certifi==2024.7.4
charset-normalizer==3.3.2
cloudpickle==3.0.0
colorama==0.4.6
coloredlogs==15.0.1
contourpy==1.2.1
customtkinter==5.2.2
cycler==0.12.1
dadaptation==3.2
darkdetect==0.8.0
-e git+https://github.com/huggingface/diffusers.git@dd4b731e68f88f58dfabfb68f28e00ede2bb90ae#egg=diffusers
filelock==3.15.4
flatbuffers==24.3.25
fonttools==4.53.1
frozenlist==1.4.1
fsspec==2024.6.1
ftfy==6.2.0
grpcio==1.65.1
huggingface-hub==0.23.3
humanfriendly==10.0
idna==3.7
importlib_metadata==8.0.0
intel-openmp==2021.4.0
invisible-watermark==0.2.0
Jinja2==3.1.4
kiwisolver==1.4.5
lightning-utilities==0.11.5
lion-pytorch==0.1.4
Markdown==3.6
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.9.1
mdurl==0.1.2
-e git+https://github.com/Nerogar/mgds.git@5ea389a62408cff79a8f3a11b2f25c185c7c1c2e#egg=mgds
mkl==2021.4.0
mpmath==1.3.0
multidict==6.0.5
networkx==3.3
numpy==1.26.4
omegaconf==2.3.0
onnxruntime-gpu==1.18.1
open-clip-torch==2.24.0
opencv-python==4.9.0.80
packaging==24.1
pillow==10.3.0
platformdirs==4.2.2
pooch==1.8.1
prodigyopt==1.0
protobuf==4.25.3
psutil==6.0.0
Pygments==2.18.0
pynvml==11.5.0
pyparsing==3.1.2
pyreadline3==3.4.1
python-dateutil==2.9.0.post0
pytorch-lightning==2.2.5
PyWavelets==1.6.0
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
rich==13.7.1
safetensors==0.4.3
scalene==1.5.41
schedulefree==1.2.5
sentencepiece==0.2.0
six==1.16.0
sympy==1.13.1
tbb==2021.13.0
tensorboard==2.17.0
tensorboard-data-server==0.7.2
timm==1.0.7
tokenizers==0.19.1
torch==2.3.1+cu118
torchmetrics==1.4.0.post0
torchvision==0.18.1+cu118
tqdm==4.66.4
transformers==4.42.3
typing_extensions==4.12.2
urllib3==2.2.2
wcwidth==0.2.13
Werkzeug==3.0.3
xformers==0.0.27+cu118
yarl==1.9.4
zipp==3.19.2

betterftr commented 4 months ago

Update: did a fresh install on master branch with full default cascade settings, this time I even included training the prior (thought maybe the issue is with tenc only) but the issue is still there, this is the config:

{
    "__version": 4,
    "training_method": "FINE_TUNE",
    "model_type": "STABLE_CASCADE_1",
    "debug_mode": false,
    "debug_dir": "C:/train/debug",
    "workspace_dir": "C:\\train",
    "cache_dir": "C:\\train",
    "tensorboard": true,
    "tensorboard_expose": false,
    "continue_last_backup": false,
    "include_train_config": "NONE",
    "base_model_name": "stabilityai/stable-cascade-prior",
    "weight_dtype": "BFLOAT_16",
    "output_dtype": "BFLOAT_16",
    "output_model_format": "SAFETENSORS",
    "output_model_destination": "models/model",
    "gradient_checkpointing": true,
    "force_circular_padding": false,
    "concept_file_name": "training_concepts/concepts.json",
    "concepts": null,
    "aspect_ratio_bucketing": true,
    "latent_caching": true,
    "clear_cache_before_training": false,
    "learning_rate_scheduler": "CONSTANT",
    "custom_learning_rate_scheduler": null,
    "scheduler_params": [],
    "learning_rate": 1.0,
    "learning_rate_warmup_steps": 10,
    "learning_rate_cycles": 1,
    "epochs": 100,
    "batch_size": 1,
    "gradient_accumulation_steps": 1,
    "ema": "OFF",
    "ema_decay": 0.999,
    "ema_update_step_interval": 5,
    "dataloader_threads": 2,
    "train_device": "cuda",
    "temp_device": "cpu",
    "train_dtype": "BFLOAT_16",
    "fallback_train_dtype": "BFLOAT_16",
    "enable_autocast_cache": false,
    "only_cache": false,
    "resolution": "512",
    "attention_mechanism": "XFORMERS",
    "align_prop": false,
    "align_prop_probability": 0.1,
    "align_prop_loss": "AESTHETIC",
    "align_prop_weight": 0.01,
    "align_prop_steps": 20,
    "align_prop_truncate_steps": 0.5,
    "align_prop_cfg_scale": 7.0,
    "mse_strength": 1.0,
    "mae_strength": 0.0,
    "vb_loss_strength": 1.0,
    "loss_weight_fn": "MIN_SNR_GAMMA",
    "loss_weight_strength": 5.0,
    "dropout_probability": 0.0,
    "loss_scaler": "NONE",
    "learning_rate_scaler": "NONE",
    "offset_noise_weight": 0.0,
    "perturbation_noise_weight": 0.0,
    "rescale_noise_scheduler_to_zero_terminal_snr": false,
    "force_v_prediction": false,
    "force_epsilon_prediction": false,
    "min_noising_strength": 0.0,
    "max_noising_strength": 1.0,
    "timestep_distribution": "UNIFORM",
    "noising_weight": 0.0,
    "noising_bias": 0.0,
    "unet": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "prior": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 0,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_layer_skip": 0,
    "text_encoder_2": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_2_layer_skip": 0,
    "text_encoder_3": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": 30,
        "stop_training_after_unit": "EPOCH",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "text_encoder_3_layer_skip": 0,
    "vae": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "FLOAT_32",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "effnet_encoder": {
        "__version": 0,
        "model_name": "C:/train/cascade/effnet_encoder.safetensors",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder": {
        "__version": 0,
        "model_name": "stabilityai/stable-cascade",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_text_encoder": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "decoder_vqgan": {
        "__version": 0,
        "model_name": "",
        "include": true,
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "learning_rate": null,
        "weight_dtype": "NONE",
        "dropout_probability": 0.0,
        "train_embedding": true
    },
    "masked_training": false,
    "unmasked_probability": 0.1,
    "unmasked_weight": 0.1,
    "normalize_masked_area_loss": false,
    "embedding_learning_rate": null,
    "preserve_embedding_norm": false,
    "embedding": {
        "__version": 0,
        "uuid": "0ed09eb2-a5b2-46f2-8837-0e2d2f38d351",
        "model_name": "",
        "placeholder": "<embedding>",
        "train": true,
        "stop_training_after": null,
        "stop_training_after_unit": "NEVER",
        "token_count": 1,
        "initial_embedding_text": "*"
    },
    "additional_embeddings": [],
    "embedding_weight_dtype": "FLOAT_32",
    "lora_model_name": "",
    "lora_rank": 16,
    "lora_alpha": 1.0,
    "lora_weight_dtype": "FLOAT_32",
    "bundle_additional_embeddings": true,
    "optimizer": {
        "__version": 0,
        "optimizer": "PRODIGY",
        "adam_w_mode": false,
        "alpha": null,
        "amsgrad": false,
        "beta1": 0.9,
        "beta2": 0.999,
        "beta3": null,
        "bias_correction": false,
        "block_wise": false,
        "capturable": false,
        "centered": false,
        "clip_threshold": null,
        "d0": 1e-06,
        "d_coef": 2.0,
        "dampening": null,
        "decay_rate": null,
        "decouple": true,
        "differentiable": false,
        "eps": 1e-08,
        "eps2": null,
        "foreach": false,
        "fsdp_in_use": false,
        "fused": false,
        "fused_back_pass": false,
        "growth_rate": "inf",
        "initial_accumulator_value": null,
        "is_paged": false,
        "log_every": null,
        "lr_decay": null,
        "max_unorm": null,
        "maximize": false,
        "min_8bit_size": null,
        "momentum": null,
        "nesterov": false,
        "no_prox": false,
        "optim_bits": null,
        "percentile_clipping": null,
        "r": null,
        "relative_step": false,
        "safeguard_warmup": true,
        "scale_parameter": false,
        "stochastic_rounding": true,
        "use_bias_correction": true,
        "use_triton": false,
        "warmup_init": false,
        "weight_decay": 0.05,
        "weight_lr_power": null,
        "decoupled_decay": false,
        "fixed_decay": false,
        "rectify": false,
        "degenerated_to_sgd": false,
        "k": null,
        "xi": null,
        "n_sma_threshold": null,
        "ams_bound": false,
        "adanorm": false,
        "adam_debias": false
    },
    "optimizer_defaults": {
        "ADAFACTOR": {
            "__version": 0,
            "optimizer": "ADAFACTOR",
            "adam_w_mode": false,
            "alpha": null,
            "amsgrad": false,
            "beta1": null,
            "beta2": null,
            "beta3": null,
            "bias_correction": false,
            "block_wise": false,
            "capturable": false,
            "centered": false,
            "clip_threshold": 1.0,
            "d0": null,
            "d_coef": null,
            "dampening": null,
            "decay_rate": -0.8,
            "decouple": false,
            "differentiable": false,
            "eps": 1e-30,
            "eps2": 0.001,
            "foreach": false,
            "fsdp_in_use": false,
            "fused": false,
            "fused_back_pass": false,
            "growth_rate": null,
            "initial_accumulator_value": null,
            "is_paged": false,
            "log_every": null,
            "lr_decay": null,
            "max_unorm": null,
            "maximize": false,
            "min_8bit_size": null,
            "momentum": null,
            "nesterov": false,
            "no_prox": false,
            "optim_bits": null,
            "percentile_clipping": null,
            "r": null,
            "relative_step": false,
            "safeguard_warmup": false,
            "scale_parameter": false,
            "stochastic_rounding": true,
            "use_bias_correction": false,
            "use_triton": false,
            "warmup_init": false,
            "weight_decay": 0.0,
            "weight_lr_power": null,
            "decoupled_decay": false,
            "fixed_decay": false,
            "rectify": false,
            "degenerated_to_sgd": false,
            "k": null,
            "xi": null,
            "n_sma_threshold": null,
            "ams_bound": false,
            "adanorm": false,
            "adam_debias": false
        },
        "PRODIGY": {
            "__version": 0,
            "optimizer": "PRODIGY",
            "adam_w_mode": false,
            "alpha": null,
            "amsgrad": false,
            "beta1": 0.9,
            "beta2": 0.999,
            "beta3": null,
            "bias_correction": false,
            "block_wise": false,
            "capturable": false,
            "centered": false,
            "clip_threshold": null,
            "d0": 1e-06,
            "d_coef": 2.0,
            "dampening": null,
            "decay_rate": null,
            "decouple": true,
            "differentiable": false,
            "eps": 1e-08,
            "eps2": null,
            "foreach": false,
            "fsdp_in_use": false,
            "fused": false,
            "fused_back_pass": false,
            "growth_rate": "inf",
            "initial_accumulator_value": null,
            "is_paged": false,
            "log_every": null,
            "lr_decay": null,
            "max_unorm": null,
            "maximize": false,
            "min_8bit_size": null,
            "momentum": null,
            "nesterov": false,
            "no_prox": false,
            "optim_bits": null,
            "percentile_clipping": null,
            "r": null,
            "relative_step": false,
            "safeguard_warmup": true,
            "scale_parameter": false,
            "stochastic_rounding": true,
            "use_bias_correction": true,
            "use_triton": false,
            "warmup_init": false,
            "weight_decay": 0.05,
            "weight_lr_power": null,
            "decoupled_decay": false,
            "fixed_decay": false,
            "rectify": false,
            "degenerated_to_sgd": false,
            "k": null,
            "xi": null,
            "n_sma_threshold": null,
            "ams_bound": false,
            "adanorm": false,
            "adam_debias": false
        }
    },
    "sample_definition_file_name": "training_samples/test.json",
    "samples": null,
    "sample_after": 5,
    "sample_after_unit": "NEVER",
    "sample_image_format": "JPG",
    "samples_to_tensorboard": true,
    "non_ema_sampling": true,
    "backup_after": 30,
    "backup_after_unit": "MINUTE",
    "rolling_backup": false,
    "rolling_backup_count": 3,
    "backup_before_save": true,
    "save_after": 0,
    "save_after_unit": "NEVER",
    "save_filename_prefix": ""
}

0x1355 commented 4 months ago

Having similar issue on main branch, but when training SDXL lora, I can see:

So Text Encoder LR displays as expected but UNET LR not.

betterftr commented 4 months ago

found the problem: changing safeguard warmup/coef doesnt break it, changing initial d or Bias correction does orange: prodigy's default settings loaded. blue: Bias correction turned on green: initial D changed to 1e-07

this is on master branch

mx commented 4 months ago

Having similar issue on main branch, but when training SDXL lora, I can see:

[snip]

So Text Encoder LR displays as expected but UNET LR not.

This just looks like a display scale issue, view the right graph at the same scale as the left graph.

0x1355 commented 4 months ago

Having similar issue on main branch, but when training SDXL lora, I can see: [snip] So Text Encoder LR displays as expected but UNET LR not.

This just looks like a display scale issue, view the right graph at the same scale as the left graph.

You are right. I missed the different scales.

Meanwhile, still having similar problem as OP, even when using default Prodigy settings, LR shows as I set with the standard scheduler (constant, cosine, etc) but not adaptive.

I am now seeing the same behavior with other adaptive optimizers too. Here is data from a recent run, using Adafactor (adaptive) and cosine with restart. The lora is trained okay.

Is this expected behavior for adaptive schedulers? Or should I be seeing 'actual LR', like what OP posted above?

mx commented 4 months ago

Adafactor is different and will not show a derived LR, should you turn that mode on, on the tensorboard graphs.

For Prodigy, I'd need to see your graph and your settings. It should show the true LR.

0x1355 commented 4 months ago

A Prodigy default setting run, using cosine with hard restart, single cycle. Stopped early:

The whole setting or just the optimizer part?

run settings

```JSON { "__version": 4, "training_method": "LORA", "model_type": "STABLE_DIFFUSION_XL_10_BASE", "debug_mode": false, "debug_dir": "debug", "workspace_dir": "/workspace/storage", "cache_dir": "/workspace/storage/_CACHE_", "tensorboard": true, "tensorboard_expose": true, "continue_last_backup": false, "include_train_config": "NONE", "base_model_name": "/workspace/storage/base_model/base_model.safetensors", "weight_dtype": "FLOAT_16", "output_dtype": "FLOAT_16", "output_model_format": "SAFETENSORS", "output_model_destination": "/workspace/storage/trained_models/trained__20240730_062103.safetensors", "gradient_checkpointing": true, "force_circular_padding": false, "concept_file_name": "/workspace/OneTrainer/training_concepts/concepts.json", "concepts": [ { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1024", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "square", "path": "/workspace/storage/data/1024x1024", "seed": -74751800, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 }, { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1344", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "portrait", "path": "/workspace/storage/data/768x1344", "seed": 760525620, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 }, { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1344", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "landscape", "path": "/workspace/storage/data/1344x768", "seed": -465686261, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 } ], "aspect_ratio_bucketing": true, "latent_caching": true, "clear_cache_before_training": false, "learning_rate_scheduler": "COSINE_WITH_HARD_RESTARTS", "custom_learning_rate_scheduler": null, "scheduler_params": [], "learning_rate": 1.0, "learning_rate_warmup_steps": 0, "learning_rate_cycles": 1, "epochs": 200, "batch_size": 16, "gradient_accumulation_steps": 1, "ema": "OFF", "ema_decay": 0.999, "ema_update_step_interval": 5, "dataloader_threads": 2, "train_device": "cuda", "temp_device": "cpu", "train_dtype": "FLOAT_16", "fallback_train_dtype": "BFLOAT_16", "enable_autocast_cache": true, "only_cache": false, "resolution": "1024", "attention_mechanism": "XFORMERS", "align_prop": false, "align_prop_probability": 0.1, "align_prop_loss": "AESTHETIC", "align_prop_weight": 0.01, "align_prop_steps": 20, "align_prop_truncate_steps": 0.5, "align_prop_cfg_scale": 7.0, "mse_strength": 1.0, "mae_strength": 0.0, "vb_loss_strength": 1.0, "loss_weight_fn": "CONSTANT", "loss_weight_strength": 5.0, "dropout_probability": 0.3, "loss_scaler": "NONE", "learning_rate_scaler": "NONE", "offset_noise_weight": 0.1, "perturbation_noise_weight": 0.0, "rescale_noise_scheduler_to_zero_terminal_snr": false, "force_v_prediction": false, "force_epsilon_prediction": false, "min_noising_strength": 0.0, "max_noising_strength": 1.0, "timestep_distribution": "UNIFORM", "noising_weight": 0.0, "noising_bias": 0.0, "unet": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 0, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "prior": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 0, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder": { "__version": 0, "model_name": "", "include": true, "train": false, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_layer_skip": 0, "text_encoder_2": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_2_layer_skip": 0, "text_encoder_3": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_3_layer_skip": 0, "vae": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "FLOAT_32", "dropout_probability": 0.0, "train_embedding": true }, "effnet_encoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder_text_encoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder_vqgan": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "masked_training": false, "unmasked_probability": 0.1, "unmasked_weight": 0.1, "normalize_masked_area_loss": false, "embedding_learning_rate": null, "preserve_embedding_norm": false, "embedding": { "__version": 0, "uuid": "b9cf8199-2697-43a6-b66b-bca6a6a4640e", "model_name": "", "placeholder": "", "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "token_count": 1, "initial_embedding_text": "*" }, "additional_embeddings": [], "embedding_weight_dtype": "FLOAT_32", "lora_model_name": "", "lora_rank": 128, "lora_alpha": 1.0, "lora_weight_dtype": "FLOAT_32", "bundle_additional_embeddings": true, "optimizer": { "__version": 0, "optimizer": "PRODIGY", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": 0.9, "beta2": 0.99, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": null, "d0": 1e-06, "d_coef": 1.0, "dampening": null, "decay_rate": null, "decouple": true, "differentiable": false, "eps": 1e-08, "eps2": null, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": false, "growth_rate": "inf", "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": false, "safeguard_warmup": true, "scale_parameter": false, "stochastic_rounding": true, "use_bias_correction": true, "use_triton": false, "warmup_init": false, "weight_decay": 0.01, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false }, "optimizer_defaults": { "PRODIGY": { "__version": 0, "optimizer": "PRODIGY", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": 0.9, "beta2": 0.99, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": null, "d0": 1e-06, "d_coef": 1.0, "dampening": null, "decay_rate": null, "decouple": true, "differentiable": false, "eps": 1e-08, "eps2": null, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": false, "growth_rate": "inf", "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": false, "safeguard_warmup": true, "scale_parameter": false, "stochastic_rounding": true, "use_bias_correction": true, "use_triton": false, "warmup_init": false, "weight_decay": 0.01, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false }, "ADAFACTOR": { "__version": 0, "optimizer": "ADAFACTOR", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": null, "beta2": null, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": 1.0, "d0": null, "d_coef": null, "dampening": null, "decay_rate": -0.8, "decouple": false, "differentiable": false, "eps": 1e-30, "eps2": 0.001, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": true, "growth_rate": null, "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": true, "safeguard_warmup": false, "scale_parameter": true, "stochastic_rounding": true, "use_bias_correction": false, "use_triton": false, "warmup_init": false, "weight_decay": 0.0, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false } }, "sample_definition_file_name": "/workspace/OneTrainer/training_samples/pony.json", "samples": [], "sample_after": 1, "sample_after_unit": "NEVER", "sample_image_format": "PNG", "samples_to_tensorboard": true, "non_ema_sampling": true, "backup_after": 3, "backup_after_unit": "EPOCH", "rolling_backup": true, "rolling_backup_count": 50, "backup_before_save": false, "save_after": 5, "save_after_unit": "NEVER", "save_filename_prefix": "" } ```

mx commented 4 months ago

What exactly is the problem? That looks fine. Prodigy found your learning rate. OP is finding prodigy not moving from the d0 value

0x1355 commented 4 months ago

I was expecting the LR to be more 'rough' and not 'smooth', due to adaptive optimizer modifying LR throughout the run, like this example shown on the Optimizer wiki page:

Did you mean that in the run above, Prodigy found 0.0034 and set it just once, and didn't find a different LR for the rest of run?

mx commented 4 months ago

It's only rough like that image if it doesn't find a good enough estimate for the problem at first. It found its best estimate early, and never needed to increase it again.

Did you mean that in the run above, Prodigy found 0.0034 and set it just once, and didn't find a different LR for the rest of run?

Correct.

0x1355 commented 4 months ago

Interesting. Here is another run. Prodigy default. Completely different training data from the above.

I did 9 long runs like these, across 3 different datasets (300+ images each). Some with default Prodigy settings, some changed according to Optimizer wiki page. I saw the same behavior: LR set once.

Is that behavior so common? Or am I just lucky?

run settings

```JSON { "__version": 4, "training_method": "LORA", "model_type": "STABLE_DIFFUSION_XL_10_BASE", "debug_mode": false, "debug_dir": "debug", "workspace_dir": "/workspace/storage", "cache_dir": "/workspace/storage/_CACHE_", "tensorboard": true, "tensorboard_expose": true, "continue_last_backup": false, "include_train_config": "NONE", "base_model_name": "/workspace/storage/base_model/base_model.safetensors", "weight_dtype": "FLOAT_16", "output_dtype": "FLOAT_16", "output_model_format": "SAFETENSORS", "output_model_destination": "/workspace/storage/trained_models/trained__20240729_100634.safetensors", "gradient_checkpointing": true, "force_circular_padding": false, "concept_file_name": "/workspace/OneTrainer/training_concepts/concepts.json", "concepts": [ { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1024", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "square", "path": "/workspace/storage/data/1024x1024", "seed": -74751800, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 }, { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1344", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "portrait", "path": "/workspace/storage/data/768x1344", "seed": 760525620, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 }, { "__version": 1, "image": { "__version": 0, "enable_crop_jitter": false, "enable_random_flip": true, "enable_fixed_flip": false, "enable_random_rotate": false, "enable_fixed_rotate": false, "random_rotate_max_angle": 0.0, "enable_random_brightness": false, "enable_fixed_brightness": false, "random_brightness_max_strength": 0.0, "enable_random_contrast": false, "enable_fixed_contrast": false, "random_contrast_max_strength": 0.0, "enable_random_saturation": false, "enable_fixed_saturation": false, "random_saturation_max_strength": 0.0, "enable_random_hue": false, "enable_fixed_hue": false, "random_hue_max_strength": 0.0, "enable_resolution_override": true, "resolution_override": "1344", "enable_random_circular_mask_shrink": false, "enable_random_mask_rotate_crop": false }, "text": { "__version": 0, "prompt_source": "sample", "prompt_path": "", "enable_tag_shuffling": true, "tag_delimiter": ", ", "keep_tags_count": 1 }, "name": "landscape", "path": "/workspace/storage/data/1344x768", "seed": -465686261, "enabled": true, "include_subdirectories": false, "image_variations": 2, "text_variations": 4, "balancing": 1.0, "balancing_strategy": "REPEATS", "loss_weight": 1.0 } ], "aspect_ratio_bucketing": true, "latent_caching": true, "clear_cache_before_training": false, "learning_rate_scheduler": "COSINE_WITH_HARD_RESTARTS", "custom_learning_rate_scheduler": null, "scheduler_params": [], "learning_rate": 1.0, "learning_rate_warmup_steps": 0, "learning_rate_cycles": 1, "epochs": 100, "batch_size": 18, "gradient_accumulation_steps": 1, "ema": "OFF", "ema_decay": 0.999, "ema_update_step_interval": 5, "dataloader_threads": 2, "train_device": "cuda", "temp_device": "cpu", "train_dtype": "FLOAT_16", "fallback_train_dtype": "BFLOAT_16", "enable_autocast_cache": true, "only_cache": false, "resolution": "1024", "attention_mechanism": "XFORMERS", "align_prop": false, "align_prop_probability": 0.1, "align_prop_loss": "AESTHETIC", "align_prop_weight": 0.01, "align_prop_steps": 20, "align_prop_truncate_steps": 0.5, "align_prop_cfg_scale": 7.0, "mse_strength": 1.0, "mae_strength": 0.0, "vb_loss_strength": 1.0, "loss_weight_fn": "CONSTANT", "loss_weight_strength": 5.0, "dropout_probability": 0.3, "loss_scaler": "NONE", "learning_rate_scaler": "NONE", "offset_noise_weight": 0.1, "perturbation_noise_weight": 0.0, "rescale_noise_scheduler_to_zero_terminal_snr": false, "force_v_prediction": false, "force_epsilon_prediction": false, "min_noising_strength": 0.0, "max_noising_strength": 1.0, "timestep_distribution": "UNIFORM", "noising_weight": 0.0, "noising_bias": 0.0, "unet": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 0, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "prior": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 0, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder": { "__version": 0, "model_name": "", "include": true, "train": false, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_layer_skip": 0, "text_encoder_2": { "__version": 0, "model_name": "", "include": true, "train": false, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_2_layer_skip": 0, "text_encoder_3": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": 30, "stop_training_after_unit": "EPOCH", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "text_encoder_3_layer_skip": 0, "vae": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "FLOAT_32", "dropout_probability": 0.0, "train_embedding": true }, "effnet_encoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder_text_encoder": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "decoder_vqgan": { "__version": 0, "model_name": "", "include": true, "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "learning_rate": null, "weight_dtype": "NONE", "dropout_probability": 0.0, "train_embedding": true }, "masked_training": false, "unmasked_probability": 0.1, "unmasked_weight": 0.1, "normalize_masked_area_loss": false, "embedding_learning_rate": null, "preserve_embedding_norm": false, "embedding": { "__version": 0, "uuid": "b9cf8199-2697-43a6-b66b-bca6a6a4640e", "model_name": "", "placeholder": "", "train": true, "stop_training_after": null, "stop_training_after_unit": "NEVER", "token_count": 1, "initial_embedding_text": "*" }, "additional_embeddings": [], "embedding_weight_dtype": "FLOAT_32", "lora_model_name": "", "lora_rank": 128, "lora_alpha": 1.0, "lora_weight_dtype": "FLOAT_32", "bundle_additional_embeddings": true, "optimizer": { "__version": 0, "optimizer": "PRODIGY", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": 0.9, "beta2": 0.99, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": null, "d0": 1e-06, "d_coef": 1.0, "dampening": null, "decay_rate": null, "decouple": true, "differentiable": false, "eps": 1e-08, "eps2": null, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": false, "growth_rate": "inf", "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": false, "safeguard_warmup": true, "scale_parameter": false, "stochastic_rounding": true, "use_bias_correction": true, "use_triton": false, "warmup_init": false, "weight_decay": 0.01, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false }, "optimizer_defaults": { "PRODIGY": { "__version": 0, "optimizer": "PRODIGY", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": 0.9, "beta2": 0.99, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": null, "d0": 1e-06, "d_coef": 1.0, "dampening": null, "decay_rate": null, "decouple": true, "differentiable": false, "eps": 1e-08, "eps2": null, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": false, "growth_rate": "inf", "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": false, "safeguard_warmup": true, "scale_parameter": false, "stochastic_rounding": true, "use_bias_correction": true, "use_triton": false, "warmup_init": false, "weight_decay": 0.01, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false }, "ADAFACTOR": { "__version": 0, "optimizer": "ADAFACTOR", "adam_w_mode": false, "alpha": null, "amsgrad": false, "beta1": null, "beta2": null, "beta3": null, "bias_correction": false, "block_wise": false, "capturable": false, "centered": false, "clip_threshold": 1.0, "d0": null, "d_coef": null, "dampening": null, "decay_rate": -0.8, "decouple": false, "differentiable": false, "eps": 1e-30, "eps2": 0.001, "foreach": false, "fsdp_in_use": false, "fused": false, "fused_back_pass": true, "growth_rate": null, "initial_accumulator_value": null, "is_paged": false, "log_every": null, "lr_decay": null, "max_unorm": null, "maximize": false, "min_8bit_size": null, "momentum": null, "nesterov": false, "no_prox": false, "optim_bits": null, "percentile_clipping": null, "r": null, "relative_step": true, "safeguard_warmup": false, "scale_parameter": true, "stochastic_rounding": true, "use_bias_correction": false, "use_triton": false, "warmup_init": false, "weight_decay": 0.0, "weight_lr_power": null, "decoupled_decay": false, "fixed_decay": false, "rectify": false, "degenerated_to_sgd": false, "k": null, "xi": null, "n_sma_threshold": null, "ams_bound": false, "adanorm": false, "adam_debias": false } }, "sample_definition_file_name": "/workspace/OneTrainer/training_samples/pony.json", "samples": [], "sample_after": 1, "sample_after_unit": "NEVER", "sample_image_format": "PNG", "samples_to_tensorboard": true, "non_ema_sampling": true, "backup_after": 3, "backup_after_unit": "EPOCH", "rolling_backup": true, "rolling_backup_count": 50, "backup_before_save": false, "save_after": 5, "save_after_unit": "NEVER", "save_filename_prefix": "" } ```

0x1355 commented 4 months ago

Adafactor is different and will not show a derived LR, should you turn that mode on, on the tensorboard graphs.

Side question: is there a way to make Adafactor log derived LR to tensorboard? Or is that inherently impossible due to how the Adafactor optimizer works?

It is working fine for me. I am just interested in how it adjusts LR under the hood.

betterftr commented 4 months ago

update again: reinstalled windows, I have only a git, a python 3.10, an nvidia driver and OneTrainer. Results are still the same. Also did test kohya as well to see if it is maybe OT's problem but got the same results (they have cu118+torch212): Kohya SDXL

deep orange=og prodigy settings
orange=decouple=text encoder only training with args: True weight_decay=0.05 d_coef=2 use_bias_correction=True safeguard_warmup=True betas=0.9,0.99
blue=unet only training with args: decouple=True weight_decay=0.05 d_coef=2 use_bias_correction=True safeguard_warmup=True betas=0.9,0.99

So this either means I have a problem with my clean windows 11 system or with my 4090.

So I went further and tested a different Prodigy implementation namely from pytorch-optimizer (https://pytorch-optimizers.readthedocs.io/en/latest/optimizer/)

changed the prodigy implementation in OT to this (had to rename some stuff like decouple to weight_decouple as per the documentation on their site) turned on bias correction that gave me problem in the og and voila:

it is working. So the problem must be og Prodigy optimizer. Or something in that combined with something with my setup

1 problem still remains; the different colors are different combinations of safeguard warmup, decouple and bias correction, the orange one is a changed Initial D (1e-07), which still breaks it even in this implementation:

Nerogar / OneTrainer