kohya-ss / sd-scripts

Apache License 2.0
5.33k stars 881 forks source link

AssertionError: g_tokens must be None if l_tokens is None #1793

Open betterftr opened 1 week ago

betterftr commented 1 week ago

I have an issue with trying to train sd3.5L when sampling during training, please help :(

Traceback (most recent call last):
  File "C:\sd-scripts\sd3_train.py", line 1074, in <module>
    train(args)
  File "C:\sd-scripts\sd3_train.py", line 949, in train
    sd3_train_utils.sample_images(
  File "C:\sd-scripts\library\sd3_train_utils.py", line 429, in sample_images
    sample_image_inference(
  File "C:\sd-scripts\library\sd3_train_utils.py", line 550, in sample_image_inference
    lg_out, t5_out, pooled, l_attn_mask, g_attn_mask, t5_attn_mask = encode_prompt(prompt)
                                                                     ^^^^^^^^^^^^^^^^^^^^^
  File "C:\sd-scripts\library\sd3_train_utils.py", line 538, in encode_prompt
    encoded_text_encoder_conds = encoding_strategy.encode_tokens(tokenize_strategy, text_encoders, tokens_and_masks)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\sd-scripts\library\strategy_sd3.py", line 97, in encode_tokens
    assert g_tokens is None, "g_tokens must be None if l_tokens is None"
           ^^^^^^^^^^^^^^^^
AssertionError: g_tokens must be None if l_tokens is None

Accelerate launch --mixed_precision bf16 --num_processes 1 --num_machines 1 --num_cpu_threads_per_process 2 C:/sd-scripts/sd3_train.py --config_file C:/train/test.toml

cfg:

clip_l = "C:/train/sd3.5/text_encoder/model.safetensors"
clip_g = "C:/train/sd3.5/text_encoder_2/model.safetensors"
t5xxl = "C:/train/sd3.5/text_encoders/t5xxl_fp16.safetensors"
pretrained_model_name_or_path = "C:/train/sd3.5/sd3.5_large.safetensors"
skip_cache_check = false
blockwise_fused_optimizers = false
fused_backward_pass = true
disable_mmap_load_safetensors = true
highvram = true
cache_latents = true
cache_latents_to_disk = true
cache_text_encoder_outputs = true
cache_text_encoder_outputs_to_disk = true
caption_extension = ".txt"
dataset_config = "C:/train/test.toml"
gradient_accumulation_steps = 1
gradient_checkpointing = true
#enable_scaled_pos_embed = true
max_grad_norm = 0.0
huber_c = 0.1
huber_schedule = "snr"
logging_dir = "C:/train/tensorboard"
loss_type = "l2"
lr_scheduler = "constant_with_warmup"
lr_scheduler_args = []
max_timestep = 1000
min_snr_gamma = 5
ip_noise_gamma = 0.1
ip_noise_gamma_random_strength = true
noise_offset_type = "Original"
output_dir = "C:/train"
output_name = "last"
persistent_data_loader_workers = true
max_data_loader_n_workers = 2
sample_every_n_epochs = 1
sample_prompts = "C:/train/sample/test_prompt.txt"
sample_sampler = "euler_a"
save_every_n_epochs = 100
save_model_as = "diffusers"
save_precision = "bf16"
save_state = true
mixed_precision = "bf16"
sdpa = true
seed = 1234
max_train_epochs = 1000
optimizer_args = [ "relative_step=False", "scale_parameter=True", "warmup_init=False", "weight_decay=0.05"]
optimizer_type = "Adafactor"
lr_warmup_steps = 50
learning_rate = 1e-5
blocks_to_swap = 20
train_batch_size = 12
train_blocks = "all"
wandb_run_name = "last"

ds:

[general]
# define common settings here
flip_aug = false
color_aug = false
shuffle_caption = false
caption_tag_dropout_rate = 0
caption_extension = ".txt"

[[datasets]]
# define the first resolution here
enable_bucket = true
min_bucket_reso = 64
max_bucket_reso = 512
bucket_reso_steps = 32
bucket_no_upscale = true
resolution = [512, 512]

  [[datasets.subsets]]
  image_dir = "F:/1_testp1"
  num_repeats = 1

[[datasets]]
# define the first resolution here
enable_bucket = true
min_bucket_reso = 64
max_bucket_reso = 512
bucket_reso_steps = 32
bucket_no_upscale = true
resolution = [512, 512]

  [[datasets.subsets]]
  image_dir = "F:/1_testp2"
  num_repeats = 1