the problem is now - Githubissues

Dins-Sync commented 6 months ago

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/train_network.py:17 in │ │ │ │ 14 from accelerate.utils import set_seed │ │ 15 from diffusers import DDPMScheduler │ │ 16 │ │ ❱ 17 import library.train_util as train_util │ │ 18 from library.train_util import ( │ │ 19 │ DreamBoothDataset, │ │ 20 ) │ │ │ │ /content/kohya-trainer/library/train_util.py:1861 in │ │ │ │ 1858 │ │ return dq, dk, dv, None, None, None, None │ │ 1859 │ │ 1860 │ │ ❱ 1861 def replace_unet_modules(unet: diffusers.models.unet_2d_condition.UNet2DConditionModel, │ │ 1862 │ # unet is not used currently, but it is here for future use │ │ 1863 │ if mem_eff_attn: │ │ 1864 │ │ replace_unet_cross_attn_to_memory_efficient() │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/utils/import_utils.py:711 in getattr │ │ │ │ 708 │ │ │ module = self._get_module(self._class_to_module[name]) │ │ 709 │ │ │ value = getattr(module, name) │ │ 710 │ │ else: │ │ ❱ 711 │ │ │ raise AttributeError(f"module {self.name} has no attribute {name}") │ │ 712 │ │ │ │ 713 │ │ setattr(self, name, value) │ │ 714 │ │ return value │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ AttributeError: module diffusers.models has no attribute unet_2d_condition ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/local/bin/accelerate:8 in │ │ │ │ 5 from accelerate.commands.accelerate_cli import main │ │ 6 if name == 'main': │ │ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 8 │ sys.exit(main()) │ │ 9 │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['/usr/bin/python3', 'train_network.py', '--dataset_config=/content/drive/MyDrive/Loras/example/dataset_config.toml', '--config_file=/content/drive/MyDrive/Loras/example/training_config.toml']' returned non-zero exit status 1.

dazaibsd commented 6 months ago

Same here!

illlliill0001 commented 6 months ago

Same here!

imalmg commented 6 months ago

me too

sdachen commented 6 months ago

I went into ~/kohya-trainer/library/train_util.py and changed replace_unet_modules to


from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput

def replace_unet_modules(unet: UNet2DConditionOutput, mem_eff_attn, xformers):
    # unet is not used currently, but it is here for future use
    if mem_eff_attn:
        replace_unet_cross_attn_to_memory_efficient()
    elif xformers:
        replace_unet_cross_attn_to_xformers()

The code runs until


loading u-net: <All keys matched successfully>
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /content/kohya-trainer/train_network.py:873 in <module>                                          │
│                                                                                                  │
│   870 │   args = parser.parse_args()                                                             │
│   871 │   args = train_util.read_config_from_file(args, parser)                                  │
│   872 │                                                                                          │
│ ❱ 873 │   train(args)                                                                            │
│   874                                                                                            │
│                                                                                                  │
│ /content/kohya-trainer/train_network.py:168 in train                                             │
│                                                                                                  │
│   165 │   weight_dtype, save_dtype = train_util.prepare_dtype(args)                              │
│   166 │                                                                                          │
│   167 │   # モデルを読み込む                                                                     │
│ ❱ 168 │   text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype, accele   │
│   169 │                                                                                          │
│   170 │   # モデルに xformers とか memory efficient attention を組み込む                         │
│   171 │   train_util.replace_unet_modules(unet, args.mem_eff_attn, args.xformers)                │
│                                                                                                  │
│ /content/kohya-trainer/library/train_util.py:3151 in load_target_model                           │
│                                                                                                  │
│   3148 │   │   if pi == accelerator.state.local_process_index:                                   │
│   3149 │   │   │   print(f"loading model for process {accelerator.state.local_process_index}/{a  │
│   3150 │   │   │                                                                                 │
│ ❱ 3151 │   │   │   text_encoder, vae, unet, load_stable_diffusion_format = _load_target_model(   │
│   3152 │   │   │   │   args, weight_dtype, accelerator.device if args.lowram else "cpu"          │
│   3153 │   │   │   )                                                                             │
│   3154                                                                                           │
│                                                                                                  │
│ /content/kohya-trainer/library/train_util.py:3117 in _load_target_model                          │
│                                                                                                  │
│   3114 │   load_stable_diffusion_format = os.path.isfile(name_or_path)  # determine SD or Diffu  │
│   3115 │   if load_stable_diffusion_format:                                                      │
│   3116 │   │   print(f"load StableDiffusion checkpoint: {name_or_path}")                         │
│ ❱ 3117 │   │   text_encoder, vae, unet = model_util.load_models_from_stable_diffusion_checkpoin  │
│   3118 │   else:                                                                                 │
│   3119 │   │   # Diffusers model is loaded to CPU                                                │
│   3120 │   │   print(f"load Diffusers pretrained models: {name_or_path}")                        │
│                                                                                                  │
│ /content/kohya-trainer/library/model_util.py:871 in load_models_from_stable_diffusion_checkpoint │
│                                                                                                  │
│    868 │   converted_vae_checkpoint = convert_ldm_vae_checkpoint(state_dict, vae_config)         │
│    869 │                                                                                         │
│    870 │   vae = AutoencoderKL(**vae_config).to(device)                                          │
│ ❱  871 │   info = vae.load_state_dict(converted_vae_checkpoint)                                  │
│    872 │   print("loading vae:", info)                                                           │
│    873 │                                                                                         │
│    874 │   # convert text_model                                                                  │
│                                                                                                  │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:2153 in load_state_dict       │
│                                                                                                  │
│   2150 │   │   │   │   │   │   ', '.join(f'"{k}"' for k in missing_keys)))                       │
│   2151 │   │                                                                                     │
│   2152 │   │   if len(error_msgs) > 0:                                                           │
│ ❱ 2153 │   │   │   raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(     │
│   2154 │   │   │   │   │   │   │      self.__class__.__name__, "\n\t".join(error_msgs)))         │
│   2155 │   │   return _IncompatibleKeys(missing_keys, unexpected_keys)                           │
│   2156                                                                                           │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Error(s) in loading state_dict for AutoencoderKL:
        Missing key(s) in state_dict: "encoder.mid_block.attentions.0.to_q.weight", 
"encoder.mid_block.attentions.0.to_q.bias", "encoder.mid_block.attentions.0.to_k.weight", 
"encoder.mid_block.attentions.0.to_k.bias", "encoder.mid_block.attentions.0.to_v.weight", 
"encoder.mid_block.attentions.0.to_v.bias", "encoder.mid_block.attentions.0.to_out.0.weight", 
"encoder.mid_block.attentions.0.to_out.0.bias", "decoder.mid_block.attentions.0.to_q.weight", 
"decoder.mid_block.attentions.0.to_q.bias", "decoder.mid_block.attentions.0.to_k.weight", 
"decoder.mid_block.attentions.0.to_k.bias", "decoder.mid_block.attentions.0.to_v.weight", 
"decoder.mid_block.attentions.0.to_v.bias", "decoder.mid_block.attentions.0.to_out.0.weight", 
"decoder.mid_block.attentions.0.to_out.0.bias". 
        Unexpected key(s) in state_dict: "encoder.mid_block.attentions.0.key.bias", 
"encoder.mid_block.attentions.0.key.weight", "encoder.mid_block.attentions.0.proj_attn.bias", 
"encoder.mid_block.attentions.0.proj_attn.weight", "encoder.mid_block.attentions.0.query.bias", 
"encoder.mid_block.attentions.0.query.weight", "encoder.mid_block.attentions.0.value.bias", 
"encoder.mid_block.attentions.0.value.weight", "decoder.mid_block.attentions.0.key.bias", 
"decoder.mid_block.attentions.0.key.weight", "decoder.mid_block.attentions.0.proj_attn.bias", 
"decoder.mid_block.attentions.0.proj_attn.weight", "decoder.mid_block.attentions.0.query.bias", 
"decoder.mid_block.attentions.0.query.weight", "decoder.mid_block.attentions.0.value.bias", 
"decoder.mid_block.attentions.0.value.weight".

Unet is successfully loaded but there're some issues with VAE

sdachen commented 6 months ago

Did some investigation I think using _convert_deprecated_attention_blocks is the solution. will fix later. adding here in case someone needs it now.

https://github.com/huggingface/diffusers/blob/25caf24ef90fc44074f4fd3712f6ed5a1db4a5c3/src/diffusers/models/modeling_utils.py#L929

I think one can also just load a newer versioned model.