bmaltais / kohya_ss

Apache License 2.0
9.51k stars 1.23k forks source link

Returned non-zero exit status 1 #1188

Closed Bellatrix8 closed 8 months ago

Bellatrix8 commented 1 year ago

Just got from a much older version to the newest and got this issue,how do I fix it?

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\kohya\kohya_ss\train_network.py:974 in │ │ │ │ 971 │ args = train_util.read_config_from_file(args, parser) │ │ 972 │ │ │ 973 │ trainer = NetworkTrainer() │ │ ❱ 974 │ trainer.train(args) │ │ 975 │ │ │ │ C:\kohya\kohya_ss\train_network.py:250 in train │ │ │ │ 247 │ │ │ vae.requiresgrad(False) │ │ 248 │ │ │ vae.eval() │ │ 249 │ │ │ with torch.no_grad(): │ │ ❱ 250 │ │ │ │ train_dataset_group.cache_latents(vae, args.vae_batch_size, args.cache_l │ │ 251 │ │ │ vae.to("cpu") │ │ 252 │ │ │ if torch.cuda.is_available(): │ │ 253 │ │ │ │ torch.cuda.empty_cache() │ │ │ │ C:\kohya\kohya_ss\library\train_util.py:1730 in cache_latents │ │ │ │ 1727 │ def cache_latents(self, vae, vae_batch_size=1, cache_to_disk=False, is_main_process= │ │ 1728 │ │ for i, dataset in enumerate(self.datasets): │ │ 1729 │ │ │ print(f"[Dataset {i}]") │ │ ❱ 1730 │ │ │ dataset.cache_latents(vae, vae_batch_size, cache_to_disk, is_main_process) │ │ 1731 │ │ │ 1732 │ def is_latent_cacheable(self) -> bool: │ │ 1733 │ │ return all([dataset.is_latent_cacheable() for dataset in self.datasets]) │ │ │ │ C:\kohya\kohya_ss\library\train_util.py:913 in cache_latents │ │ │ │ 910 │ │ │ img_tensors = torch.stack(images, dim=0) │ │ 911 │ │ │ img_tensors = img_tensors.to(device=vae.device, dtype=vae.dtype) │ │ 912 │ │ │ │ │ ❱ 913 │ │ │ latents = vae.encode(img_tensors).latent_dist.sample().to("cpu") │ │ 914 │ │ │ │ │ 915 │ │ │ for info, latent in zip(batch, latents): │ │ 916 │ │ │ │ # check NaN │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\utils\accelerate_utils.py:46 in wrapper │ │ │ │ 43 │ def wrapper(self, *args, kwargs): │ │ 44 │ │ if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"): │ │ 45 │ │ │ self._hf_hook.pre_forward(self) │ │ ❱ 46 │ │ return method(self, *args, *kwargs) │ │ 47 │ │ │ 48 │ return wrapper │ │ 49 │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\models\autoencoder_kl.py:164 in encode │ │ │ │ 161 │ │ if self.use_tiling and (x.shape[-1] > self.tile_sample_min_size or x.shape[-2] > │ │ 162 │ │ │ return self.tiled_encode(x, return_dict=return_dict) │ │ 163 │ │ │ │ ❱ 164 │ │ h = self.encoder(x) │ │ 165 │ │ moments = self.quant_conv(h) │ │ 166 │ │ posterior = DiagonalGaussianDistribution(moments) │ │ 167 │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │ │ │ │ 1127 │ │ # this function, and just call forward. │ │ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1130 │ │ │ return forward_call(input, kwargs) │ │ 1131 │ │ # Do not call functions when jit is used │ │ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\models\vae.py:142 in forward │ │ │ │ 139 │ │ │ │ sample = down_block(sample) │ │ 140 │ │ │ │ │ 141 │ │ │ # middle │ │ ❱ 142 │ │ │ sample = self.mid_block(sample) │ │ 143 │ │ │ │ 144 │ │ # post-process │ │ 145 │ │ sample = self.conv_norm_out(sample) │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │ │ │ │ 1127 │ │ # this function, and just call forward. │ │ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1130 │ │ │ return forward_call(*input, *kwargs) │ │ 1131 │ │ # Do not call functions when jit is used │ │ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\models\unet_2d_blocks.py:472 in forward │ │ │ │ 469 │ │ hidden_states = self.resnets[0](hidden_states, temb) │ │ 470 │ │ for attn, resnet in zip(self.attentions, self.resnets[1:]): │ │ 471 │ │ │ if attn is not None: │ │ ❱ 472 │ │ │ │ hidden_states = attn(hidden_states, temb=temb) │ │ 473 │ │ │ hidden_states = resnet(hidden_states, temb) │ │ 474 │ │ │ │ 475 │ │ return hidden_states │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │ │ │ │ 1127 │ │ # this function, and just call forward. │ │ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1130 │ │ │ return forward_call(input, kwargs) │ │ 1131 │ │ # Do not call functions when jit is used │ │ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\models\attention_processor.py:320 in forward │ │ │ │ 317 │ │ # The Attention class can call different attention processors / attention func │ │ 318 │ │ # here we simply pass along all tensors to the selected processor class │ │ 319 │ │ # For standard processors that are defined here, `cross_attention_kwargs` is e │ │ ❱ 320 │ │ return self.processor( │ │ 321 │ │ │ self, │ │ 322 │ │ │ hidden_states, │ │ 323 │ │ │ encoder_hidden_states=encoder_hidden_states, │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\diffusers\models\attention_processor.py:1045 in │ │ call │ │ │ │ 1042 │ │ key = attn.head_to_batch_dim(key).contiguous() │ │ 1043 │ │ value = attn.head_to_batch_dim(value).contiguous() │ │ 1044 │ │ │ │ ❱ 1045 │ │ hidden_states = xformers.ops.memory_efficient_attention( │ │ 1046 │ │ │ query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=att │ │ 1047 │ │ ) │ │ 1048 │ │ hidden_states = hidden_states.to(query.dtype) │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ TypeError: memory_efficient_attention() got an unexpected keyword argument 'scale' ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Program │ │ Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64qbz5n2kfra8p0\lib\runpy. │ │ py:196 in _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Program │ │ Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64qbz5n2kfra8p0\lib\runpy. │ │ py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ C:\kohya\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['C:\kohya\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--pretrained_model_name_or_path=D:/nai/nai.ckpt', '--train_data_dir=D:/trainer/image', '--resolution=512,512', '--output_dir=D:/trainer/model', '--logging_dir=D:/trainer/log', '--network_alpha=1', '--save_model_as=safetensors', '--network_module=networks.lora', '--network_dim=8', '--output_name=v1', '--lr_scheduler_num_cycles=10', '--no_half_vae', '--learning_rate=0.0001', '--lr_scheduler=constant', '--train_batch_size=1', '--max_train_steps=28560', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--mem_eff_attn', '--gradient_checkpointing', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 1.

My settings are:

"pretrained_model_name_or_path": "D:/nai/nai.ckpt", "v2": false, "v_parameterization": false, "logging_dir": "D:/trainer/log", "train_data_dir": "D:/trainer/image", "reg_data_dir": "", "output_dir": "D:/trainer/model", "max_resolution": "512,512", "learning_rate": "0.0001", "lr_scheduler": "constant", "lr_warmup": "0", "train_batch_size": 1, "epoch": 10, "save_every_n_epochs": 1, "mixed_precision": "fp16", "save_precision": "fp16", "seed": "1234", "num_cpu_threads_per_process": 2, "cache_latents": true, "caption_extension": ".txt", "enable_bucket": false, "gradient_checkpointing": true, "full_fp16": false, "no_token_padding": false, "stop_text_encoder_training": 0, "xformers": true, "save_model_as": "safetensors", "shuffle_caption": false, "save_state": false, "resume": "", "prior_loss_weight": 1.0, "color_aug": false, "flip_aug": false, "clip_skip": 2, "vae": "", "output_name": "v1", "max_token_length": "75", "max_train_epochs": "", "max_data_loader_n_workers": "1", "mem_eff_attn": true, "gradient_accumulation_steps": 1.0, "model_list": "custom", "keep_tokens": "0", "persistent_data_loader_workers": false, "bucket_no_upscale": true, "random_crop": false, "bucket_reso_steps": 64.0, "caption_dropout_every_n_epochs": 0.0, "caption_dropout_rate": 0, "optimizer": "AdamW", "optimizer_args": "", "noise_offset": "", "sample_every_n_steps": 0, "sample_every_n_epochs": 0, "sample_sampler": "euler_a", "sample_prompts": "", "additional_parameters": "", "vae_batch_size": 0

demirklvc commented 1 year ago

ive asked the same monthd ago, the creators didnt care to answer or help

Bellatrix8 commented 1 year ago

ive asked the same month ago, the creators didnt care to answer or help

If you still have the issue, I already found the solution: uncheck 'use xformers' and use fp16

mats4d commented 1 year ago

ive asked the same month ago, the creators didnt care to answer or help

If you still have the issue, I already found the solution: uncheck 'use xformers' and use fp16

I will try that today, but I think I've tried that alrasdy.

Hope they fix it because xformers speeds up the process quite a bit (and supposedly the quality drop is unnoticeable We shall see, thanks !!

mats4d commented 1 year ago

ive asked the same month ago, the creators didnt care to answer or help

If you still have the issue, I already found the solution: uncheck 'use xformers' and use fp16

try installing the dev release for the new xformers, this worked for me, and it's all good again (for now)

https://pypi.org/project/xformers/0.0.21.dev565/#history

xiaoming9802 commented 1 year ago

I also encountered this problem, help

19:27:00-497156 INFO Version: v21.8.2 19:27:00-512166 INFO Using CPU-only Torch 19:27:02-032364 INFO Torch 2.0.1+cu118 19:27:02-053425 INFO Torch backend: nVidia CUDA 11.8 cuDNN 8700 19:27:02-056979 INFO Torch detected GPU: GeForce GTX 1080 Ti VRAM 11264 Arch (6, 1) Cores 28 19:27:02-058973 INFO Verifying modules instalation status from requirements_windows_torch2.txt... 19:27:02-061965 INFO Verifying modules instalation status from requirements.txt... 19:27:04-974244 INFO headless: False 19:27:04-978259 INFO Load CSS... Running on local URL: http://127.0.0.1:7860

To create a public link, set share=True in launch(). 19:27:37-582413 INFO Loading config... 19:27:38-283491 INFO Loading config... 19:28:01-588060 INFO Start training LoRA Standard ... 19:28:01-589033 INFO Valid image folder names found in: E:\LoRA_test\image 19:28:01-590059 INFO Folder 30_nianping: 10 images found 19:28:01-591060 INFO Folder 30_nianping: 300 steps 19:28:01-592053 INFO Total steps: 300 19:28:01-593053 INFO Train batch size: 1 19:28:01-594048 INFO Gradient accumulation steps: 1.0 19:28:01-595045 INFO Epoch: 2 19:28:01-595045 INFO Regulatization factor: 1 19:28:01-596042 INFO max_train_steps (300 / 1 / 1.0 2 1) = 600 19:28:01-598010 INFO stop_text_encoder_training = 0 19:28:01-599034 INFO lr_warmup_steps = 6 19:28:01-600031 WARNING Here is the trainer command as a reference. It will not be executed:

accelerate launch --num_cpu_threads_per_process=6 "./train_network.py" --enable_bucket --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="E:\LoRA_test\image" --resolution="512,512" --output_dir="E:\LoRA_test\model" --logging_dir="E:\LoRA_test\log" --network_alpha="128" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-05 --unet_lr=0.0001 --network_dim=128 --output_name="aixiaolong" --lr_scheduler_num_cycles="2" --no_half_vae --learning_rate="0.0001" --lr_scheduler="cosine_with_restarts" --lr_warmup_steps="6" --train_batch_size="1" --max_train_steps="600" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW8bit" --max_data_loader_n_workers="0" --clip_skip=2 --bucket_reso_steps=64 --xformers --bucket_no_upscale 19:28:04-029947 INFO Start training LoRA Standard ... 19:28:04-031942 INFO Valid image folder names found in: E:\LoRA_test\image 19:28:04-033937 INFO Folder 30_nianping: 10 images found 19:28:04-034934 INFO Folder 30_nianping: 300 steps 19:28:04-035932 INFO Total steps: 300 19:28:04-036929 INFO Train batch size: 1 19:28:04-037926 INFO Gradient accumulation steps: 1.0 19:28:04-038923 INFO Epoch: 2 19:28:04-038923 INFO Regulatization factor: 1 19:28:04-039921 INFO max_train_steps (300 / 1 / 1.0 2 1) = 600 19:28:04-040919 INFO stop_text_encoder_training = 0 19:28:04-041915 INFO lr_warmup_steps = 6 19:28:04-042913 INFO Saving training config to E:\LoRA_test\model\aixiaolong_20230716-192804.json... 19:28:04-048401 INFO accelerate launch --num_cpu_threads_per_process=6 "./train_network.py" --enable_bucket --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="E:\LoRA_test\image" --resolution="512,512" --output_dir="E:\LoRA_test\model" --logging_dir="E:\LoRA_test\log" --network_alpha="128" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-05 --unet_lr=0.0001 --network_dim=128 --output_name="aixiaolong" --lr_scheduler_num_cycles="2" --no_half_vae --learning_rate="0.0001" --lr_scheduler="cosine_with_restarts" --lr_warmup_steps="6" --train_batch_size="1" --max_train_steps="600" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW8bit" --max_data_loader_n_workers="0" --clip_skip=2 --bucket_reso_steps=64 --xformers --bucket_no_upscale [19:28:09] WARNING The following values were not passed to accelerate launch and had defaults used launch.py:890 instead: --num_processes was set to a value of 1 --num_machines was set to a value of 1 --mixed_precision was set to a value of 'no' --dynamo_backend was set to a value of 'no' To avoid this warning pass in values for each of the problematic parameters or run accelerate config. A matching Triton is not available, some optimizations will not be enabled. Error caught was: No module named 'triton' prepare tokenizer Using DreamBooth method. prepare images. found directory E:\LoRA_test\image\30_nianping contains 10 image files 300 train images with repeating. 0 reg images. no regularization images / 正則化画像が見つかりませんでした [Dataset 0] batch_size: 1 resolution: (512, 512) enable_bucket: True min_bucket_reso: 256 max_bucket_reso: 1024 bucket_reso_steps: 64 bucket_no_upscale: True

[Subset 0 of Dataset 0] image_dir: "E:\LoRA_test\image\30_nianping" image_count: 10 num_repeats: 30 shuffle_caption: False keep_tokens: 0 caption_dropout_rate: 0.0 caption_dropout_every_n_epoches: 0 caption_tag_dropout_rate: 0.0 color_aug: False flip_aug: False face_crop_aug_range: None random_crop: False token_warmup_min: 1, token_warmup_step: 0, is_reg: False class_tokens: nianping caption_extension: .txt

[Dataset 0] loading image sizes. 100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 1662.76it/s] make buckets min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む) bucket 0: resolution (320, 512), count: 90 bucket 1: resolution (384, 448), count: 60 bucket 2: resolution (448, 448), count: 120 bucket 3: resolution (448, 512), count: 30 mean ar error (without repeats): 0.049647249649361735 preparing accelerator loading model for process 0/1 load Diffusers pretrained models: runwayml/stable-diffusion-v1-5 vae\diffusion_pytorch_model.safetensors not found You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing safety_checker=None. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . UNet2DConditionModel: 64, 8, 768, False, False U-Net converted to original U-Net Enable xformers for U-Net import network module: networks.lora [Dataset 0] caching latents. checking cache validity... 100%|██████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s] 100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00, 4.82it/s] create LoRA network. base dim (rank): 128, alpha: 128.0 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None create LoRA for Text Encoder: create LoRA for Text Encoder: 72 modules. create LoRA for U-Net: 192 modules. enable LoRA for text encoder enable LoRA for U-Net prepare optimizer, data loader etc.

===================================BUG REPORT=================================== Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link

CUDA SETUP: Loading binary E:\kohya_ss\kohya_ss\venv\lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll... use 8-bit AdamW optimizer | {} running training / 学習開始 num train images * repeats / 学習画像の数×繰り返し回数: 300 num reg images / 正則化画像の数: 0 num batches per epoch / 1epochのバッチ数: 300 num epochs / epoch数: 2 batch size per device / バッチサイズ: 1 gradient accumulation steps / 勾配を合計するステップ数 = 1 total optimization steps / 学習ステップ数: 600 steps: 0%| | 0/600 [00:00<?, ?it/s] epoch 1/2 Error no kernel image is available for execution on the device at line 167 in file D:\ai\tool\bitsandbytes\csrc\ops.cu ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in │ │ _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['E:\kohya_ss\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=E:\LoRA_test\image', '--resolution=512,512', '--output_dir=E:\LoRA_test\model', '--logging_dir=E:\LoRA_test\log', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-05', '--unet_lr=0.0001', '--network_dim=128', '--output_name=aixiaolong', '--lr_scheduler_num_cycles=2', '--no_half_vae', '--learning_rate=0.0001', '--lr_scheduler=cosine_with_restarts', '--lr_warmup_steps=6', '--train_batch_size=1', '--max_train_steps=600', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW8bit', '--max_data_loader_n_workers=0', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 1. 19:33:05-517246 INFO Start training LoRA Standard ... 19:33:05-520236 INFO Valid image folder names found in: E:\LoRA_test\image 19:33:05-522745 INFO Folder 30_nianping: 10 images found 19:33:05-523734 INFO Folder 30_nianping: 300 steps 19:33:05-525729 INFO Total steps: 300 19:33:05-526726 INFO Train batch size: 1 19:33:05-527724 INFO Gradient accumulation steps: 1.0 19:33:05-529718 INFO Epoch: 2 19:33:05-531713 INFO Regulatization factor: 1 19:33:05-534705 INFO max_train_steps (300 / 1 / 1.0 2 1) = 600 19:33:05-538694 INFO stop_text_encoder_training = 0 19:33:05-540689 INFO lr_warmup_steps = 6 19:33:05-541686 WARNING Here is the trainer command as a reference. It will not be executed:

accelerate launch --num_cpu_threads_per_process=6 "./train_network.py" --enable_bucket --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="E:\LoRA_test\image" --resolution="512,512" --output_dir="E:\LoRA_test\model" --logging_dir="E:\LoRA_test\log" --network_alpha="64" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-05 --unet_lr=0.0001 --network_dim=64 --output_name="aixiaolong" --lr_scheduler_num_cycles="2" --no_half_vae --learning_rate="0.0001" --lr_scheduler="cosine_with_restarts" --lr_warmup_steps="6" --train_batch_size="1" --max_train_steps="600" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW8bit" --max_data_loader_n_workers="0" --clip_skip=2 --bucket_reso_steps=64 --xformers --bucket_no_upscale 19:33:09-694179 INFO ['tensorboard.exe', '--logdir', 'E:\LoRA_test\log', '--host', '0.0.0.0', '--port', '6006'] 19:33:09-696174 INFO Starting tensorboard... TensorBoard 2.12.3 at http://0.0.0.0:6006/ (Press CTRL+C to quit) 19:33:14-715608 INFO Opening tensorboard url in browser... Exception in thread Reloader: Traceback (most recent call last): File "C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\threading.py", line 1016, in _bootstrap_inner self.run() File "C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\threading.py", line 953, in run self._target(*self._args, *self._kwargs) File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorboard\backend\event_processing\data_ingester.py", line 104, in _reload self._multiplexer.AddRunsFromDirectory(path, name) File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorboard\backend\event_processing\plugin_event_multiplexer.py", line 205, in AddRunsFromDirectory for subdir in io_wrapper.GetLogdirSubdirectories(path): File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorboard\backend\event_processing\io_wrapper.py", line 220, in return ( File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorboard\backend\event_processing\io_wrapper.py", line 172, in ListRecursivelyViaWalking for dirpath, , filenames in tf.io.gfile.walk(top, topdown=True): File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorflow\python\lib\io\file_io.py", line 876, in walk_v2 if is_directory(full_path): File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorflow\python\lib\io\file_io.py", line 689, in is_directory return is_directory_v2(dirname) File "E:\kohya_ss\kohya_ss\venv\lib\site-packages\tensorflow\python\lib\io\file_io.py", line 703, in is_directory_v2 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa4 in position 23: invalid start byte 19:34:18-402653 INFO ['tensorboard.exe', '--logdir', 'E:\LoRA_test\log', '--host', '0.0.0.0', '--port', '6006'] 19:34:18-404647 INFO Tensorboard is already running. Terminating existing process before starting new one... 19:34:18-406623 INFO Stopping tensorboard process... 19:34:18-408617 INFO ...process stopped 19:34:18-410612 INFO Starting tensorboard... TensorBoard 2.12.3 at http://0.0.0.0:6006/ (Press CTRL+C to quit) 19:34:23-426548 INFO Opening tensorboard url in browser... 19:36:17-512693 INFO Start training LoRA Standard ... 19:36:17-514688 INFO Valid image folder names found in: E:\LoRA_test\image 19:36:17-517679 INFO Folder 30_nianping: 10 images found 19:36:17-518677 INFO Folder 30_nianping: 300 steps 19:36:17-520671 INFO Total steps: 300 19:36:17-521669 INFO Train batch size: 1 19:36:17-522665 INFO Gradient accumulation steps: 1.0 19:36:17-523663 INFO Epoch: 2 19:36:17-524661 INFO Regulatization factor: 1 19:36:17-526659 INFO max_train_steps (300 / 1 / 1.0 2 * 1) = 600 19:36:17-528650 INFO stop_text_encoder_training = 0 19:36:17-529648 INFO lr_warmup_steps = 6 19:36:17-530644 WARNING Here is the trainer command as a reference. It will not be executed:

accelerate launch --num_cpu_threads_per_process=6 "./train_network.py" --enable_bucket --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="E:\LoRA_test\image" --resolution="512,512" --output_dir="E:\LoRA_test\model" --logging_dir="E:\LoRA_test\log" --network_alpha="64" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-05 --unet_lr=0.0001 --network_dim=64 --output_name="aixiaolong" --lr_scheduler_num_cycles="2" --no_half_vae --learning_rate="0.0001" --lr_scheduler="cosine_with_restarts" --lr_warmup_steps="6" --train_batch_size="1" --max_train_steps="600" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW8bit" --max_data_loader_n_workers="0" --clip_skip=2 --bucket_reso_steps=64 --xformers --bucket_no_upscale 19:36:29-898751 INFO Start training LoRA Standard ... 19:36:29-900745 INFO Valid image folder names found in: E:\LoRA_test\image 19:36:29-902739 INFO Folder 30_nianping: 10 images found 19:36:29-903737 INFO Folder 30_nianping: 300 steps 19:36:29-904734 INFO Total steps: 300 19:36:29-905731 INFO Train batch size: 1 19:36:29-907726 INFO Gradient accumulation steps: 1.0 19:36:29-908723 INFO Epoch: 2 19:36:29-909720 INFO Regulatization factor: 1 19:36:29-911715 INFO max_train_steps (300 / 1 / 1.0 2 1) = 600 19:36:29-912713 INFO stop_text_encoder_training = 0 19:36:29-913710 INFO lr_warmup_steps = 6 19:36:29-914707 INFO Saving training config to E:\LoRA_test\model\aixiaolong_20230716-193629.json... 19:36:29-918696 INFO accelerate launch --num_cpu_threads_per_process=6 "./train_network.py" --enable_bucket --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="E:\LoRA_test\image" --resolution="512,512" --output_dir="E:\LoRA_test\model" --logging_dir="E:\LoRA_test\log" --network_alpha="64" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-05 --unet_lr=0.0001 --network_dim=64 --output_name="aixiaolong" --lr_scheduler_num_cycles="2" --no_half_vae --learning_rate="0.0001" --lr_scheduler="cosine_with_restarts" --lr_warmup_steps="6" --train_batch_size="1" --max_train_steps="600" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW8bit" --max_data_loader_n_workers="0" --clip_skip=2 --bucket_reso_steps=64 --xformers --bucket_no_upscale [19:36:35] WARNING The following values were not passed to accelerate launch and had defaults used launch.py:890 instead: --num_processes was set to a value of 1 --num_machines was set to a value of 1 --mixed_precision was set to a value of 'no' --dynamo_backend was set to a value of 'no' To avoid this warning pass in values for each of the problematic parameters or run accelerate config. A matching Triton is not available, some optimizations will not be enabled. Error caught was: No module named 'triton' prepare tokenizer Using DreamBooth method. prepare images. found directory E:\LoRA_test\image\30_nianping contains 10 image files 300 train images with repeating. 0 reg images. no regularization images / 正則化画像が見つかりませんでした [Dataset 0] batch_size: 1 resolution: (512, 512) enable_bucket: True min_bucket_reso: 256 max_bucket_reso: 1024 bucket_reso_steps: 64 bucket_no_upscale: True

[Subset 0 of Dataset 0] image_dir: "E:\LoRA_test\image\30_nianping" image_count: 10 num_repeats: 30 shuffle_caption: False keep_tokens: 0 caption_dropout_rate: 0.0 caption_dropout_every_n_epoches: 0 caption_tag_dropout_rate: 0.0 color_aug: False flip_aug: False face_crop_aug_range: None random_crop: False token_warmup_min: 1, token_warmup_step: 0, is_reg: False class_tokens: nianping caption_extension: .txt

[Dataset 0] loading image sizes. 100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 1671.30it/s] make buckets min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む) bucket 0: resolution (320, 512), count: 90 bucket 1: resolution (384, 448), count: 60 bucket 2: resolution (448, 448), count: 120 bucket 3: resolution (448, 512), count: 30 mean ar error (without repeats): 0.049647249649361735 preparing accelerator loading model for process 0/1 load Diffusers pretrained models: runwayml/stable-diffusion-v1-5 vae\diffusion_pytorch_model.safetensors not found You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing safety_checker=None. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . UNet2DConditionModel: 64, 8, 768, False, False U-Net converted to original U-Net Enable xformers for U-Net import network module: networks.lora [Dataset 0] caching latents. checking cache validity... 100%|██████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s] 100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00, 4.76it/s] create LoRA network. base dim (rank): 64, alpha: 64.0 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None create LoRA for Text Encoder: create LoRA for Text Encoder: 72 modules. create LoRA for U-Net: 192 modules. enable LoRA for text encoder enable LoRA for U-Net prepare optimizer, data loader etc.

===================================BUG REPORT=================================== Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link

CUDA SETUP: Loading binary E:\kohya_ss\kohya_ss\venv\lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll... use 8-bit AdamW optimizer | {} running training / 学習開始 num train images * repeats / 学習画像の数×繰り返し回数: 300 num reg images / 正則化画像の数: 0 num batches per epoch / 1epochのバッチ数: 300 num epochs / epoch数: 2 batch size per device / バッチサイズ: 1 gradient accumulation steps / 勾配を合計するステップ数 = 1 total optimization steps / 学習ステップ数: 600 steps: 0%| | 0/600 [00:00<?, ?it/s] epoch 1/2 Error no kernel image is available for execution on the device at line 167 in file D:\ai\tool\bitsandbytes\csrc\ops.cu ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in │ │ _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['E:\kohya_ss\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=E:\LoRA_test\image', '--resolution=512,512', '--output_dir=E:\LoRA_test\model', '--logging_dir=E:\LoRA_test\log', '--network_alpha=64', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-05', '--unet_lr=0.0001', '--network_dim=64', '--output_name=aixiaolong', '--lr_scheduler_num_cycles=2', '--no_half_vae', '--learning_rate=0.0001', '--lr_scheduler=cosine_with_restarts', '--lr_warmup_steps=6', '--train_batch_size=1', '--max_train_steps=600', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW8bit', '--max_data_loader_n_workers=0', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 1.

cwinebrenner commented 1 year ago

try training with 1.5 model not the new 2.x versions, fixed it for me.

bleetube commented 1 year ago

As mats4d pointed out, xformers needs an update. Try pip install -U xformers and turn transformers back on. v0.0.21 appears to fix this issue. v0.0.20 (currently used by kohya_ss release version) has the problem.