bmaltais / kohya_ss

Apache License 2.0
9.54k stars 1.23k forks source link

Sdxl training issue #1879

Closed Bellatrix8 closed 5 months ago

Bellatrix8 commented 9 months ago

I am having this issue, how can I solve it? (I have a 1080ti) ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\kohya_ss\sdxl_train_network.py:167 in │ │ │ │ 164 │ args = train_util.read_config_from_file(args, parser) │ │ 165 │ │ │ 166 │ trainer = SdxlNetworkTrainer() │ │ ❱ 167 │ trainer.train(args) │ │ 168 │ │ │ │ C:\kohya_ss\train_network.py:213 in train │ │ │ │ 210 │ │ vae_dtype = torch.float32 if args.no_half_vae else weight_dtype │ │ 211 │ │ │ │ 212 │ │ # モデルを読み込む │ │ ❱ 213 │ │ model_version, text_encoder, vae, unet = self.load_target_model(args, weight_dty │ │ 214 │ │ │ │ 215 │ │ # text_encoder is List[CLIPTextModel] or CLIPTextModel │ │ 216 │ │ text_encoders = text_encoder if isinstance(text_encoder, list) else [text_encode │ │ │ │ C:\kohya_ss\sdxl_train_network.py:34 in load_target_model │ │ │ │ 31 │ │ │ unet, │ │ 32 │ │ │ logit_scale, │ │ 33 │ │ │ ckpt_info, │ │ ❱ 34 │ │ ) = sdxl_train_util.load_target_model(args, accelerator, sdxl_model_util.MODEL_V │ │ 35 │ │ │ │ 36 │ │ self.load_stable_diffusion_format = load_stable_diffusion_format │ │ 37 │ │ self.logit_scale = logit_scale │ │ │ │ C:\kohya_ss\library\sdxl_train_util.py:34 in load_target_model │ │ │ │ 31 │ │ │ │ unet, │ │ 32 │ │ │ │ logit_scale, │ │ 33 │ │ │ │ ckpt_info, │ │ ❱ 34 │ │ │ ) = _load_target_model(args, model_version, weight_dtype, accelerator.device │ │ 35 │ │ │ │ │ 36 │ │ │ # work on low-ram device │ │ 37 │ │ │ if args.lowram: │ │ │ │ C:\kohya_ss\library\sdxl_train_util.py:69 in _load_target_model │ │ │ │ 66 │ │ unet, │ │ 67 │ │ logit_scale, │ │ 68 │ │ ckpt_info, │ │ ❱ 69 │ ) = sdxl_model_util.load_models_from_sdxl_checkpoint(model_version, name_or_path, de │ │ 70 │ │ │ 71 │ # VAEを読み込む │ │ 72 │ if args.vae is not None: │ │ │ │ C:\kohya_ss\library\sdxl_model_util.py:169 in load_models_from_sdxl_checkpoint │ │ │ │ 166 │ │ # torch_dtype="float32", │ │ 167 │ │ # transformers_version="4.25.0.dev0", │ │ 168 │ ) │ │ ❱ 169 │ text_model2 = CLIPTextModelWithProjection(text_model2_cfg) │ │ 170 │ │ │ 171 │ print("loading text encoders from checkpoint") │ │ 172 │ te1_sd = {} │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modeling_clip.py:1188 │ │ in init │ │ │ │ 1185 │ def init(self, config: CLIPTextConfig): │ │ 1186 │ │ super().init(config) │ │ 1187 │ │ │ │ ❱ 1188 │ │ self.text_model = CLIPTextTransformer(config) │ │ 1189 │ │ │ │ 1190 │ │ self.text_projection = nn.Linear(config.hidden_size, config.projection_dim, bias │ │ 1191 │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modeling_clip.py:700 in │ │ init │ │ │ │ 697 │ │ self.config = config │ │ 698 │ │ embed_dim = config.hidden_size │ │ 699 │ │ self.embeddings = CLIPTextEmbeddings(config) │ │ ❱ 700 │ │ self.encoder = CLIPEncoder(config) │ │ 701 │ │ self.final_layer_norm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) │ │ 702 │ │ │ 703 │ @add_start_docstrings_to_model_forward(CLIP_TEXT_INPUTS_DOCSTRING) │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modelingclip.py:585 in │ │ init │ │ │ │ 582 │ def init(self, config: CLIPConfig): │ │ 583 │ │ super().init() │ │ 584 │ │ self.config = config │ │ ❱ 585 │ │ self.layers = nn.ModuleList([CLIPEncoderLayer(config) for in range(config.num_ │ │ 586 │ │ self.gradient_checkpointing = False │ │ 587 │ │ │ 588 │ def forward( │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modelingclip.py:585 in │ │ │ │ │ │ 582 │ def init(self, config: CLIPConfig): │ │ 583 │ │ super().init() │ │ 584 │ │ self.config = config │ │ ❱ 585 │ │ self.layers = nn.ModuleList([CLIPEncoderLayer(config) for in range(config.num_ │ │ 586 │ │ self.gradient_checkpointing = False │ │ 587 │ │ │ 588 │ def forward( │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modeling_clip.py:358 in │ │ init │ │ │ │ 355 │ def init(self, config: CLIPConfig): │ │ 356 │ │ super().init() │ │ 357 │ │ self.embed_dim = config.hidden_size │ │ ❱ 358 │ │ self.self_attn = CLIPAttention(config) │ │ 359 │ │ self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) │ │ 360 │ │ self.mlp = CLIPMLP(config) │ │ 361 │ │ self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\transformers\models\clip\modeling_clip.py:255 in │ │ init │ │ │ │ 252 │ │ self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) │ │ 253 │ │ self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) │ │ 254 │ │ self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) │ │ ❱ 255 │ │ self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) │ │ 256 │ │ │ 257 │ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): │ │ 258 │ │ return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2). │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\torch\nn\modules\linear.py:96 in init │ │ │ │ 93 │ │ super().init() │ │ 94 │ │ self.in_features = in_features │ │ 95 │ │ self.out_features = out_features │ │ ❱ 96 │ │ self.weight = Parameter(torch.empty((out_features, in_features), factory_kwarg │ │ 97 │ │ if bias: │ │ 98 │ │ │ self.bias = Parameter(torch.empty(out_features, factory_kwargs)) │ │ 99 │ │ else: │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 6553600 bytes. ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\Utilizator\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in │ │ _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Users\Utilizator\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\accelerate\commands\launch.py:918 in │ │ launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ C:\Users\Utilizator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\Lo │ │ calCache\local-packages\Python310\site-packages\accelerate\commands\launch.py:580 in │ │ simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['C:\Users\Utilizator\AppData\Local\Programs\Python\Python310\python.exe', './sdxl_train_network.py', '--pretrained_model_name_or_path=E:/stable diffusion/stable-diffusion-webui/models/Stable-diffusion/animagineXLV3_v30Base.safetensors', '--train_data_dir=D:/trainer/trained//image', '--resolution=1024,1024', '--output_dir=D:/trainer/trained//model', '--logging_dir=D:/trainer/trained//log', '--network_alpha=6', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=0.0001', '--unet_lr=0.0005', '--network_dim=12', '--output_name=_XL_v1', '--lr_scheduler_num_cycles=10', '--no_half_vae', '--learning_rate=0.0002', '--lr_scheduler=cosine_with_restarts', '--train_batch_size=1', '--max_train_steps=8640', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--min_snr_gamma=5', '--mem_eff_attn', '--gradient_checkpointing', '--bucket_no_upscale', '--noise_offset=0.0357']' returned non-zero exit status 1.

sm079 commented 8 months ago

it looks like you're running out of vram