bmaltais / kohya_ss

Apache License 2.0
9.54k stars 1.23k forks source link

SDXL LoRA trained on lastest commit (master) give an error (RuntimeError: The size of tensor a (2048) must match the size of tensor b (768) at non-singleton dimension 1) on stable diffusion webui #1423

Closed agentj01 closed 8 months ago

agentj01 commented 1 year ago
*** Error completing request
*** Arguments: ('task(xd6t04ckfb6nht1)', '1girl\n <lora:last:1>', '', [], 30, 16, False, False, 1, 1, 7, -1.0, -1.0, 0, 0, 0, False, 768, 2048, False, 0.5, 1.5, '4x-UltraSharp', 20, 0, 0, 0, '', '', [], <gradio.routes.Request object at 0x00000227645CE590>, 0, False, 'MultiDiffusion', False, True, 1024, 1024, 96, 96, 48, 4, 'None', 2, False, 10, 1, 1, 64, False, False, False, False, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 0.4, 0.4, 0.2, 0.2, '', '', 'Background', 0.2, -1.0, False, 3072, 192, True, True, True, False, True, False, 1, False, False, False, 1.1, 1.5, 100, 0.7, False, False, True, False, False, 0, 'Gustavosta/MagicPrompt-Stable-Diffusion', '', <scripts.controlnet_ui.controlnet_ui_group.UiControlNetUnit object at 0x00000227645CEBF0>, <scripts.controlnet_ui.controlnet_ui_group.UiControlNetUnit object at 0x00000227645CDBD0>, <scripts.controlnet_ui.controlnet_ui_group.UiControlNetUnit object at 0x0000022764589960>, False, '', 0.5, True, False, '', 'Lerp', False, False, 'None', 20, False, False, 'positive', 'comma', 0, False, False, '', 1, '', [], 0, '', [], 0, '', [], True, False, False, False, 0, None, None, False, None, None, False, None, None, False, 50) {}
    Traceback (most recent call last):
      File "I:\stable-diffusion-webui\modules\call_queue.py", line 58, in f
        res = list(func(*args, **kwargs))
      File "I:\stable-diffusion-webui\modules\call_queue.py", line 37, in f
        res = func(*args, **kwargs)
      File "I:\stable-diffusion-webui\modules\txt2img.py", line 62, in txt2img
        processed = processing.process_images(p)
      File "I:\stable-diffusion-webui\modules\processing.py", line 677, in process_images
        res = process_images_inner(p)
      File "I:\stable-diffusion-webui\extensions\sd-webui-controlnet\scripts\batch_hijack.py", line 42, in processing_process_images_hijack
        return getattr(processing, '__controlnet_original_process_images_inner')(p, *args, **kwargs)
      File "I:\stable-diffusion-webui\modules\processing.py", line 794, in process_images_inner
        samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
      File "I:\stable-diffusion-webui\modules\processing.py", line 1054, in sample
        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
      File "I:\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 464, in sample
        samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
      File "I:\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 303, in launch_sampling
        return func()
      File "I:\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 464, in <lambda>
        samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
        return func(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\sampling.py", line 594, in sample_dpmpp_2m
        denoised = model(x, sigmas[i] * s_in, **extra_args)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 183, in forward
        x_out = self.inner_model(x_in, sigma_in, cond=make_condition_dict(cond_in, image_cond_in))
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\external.py", line 112, in forward
        eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs)
      File "I:\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\external.py", line 138, in get_eps
        return self.inner_model.apply_model(*args, **kwargs)
      File "I:\stable-diffusion-webui\modules\sd_models_xl.py", line 37, in apply_model
        return self.model(x, t, cond)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\modules\sd_hijack_utils.py", line 17, in <lambda>
        setattr(resolved_obj, func_path[-1], lambda *args, **kwargs: self(*args, **kwargs))
      File "I:\stable-diffusion-webui\modules\sd_hijack_utils.py", line 28, in __call__
        return self.__orig_func(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\wrappers.py", line 28, in forward
        return self.diffusion_model(
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1538, in _call_impl
        result = forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\openaimodel.py", line 993, in forward
        h = module(h, emb, context)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\openaimodel.py", line 100, in forward
        x = layer(x, context)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\attention.py", line 627, in forward
        x = block(x, context=context[i])
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\attention.py", line 459, in forward
        return checkpoint(
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\util.py", line 165, in checkpoint
        return CheckpointFunction.apply(func, len(inputs), *args)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\autograd\function.py", line 506, in apply
        return super().apply(*args, **kwargs)  # type: ignore[misc]
      File "I:\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\util.py", line 182, in forward
        output_tensors = ctx.run_function(*ctx.input_tensors)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\tomesd\patch.py", line 55, in _forward
        x = u_c(self.attn2(m_c(self.norm2(x)), context=context)) + x
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\modules\sd_hijack_optimizations.py", line 479, in xformers_attention_forward
        k_in = self.to_k(context_k)
      File "I:\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
        return forward_call(*args, **kwargs)
      File "I:\stable-diffusion-webui\extensions-builtin\Lora\networks.py", line 359, in network_Linear_forward
        network_apply_weights(self)
      File "I:\stable-diffusion-webui\extensions-builtin\Lora\networks.py", line 295, in network_apply_weights
        self.weight += updown
    RuntimeError: The size of tensor a (2048) must match the size of tensor b (768) at non-singleton dimension 1

I trained other SDXL LoRA on previous commit and were working fine. I was on version 21.8.5. I haven't git pull but then LoRA trained on it was giving this error. The only thing i remember is a LoRA/Lycoris module updated when i started the Gui. I am also having this error whatever the version i am trying, so git checkout another commit or downloading another version not gonna fix it.

SShadowS commented 1 year ago

Same with BLIP captioning

Batch size 1: RuntimeError: The size of tensor a (10) must match the size of tensor b (100) at non-singleton dimension 0

Batch size 5: RuntimeError: The size of tensor a (50) must match the size of tensor b (500) at non-singleton dimension 0

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ c:\AI\kohya_ss\finetune\make_captions.py:200 in │ │ │ │ 197 │ if args.caption_extention is not None: │ │ 198 │ │ args.caption_extension = args.caption_extention │ │ 199 │ │ │ ❱ 200 │ main(args) │ │ 201 │ │ │ │ c:\AI\kohya_ss\finetune\make_captions.py:144 in main │ │ │ │ 141 │ │ │ │ │ 142 │ │ │ b_imgs.append((image_path, img_tensor)) │ │ 143 │ │ │ if len(b_imgs) >= args.batch_size: │ │ ❱ 144 │ │ │ │ run_batch(b_imgs) │ │ 145 │ │ │ │ b_imgs.clear() │ │ 146 │ if len(b_imgs) > 0: │ │ 147 │ │ run_batch(b_imgs) │ │ │ │ c:\AI\kohya_ss\finetune\make_captions.py:97 in run_batch │ │ │ │ 94 │ │ │ │ 95 │ │ with torch.no_grad(): │ │ 96 │ │ │ if args.beam_search: │ │ ❱ 97 │ │ │ │ captions = model.generate( │ │ 98 │ │ │ │ │ imgs, sample=False, num_beams=args.num_beams, max_length=args.max_le │ │ 99 │ │ │ │ ) │ │ 100 │ │ │ else: │ │ │ │ c:\AI\kohya_ss\finetune\blip\blip.py:158 in generate │ │ │ │ 155 │ │ │ │ │ │ │ │ │ │ │ │ model_kwargs) │ │ 156 │ │ else: │ │ 157 │ │ │ #beam search │ │ ❱ 158 │ │ │ outputs = self.text_decoder.generate(input_ids=input_ids, │ │ 159 │ │ │ │ │ │ │ │ │ │ │ │ max_length=max_length, │ │ 160 │ │ │ │ │ │ │ │ │ │ │ │ min_length=min_length, │ │ 161 │ │ │ │ │ │ │ │ │ │ │ │ num_beams=num_beams, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\utils_contextlib.py:115 in decorate_context │ │ │ │ 112 │ @functools.wraps(func) │ │ 113 │ def decorate_context(*args, *kwargs): │ │ 114 │ │ with ctx_factory(): │ │ ❱ 115 │ │ │ return func(args, kwargs) │ │ 116 │ │ │ 117 │ return decorate_context │ │ 118 │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\transformers\generation\utils.py:1611 in generate │ │ │ │ 1608 │ │ │ │ model_kwargs, │ │ 1609 │ │ │ ) │ │ 1610 │ │ │ # 13. run beam search │ │ ❱ 1611 │ │ │ return self.beam_search( │ │ 1612 │ │ │ │ input_ids, │ │ 1613 │ │ │ │ beam_scorer, │ │ 1614 │ │ │ │ logits_processor=logits_processor, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\transformers\generation\utils.py:2909 in beam_search │ │ │ │ 2906 │ │ │ │ │ 2907 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, model_kwargs) │ │ 2908 │ │ │ │ │ ❱ 2909 │ │ │ outputs = self( │ │ 2910 │ │ │ │ model_inputs, │ │ 2911 │ │ │ │ return_dict=True, │ │ 2912 │ │ │ │ output_attentions=output_attentions, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, *kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:886 in forward │ │ │ │ 883 │ │ if labels is not None: │ │ 884 │ │ │ use_cache = False │ │ 885 │ │ │ │ ❱ 886 │ │ outputs = self.bert( │ │ 887 │ │ │ input_ids, │ │ 888 │ │ │ attention_mask=attention_mask, │ │ 889 │ │ │ position_ids=position_ids, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:781 in forward │ │ │ │ 778 │ │ else: │ │ 779 │ │ │ embedding_output = encoder_embeds │ │ 780 │ │ │ │ ❱ 781 │ │ encoder_outputs = self.encoder( │ │ 782 │ │ │ embedding_output, │ │ 783 │ │ │ attention_mask=extended_attention_mask, │ │ 784 │ │ │ head_mask=head_mask, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:445 in forward │ │ │ │ 442 │ │ │ │ │ mode=mode, │ │ 443 │ │ │ │ ) │ │ 444 │ │ │ else: │ │ ❱ 445 │ │ │ │ layer_outputs = layer_module( │ │ 446 │ │ │ │ │ hidden_states, │ │ 447 │ │ │ │ │ attention_mask, │ │ 448 │ │ │ │ │ layer_head_mask, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, *kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:361 in forward │ │ │ │ 358 │ │ if mode=='multimodal': │ │ 359 │ │ │ assert encoder_hidden_states is not None, "encoder_hidden_states must be giv │ │ 360 │ │ │ │ │ ❱ 361 │ │ │ cross_attention_outputs = self.crossattention( │ │ 362 │ │ │ │ attention_output, │ │ 363 │ │ │ │ attention_mask, │ │ 364 │ │ │ │ head_mask, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:277 in forward │ │ │ │ 274 │ │ past_key_value=None, │ │ 275 │ │ output_attentions=False, │ │ 276 │ ): │ │ ❱ 277 │ │ self_outputs = self.self( │ │ 278 │ │ │ hidden_states, │ │ 279 │ │ │ attention_mask, │ │ 280 │ │ │ head_mask, │ │ │ │ c:\AI\kohya_ss\venv\lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ c:\AI\kohya_ss\finetune\blip\med.py:178 in forward │ │ │ │ 175 │ │ past_key_value = (key_layer, value_layer) │ │ 176 │ │ │ │ 177 │ │ # Take the dot product between "query" and "key" to get the raw attention scores │ │ ❱ 178 │ │ attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) │ │ 179 │ │ │ │ 180 │ │ if self.position_embedding_type == "relative_key" or self.position_embedding_typ │ │ 181 │ │ │ seq_length = hidden_states.size()[1] │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯

ChristopherDicke commented 1 year ago

I get the same error. When you set batch_size and number of beams to 1 the error isn´t thrown

agentj01 commented 1 year ago

Well found the issue, not even sure why it is a thing. If you leave the default name "last" and inference the lora directly you get that error i posted. Just renaming the LoRA fix it. Tested and reproduced on 21.8.5 and lastest commit (21.8.8).

JC0NN0R commented 1 year ago

Well found the issue, not even sure why it is a thing. If you leave the default name "last" and inference the lora directly you get that error i posted. Just renaming the LoRA fix it. Tested and reproduced on 21.8.5 and lastest commit (21.8.8).

I'm not sure this fixed it I just tried your fix and it didn't work for me I still got this message I used images of varying sizes though, because some of the images had some stuff that needed to be removed so I cropped some of the images. None of the images are traditional image sizes, I'm sure most of them weren't multiples of 2, could this possible be the problem?

Here is the Traceback:

Traceback (most recent call last): File "H:\StableDiffusion\stable-diffusion-webui\modules\call_queue.py", line 58, in f res = list(func(*args, kwargs)) File "H:\StableDiffusion\stable-diffusion-webui\modules\call_queue.py", line 37, in f res = func(*args, *kwargs) File "H:\StableDiffusion\stable-diffusion-webui\modules\txt2img.py", line 62, in txt2img processed = processing.process_images(p) File "H:\StableDiffusion\stable-diffusion-webui\modules\processing.py", line 677, in process_images res = process_images_inner(p) File "H:\StableDiffusion\stable-diffusion-webui\modules\processing.py", line 794, in process_images_inner samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) File "H:\StableDiffusion\stable-diffusion-webui\modules\processing.py", line 1054, in sample samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 464, in sample samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={ File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 303, in launch_sampling return func() File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 464, in samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={ File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context return func(args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\sampling.py", line 145, in sample_euler_ancestral denoised = model(x, sigmas[i] * s_in, extra_args) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_samplers_kdiffusion.py", line 202, in forward x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(c_crossattn, image_cond_in[a:b])) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\external.py", line 112, in forward eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\k-diffusion\k_diffusion\external.py", line 138, in get_eps return self.inner_model.apply_model(*args, *kwargs) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_models_xl.py", line 37, in apply_model return self.model(x, t, cond) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_hijack_utils.py", line 17, in setattr(resolved_obj, func_path[-1], lambda *args, kwargs: self(*args, *kwargs)) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_hijack_utils.py", line 28, in call return self.__orig_func(args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\wrappers.py", line 28, in forward return self.diffusion_model( File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\openaimodel.py", line 993, in forward h = module(h, emb, context) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\openaimodel.py", line 100, in forward x = layer(x, context) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\attention.py", line 627, in forward x = block(x, context=context[i]) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\attention.py", line 459, in forward return checkpoint( File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\util.py", line 165, in checkpoint return CheckpointFunction.apply(func, len(inputs), args) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\autograd\function.py", line 506, in apply return super().apply(args, kwargs) # type: ignore[misc] File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\diffusionmodules\util.py", line 182, in forward output_tensors = ctx.run_function(ctx.input_tensors) File "H:\StableDiffusion\stable-diffusion-webui\repositories\generative-models\sgm\modules\attention.py", line 478, in _forward self.attn2( File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, *kwargs) File "H:\StableDiffusion\stable-diffusion-webui\modules\sd_hijack_optimizations.py", line 224, in split_cross_attention_forward k_in = self.to_k(context_k) File "H:\StableDiffusion\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, **kwargs) File "H:\StableDiffusion\stable-diffusion-webui\extensions-builtin\Lora\networks.py", line 359, in network_Linear_forward network_apply_weights(self) File "H:\StableDiffusion\stable-diffusion-webui\extensions-builtin\Lora\networks.py", line 295, in network_apply_weights self.weight += updown RuntimeError: The size of tensor a (2048) must match the size of tensor b (1024) at non-singleton dimension 1

shssoichiro commented 1 year ago

I'm also getting this error when attempting BLIP captioning: RuntimeError: The size of tensor a (12) must match the size of tensor b (144) at non-singleton dimension 0. Probably not a coincidence that the size of tensor a (12) is equal to the beam count I chose.

Edit: Seems this is likely a dupe of #1236. There is a workaround for this noted there for BLIP captioning, though this may or may not work for SDXL training.