luosiallen / latent-consistency-model

Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference
MIT License
4.23k stars 220 forks source link

`mat1 and mat2 must have the same dtype, but got Float and Half` #46

Closed swim2sun closed 8 months ago

swim2sun commented 8 months ago

got mat1 and mat2 must have the same dtype, but got Float and Half error when run sample code:

import torch
import cv2
import numpy as np
from PIL import Image

from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
from diffusers.utils import load_image, make_image_grid

image = load_image(
    "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
).resize((512, 512))

image = np.array(image)

low_threshold = 100
high_threshold = 200

image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)

controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16,
    safety_checker=None,
    variant="fp16"
).to("cuda")

# set scheduler
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

# load LCM-LoRA
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")

generator = torch.manual_seed(0)
image = pipe(
    "the mona lisa",
    image=canny_image,
    num_inference_steps=4,
    guidance_scale=1.5,
    controlnet_conditioning_scale=0.8,
    cross_attention_kwargs={"scale": 1},
    generator=generator,
).images[0]
make_image_grid([canny_image, image], rows=1, cols=2)

Here is the exception stack:

log RuntimeError Traceback (most recent call last) Cell In[1], line 39 36 pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") 38 generator = torch.manual_seed(0) ---> 39 image = pipe( 40 "the mona lisa", 41 image=canny_image, 42 num_inference_steps=4, 43 guidance_scale=1.5, 44 controlnet_conditioning_scale=0.8, 45 cross_attention_kwargs={"scale": 1}, 46 generator=generator, 47 ).images[0] 48 make_image_grid([canny_image, image], rows=1, cols=2)

File ~\venv\Lib\site-packages\torch\utils_contextlib.py:115, in context_decorator..decorate_context(*args, kwargs) 112 @functools.wraps(func) 113 def decorate_context(*args, *kwargs): 114 with ctx_factory(): --> 115 return func(args, kwargs)

File ~\venv\Lib\site-packages\diffusers\pipelines\controlnet\pipeline_controlnet.py:1010, in StableDiffusionControlNetPipeline.call(self, prompt, image, height, width, num_inference_steps, guidance_scale, negative_prompt, num_images_per_prompt, eta, generator, latents, prompt_embeds, negative_prompt_embeds, output_type, return_dict, callback, callback_steps, cross_attention_kwargs, controlnet_conditioning_scale, guess_mode, control_guidance_start, control_guidance_end, clip_skip) 1007 controlnet_cond_scale = controlnet_cond_scale[0] 1008 cond_scale = controlnet_cond_scale * controlnet_keep[i] -> 1010 down_block_res_samples, mid_block_res_sample = self.controlnet( 1011 control_model_input, 1012 t, 1013 encoder_hidden_states=controlnet_prompt_embeds, 1014 controlnet_cond=image, 1015 conditioning_scale=cond_scale, 1016 guess_mode=guess_mode, 1017 return_dict=False, 1018 ) 1020 if guess_mode and do_classifier_free_guidance: 1021 # Infered ControlNet only for the conditional batch. 1022 # To apply the output of ControlNet to both the unconditional and conditional batches, 1023 # add 0 to the unconditional batch to keep it unchanged. 1024 down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~\venv\Lib\site-packages\diffusers\models\controlnet.py:736, in ControlNetModel.forward(self, sample, timestep, encoder_hidden_states, controlnet_cond, conditioning_scale, class_labels, timestep_cond, attention_mask, added_cond_kwargs, cross_attention_kwargs, guess_mode, return_dict) 731 # timesteps does not contain any weights and will always return f32 tensors 732 # but time_embedding might actually be running in fp16. so we need to cast here. 733 # there might be better ways to encapsulate this. 734 t_emb = t_emb.to(dtype=sample.dtype) --> 736 emb = self.time_embedding(t_emb, timestep_cond) 737 aug_emb = None 739 if self.class_embedding is not None:

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~\venv\Lib\site-packages\diffusers\models\embeddings.py:226, in TimestepEmbedding.forward(self, sample, condition) 224 if condition is not None: 225 sample = sample + self.cond_proj(condition) --> 226 sample = self.linear_1(sample) 228 if self.act is not None: 229 sample = self.act(sample)

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~\venv\Lib\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~\venv\Lib\site-packages\diffusers\models\lora.py:300, in LoRACompatibleLinear.forward(self, hidden_states, scale) 298 def forward(self, hidden_states: torch.Tensor, scale: float = 1.0) -> torch.Tensor: 299 if self.lora_layer is None: --> 300 out = super().forward(hidden_states) 301 return out 302 else:

File ~\venv\Lib\site-packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input) 113 def forward(self, input: Tensor) -> Tensor: --> 114 return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half

My OS is Windows 11, CUDA version 11.8

luosiallen commented 8 months ago

update to diffuser 0.23.0. pip install diffusers==0.23.0