High-Resolution Image Synthesis with Latent Diffusion Models
MIT License
37.84k
stars
4.88k
forks
source link
Error on untimeError: Expected attn_mask dtype to be bool or to match query dtype, but got attn_mask.dtype: float and query.dtype: c10::BFloat16 instead. #347
After giving
python scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt model_files/v2-1_768-ema-pruned.ckpt --config configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768
iam getting error below:
Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...
data: 0%| | 0/1 [00:00<?, ?it/s]
Sampling: 0%| | 0/3 [00:00<?, ?it/s]
Traceback (most recent call last):
File "scripts/txt2img.py", line 388, in
main(opt)
File "scripts/txt2img.py", line 342, in main
uc = model.get_learned_conditioning(batch_size [""])
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/models/diffusion/ddpm.py", line 665, in get_learned_conditioning
c = self.cond_stage_model.encode(c)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 236, in encode
return self(text)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 213, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 220, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 232, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/open_clip/model.py", line 329, in forward
x = x + self.ln_attn(self.attention(self.ln_1(x), attn_mask=attn_mask))
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/open_clip/model.py", line 321, in attention
return self.attn(x, x, x, need_weights=False, attn_mask=attn_mask)[0]
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1241, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/functional.py", line 5440, in multi_head_attention_forward
attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
RuntimeError: Expected attn_mask dtype to be bool or to match query dtype, but got attn_mask.dtype: float and query.dtype: c10::BFloat16 instead
After giving python scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt model_files/v2-1_768-ema-pruned.ckpt --config configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768
iam getting error below: Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)... data: 0%| | 0/1 [00:00<?, ?it/s] Sampling: 0%| | 0/3 [00:00<?, ?it/s] Traceback (most recent call last): File "scripts/txt2img.py", line 388, in
main(opt)
File "scripts/txt2img.py", line 342, in main
uc = model.get_learned_conditioning(batch_size [""])
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/models/diffusion/ddpm.py", line 665, in get_learned_conditioning
c = self.cond_stage_model.encode(c)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 236, in encode
return self(text)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 213, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 220, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/chidanand/LLM_experiments/stablediffusion/ldm/modules/encoders/modules.py", line 232, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/open_clip/model.py", line 329, in forward
x = x + self.ln_attn(self.attention(self.ln_1(x), attn_mask=attn_mask))
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/open_clip/model.py", line 321, in attention
return self.attn(x, x, x, need_weights=False, attn_mask=attn_mask)[0]
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, kwargs)
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1241, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/home/chidanand/anaconda/envs/control/lib/python3.8/site-packages/torch/nn/functional.py", line 5440, in multi_head_attention_forward
attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
RuntimeError: Expected attn_mask dtype to be bool or to match query dtype, but got attn_mask.dtype: float and query.dtype: c10::BFloat16 instead