Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
--- using zero snr---
Traceback (most recent call last):
File "/home/work/dev/LucidFusion/demo.py", line 128, in
main()
File "/home/work/dev/LucidFusion/demo.py", line 120, in main
pipeline = setup_crm_diffusion()
File "/home/work/dev/LucidFusion/data/diffusion_data.py", line 40, in setup_crm_diffusion
pipeline = TwoStagePipeline(
File "/home/work/dev/LucidFusion/CRM/pipelines.py", line 42, in init
self.stage1_sampler = get_obj_from_str(stage1_sampler_config.target)(
File "/home/work/dev/LucidFusion/CRM/libs/sample.py", line 32, in init
uc = model.get_learned_conditioning([neg_texts]).to(device)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/interface.py", line 185, in get_learned_conditioning
return self.clip_model(prompts)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 218, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 282, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 297, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/open_clip/transformer.py", line 263, in forward
x = q_x + self.ls_1(self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask))
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/open_clip/transformer.py", line 250, in attention
return self.attn(
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/activation.py", line 1266, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/functional.py", line 5382, in multi_head_attention_forward
raise RuntimeError(f"The shape of the 2D attn_mask is {attn_mask.shape}, but should be {correct_2d_size}.")
RuntimeError: The shape of the 2D attn_mask is torch.Size([77, 77]), but should be (1, 1).
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. making attention of type 'vanilla-xformers' with 512 in_channels building MemoryEfficientAttnBlock with 512 in_channels... Working with z of shape (1, 4, 32, 32) = 4096 dimensions. making attention of type 'vanilla-xformers' with 512 in_channels building MemoryEfficientAttnBlock with 512 in_channels... --- using zero snr--- Traceback (most recent call last): File "/home/work/dev/LucidFusion/demo.py", line 128, in
main()
File "/home/work/dev/LucidFusion/demo.py", line 120, in main
pipeline = setup_crm_diffusion()
File "/home/work/dev/LucidFusion/data/diffusion_data.py", line 40, in setup_crm_diffusion
pipeline = TwoStagePipeline(
File "/home/work/dev/LucidFusion/CRM/pipelines.py", line 42, in init
self.stage1_sampler = get_obj_from_str(stage1_sampler_config.target)(
File "/home/work/dev/LucidFusion/CRM/libs/sample.py", line 32, in init
uc = model.get_learned_conditioning([neg_texts]).to(device)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/interface.py", line 185, in get_learned_conditioning
return self.clip_model(prompts)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 218, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 282, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/work/dev/LucidFusion/CRM/imagedream/ldm/modules/encoders/modules.py", line 297, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/open_clip/transformer.py", line 263, in forward
x = q_x + self.ls_1(self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask))
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/open_clip/transformer.py", line 250, in attention
return self.attn(
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/modules/activation.py", line 1266, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/home/sekkit/miniconda3/envs/DreamMat/lib/python3.10/site-packages/torch/nn/functional.py", line 5382, in multi_head_attention_forward
raise RuntimeError(f"The shape of the 2D attn_mask is {attn_mask.shape}, but should be {correct_2d_size}.")
RuntimeError: The shape of the 2D attn_mask is torch.Size([77, 77]), but should be (1, 1).