Open DogsHeadZ opened 1 year ago
me too...
me too...
solved it by installing diffusers==0.10.0, you can try
solved!thanx!!!!!
not work, could you share more detail? thanks.
not work, could you share more detail? thanks.
https://github.com/google/prompt-to-prompt/issues/37#issuecomment-1468319082
and installing diffusers==0.10.0
it works. thanks!
Using diffusers==0.10.0, raises another error for me: no attribute 'StableUnCLIPImg2ImgPipeline'
Using diffusers==0.10.0, raises another error for me: no attribute 'StableUnCLIPImg2ImgPipeline'
https://github.com/google/prompt-to-prompt/issues/37#issuecomment-1468319082
The actual problem is that on newer version the code of UNet was rewritten, and instead of CrossAttention they use Attention so it doesn't allow attention masks to be stored
Version == 0.10.0
Version == 0.17.1
So, to fix this one should change register_recr
on this function
def register_recr(net_, count, place_in_unet):
if net_.__class__.__name__ == 'Attention':
And rewrite ca_forward. For version 0.17.1 it will looks like:
def ca_forward(self, place_in_unet):
to_out = self.to_out
if type(to_out) is torch.nn.modules.container.ModuleList:
to_out = self.to_out[0]
else:
to_out = self.to_out
def forward(hidden_states, encoder_hidden_states=None, attention_mask=None,temb=None,):
is_cross = encoder_hidden_states is not None
residual = hidden_states
if self.spatial_norm is not None:
hidden_states = self.spatial_norm(hidden_states, temb)
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
attention_mask = self.prepare_attention_mask(attention_mask, sequence_length, batch_size)
if self.group_norm is not None:
hidden_states = self.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = self.to_q(hidden_states)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif self.norm_cross:
encoder_hidden_states = self.norm_encoder_hidden_states(encoder_hidden_states)
key = self.to_k(encoder_hidden_states)
value = self.to_v(encoder_hidden_states)
query = self.head_to_batch_dim(query)
key = self.head_to_batch_dim(key)
value = self.head_to_batch_dim(value)
attention_probs = self.get_attention_scores(query, key, attention_mask)
attention_probs = controller(attention_probs, is_cross, place_in_unet)
hidden_states = torch.bmm(attention_probs, value)
hidden_states = self.batch_to_head_dim(hidden_states)
# linear proj
hidden_states = to_out(hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if self.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / self.rescale_output_factor
return hidden_states
return forward
Thx!but after changing to 0.17.1 version and modify the code as you instructed, still error when calling the cell in Cross-Attention Visualization session.
g_cpu = torch.Generator().manual_seed(88) prompts = ["A painting of a squirrel eating a burger"] controller = AttentionStore() image, x_t = run_and_display(prompts, controller, latent=None, run_baseline=False, generator=g_cpu) show_cross_attention(controller, res=16, from_where=("up", "down"))
TypeError: PNDMScheduler.set_timesteps() got an unexpected keyword argument 'offset'
It's other problem :) The solution is here
https://github.com/google/prompt-to-prompt/issues/37#issuecomment-1468319082
change show_cross_attention(controller, 16, ["up", "down"])
to show_cross_attention(controller, 24, ["up", "down"])
@anvilarth 你是我的神
AttributeError: 'Attention' object has no attribute 'spatial_norm'
solved thanks
The actual problem is that on newer version the code of UNet was rewritten, and instead of CrossAttention they use Attention so it doesn't allow attention masks to be stored
Version == 0.10.0
Version == 0.17.1
So, to fix this one should change
register_recr
on this functiondef register_recr(net_, count, place_in_unet): if net_.__class__.__name__ == 'Attention':
And rewrite ca_forward. For version 0.17.1 it will looks like:
def ca_forward(self, place_in_unet): to_out = self.to_out if type(to_out) is torch.nn.modules.container.ModuleList: to_out = self.to_out[0] else: to_out = self.to_out def forward(hidden_states, encoder_hidden_states=None, attention_mask=None,temb=None,): is_cross = encoder_hidden_states is not None residual = hidden_states if self.spatial_norm is not None: hidden_states = self.spatial_norm(hidden_states, temb) input_ndim = hidden_states.ndim if input_ndim == 4: batch_size, channel, height, width = hidden_states.shape hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) batch_size, sequence_length, _ = ( hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape ) attention_mask = self.prepare_attention_mask(attention_mask, sequence_length, batch_size) if self.group_norm is not None: hidden_states = self.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) query = self.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif self.norm_cross: encoder_hidden_states = self.norm_encoder_hidden_states(encoder_hidden_states) key = self.to_k(encoder_hidden_states) value = self.to_v(encoder_hidden_states) query = self.head_to_batch_dim(query) key = self.head_to_batch_dim(key) value = self.head_to_batch_dim(value) attention_probs = self.get_attention_scores(query, key, attention_mask) attention_probs = controller(attention_probs, is_cross, place_in_unet) hidden_states = torch.bmm(attention_probs, value) hidden_states = self.batch_to_head_dim(hidden_states) # linear proj hidden_states = to_out(hidden_states) if input_ndim == 4: hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) if self.residual_connection: hidden_states = hidden_states + residual hidden_states = hidden_states / self.rescale_output_factor return hidden_states return forward
you're the answer!
The actual problem is that on newer version the code of UNet was rewritten, and instead of CrossAttention they use Attention so it doesn't allow attention masks to be stored
Version == 0.10.0
Version == 0.17.1
So, to fix this one should change
register_recr
on this functiondef register_recr(net_, count, place_in_unet): if net_.__class__.__name__ == 'Attention':
And rewrite ca_forward. For version 0.17.1 it will looks like:
def ca_forward(self, place_in_unet): to_out = self.to_out if type(to_out) is torch.nn.modules.container.ModuleList: to_out = self.to_out[0] else: to_out = self.to_out def forward(hidden_states, encoder_hidden_states=None, attention_mask=None,temb=None,): is_cross = encoder_hidden_states is not None residual = hidden_states if self.spatial_norm is not None: hidden_states = self.spatial_norm(hidden_states, temb) input_ndim = hidden_states.ndim if input_ndim == 4: batch_size, channel, height, width = hidden_states.shape hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) batch_size, sequence_length, _ = ( hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape ) attention_mask = self.prepare_attention_mask(attention_mask, sequence_length, batch_size) if self.group_norm is not None: hidden_states = self.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) query = self.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif self.norm_cross: encoder_hidden_states = self.norm_encoder_hidden_states(encoder_hidden_states) key = self.to_k(encoder_hidden_states) value = self.to_v(encoder_hidden_states) query = self.head_to_batch_dim(query) key = self.head_to_batch_dim(key) value = self.head_to_batch_dim(value) attention_probs = self.get_attention_scores(query, key, attention_mask) attention_probs = controller(attention_probs, is_cross, place_in_unet) hidden_states = torch.bmm(attention_probs, value) hidden_states = self.batch_to_head_dim(hidden_states) # linear proj hidden_states = to_out(hidden_states) if input_ndim == 4: hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) if self.residual_connection: hidden_states = hidden_states + residual hidden_states = hidden_states / self.rescale_output_factor return hidden_states return forward
what code do we make these changes in?
could somebody attach the env they use and the updated ipynb for ptp-stable?
running show_cross_attention(controller, 16, ["up", "down"]), it throws KeyError, what's the problem