Error while using t2iadapter_sketch_sd15v2

Hi,

I run the example code from here, as follows:

import torch
from PIL import Image
from controlnet_aux import PidiNetDetector
import numpy as np
from diffusers import (
    T2IAdapter,
    StableDiffusionAdapterPipeline
)

image = Image.open('image.png')

processor = PidiNetDetector.from_pretrained('lllyasviel/Annotators')

sketch_image = processor(image)

sketch_image.save('sketch.png')

print(np.array(sketch_image).shape)

adapter = T2IAdapter.from_pretrained("TencentARC/t2iadapter_sketch_sd15v2", torch_dtype=torch.float16)
pipe = StableDiffusionAdapterPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", adapter=adapter, safety_checker=None, torch_dtype=torch.float16, variant="fp16"
)

pipe.to('cuda')

generator = torch.Generator().manual_seed(0)

sketch_image_out = pipe(prompt="royal chamber with fancy bed", image=[sketch_image], generator=generator).images[0]

sketch_image_out.save('sketch_out.png')

Notice that I use _TencentARC/t2iadapter_sketchsd15v2 and runwayml/stable-diffusion-v1-5. Then I got the error as follows:

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ E:\huggingface\test_t2i_adapter.py:29 in <module>                                                │
│                                                                                                  │
│   26                                                                                             │
│   27 generator = torch.Generator().manual_seed(0)                                                │
│   28                                                                                             │
│ ❱ 29 sketch_image_out = pipe(prompt="royal chamber with fancy bed", image=[sketch_image], gen    │
│   30                                                                                             │
│   31 sketch_image_out.save('sketch_out.png')                                                     │
│   32                                                                                             │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\autograd\grad_mode.py:27 in                │
│ decorate_context                                                                                 │
│                                                                                                  │
│    24 │   │   @functools.wraps(func)                                                             │
│    25 │   │   def decorate_context(*args, **kwargs):                                             │
│    26 │   │   │   with self.clone():                                                             │
│ ❱  27 │   │   │   │   return func(*args, **kwargs)                                               │
│    28 │   │   return cast(F, decorate_context)                                                   │
│    29 │                                                                                          │
│    30 │   def _wrap_generator(self, func):                                                       │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\diffusers\pipelines\t2i_adapter\pipeline_stable_ │
│ diffusion_adapter.py:734 in __call__                                                             │
│                                                                                                  │
│   731 │   │   │   for k, v in enumerate(adapter_state):                                          │
│   732 │   │   │   │   adapter_state[k] = v                                                       │
│   733 │   │   else:                                                                              │
│ ❱ 734 │   │   │   adapter_state = self.adapter(adapter_input)                                    │
│   735 │   │   │   for k, v in enumerate(adapter_state):                                          │
│   736 │   │   │   │   adapter_state[k] = v * adapter_conditioning_scale                          │
│   737 │   │   if num_images_per_prompt > 1:                                                      │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl    │
│                                                                                                  │
│   1127 │   │   # this function, and just call forward.                                           │
│   1128 │   │   if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o  │
│   1129 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   │
│ ❱ 1130 │   │   │   return forward_call(*input, **kwargs)                                         │
│   1131 │   │   # Do not call functions when jit is used                                          │
│   1132 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             │
│   1133 │   │   if self._backward_hooks or _global_backward_hooks:                                │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\diffusers\models\adapter.py:258 in forward       │
│                                                                                                  │
│   255 │   │   │   raise ValueError(f"unknown adapter_type: {type}. Choose either 'full_adapter   │
│   256 │                                                                                          │
│   257 │   def forward(self, x: torch.Tensor) -> List[torch.Tensor]:                              │
│ ❱ 258 │   │   return self.adapter(x)                                                             │
│   259 │                                                                                          │
│   260 │   @property                                                                              │
│   261 │   def total_downscale_factor(self):                                                      │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl    │
│                                                                                                  │
│   1127 │   │   # this function, and just call forward.                                           │
│   1128 │   │   if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o  │
│   1129 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   │
│ ❱ 1130 │   │   │   return forward_call(*input, **kwargs)                                         │
│   1131 │   │   # Do not call functions when jit is used                                          │
│   1132 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             │
│   1133 │   │   if self._backward_hooks or _global_backward_hooks:                                │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\diffusers\models\adapter.py:297 in forward       │
│                                                                                                  │
│   294 │                                                                                          │
│   295 │   def forward(self, x: torch.Tensor) -> List[torch.Tensor]:                              │
│   296 │   │   x = self.unshuffle(x)                                                              │
│ ❱ 297 │   │   x = self.conv_in(x)                                                                │
│   298 │   │                                                                                      │
│   299 │   │   features = []                                                                      │
│   300                                                                                            │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl    │
│                                                                                                  │
│   1127 │   │   # this function, and just call forward.                                           │
│   1128 │   │   if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o  │
│   1129 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   │
│ ❱ 1130 │   │   │   return forward_call(*input, **kwargs)                                         │
│   1131 │   │   # Do not call functions when jit is used                                          │
│   1132 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             │
│   1133 │   │   if self._backward_hooks or _global_backward_hooks:                                │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\nn\modules\conv.py:457 in forward          │
│                                                                                                  │
│    454 │   │   │   │   │   │   self.padding, self.dilation, self.groups)                         │
│    455 │                                                                                         │
│    456 │   def forward(self, input: Tensor) -> Tensor:                                           │
│ ❱  457 │   │   return self._conv_forward(input, self.weight, self.bias)                          │
│    458                                                                                           │
│    459 class Conv3d(_ConvNd):                                                                    │
│    460 │   __doc__ = r"""Applies a 3D convolution over an input signal composed of several inpu  │
│                                                                                                  │
│ D:\Anaconda3\envs\xxx\lib\site-packages\torch\nn\modules\conv.py:453 in _conv_forward    │
│                                                                                                  │
│    450 │   │   │   return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=sel  │
│    451 │   │   │   │   │   │   │   weight, bias, self.stride,                                    │
│    452 │   │   │   │   │   │   │   _pair(0), self.dilation, self.groups)                         │
│ ❱  453 │   │   return F.conv2d(input, weight, bias, self.stride,                                 │
│    454 │   │   │   │   │   │   self.padding, self.dilation, self.groups)                         │
│    455 │                                                                                         │
│    456 │   def forward(self, input: Tensor) -> Tensor:                                           │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Given groups=1, weight of size [320, 64, 3, 3], expected input[1, 192, 80, 64] to have 64 channels, but got 192 channels instead

The version of diffusers is 0.21.4.

It seems that the weight for T2I Adapter is not matched with Stable Diffusion 1.5.

Please help me out!

TencentARC / T2I-Adapter

Error while using t2iadapter_sketch_sd15v2 #96