Open aiXia121 opened 1 week ago
my test code following your *ipynb
model_id = "/cv_huggingface_models/sdxl_model/stable-diffusion-xl-base-1.0"
from diffusers import StableDiffusionXLPipeline
pipeline = StableDiffusionXLPipeline.from_pretrained( model_id, torch_dtype=torch.float16, variant="fp16" ).to("cuda")
from controlnet_lite import ControlNetLLLite
path = 'kohya_controllllite_xl_canny.safetensors' controlnet = ControlNetLLLite(path)
from PIL import Image import numpy as np
image = Image.open("hf-logo.png") control_image = np.array(image) print("image.size -> {}".format(image.size))
conditioning_weight = 0.9 conditioning_weight = 1
controlnet.apply(pipe=pipeline, cond=control_image, weight=conditioning_weight)
with torch.inference_mode(): image = pipeline(prompt="aerial view, a futuristic research complex in a bright foggy jungle, hard lighting", num_inference_steps=20).images[0]
image_name = "test_out.png" image.save(image_name)
thanks, looking forward your reply
python test_kohya.py Loading pipeline components...: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 7.78it/s] image.size -> (1024, 1024) 0%| | 0/20 [00:00<?, ?it/s] Traceback (most recent call last): File "/aigc-nas01/yongbo/project_memegif_create/deploy_dev/kohya_control/test_kohya.py", line 85, in
image = pipeline(prompt="aerial view, a futuristic research complex in a bright foggy jungle, hard lighting", num_inference_steps=20).images[0]
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py", line 1174, in call
noise_pred = self.unet(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_condition.py", line 1216, in forward
sample, res_samples = downsample_block(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_blocks.py", line 1279, in forward
hidden_states = attn(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/transformers/transformer_2d.py", line 397, in forward
hidden_states = block(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/attention.py", line 329, in forward
attn_output = self.attn1(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 522, in forward
return self.processor(
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 1259, in call
query = attn.to_q(hidden_states)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/aigc-nas01/yongbo/project_memegif_create/deploy_dev/kohya_control/controlnet_lite.py", line 228, in forward
hack = hack + module(x) weight
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/aigc-nas01/yongbo/project_memegif_create/deploy_dev/kohya_control/controlnet_lite.py", line 77, in forward
cx = self.conditioning1(self.cond_image.to(x.device, dtype=x.dtype))
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/container.py", line 217, in forward
input = module(input)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/opt/conda/envs/p310t201/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Given groups=1, weight of size [16, 3, 4, 4], expected input[1, 4, 1024, 1024] to have 3 channels, but got 4 channels instead