Closed Bilal143260 closed 5 months ago
I have trained the ip-adapter using tutorial_train.py. I have got model.safetensors file and file size is 1.29GB. I used following code to convert it
import torch from safetensors.torch import load_file ckpt = "/home/bilal/IP-Adapter-Controlnet/output/checkpoint-140000/model.safetensors" # sd = torch.load(ckpt, map_location="cpu") sd = load_file(ckpt, device="cpu") image_proj_sd = {} ip_sd = {} for k in sd: if k.startswith("unet"): pass elif k.startswith("image_proj_model"): image_proj_sd[k.replace("image_proj_model.", "")] = sd[k] elif k.startswith("adapter_modules"): ip_sd[k.replace("adapter_modules.", "")] = sd[k] torch.save({"image_proj": image_proj_sd, "ip_adapter": ip_sd}, "ip_adapter.bin")
and for inference I used the following code
import torch from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL from PIL import Image from ip_adapter import IPAdapter base_model_path = "runwayml/stable-diffusion-v1-5" vae_model_path = "stabilityai/sd-vae-ft-mse" image_encoder_path = "/home/bilal/IP-Adapter-Controlnet/models/image_encoder" # ip_ckpt = "models/ip-adapter_sd15.bin" ip_ckpt = "/home/bilal/IP-Adapter-Controlnet/ip_adapter.bin" device = "cuda" def image_grid(imgs, rows, cols): assert len(imgs) == rows*cols w, h = imgs[0].size grid = Image.new('RGB', size=(cols*w, rows*h)) grid_w, grid_h = grid.size for i, img in enumerate(imgs): grid.paste(img, box=(i%cols*w, i//cols*h)) return grid noise_scheduler = DDIMScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False, steps_offset=1, ) vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16) # load SD pipeline pipe = StableDiffusionPipeline.from_pretrained( base_model_path, torch_dtype=torch.float16, scheduler=noise_scheduler, vae=vae, feature_extractor=None, safety_checker=None ) # read image prompt image = Image.open("assets/images/woman.png") image.resize((256, 256)) # load ip-adapter ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device) # generate image variations images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42) grid = image_grid(images, 1, 4)
but i got following error
raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( RuntimeError: Error(s) in loading state_dict for ModuleList: Missing key(s) in state_dict: "1.to_k_ip.weight", "1.to_v_ip.weight", "3.to_k_ip.weight", "3.to_v_ip.weight", "5.to_k_ip.weight", "5.to_v_ip.weight", "7.to_k_ip.weight", "7.to_v_ip.weight", "9.to_k_ip.weight", "9.to_v_ip.weight", "11.to_k_ip.weight", "11.to_v_ip.weight", "13.to_k_ip.weight", "13.to_v_ip.weight", "15.to_k_ip.weight", "15.to_v_ip.weight", "17.to_k_ip.weight", "17.to_v_ip.weight", "19.to_k_ip.weight", "19.to_v_ip.weight", "21.to_k_ip.weight", "21.to_v_ip.weight", "23.to_k_ip.weight", "23.to_v_ip.weight", "25.to_k_ip.weight", "25.to_v_ip.weight", "27.to_k_ip.weight", "27.to_v_ip.weight", "29.to_k_ip.weight", "29.to_v_ip.weight", "31.to_k_ip.weight", "31.to_v_ip.weight"
https://github.com/tencent-ailab/IP-Adapter/issues/246
I have trained the ip-adapter using tutorial_train.py. I have got model.safetensors file and file size is 1.29GB. I used following code to convert it
and for inference I used the following code
but i got following error