Open MaxTran96 opened 1 year ago
If you wish to write a script to run the inference without using the UI, this information might be helpful:
https://github.com/comfyanonymous/ComfyUI/tree/master/script_examples
well what i mean is a standalone python script to generate inference without using Comfy if the code is available
hi anyone that know the code to run inference that can load a finetuned lora model?
i tried
def load_lora_weights(pipeline, checkpoint_path, multiplier, device, dtype):
LORA_PREFIX_UNET = "lora_unet"
LORA_PREFIX_TEXT_ENCODER = "lora_te"
# load LoRA weight from .safetensors
state_dict = load_file(checkpoint_path, device=device)
updates = defaultdict(dict)
for key, value in state_dict.items():
# it is suggested to print out the key, it usually will be something like below
# "lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight"
layer, elem = key.split('.', 1)
updates[layer][elem] = value
# directly update weight in diffusers model
for layer, elems in updates.items():
if "text" in layer:
layer_infos = layer.split(LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
curr_layer = pipeline.text_encoder
else:
layer_infos = layer.split(LORA_PREFIX_UNET + "_")[-1].split("_")
curr_layer = pipeline.unet
# find the target layer
temp_name = layer_infos.pop(0)
while len(layer_infos) > -1:
try:
curr_layer = curr_layer.__getattr__(temp_name)
if len(layer_infos) > 0:
temp_name = layer_infos.pop(0)
elif len(layer_infos) == 0:
break
except Exception:
if len(temp_name) > 0:
temp_name += "_" + layer_infos.pop(0)
else:
temp_name = layer_infos.pop(0)
# get elements for this layer
weight_up = elems['lora_up.weight'].to(dtype)
weight_down = elems['lora_down.weight'].to(dtype)
alpha = elems['alpha']
if alpha:
alpha = alpha.item() / weight_up.shape[1]
else:
alpha = 1.0
# update weight
if len(weight_up.shape) == 4:
curr_layer.weight.data += multiplier * alpha * torch.mm(weight_up.squeeze(3).squeeze(2), weight_down.squeeze(3).squeeze(2)).unsqueeze(2).unsqueeze(3)
else:
curr_layer.weight.data += multiplier * alpha * torch.mm(weight_up, weight_down)
return pipeline
dtype = torch.float16
variant = 'fp16'
STABLE_DIFFUSION_SDXL = 'stabilityai/stable-diffusion-xl-base-0.9'
pipe = DiffusionPipeline.from_pretrained(
STABLE_DIFFUSION_SDXL,
torch_dtype=dtype,
use_safetensors=True,
safety_checker=None,
variant=variant
).to('cuda')
pipe = load_lora_weights(pipe, lora_path, 1.0, 'cuda', torch.float16)
it doesn't generate the image of the subject that i finetuned the model with
Try this (credit to abhishekkrthakur)
from diffusers import DiffusionPipeline, StableDiffusionXLImg2ImgPipeline
import torch
model = "stabilityai/stable-diffusion-xl-base-1.0"
pipe = DiffusionPipeline.from_pretrained(
model,
torch_dtype=torch.float16,
)
pipe.to("cuda")
pipe.load_lora_weights("model/", weight_name="pytorch_lora_weights.safetensors")
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
torch_dtype=torch.float16,
)
refiner.to("cuda")
prompt = "a portrait of sks dog, pixar, cartoon, 3d, headshots, fantasy, 4k, uhd"
for seed in range(10):
generator = torch.Generator("cuda").manual_seed(seed)
image = pipe(prompt=prompt, generator=generator, num_inference_steps=25)
image = image.images[0]
image.save(f"images/{seed}.png")
image = refiner(prompt=prompt, generator=generator, image=image)
image = image.images[0]
image.save(f"images_refined/{seed}.png")
I'm actually having the opposite problem. I can't get my lora to work using the UI, but can using that code. Could you share the workflow you used to get the LORA to work with SDXL in the UI?
i think i have tried that but it failed
https://github.com/huggingface/diffusers/issues/4302#issuecomment-1654783299
Can you share your json file that you used for comfyui to get the lora to work?
i don't think i can share the .json file to here
Hi, Anybody know how to use combine lora weight to the refiner?
You can use this code to run inference with a lora. It incorporates the base sdxl model as well as the refiner. It works, however, as noted by an outstanding issue #1053 , the VAE Decoder step adds an additional 10 GB of VRAM to the GPU that does not occur when running through the UI. I'm waiting on a response there to understand how to mitigate this.
from nodes import KSamplerAdvanced
from nodes import VAEDecode
from nodes import LoraLoader
from nodes import EmptyLatentImage
from nodes import CLIPTextEncode
from nodes import SaveImage
from nodes import CheckpointLoaderSimple
def main():
checkpointloadersimple = CheckpointLoaderSimple()
checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_base_1.0.safetensors")
emptylatentimage = EmptyLatentImage()
emptylatentimage_5 = emptylatentimage.generate(width=1024, height=1024, batch_size=1)
checkpointloadersimple_12 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_refiner_1.0.safetensors")
loraloader = LoraLoader()
loraloader_49 = loraloader.load_lora(lora_name="my_lora.safetensors", strength_model=1, strength_clip=1, model=checkpointloadersimple_4[0], clip=checkpointloadersimple_4[1])
cliptextencode = CLIPTextEncode()
cliptextencode_6 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=loraloader_49[1])
cliptextencode_7 = cliptextencode.encode(text="text, watermark", clip=loraloader_49[1])
ksampleradvanced = KSamplerAdvanced()
ksampleradvanced_10 = ksampleradvanced.sample(add_noise="enable", noise_seed=721897303308196, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=0, end_at_step=20, return_with_leftover_noise="enable", model=loraloader_49[0], positive=cliptextencode_6[0], negative=cliptextencode_7[0], latent_image=emptylatentimage_5[0])
cliptextencode_15 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=checkpointloadersimple_12[1])
cliptextencode_16 = cliptextencode.encode(text="text, watermark", clip=checkpointloadersimple_12[1])
ksampleradvanced_11 = ksampleradvanced.sample(add_noise="disable", noise_seed=0, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=20, end_at_step=10000, return_with_leftover_noise="disable", model=checkpointloadersimple_12[0], positive=cliptextencode_15[0], negative=cliptextencode_16[0], latent_image=ksampleradvanced_10[0])
vaedecode = VAEDecode()
vaedecode_17 = vaedecode.decode(samples=ksampleradvanced_11[0], vae=checkpointloadersimple_12[2])
saveimage = SaveImage()
saveimage_19 = saveimage.save_images(filename_prefix="ComfyUI", images=vaedecode_17[0].detach())
if __name__ == "__main__":
main()
You can use this code to run inference with a lora. It incorporates the base sdxl model as well as the refiner. It works, however, as noted by an outstanding issue #1053 , the VAE Decoder step adds an additional 10 GB of VRAM to the GPU that does not occur when running through the UI. I'm waiting on a response there to understand how to mitigate this.
from nodes import KSamplerAdvanced from nodes import VAEDecode from nodes import LoraLoader from nodes import EmptyLatentImage from nodes import CLIPTextEncode from nodes import SaveImage from nodes import CheckpointLoaderSimple def main(): checkpointloadersimple = CheckpointLoaderSimple() checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_base_1.0.safetensors") emptylatentimage = EmptyLatentImage() emptylatentimage_5 = emptylatentimage.generate(width=1024, height=1024, batch_size=1) checkpointloadersimple_12 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_refiner_1.0.safetensors") loraloader = LoraLoader() loraloader_49 = loraloader.load_lora(lora_name="my_lora.safetensors", strength_model=1, strength_clip=1, model=checkpointloadersimple_4[0], clip=checkpointloadersimple_4[1]) cliptextencode = CLIPTextEncode() cliptextencode_6 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=loraloader_49[1]) cliptextencode_7 = cliptextencode.encode(text="text, watermark", clip=loraloader_49[1]) ksampleradvanced = KSamplerAdvanced() ksampleradvanced_10 = ksampleradvanced.sample(add_noise="enable", noise_seed=721897303308196, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=0, end_at_step=20, return_with_leftover_noise="enable", model=loraloader_49[0], positive=cliptextencode_6[0], negative=cliptextencode_7[0], latent_image=emptylatentimage_5[0]) cliptextencode_15 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=checkpointloadersimple_12[1]) cliptextencode_16 = cliptextencode.encode(text="text, watermark", clip=checkpointloadersimple_12[1]) ksampleradvanced_11 = ksampleradvanced.sample(add_noise="disable", noise_seed=0, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=20, end_at_step=10000, return_with_leftover_noise="disable", model=checkpointloadersimple_12[0], positive=cliptextencode_15[0], negative=cliptextencode_16[0], latent_image=ksampleradvanced_10[0]) vaedecode = VAEDecode() vaedecode_17 = vaedecode.decode(samples=ksampleradvanced_11[0], vae=checkpointloadersimple_12[2]) saveimage = SaveImage() saveimage_19 = saveimage.save_images(filename_prefix="ComfyUI", images=vaedecode_17[0].detach()) if __name__ == "__main__": main()
Hi, may I know how can you export the comfyUI's inference code?
@Austinzs277 Sure, I created a script that will create the code I pasted above when given a workflow_api.json file exported from the ComfyUI gui. I plan on sharing the code in a public repo this week.
hi, i got this when running your inference code
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<stdin>", line 3, in main
File "/home/ubuntu/content/ComfyUI/nodes.py", line 446, in load_checkpoint
out = comfy.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))
File "/home/ubuntu/content/ComfyUI/comfy/sd.py", line 1149, in load_checkpoint_guess_config
sd = utils.load_torch_file(ckpt_path)
File "/home/ubuntu/content/ComfyUI/comfy/utils.py", line 10, in load_torch_file
if ckpt.lower().endswith(".safetensors"):
AttributeError: 'NoneType' object has no attribute 'lower'
I only changed this line
loraloader_49 = loraloader.load_lora(lora_name="/home/content/VenusAI/Avatar/models/fixed_sdxl.safetensors"
in
def main():
checkpointloadersimple = CheckpointLoaderSimple()
checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_base_1.0.safetensors")
emptylatentimage = EmptyLatentImage()
emptylatentimage_5 = emptylatentimage.generate(width=1024, height=1024, batch_size=1)
checkpointloadersimple_12 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_refiner_1.0.safetensors")
loraloader = LoraLoader()
loraloader_49 = loraloader.load_lora(lora_name="/home/content/VenusAI/Avatar/models/fixed_sdxl.safetensors", strength_model=1, strength_clip=1, model=checkpointloadersimple_4[0], clip=checkpointloadersimple_4[1])
cliptextencode = CLIPTextEncode()
cliptextencode_6 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=loraloader_49[1])
cliptextencode_7 = cliptextencode.encode(text="text, watermark", clip=loraloader_49[1])
ksampleradvanced = KSamplerAdvanced()
ksampleradvanced_10 = ksampleradvanced.sample(add_noise="enable", noise_seed=721897303308196, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=0, end_at_step=20, return_with_leftover_noise="enable", model=loraloader_49[0], positive=cliptextencode_6[0], negative=cliptextencode_7[0], latent_image=emptylatentimage_5[0])
cliptextencode_15 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=checkpointloadersimple_12[1])
cliptextencode_16 = cliptextencode.encode(text="text, watermark", clip=checkpointloadersimple_12[1])
ksampleradvanced_11 = ksampleradvanced.sample(add_noise="disable", noise_seed=0, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=20, end_at_step=10000, return_with_leftover_noise="disable", model=checkpointloadersimple_12[0], positive=cliptextencode_15[0], negative=cliptextencode_16[0], latent_image=ksampleradvanced_10[0])
vaedecode = VAEDecode()
vaedecode_17 = vaedecode.decode(samples=ksampleradvanced_11[0], vae=checkpointloadersimple_12[2])
saveimage = SaveImage()
saveimage_19 = saveimage.save_images(filename_prefix="ComfyUI", images=vaedecode_17[0].detach())
Try running the script from the ComfyUI folder (The same place as main.py
) and make sure your lora is placed in the models/loras/
folder.
Also only include the name of the Lora, not the full path.
Got it, but where does it save image to?
The output
folder with the ComfyUI prefix. It's exactly the same as you would get from running it in the GUI, but you don't need to run the server and you get to see how each piece of code works together.
i got CUDA out of memory error. Is there anyway to bypass this? I'm using g5.4xlarge ec2 on AWS.
That must be happening at the VAEDecode().decode() step. That's where the VRAM currently spikes. However, I'm running this code on an RTX 4090 (24 GB of VRAM) and it caps out at around 23 GB of VRAM.
so can i remove that vae layer for now? how would the code look?
Try replacing vaedecode = VAEDecode()
with vaedecode = VAEDecodeTiled()
. That reduced total VRAM usage to 18 GB for me.
hi what does these 2 lines do start_at_step=0, end_at_step=20, return_with_leftover_noise="enable" and start_at_step=20, end_at_step=10000, return_with_leftover_noise="disable"
Those are default parameters in the sdxl workflow example. I think it's basically the refiner model picking up where the base model left off. If you've looked at outputs from both, the output from the refiner model is usually a nicer, more detailed version of the base model output. Kind of like image to image.
" the output from the base model is usually a nicer, more detailed version of the base model output." -> you mean the output from the refined model is usually a nicer ... ?
I look at the nodes.py file and it seems like there is no possible way to pass num_images_per_prompt=self.num_images_output
self.pipe = DiffusionPipeline.from_pretrained(self.stable_diffusion_id, custom_pipeline="lpw_stable_diffusion", unet=self.unet, safety_checker=None, text_encoder=self.text_encoder, scheduler=self.scheduler, torch_dtype=torch.float16)
with autocast("cuda"), torch.inference_mode(): images = self.pipe( positive_prompt, height=height, width=width, negative_prompt=negative_prompt, num_images_per_prompt=self.num_images_output, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=torch.Generator(device=device).manual_seed(self.seed) ).images
So is for loop the only way to generate multiple images in 1 run. It's taking a very long time tho
You're right. I edited my typo in the last comment. And can definitely increase the batch size to produce more than one image at a time. But what is probably taking you a long time is that the model is being placed on the GPU each run. Try loading the base and refiner model onto your GPU outside of the for loop. Once those are loaded, you should be able to generate a photo every few seconds.
Got it, how do i increase the batch size?
Update the batch_size argument here:
emptylatentimage_5 = emptylatentimage.generate(width=1024, height=1024, batch_size=1)
Change batch_size from 1 to whatever you want your batch size to be
@MaxTran96 Check out this repo I made to convert any native ComfyUI workflow to working Python code. https://github.com/pydn/ComfyUI-to-Python-Extension
I think it should be helpful for what you are trying to achieve. I also added in logic to be able to quickly generate multiple images without unnecessarily loading models or encoding text. Let me know what you think.
You can use this code to run inference with a lora. It incorporates the base sdxl model as well as the refiner. It works, however, as noted by an outstanding issue #1053 , the VAE Decoder step adds an additional 10 GB of VRAM to the GPU that does not occur when running through the UI. I'm waiting on a response there to understand how to mitigate this.
from nodes import KSamplerAdvanced from nodes import VAEDecode from nodes import LoraLoader from nodes import EmptyLatentImage from nodes import CLIPTextEncode from nodes import SaveImage from nodes import CheckpointLoaderSimple def main(): checkpointloadersimple = CheckpointLoaderSimple() checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_base_1.0.safetensors") emptylatentimage = EmptyLatentImage() emptylatentimage_5 = emptylatentimage.generate(width=1024, height=1024, batch_size=1) checkpointloadersimple_12 = checkpointloadersimple.load_checkpoint(ckpt_name="sd_xl_refiner_1.0.safetensors") loraloader = LoraLoader() loraloader_49 = loraloader.load_lora(lora_name="my_lora.safetensors", strength_model=1, strength_clip=1, model=checkpointloadersimple_4[0], clip=checkpointloadersimple_4[1]) cliptextencode = CLIPTextEncode() cliptextencode_6 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=loraloader_49[1]) cliptextencode_7 = cliptextencode.encode(text="text, watermark", clip=loraloader_49[1]) ksampleradvanced = KSamplerAdvanced() ksampleradvanced_10 = ksampleradvanced.sample(add_noise="enable", noise_seed=721897303308196, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=0, end_at_step=20, return_with_leftover_noise="enable", model=loraloader_49[0], positive=cliptextencode_6[0], negative=cliptextencode_7[0], latent_image=emptylatentimage_5[0]) cliptextencode_15 = cliptextencode.encode(text="evening sunset scenery blue sky nature, glass bottle with a galaxy in it", clip=checkpointloadersimple_12[1]) cliptextencode_16 = cliptextencode.encode(text="text, watermark", clip=checkpointloadersimple_12[1]) ksampleradvanced_11 = ksampleradvanced.sample(add_noise="disable", noise_seed=0, steps=25, cfg=8, sampler_name="euler", scheduler="normal", start_at_step=20, end_at_step=10000, return_with_leftover_noise="disable", model=checkpointloadersimple_12[0], positive=cliptextencode_15[0], negative=cliptextencode_16[0], latent_image=ksampleradvanced_10[0]) vaedecode = VAEDecode() vaedecode_17 = vaedecode.decode(samples=ksampleradvanced_11[0], vae=checkpointloadersimple_12[2]) saveimage = SaveImage() saveimage_19 = saveimage.save_images(filename_prefix="ComfyUI", images=vaedecode_17[0].detach()) if __name__ == "__main__": main()
Hi, may I know how can you export the comfyUI's inference code?
@Austinzcs I just open sourced my code used to convert ComfyUI workflow to python. Take a look here https://github.com/pydn/ComfyUI-to-Python-Extension
Hello, how do you run inference on a .safetensor lora file generated from SDXL base model via lora training. Is there a specific python script i need to run. I know you can do that via the UI, but i'm hoping to do that via code