chengzeyi / stable-fast

Best inference performance optimization framework for HuggingFace Diffusers on NVIDIA GPUs.
MIT License
1.16k stars 70 forks source link

Can't load/unload lora dynamically #91

Open zhangjun opened 9 months ago

zhangjun commented 9 months ago

Hi, I want to load lora dynamically, but I found it can't be unloaded and the results of the remaining rounds are the same. The code is as follows.

import time
import torch

from diffusers import EulerAncestralDiscreteScheduler, StableDiffusionPipeline

use_sfast = False
try: 
    from sfast.compilers.stable_diffusion_pipeline_compiler import (compile, CompilationConfig)
    use_sfast = True
    print("sfast import succeed!")
except:
    print("stable fast not found!")
use_lora = True

model_path = "stabilityai/stable-diffusion-2-1"

scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
    model_path, subfolder="scheduler"
)
pipeline = StableDiffusionPipeline.from_pretrained(
    model_path,
    scheduler=scheduler,
    torch_dtype=torch.float16,
    safety_checker=None,
)

pipeline.to("cuda")

lora_path = 'style-A.safetensors'
pipeline.load_lora_weights(lora_path)
pipeline.fuse_lora(lora_scale=1.0)

config = CompilationConfig.Default()
try:
    import xformers
    config.enable_xformers = True
except ImportError:
    print('xformers not installed, skip')
try:
    import triton
    config.enable_triton = True
    torch.backends.cuda.matmul.allow_tf32 = True
except ImportError:
    print('Triton not installed, skip')
config.enable_cuda_graph = True
# config.trace_scheduler = False
config.prefer_lowp_gemm = True
pipeline = compile(pipeline, config)

prompt = '1boy,black hair,curly hair,brown eyes,casual blue t-shirt,cargo green shorts,playful red cap,friendly yellow smile, solo'
neg_prompt = 'monochrome, lowres, bad anatomy, worst quality, low quality'
height, width = 576, 768
num_inference_steps = 30
seed = 2019

lora_list = [
    'style-A.safetensors',
    'style-B.safetensors',
]
for i in range(4):
    t0 = time.time()
    lora_path = lora_list[i % 2]
    t0 = time.time()
    if use_lora:
        print(f"use lora {lora_path}")
        pipeline.load_lora_weights(lora_path)
        pipeline.fuse_lora(lora_scale=1.0)
    t1 = time.time()
    sfast_inputs = dict(
        prompt=prompt,
        negative_prompt=neg_prompt,
        generator=torch.Generator(device='cuda').manual_seed(seed),
        width=width,
        height=height,
        num_inference_steps=num_inference_steps,
    )
    image = pipeline(**sfast_inputs).images[0]
    t2 = time.time()
    if use_lora:
        pipeline.unfuse_lora()
        pipeline.unload_lora_weights()
    print("cost: ", t1 - t0, t2 - t1, time.time() - t2)
    image.save(f'image-{i}.jpg')
    print(i, ", cost: ", time.time() - t0)

image.save(f'image2.jpg')
yangshaobooo commented 8 months ago

I encountered the same issue. Have you resolved it

zhangjun commented 7 months ago

I encountered the same issue. Have you resolved it

Compile firstly, and then load lora or switch lora file.