TencentARC / PhotoMaker

PhotoMaker [CVPR 2024]
https://photo-maker.github.io/
Other
9.58k stars 768 forks source link

the result of running is noise, why? please help #98

Open dnnyyq opened 10 months ago

dnnyyq commented 10 months ago

i runned the demo by notebook, but I got the noise like this image

infer code is

import torch
import os
from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler, DDIMScheduler
from photomaker import PhotoMakerStableDiffusionXLPipeline

import  os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

base_model_path = "./SG161222"
photomaker_path = "./ckpt/photomaker-v1.bin"

### Load base model
pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
    base_model_path,  # can change to any base model based on SDXL
    torch_dtype=torch.bfloat16, 
    use_safetensors=True, 
    variant="fp16"
).to(device)

### Load PhotoMaker checkpoint
pipe.load_photomaker_adapter(
    os.path.dirname(photomaker_path),
    subfolder="",
    weight_name=os.path.basename(photomaker_path),
    trigger_word="img"  # define the trigger word
)     

#pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
### Also can cooperate with other LoRA modules
# pipe.load_lora_weights(os.path.dirname(lora_path), weight_name=lora_model_name, adapter_name="xl_more_art-full")
# pipe.set_adapters(["photomaker", "xl_more_art-full"], adapter_weights=[1.0, 0.5])

pipe.fuse_lora()

### define the input ID images
input_folder_name = './examples/newton_man'
image_basename_list = os.listdir(input_folder_name)
image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])

input_id_images = []
for image_path in image_path_list:
    input_id_images.append(load_image(image_path))

# Note that the trigger word `img` must follow the class word for personalization
num_steps = 50
style_strength_ratio = 20
start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
if start_merge_step > 30:
    start_merge_step = 30

prompt = "a half-body portrait of a man img wearing the sunglasses in Iron man suit, best quality"
negative_prompt = "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth, grayscale"
generator = torch.Generator(device=device).manual_seed(42)
images = pipe(
    prompt=prompt,
    input_id_images=input_id_images,
    negative_prompt=negative_prompt,
    num_images_per_prompt=1,
    num_inference_steps=num_steps,
    start_merge_step=10,
    generator=generator,
).images
print(len(images))
for i, image in enumerate(images):
    image.save('out_photomaker_{}.png'.format(i))
dnnyyq commented 10 months ago

base_model_path content are from SG161222--RealVisXL_V3.0-11ee564ebf4bd96d90ed5d473cb8e7f2e6450bcf.tar

SG161222/
├── model_index.json
├── scheduler
├── text_encoder
├── text_encoder_2
├── tokenizer
├── tokenizer_2
├── unet
└── vae
Paper99 commented 10 months ago

Did you try others base model, e.g., SDXL 1.0?

dnnyyq commented 10 months ago

I found that my GPU do not support bfloat16, it was solved by changing bfloat16 to float16.