TencentARC / PhotoMaker

PhotoMaker [CVPR 2024]
https://photo-maker.github.io/
Other
9.39k stars 749 forks source link

Input ... is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers. #94

Closed machineminded closed 8 months ago

machineminded commented 8 months ago

Here is my code...

import torch
import os
import sys
sys.path.append('../photomaker')

from PIL import Image
from diffusers import EulerDiscreteScheduler
from photomaker import PhotoMakerStableDiffusionXLPipeline
from huggingface_hub import hf_hub_download

# base_model_path = 'SG161222/RealVisXL_V3.0'
base_model_path = "E:\\github\\stable-diffusion-webui\\models\\Stable-diffusion\\sdxl\\RealVisXL_V3.0.safetensors"
photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", filename="photomaker-v1.bin", repo_type="model")

def generate_photomaker(prompt, input_id_images, negative_prompt, steps, seed):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pipe = PhotoMakerStableDiffusionXLPipeline.from_single_file(
        base_model_path,
        torch_dtype=torch.float16,
        use_safetensors=True,
        # variant=variant
    ).to(device)

    pipe.load_photomaker_adapter(
        os.path.dirname(photomaker_ckpt),
        subfolder="",
        weight_name=os.path.basename(photomaker_ckpt),
        trigger_word="img"
    )

    pipe.id_encoder.to(device)

    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)

    pipe.fuse_lora()

    generator = torch.Generator(device=device).manual_seed(seed)

    input_id_images = []
    for img in input_id_images:
        input_id_images.append(Image.fromarray(img))

    images = pipe(
        prompt=prompt,
        input_id_images=input_id_images,
        negative_prompt=negative_prompt,
        num_images_per_prompt=1,
        num_inference_steps=steps,
        start_merge_step=10,
        generator=generator,
    ).images

    return images

After invoking the generate_photomaker method, I get this error:

Doing the PhotoMaker...
<class 'numpy.ndarray'>
Loading PhotoMaker components [1] id_encoder from [C:\Users\MachineMinded\.cache\huggingface\hub\models--TencentARC--PhotoMaker\snapshots\3602d02ba7cc99ce8886e24063ed10e4f2510c84]...
Loading PhotoMaker components [2] lora_weights from [C:\Users\MachineMinded\.cache\huggingface\hub\models--TencentARC--PhotoMaker\snapshots\3602d02ba7cc99ce8886e24063ed10e4f2510c84]
Traceback (most recent call last):
  File "E:\github\Fooocus-inswapper\modules\async_worker.py", line 848, in worker
    handler(task)
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "E:\github\Fooocus-inswapper\modules\async_worker.py", line 772, in handler
    imgs = generate_photomaker(positive_cond, [photomaker_source_image], negative_cond, steps, task['task_seed'])
  File "E:\github\Fooocus-inswapper\modules\pm.py", line 44, in generate_photomaker
    images = pipe(
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "E:\github\Fooocus-inswapper\photomaker\pipeline.py", line 336, in __call__
    ) = self.encode_prompt_with_trigger_word(
  File "E:\github\Fooocus-inswapper\photomaker\pipeline.py", line 163, in encode_prompt_with_trigger_word
    input_ids = tokenizer.encode(prompt) # TODO: batch encode
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\transformers\tokenization_utils_base.py", line 2574, in encode
    encoded_inputs = self.encode_plus(
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\transformers\tokenization_utils_base.py", line 2982, in encode_plus
    return self._encode_plus(
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\transformers\tokenization_utils.py", line 719, in _encode_plus
    first_ids = get_input_ids(text)
  File "E:\github\Fooocus-inswapper\venv\lib\site-packages\transformers\tokenization_utils.py", line 705, in get_input_ids
    raise ValueError(
ValueError: Input [[tensor([[[-3.8946, -2.4532,  4.4858,  ...,  0.1694,  0.4139, -0.2779],
         [ 0.2651,  0.0117,  0.6631,  ...,  0.0095,  0.2409, -0.2889],
         [ 0.2398,  0.5118, -0.1032,  ...,  0.9963,  0.2142,  0.3009],
         ...,
         [-0.1065,  0.4647,  0.0722,  ...,  0.3633,  0.3654,  0.6109],
         [-0.1561, -0.2883, -0.4512,  ..., -0.0046, -1.3019,  0.6663],
         [-0.3591, -0.2266, -0.6351,  ..., -0.4206, -0.7663, -1.0454]]]), {'pooled_output': tensor([[-0.6228, -0.0542, -0.3522,  ..., -0.6064,  0.0334,  0.1428]])}]] is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers.
Total time: 20.25 seconds

I am using the latest version of transformers.

Paper99 commented 8 months ago

Hi, @machineminded. Is it solved?

machineminded commented 8 months ago

Hello @Paper99 , yes this is solved. I was passing the wrong data type :) Thank you.