kamalkraj / stable-diffusion-tritonserver

Deploy stable diffusion model with onnx/tenorrt + tritonserver
Apache License 2.0
122 stars 19 forks source link

How to modify the default height and width? #4

Open whalefa1I opened 1 year ago

whalefa1I commented 1 year ago

I need a picture with a width of 1024 and a height of 768,and I modified the unet configuration file

  {
    name: "sample"
    data_type: TYPE_FP32
    dims: [ -1, 4, -1, -1 ]#[ -1, 4, 64, 64 ]
  },

but I got an error like

tritonclient.utils.InferenceServerException: Failed to process the request(s) for model instance 'stable_diffusion', message: TritonModelException: [request id: <id_unknown>] unexpected shape for input 'sample' for model 'unet'. Expected [-1,4,64,64], got [2,4,128,96]

So how to modify the default height and width?

whalefa1I commented 1 year ago

Do you have converted onnx model(1.4 or 1.5)that can be deployed in triton successfully by any chance? My onnx model converted from the branch V2 cannot return image successfully.And branch Master works well for me. image but I cannot get image from triton: image "1119 14:06:51.895234 100 pb_stub.cc:409] Failed to process the request(s) for model 'stable_diffusion', message: TritonModelException: GPU tensors are not supported.

At: /models/stable_diffusion/1/model.py(270): execute "

import numpy as np
import tritonclient.http

model_name = "stable_diffusion"
url = "0.0.0.0:8010"
model_version = "1"
batch_size = 1

prompt = "A small cabin on top of a snowy mountain in the style of Disney, artstation"
samples = 1 # no.of images to generate
steps = 45
guidance_scale = 7.5
seed = 1024

triton_client = tritonclient.http.InferenceServerClient(url=url, verbose=False)
assert triton_client.is_model_ready(
    model_name=model_name, model_version=model_version
), f"model {model_name} not yet ready"

model_metadata = triton_client.get_model_metadata(model_name=model_name, model_version=model_version)
model_config = triton_client.get_model_config(model_name=model_name, model_version=model_version)

prompt_in = tritonclient.http.InferInput(name="PROMPT", shape=(batch_size,), datatype="BYTES")
samples_in = tritonclient.http.InferInput("SAMPLES", (batch_size, ), "INT32")
steps_in = tritonclient.http.InferInput("STEPS", (batch_size, ), "INT32")
guidance_scale_in = tritonclient.http.InferInput("GUIDANCE_SCALE", (batch_size, ), "FP32")
seed_in = tritonclient.http.InferInput("SEED", (batch_size, ), "INT64")
height = tritonclient.http.InferInput("HEIGHT", (batch_size, ), "INT32")
width = tritonclient.http.InferInput("WIDTH", (batch_size, ), "INT32")

images = tritonclient.http.InferRequestedOutput(name="IMAGES", binary_data=False)

prompt_in.set_data_from_numpy(np.asarray([prompt] * batch_size, dtype=object))
samples_in.set_data_from_numpy(np.asarray([samples], dtype=np.int32))
steps_in.set_data_from_numpy(np.asarray([steps], dtype=np.int32))
guidance_scale_in.set_data_from_numpy(np.asarray([guidance_scale], dtype=np.float32))
seed_in.set_data_from_numpy(np.asarray([seed], dtype=np.int64))
height.set_data_from_numpy(np.asarray([512], dtype=np.int32))
width.set_data_from_numpy(np.asarray([512], dtype=np.int32))

response = triton_client.infer(
    model_name=model_name, model_version=model_version,
    inputs=[prompt_in,samples_in,steps_in,guidance_scale_in,seed_in,height,width],
    outputs=[images]
)
whalefa1I commented 1 year ago

When I put V2 converted models into docker environment which launch successfully with your huggingface models,I got an error like : image

image

import numpy as np
import sys
import tritonclient.http as httpclient
from transformers import CLIPTokenizer
import torch
import inspect
import json
from diffusers.schedulers import (
    DDIMScheduler,
    PNDMScheduler,
    LMSDiscreteScheduler,
    EulerDiscreteScheduler,
    EulerAncestralDiscreteScheduler,
    # DPMSolverMultistepScheduler,
)

prompt = "A small cabin on top of a snowy mountain in the style of Disney, artstation"
samples = 1 # no.of images to generate
steps = 45
guidance_scale = 7.5
seed = 1024
# model
model_name = "stable_diffusion"
text_encoder_name = "text_encoder"
unet_name = 'unet'

model_nameurl = "0.0.0.0:8010"
model_version = "1"
batch_size = 1
height = 1024
width = 768
num_inference_steps = 50
eta = 0.0
do_classifier_free_guidance = guidance_scale > 1.0

try:
    triton_client = httpclient.InferenceServerClient(url=model_nameurl, verbose=0)
except Exception as e:
    print("channel creation failed: " + str(e))
    sys.exit(1)

tokenizer = CLIPTokenizer.from_pretrained(
     r"/models/stable-diffusion/tokenizer"
)
text_input = tokenizer(
    prompt,
    padding="max_length",
    max_length=tokenizer.model_max_length,
    truncation=False,
    return_tensors="pt",
)
input_ids = text_input['input_ids']
inputs = []
outputs = []
inputs.append(httpclient.InferInput('input_ids', [1, 77], "INT32"))
inputs[0].set_data_from_numpy(np.array(input_ids).astype(np.int32), binary_data=False)

outputs.append(httpclient.InferRequestedOutput('last_hidden_state'))
outputs.append(httpclient.InferRequestedOutput('pooler_output'))

response = triton_client.infer(
    model_name=text_encoder_name, #model_version=model_version,
    inputs=inputs,
    outputs=outputs
)
last_hidden_state = torch.tensor(response.as_numpy('last_hidden_state'))
pooler_output = response.as_numpy('pooler_output')

text_embeddings = torch.repeat_interleave(last_hidden_state, batch_size, dim=0)
text_embeddings_0 = np.repeat(response.as_numpy('last_hidden_state'), batch_size, axis=0)

latents_shape = (batch_size, 4, height // 8, width // 8)

generator = torch.Generator(device='cpu').manual_seed(seed)
latents = torch.randn(
    latents_shape, generator=generator, device='cpu'
)

scheduler = json.load(open(r"/models/stable-diffusion/scheduler/scheduler_config.json"))["_class_name"]
scheduler = eval(scheduler).from_config(r"/models/stable-diffusion/scheduler")
# self.scheduler = self.scheduler.set_format("pt")
# set timesteps
accepts_offset = "offset" in set(
    inspect.signature(scheduler.set_timesteps).parameters.keys()
)
extra_set_kwargs = {}
if accepts_offset:
    extra_set_kwargs["offset"] = 1

scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)

accepts_eta = "eta" in set(
    inspect.signature(scheduler.step).parameters.keys()
)
extra_step_kwargs = {}
if accepts_eta:
    extra_step_kwargs["eta"] = eta

for i, t in enumerate(scheduler.timesteps):
    # expand the latents if we are doing classifier free guidance
    latent_model_input = (
        torch.cat([latents] * 2) if do_classifier_free_guidance else latents
    )
    latent_model_input_0 = (np.concatenate([np.array(latents)] * 2)) if do_classifier_free_guidance else np.array(latents)
    if isinstance(scheduler, LMSDiscreteScheduler):
        sigma = scheduler.sigmas[i]
        latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
    latent_model_input = latent_model_input.type(dtype=torch.float16)
    latent_model_input_0 = latent_model_input_0.astype(np.float16)
    timestep = t[None].type(dtype=torch.float16)
    timestep_0 = np.array(t[None]).astype(np.float16)
    encoder_hidden_states = text_embeddings.type(dtype=torch.float16)
    encoder_hidden_states_0 = text_embeddings_0.astype(np.float16)
    inputs = []
    outputs = []
    inputs.append(httpclient.InferInput('sample', [2, 4, 128, 96], "FP16"))
    inputs[0].set_data_from_numpy(latent_model_input_0, binary_data=True)
    inputs.append(httpclient.InferInput('timestep', [1,], "FP16"))
    inputs[1].set_data_from_numpy(timestep_0, binary_data=True)
    inputs.append(httpclient.InferInput('encoder_hidden_states', [1, 77, 768], "FP16"))
    inputs[2].set_data_from_numpy(encoder_hidden_states_0, binary_data=True)
    outputs.append(httpclient.InferRequestedOutput('out_sample'))
    response_unet = triton_client.infer(
        model_name=unet_name, model_version=model_version,
        inputs=inputs,
        outputs=outputs
    )
    last_hidden_state = torch.tensor(response.as_numpy('out_sample'))
kamalkraj commented 1 year ago

I haven't checked v2 models yet, I will check and update