Open whalefa1I opened 1 year ago
Do you have converted onnx model(1.4 or 1.5)that can be deployed in triton successfully by any chance? My onnx model converted from the branch V2 cannot return image successfully.And branch Master works well for me. but I cannot get image from triton: "1119 14:06:51.895234 100 pb_stub.cc:409] Failed to process the request(s) for model 'stable_diffusion', message: TritonModelException: GPU tensors are not supported.
At: /models/stable_diffusion/1/model.py(270): execute "
import numpy as np
import tritonclient.http
model_name = "stable_diffusion"
url = "0.0.0.0:8010"
model_version = "1"
batch_size = 1
prompt = "A small cabin on top of a snowy mountain in the style of Disney, artstation"
samples = 1 # no.of images to generate
steps = 45
guidance_scale = 7.5
seed = 1024
triton_client = tritonclient.http.InferenceServerClient(url=url, verbose=False)
assert triton_client.is_model_ready(
model_name=model_name, model_version=model_version
), f"model {model_name} not yet ready"
model_metadata = triton_client.get_model_metadata(model_name=model_name, model_version=model_version)
model_config = triton_client.get_model_config(model_name=model_name, model_version=model_version)
prompt_in = tritonclient.http.InferInput(name="PROMPT", shape=(batch_size,), datatype="BYTES")
samples_in = tritonclient.http.InferInput("SAMPLES", (batch_size, ), "INT32")
steps_in = tritonclient.http.InferInput("STEPS", (batch_size, ), "INT32")
guidance_scale_in = tritonclient.http.InferInput("GUIDANCE_SCALE", (batch_size, ), "FP32")
seed_in = tritonclient.http.InferInput("SEED", (batch_size, ), "INT64")
height = tritonclient.http.InferInput("HEIGHT", (batch_size, ), "INT32")
width = tritonclient.http.InferInput("WIDTH", (batch_size, ), "INT32")
images = tritonclient.http.InferRequestedOutput(name="IMAGES", binary_data=False)
prompt_in.set_data_from_numpy(np.asarray([prompt] * batch_size, dtype=object))
samples_in.set_data_from_numpy(np.asarray([samples], dtype=np.int32))
steps_in.set_data_from_numpy(np.asarray([steps], dtype=np.int32))
guidance_scale_in.set_data_from_numpy(np.asarray([guidance_scale], dtype=np.float32))
seed_in.set_data_from_numpy(np.asarray([seed], dtype=np.int64))
height.set_data_from_numpy(np.asarray([512], dtype=np.int32))
width.set_data_from_numpy(np.asarray([512], dtype=np.int32))
response = triton_client.infer(
model_name=model_name, model_version=model_version,
inputs=[prompt_in,samples_in,steps_in,guidance_scale_in,seed_in,height,width],
outputs=[images]
)
When I put V2 converted models into docker environment which launch successfully with your huggingface models,I got an error like :
import numpy as np
import sys
import tritonclient.http as httpclient
from transformers import CLIPTokenizer
import torch
import inspect
import json
from diffusers.schedulers import (
DDIMScheduler,
PNDMScheduler,
LMSDiscreteScheduler,
EulerDiscreteScheduler,
EulerAncestralDiscreteScheduler,
# DPMSolverMultistepScheduler,
)
prompt = "A small cabin on top of a snowy mountain in the style of Disney, artstation"
samples = 1 # no.of images to generate
steps = 45
guidance_scale = 7.5
seed = 1024
# model
model_name = "stable_diffusion"
text_encoder_name = "text_encoder"
unet_name = 'unet'
model_nameurl = "0.0.0.0:8010"
model_version = "1"
batch_size = 1
height = 1024
width = 768
num_inference_steps = 50
eta = 0.0
do_classifier_free_guidance = guidance_scale > 1.0
try:
triton_client = httpclient.InferenceServerClient(url=model_nameurl, verbose=0)
except Exception as e:
print("channel creation failed: " + str(e))
sys.exit(1)
tokenizer = CLIPTokenizer.from_pretrained(
r"/models/stable-diffusion/tokenizer"
)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=False,
return_tensors="pt",
)
input_ids = text_input['input_ids']
inputs = []
outputs = []
inputs.append(httpclient.InferInput('input_ids', [1, 77], "INT32"))
inputs[0].set_data_from_numpy(np.array(input_ids).astype(np.int32), binary_data=False)
outputs.append(httpclient.InferRequestedOutput('last_hidden_state'))
outputs.append(httpclient.InferRequestedOutput('pooler_output'))
response = triton_client.infer(
model_name=text_encoder_name, #model_version=model_version,
inputs=inputs,
outputs=outputs
)
last_hidden_state = torch.tensor(response.as_numpy('last_hidden_state'))
pooler_output = response.as_numpy('pooler_output')
text_embeddings = torch.repeat_interleave(last_hidden_state, batch_size, dim=0)
text_embeddings_0 = np.repeat(response.as_numpy('last_hidden_state'), batch_size, axis=0)
latents_shape = (batch_size, 4, height // 8, width // 8)
generator = torch.Generator(device='cpu').manual_seed(seed)
latents = torch.randn(
latents_shape, generator=generator, device='cpu'
)
scheduler = json.load(open(r"/models/stable-diffusion/scheduler/scheduler_config.json"))["_class_name"]
scheduler = eval(scheduler).from_config(r"/models/stable-diffusion/scheduler")
# self.scheduler = self.scheduler.set_format("pt")
# set timesteps
accepts_offset = "offset" in set(
inspect.signature(scheduler.set_timesteps).parameters.keys()
)
extra_set_kwargs = {}
if accepts_offset:
extra_set_kwargs["offset"] = 1
scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
accepts_eta = "eta" in set(
inspect.signature(scheduler.step).parameters.keys()
)
extra_step_kwargs = {}
if accepts_eta:
extra_step_kwargs["eta"] = eta
for i, t in enumerate(scheduler.timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = (
torch.cat([latents] * 2) if do_classifier_free_guidance else latents
)
latent_model_input_0 = (np.concatenate([np.array(latents)] * 2)) if do_classifier_free_guidance else np.array(latents)
if isinstance(scheduler, LMSDiscreteScheduler):
sigma = scheduler.sigmas[i]
latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
latent_model_input = latent_model_input.type(dtype=torch.float16)
latent_model_input_0 = latent_model_input_0.astype(np.float16)
timestep = t[None].type(dtype=torch.float16)
timestep_0 = np.array(t[None]).astype(np.float16)
encoder_hidden_states = text_embeddings.type(dtype=torch.float16)
encoder_hidden_states_0 = text_embeddings_0.astype(np.float16)
inputs = []
outputs = []
inputs.append(httpclient.InferInput('sample', [2, 4, 128, 96], "FP16"))
inputs[0].set_data_from_numpy(latent_model_input_0, binary_data=True)
inputs.append(httpclient.InferInput('timestep', [1,], "FP16"))
inputs[1].set_data_from_numpy(timestep_0, binary_data=True)
inputs.append(httpclient.InferInput('encoder_hidden_states', [1, 77, 768], "FP16"))
inputs[2].set_data_from_numpy(encoder_hidden_states_0, binary_data=True)
outputs.append(httpclient.InferRequestedOutput('out_sample'))
response_unet = triton_client.infer(
model_name=unet_name, model_version=model_version,
inputs=inputs,
outputs=outputs
)
last_hidden_state = torch.tensor(response.as_numpy('out_sample'))
I haven't checked v2 models yet, I will check and update
I need a picture with a width of 1024 and a height of 768,and I modified the unet configuration file
but I got an error like
So how to modify the default height and width?