Open zhuraromdev opened 1 month ago
Code:
import gradio as gr
from PIL import Image
import io
import logging
import base64
from lmdeploy import pipeline, TurbomindEngineConfig
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Model setup
model_id = "PATH_TO_PRIVATE_QUANITIZED_HF_MODEL"
backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, tp=1)
pipe = pipeline(model_id, backend_config=backend_config)
def respond(
message,
image,
history,
system_message,
max_tokens,
temperature,
top_p,
):
logger.info("Received a new message.")
messages = [{"role": "system", "content": str(system_message)}] # Ensure system_message is a string
# Ensure history is a list of tuples
if isinstance(history, str):
logger.warning(f"Expected history to be a list of tuples, but got string: {history}")
history = []
elif isinstance(history, list):
for val in history:
if isinstance(val, list) and len(val) == 2:
messages.append({"role": "user", "content": val[0]})
messages.append({"role": "assistant", "content": val[1]})
else:
logger.warning(f"Unexpected format in history: {val}")
else:
logger.warning(f"Unexpected type for history: {type(history)}")
messages.append({"role": "user", "content": message})
image_prompt = None
if isinstance(image, Image.Image):
try:
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
image_prompt = {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,' + img_str}}
except Exception as e:
logger.error(f"Error processing image: {e}")
prompts = [
{
'role': 'user',
'content': [{'type': 'text', 'text': message}]
}
]
if image_prompt:
prompts[0]['content'].append(image_prompt)
response = ""
try:
responses = pipe(prompts, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
response = responses[0]['content']
yield response
except Exception as e:
logger.error(f"Error during chat completion: {e}")
yield "An error occurred during the chat completion process."
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Image(type="pil", label="Input Image (optional)")
],
)
if __name__ == "__main__":
demo.launch()
Checklist
Describe the bug
Hello, I have created the space on HF and trying to send the image from gradio input to quanitized model. However, getting an error. Is it possible to read an image from local machine or the only way, how can I send the image to VLLM offline is through url?
Reproduction
Environment
Error traceback