Open mikeldking opened 6 months ago
Example vLLM client that should also support vision
class VLMClient:
def __init__(self, vlm_model: str = VLM_MODEL, vllm_url: str = VLLM_URL):
self._vlm_model = vlm_model
self._vllm_client = httpx.AsyncClient(base_url=vllm_url)
if VLLM_HEALTHCHECK:
wait_for_ready(
server_url=vllm_url,
wait_seconds=VLLM_READY_TIMEOUT,
health_endpoint="health",
)
@property
def vlm_model(self) -> str:
return self._vlm_model
async def __call__(
self,
prompt: str,
image_bytes: bytes | None = None,
image_filetype: filetype.Type | None = None,
max_tokens: int = 10,
) -> str:
# Assemble the message content
message_content: list[dict[str, str | dict]] = [
{
"type": "text",
"text": prompt,
}
]
if image_bytes is not None:
if image_filetype is None:
image_filetype = filetype.guess(image_bytes)
if image_filetype is None:
raise ValueError("Could not determine image filetype")
if image_filetype not in ALLOWED_IMAGE_TYPES:
raise ValueError(
f"Image type {image_filetype} is not supported. Allowed types: {ALLOWED_IMAGE_TYPES}"
)
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
message_content.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{image_filetype.mime};base64,{image_b64}",
},
}
)
# Put together the request payload
payload = {
"model": self.vlm_model,
"messages": [{"role": "user", "content": message_content}],
"max_tokens": max_tokens,
# "logprobs": True,
# "top_logprobs": 1,
}
response = await self._vllm_client.post("/v1/chat/completions", json=payload)
response = response.json()
response_text: str = (
response.get("choices")[0].get("message", {}).get("content", "").strip()
)
return response_text
GPT 4o introduces a new message type that contains images and coded as either URL or base64 encoded.
example:
https://platform.openai.com/docs/guides/vision
Milestone 1
Milestone N
Tracing
Instrumenation
Testing
Image tracing
Context Attributes
Config
Suppress Tracing
UI / Javascript
Testing
Documentation
Evals