neuralmagic / deepsparse

Sparsity-aware deep learning inference runtime for CPUs
https://neuralmagic.com/deepsparse/
Other
2.97k stars 171 forks source link

[OpenAI][Server] Enable OpenAI text generation streaming #1486

Closed dsikka closed 8 months ago

dsikka commented 8 months ago

Summary

Testing

Starting the server with --integration openai, we can send the python api request below

import openai
from openai import OpenAI

client = OpenAI(base_url="http://localhost:5543/v1", api_key="EMPTY")

models = client.models.list()

model = "hf:mgoin/TinyStories-1M-ds"
print(f"Accessing model API '{model}'")

# Completion API
stream = True
completion = client.chat.completions.create(
    messages="The sun shined",
    stream=stream,
    max_tokens=10,
    model=model
)

for c in completion:
    print(c)

Output:



ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' the', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' sky', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content='.', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' The', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' sun', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' was', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' shining', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' and', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content=' the', function_call=None, role=None, tool_calls=None), finish_reason=None, index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)
ChatCompletionChunk(id='cmpl-d494a29cdd274d28a73d372356154f6f', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, role=None, tool_calls=None), finish_reason='length', index=None)], created=1702935583, model='/home/dsikka/.cache/huggingface/hub/models--mgoin--TinyStories-1M-ds/snapshots/ca4ce12f6093b31f6c3f1e398f4b04b113e26bb7/model.onnx', object='chat.completion.chunk', system_fingerprint=None)