In Python script, I use yield keywords to output result in token granularity, but the page in browser displays the result in much larger granularity than token.
Have you searched existing issues? 🔎
[X] I have searched and found no existing issues
Reproduction
import gradio as gr
import openai
import os
TITLE="Streaming Test"
client = openai.Client(
base_url="https://api.xverse.cn/v1",
api_key=os.environ["API_KEY"]
)
def predict(msg, history=[]):
messages = []
tuples = []
for i in range(0, len(history), 2):
messages.append({"role": "user", "content": history[i]})
messages.append({"role": "assistant", "content": history[i+1]})
tuples.append((history[i], history[i+1]))
messages.append({"role": "user", "content": msg})
response = client.chat.completions.create(
model=os.environ["MODEL"],
messages=messages,
max_tokens=2048,
top_p=0.85,
temperature=0.5,
presence_penalty=1.1,
stream=True
)
snippet = ""
i = 0
for chunk in response:
i += 1
if chunk.choices[0].delta.content is not None:
snippet = snippet + chunk.choices[0].delta.content
if i == 1:
tuples.append((msg, snippet))
history.append(msg)
history.append(snippet)
else:
tuples[-1] = (msg, snippet)
history[-1] = snippet
yield tuples, history
def reset():
return None, []
def clear_textbox():
return gr.update(value="")
css = """
h1 {
text-align: center;
display: block;
}
"""
with gr.Blocks(css=css) as chat_demo:
gr.Markdown("""# <center><font size=8>{}</center>""".format(TITLE))
chatbot = gr.Chatbot(elem_id="chatbot", height=550, bubble_full_width=False, likeable=False)
state = gr.State([])
with gr.Row():
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", container=False)
with gr.Row():
submit_btn = gr.Button(value="Submit")
reset_btn = gr.Button(value="Reset")
txt.submit(fn=predict, inputs=[txt, state], outputs=[chatbot, state])
txt.submit(fn=clear_textbox, inputs=None, outputs=[txt])
submit_btn.click(fn=predict, inputs=[txt, state], outputs=[chatbot, state])
submit_btn.click(fn=clear_textbox, inputs=None, outputs=[txt])
reset_btn.click(fn=reset, inputs=None, outputs=[chatbot, state])
gr.Examples(examples=examples, inputs=[txt])
if __name__ == "__main__":
chat_demo.queue()
chat_demo.launch(share=False)
Screenshot
No response
Logs
No response
System Info
Gradio Environment Information:
------------------------------
Operating System: Linux
gradio version: 4.24.0
gradio_client version: 0.14.0
------------------------------------------------
gradio dependencies in your environment:
aiofiles: 23.2.1
altair: 5.2.0
fastapi: 0.110.0
ffmpy: 0.3.2
gradio-client==0.14.0 is not installed.
httpx: 0.27.0
huggingface-hub: 0.22.2
importlib-resources: 6.4.0
jinja2: 3.1.3
markupsafe: 2.1.5
matplotlib: 3.8.3
numpy: 1.26.4
orjson: 3.10.0
packaging: 24.0
pandas: 2.2.1
pillow: 10.2.0
pydantic: 2.6.4
pydub: 0.25.1
python-multipart: 0.0.9
pyyaml: 6.0.1
ruff: 0.3.4
semantic-version: 2.10.0
tomlkit==0.12.0 is not installed.
typer: 0.12.0
typing-extensions: 4.10.0
uvicorn: 0.29.0
authlib; extra == 'oauth' is not installed.
itsdangerous; extra == 'oauth' is not installed.
Describe the bug
In Python script, I use yield keywords to output result in token granularity, but the page in browser displays the result in much larger granularity than token.
Have you searched existing issues? 🔎
Reproduction
Screenshot
No response
Logs
No response
System Info
Severity
Blocking usage of gradio