Closed isamu-isozaki closed 4 months ago
Start client
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage, AIMessage
import json
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=1.0,
openai_api_base="http://localhost:5000/v1",
openai_api_key="Test",
streaming=True,
max_tokens=1024)
Test choices
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content="Who is better bob or fred?"
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "choices", "choices": ["bob", "fred"]}):
print(chunk.content, end="", flush=True)
Output:
bob
Test JSON
from enum import Enum
from pydantic import BaseModel, constr
import json
class Weapon(str, Enum):
sword = "sword"
axe = "axe"
mace = "mace"
spear = "spear"
bow = "bow"
crossbow = "crossbow"
class Armor(str, Enum):
leather = "leather"
chainmail = "chainmail"
plate = "plate"
class Character(BaseModel):
name: constr(max_length=10)
age: int
armor: Armor
weapon: Weapon
strength: int
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content=f"Give me an interesting character description based on the following schema: {json.dumps(Character.schema())}"
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "json", "json": json.dumps(Character.schema())}):
print(chunk.content, end="", flush=True)
Output
{ "name": "Eldric the" , "age": 37, "armor": "chainmail", "weapon": "sword", "strength": 87 }
Test Regex
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content=f"Choose between bob and fred."
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "regex", "regex": "bob|fred"}):
print(chunk.content, end="", flush=True)
Output
bob
Test stop_at keyword
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content="Instruction: Always answer Questions in the form Question: What is 2+1?\nwith\nAnswer: 2+1=3\nQuestion: What is 21+1?\n"
)
]
for chunk in llm.stream(messages, extra_body={"outlines_type": "text", "stop_at": "+"}):
print(chunk.content, end="", flush=True)
For testing keyboard interrupt, I ran the below code, interrupted during generation, and then ran it again. And it seemed to work
messages = [
SystemMessage(
content="You are a helpful assistant."
),
HumanMessage(
content="What is your name?"
)
]
for chunk in llm.stream(messages):
print(chunk.content, end="", flush=True)
Let me know if there should be more tests!
This is a pr to adapt outlines for exllamav2 dynamic generation. I think this will remove the need for this pr as exllamav2 should do this under the hood.
To run this, you need to install outlines from my branch using
This is currently a PR in outlines too here so in the future
might be enough.
I started server with
and tested code for