from openai import OpenAI
ENDPOINT = "http://localhost:3928/v1"
MODEL = "meta-llama3.1-8b-instruct"
client = OpenAI(
# This is the default and can be omitted
base_url=ENDPOINT,
api_key="not-needed"
)
completion_payload = {
"messages": [
{"role": "user", "content": "Who won the world series in 2020?"}
]
}
response = client.chat.completions.create(
top_p=0.9,
temperature=0.6,
model=MODEL,
messages= completion_payload["messages"],
top_logprobs=2,
stream=False,
logprobs=True
)
print(response)
Fix #262
Can test this feature by use openai lib