vllm-project / vllm

A high-throughput and memory-efficient inference and serving engine for LLMs
https://docs.vllm.ai
Apache License 2.0
26.78k stars 3.92k forks source link

[Bug]: TypeError: Object of type ModelMetaclass is not JSON serializable when using guided decoding #7061

Open LIUKAI0815 opened 1 month ago

LIUKAI0815 commented 1 month ago

Your current environment

The output of `python collect_env.py`

🐛 Describe the bug

class AnswerFormat(BaseModel): query: str emotion: str

def qwen2(coontent): from openai import OpenAI

openai_api_key = "EMPTY"
openai_api_base = "" 

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

model = 'Qwen2-72B-Instruct-awq'  
prompt = """
<role>
    Classify emotion among positive, negative, neutral.
</role>
<fewShotExample>
    query:打造装备神火被吞。
    emotion:neutral
    -------------------
    query:好的,明白了。
    emotion:positive
    -------------------
    query:你这不是坑人嘛。
    emotion:negative
</fewShotExample>
<expectedAnswer>
    query:{query}
    emotion:
</expectedAnswer>
"""
prompt = prompt.replace("{query}",coontent)
chat_completion = client.chat.completions.create(
    messages=[{
        "role": "system",
        "content": "You are a helpful assistant."
    },
    {
        "role": "user",
        "content": prompt
    }],
    model=model,
    extra_body={
        "guided_json": [AnswerFormat],
        "guided_decoding_backend": "lm-format-enforcer"
    }
)

print("Chat completion results:")
print(chat_completion.choices[0].message.content)
return chat_completion.choices[0].message.content

qwen2('游戏维护期间能玩吗?')

WuNein commented 1 month ago

extra_body={ "guided_json": Answer.model_json_schema(), "guided_decoding_backend": "lm-format-enforcer" },