assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 3
for i in range(3):
assert completion.choices[i].text is not None
print(completion.choices[i].text)
output_json = json.loads(completion.choices[i].text)
jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)
## Error
File "/workspace/vllm/entrypoints/openai/serving_completion.py", line 128, in create_completion
await get_guided_decoding_logits_processor(
File "/workspace/vllm/model_executor/guided_decoding.py", line 76, in get_guided_decoding_logits_processor
result = await loop.run_in_executor(global_thread_pool,
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, *self.kwargs)
File "/workspace/vllm/model_executor/guided_decoding.py", line 123, in _get_cached_logits_processor
return JSONLogitsProcessor(guide, tokenizer)
File "/workspace/vllm/model_executor/guided_logits_processors.py", line 154, in init
super().init(regex_string, tokenizer)
File "/workspace/vllm/model_executor/guided_logits_processors.py", line 117, in init
fsm = RegexFSM(regex_string, tokenizer)
File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/fsm.py", line 121, in init
self.states_to_token_maps, self.empty_token_ids = create_states_mapping(
File "/usr/local/lib/python3.10/dist-packages/outlines/caching.py", line 74, in wrapper
result = cached_function(args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/fsm.py", line 104, in create_states_mapping
states_to_token_maps, empty_token_ids = create_fsm_index_tokenizer(
File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/regex.py", line 571, in create_fsm_index_tokenizer
vocabulary, empty_token_ids = reduced_vocabulary(tokenizer)
File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/regex.py", line 545, in reduced_vocabulary
token_str = tokenizer.convert_token_to_string(token)
File "/workspace/vllm/model_executor/guided_logits_processors.py", line 53, in convert_token_to_string
if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
TypeError: startswith first arg must be bytes or a tuple of bytes, not str
Your current environment
docker run -it --rm -p 8000:8000 --gpus='"device=0"' --name qwen-int4-vllm -v /data/Qwen-14B-Chat-Int4:/model/Qwen-14B-Chat-Int4 --entrypoint=python3 vllm:0.4.0 -m vllm.entrypoints.openai.api_server --model /model/Qwen-14B-Chat-Int4 --trust-remote-code --quantization gptq
import json
import jsonschema import openai
client = openai.OpenAI( base_url="http://127.0.0.1:8000/v1", api_key="token-abc123", )
MODEL_NAME = '/model/Qwen-14B-Chat-Int4' TEST_SCHEMA = { "type": "object", "properties": { "name": { "type": "string" }, "age": { "type": "integer" }, "skills": { "type": "array", "items": { "type": "string", "maxLength": 10 }, "minItems": 3 }, "work history": { "type": "array", "items": { "type": "object", "properties": { "company": { "type": "string" }, "duration": { "type": "string" }, "position": { "type": "string" } }, "required": ["company", "position"] } } }, "required": ["name", "age", "skills", "work history"] } completion = client.completions.create( model=MODEL_NAME, prompt=f"Give an example JSON for an employee profile " f"that fits this schema: {TEST_SCHEMA}", n=3, temperature=1.0, max_tokens=500, extra_body=dict(guided_json=TEST_SCHEMA))
assert completion.id is not None assert completion.choices is not None and len(completion.choices) == 3 for i in range(3): assert completion.choices[i].text is not None print(completion.choices[i].text) output_json = json.loads(completion.choices[i].text) jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)
File "/workspace/vllm/entrypoints/openai/serving_completion.py", line 128, in create_completion await get_guided_decoding_logits_processor( File "/workspace/vllm/model_executor/guided_decoding.py", line 76, in get_guided_decoding_logits_processor result = await loop.run_in_executor(global_thread_pool, File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, *self.kwargs) File "/workspace/vllm/model_executor/guided_decoding.py", line 123, in _get_cached_logits_processor return JSONLogitsProcessor(guide, tokenizer) File "/workspace/vllm/model_executor/guided_logits_processors.py", line 154, in init super().init(regex_string, tokenizer) File "/workspace/vllm/model_executor/guided_logits_processors.py", line 117, in init fsm = RegexFSM(regex_string, tokenizer) File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/fsm.py", line 121, in init self.states_to_token_maps, self.empty_token_ids = create_states_mapping( File "/usr/local/lib/python3.10/dist-packages/outlines/caching.py", line 74, in wrapper result = cached_function(args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/fsm.py", line 104, in create_states_mapping states_to_token_maps, empty_token_ids = create_fsm_index_tokenizer( File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/regex.py", line 571, in create_fsm_index_tokenizer vocabulary, empty_token_ids = reduced_vocabulary(tokenizer) File "/usr/local/lib/python3.10/dist-packages/outlines/fsm/regex.py", line 545, in reduced_vocabulary token_str = tokenizer.convert_token_to_string(token) File "/workspace/vllm/model_executor/guided_logits_processors.py", line 53, in convert_token_to_string if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>": TypeError: startswith first arg must be bytes or a tuple of bytes, not str