Closed happened closed 11 months ago
i use lmdeploy serve api_server
to deploy the internlm-chat-20b model, and then use opencompass to eval it.
so i add a new model named 'FastAPI' which similar with openai interface .
import json
import os
import re
import time
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from typing import Dict, List, Optional, Union
import jieba
import requests
from opencompass.registry import MODELS
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
from lmdeploy.serve.openai.api_client import APIClient
@MODELS.register_module()
class FastAPI(BaseAPIModel):
def __init__(self,
fastapi_url: str,
path: str = 'fastapi',
retry: int = 2,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None):
super().__init__(path=path,
max_seq_len=max_seq_len,
meta_template=meta_template,
query_per_second=query_per_second,
retry=retry)
self.api_client = APIClient(fastapi_url)
self.model_name =self.api_client.available_models[0]
print("init success\n")
def generate(
self,
inputs: List[str or PromptList],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic. Defaults to 0.7.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs))
)
self.flush()
return results
def _generate(self, input: str or PromptList, max_out_len: int) -> str:
"""Generate results given a list of inputs.
Args:
inputs (str or PromptList): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
messages.append(msg)
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
header = {
'content-type': 'application/json',
}
try:
raw_response =self.api_client.chat_completions_v1(
model=model_name, messages=messages)[0]
#self.logger.error("input",messages,"\n","output:",raw_response,"\n")
self.release()
except requests.ConnectionError:
self.logger.error('Got connection error, retrying...')
self.release()
self.wait()
continue
try:
response =json.dumps(raw_response,ensure_ascii=False)
except requests.JSONDecodeError:
self.logger.error('JsonDecode error, got',
str(raw_response))
continue
try:
return response['choices'][0]['message']['content'].strip()
except KeyError:
if 'error' in response:
if response['error']['code'] == 'rate_limit_exceeded':
time.sleep(1)
continue
elif response['error']['code'] == 'insufficient_quota':
self.invalid_keys.add(key)
self.logger.warn(f'insufficient_quota key: {key}')
continue
self.logger.error('Find error message in response: ',
str(response['error']))
max_num_retries += 1
raise RuntimeError('Calling FastAPI failed after retrying for '
f'{max_num_retries} times. Check the logs for '
'details.')
by the way ,it's too hard to understand the config file~!
You may need a batch_size=1
in your model config:
models = [
dict(
type=FastAPI,
abbr="internlm-chat-20b-turbomind-fastapi", #模型简称 展示用
#path="/mnt/lustrenew/shengkejun/internlm/models/internlm-chat-20B-turbomind",
fastapi_url="http://10.198.34.111:23333",
max_seq_len=2048,
meta_template=meta_template,
batch_size=1,
)
]
it does work
先决条件
问题类型
我正在使用官方支持的任务/模型/数据集进行评估。
环境
not env's problem
重现问题 - 代码/配置示例
eval_api_fastapi.py is created by myself, the content is :
重现问题 - 命令或脚本
python run.py configs/api_examples/eval_api_fastapi.py -w outputs/turbomind
重现问题 - 错误信息
12/07 17:24:14 - OpenCompass - INFO - Task [internlm-chat-20b-turbomind-fastapi/gsm8k] init success
12/07 17:26:28 - OpenCompass - INFO - Start inferencing [internlm-chat-20b-turbomind-fastapi/gsm8k] [2023-12-07 17:26:29,922] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
0%| | 0/1319 [00:00<?, ?it/s] 0%| | 0/1319 [00:01<?, ?it/s] Traceback (most recent call last): File "testxxx/internlm/opencompass/opencompass/tasks/openicl_infer.py", line 148, in
inferencer.run()
File "testxxx/internlm/opencompass/opencompass/tasks/openicl_infer.py", line 78, in run
self._inference()
File "testxxx/internlm/opencompass/opencompass/tasks/openicl_infer.py", line 121, in _inference
inferencer.inference(retriever,
File "testxxx/internlm/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 126, in inference
entry, golds = list(zip(*datum))
ValueError: too many values to unpack (expected 2)
list(zip(*datum)) is : [({'section': 'round', 'pos': 'begin'}, 'J'), ({'prompt': "Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:", 'role': 'HUMAN'}, 'a')]
其他信息
No response