Open zhuifengzl opened 7 months ago
我用的模型是RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth,也转换过RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096-fp16.bin,都测试过,即使修改代码backend-python\routes\completion.py下的设置,重启程序也不生效,是这个模型不支持吗 class ChatCompletionBody(ModelConfigBody): messages: Union[List[Message], None] model: Union[str, None] = "rwkv" stream: bool = False stop: Union[str, List[str], None] = default_stop user_name: Union[str, None] = Field( None, description="Internal user name", min_length=1 ) assistant_name: Union[str, None] = Field( None, description="Internal assistant name", min_length=1 ) system_name: Union[str, None] = Field( None, description="Internal system name", min_length=1 ) presystem: bool = Field( True, description="Whether to insert default system prompt at the beginning" )
model_config = {
"json_schema_extra": {
"example": {
"messages": [
{"role": Role.User.value, "content": "hello", "raw": False}
],
"model": "rwkv",
"stream": False,
"stop": None,
"user_name": None,
"assistant_name": None,
"system_name": None,
"presystem": True,
"max_tokens": 1000,
"temperature": 1,
"top_p": 0.3,
"presence_penalty": 0,
"frequency_penalty": 1,
}
}
}
class CompletionBody(ModelConfigBody): prompt: Union[str, List[str], None] model: Union[str, None] = "rwkv" stream: bool = False stop: Union[str, List[str], None] = None
model_config = {
"json_schema_extra": {
"example": {
"prompt": "The following is an epic science fiction masterpiece that is immortalized, "
+ "with delicate descriptions and grand depictions of interstellar civilization wars.\nChapter 1.\n",
"model": "rwkv",
"stream": False,
"stop": None,
"max_tokens": 100,
"temperature": 1,
"top_p": 0.3,
"presence_penalty": 0,
"frequency_penalty": 1,
}
}
}
好像每次程序都会重置修改的参数,那个流式参数是不能修改吗
模型用 https://huggingface.co/BlinkDL/rwkv-6-world/blob/main/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth
另外显存多少?显存够就解码参数选 cuda fp16
12g的显存
@zhuifengzl 参数是调用api的时候传递的, 可以改, 你不用去改源码, 载入模型的时候, 拉满"载入显存层数"
好的,我试试,感谢哈
public struct LocalSendData
{
public string model;
public bool stream;
public bool presystem;
public int max_tokens;
public double temperature;
public double top_p;
public double presence_penalty;
public double frequency_penalty;
public List
LocalSendData local = new LocalSendData();
local.model = "rwkv";
local.stream = false;
local.presystem = true;
local.max_tokens = 10000;
local.temperature = 1.2;
local.top_p = 0.5;
local.presence_penalty = 0.4;
local.frequency_penalty = 0.4;
local.messages = new List<LocalSendDataMes>();
local.messages.Add(new LocalSendDataMes() { role = "user", content = content, raw = false });
postData = JsonMapper.ToJson(local);
Debug.Log(postData);
//Debug.Log(ConfigExcelMgr.instance.excelData.str_localPath);
using (var request = new UnityWebRequest(ConfigExcelMgr.instance.excelData.str_localPath + "/chat/completions", "POST"))
//using (var request = new UnityWebRequest("http://192.168.0.105:8000/chat/completions", "POST"))
{
request.SetRequestHeader("Accept", "application/json, text/plain, */*");
request.SetRequestHeader("Content-Type", "application/json");
request.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(postData));
request.downloadHandler = new DownloadHandlerBuffer();
UnityWebRequestAsyncOperation asyncOp = request.SendWebRequest();
int dataIndex = 0;
string text = "";
while (!asyncOp.isDone)
{
//Disponse(false);
yield return wait_internal;
}
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError("Error: " + request.error);
}
//Debug.Log(request.downloadHandler.text);
if (request.downloadHandler.text != null)
{
string jsondata = request.downloadHandler.text;
JsonData jd = JsonMapper.ToObject(jsondata);
//string js_finish_reason = (string)jd["choices"]["finish_reason"];
string js_text = (string)jd["choices"][0]["message"]["content"];
callback?.Invoke(js_text, true);
}
这是写的代码,我不太懂,帮忙看看应该怎么修改,才能提高接口的回答速度呢
2024-04-19 16:22:02,475 - INFO Client: Address(host='192.168.31.39', port=63902) Url: http://192.168.31.39:8000/chat/completions Body: {"max_tokens": 1000, "temperature": 1.2, "top_p": 0.5, "presence_penalty": 0.4, "frequency_penalty": 0.4, "penalty_decay": null, "top_k": null, "global_penalty": null, "messages": [{"role": "user", "content": "喂", "raw": false}], "model": "rwkv", "stream": false, "stop": ["\n\nUser", "\n\nQuestion", "\n\nQ", "\n\nHuman", "\n\nBob", "\n\nAssistant", "\n\nAnswer", "\n\nA", "\n\nBot", "\n\nAlice", "\n\nUser", "\n\nAss"], "user_name": null, "assistant_name": null, "system_name": null, "presystem": true} Data: Hello! How can I assist you today? Finished. RequestsNum: 0