function-calling-upgrade-sei:add wip annotation

feat(backend): python-banckend file change, add wip annotation

function call的调用示例代码, 使用方式见此: https://github.com/josStorer/RWKV-Runner/pull/364/files#diff-d16d47854f9f4a27efd8520938502741093433195730311b1b83ab4ea3000382 此PR完成后, 期望该示例代码能够跑通, 返回一个非None结果

核心要进行的开发部分: https://github.com/josStorer/RWKV-Runner/pull/364/files#diff-4e4a05876ad4fc0cfe0ed3cbdc219a3859cb0518161d9e959d52820520c87227R408

以下是RWKV Mobius模型(https://huggingface.co/TimeMobius/Mobius-RWKV-r6-12B), 进行function call的prompt示例:

System: You are a helpful assistant with access to the following functions. Use them if required -[{
  "name": "get_exchange_rate",
  "description": "Get the exchange rate between two currencies",
  "parameters": {
    "type": "object",
    "properties": {
    "base_currency": {
    "type": "string",
    "description": "The currency to convert from"
    },
    "target_currency": {
    "type": "string",
    "description": "The currency to convert to"
    }
    },
    "required": [
    "base_currency",
    "target_currency"
    ]
  }
},{
  "name": "get_current_weather",
  "description": "Get the current weather in a given location,
  "parameters": {
    "type": "object",
    "properties": {
    "location": {
    "type": "string",
    "description": "The city and state, e.g. San Francisco, CA"
    },
    "unit": {
    "type": "string",
    "enum": ["celsius", "fahrenheit"]
    }
    },
    "required": [
    "location"
    ]
  }
}]

User: 美元对人民币汇率

Assistant: get_exchange_rate
```python
tool_call("base_currency"= "USD", "target_currency"= "CNY")

Observation: {"status":"success", "data":{"exchange_rate":7.25}}

Assistant: 美元兑人民币当前汇率是7.25。


非stream模式下的function call响应示例, 代码需要修改适配新增的function call相关字段

```json
{
    "id": "chatcmpl-9pCMwSnconjiQaCEPIpYNE85imPIc",
    "object": "chat.completion",
    "created": 1721989302,
    "model": "gpt-3.5-turbo-0125",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": null,
                "tool_calls": [
                    {
                        "id": "call_i1i5tCZ3DwqFS6rkeChQpCeP",
                        "type": "function",
                        "function": {
                            "name": "get_current_weather",
                            "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}"
                        }
                    },
                    {
                        "id": "call_uH91rW0x7Bh0KzncuiK8PEcE",
                        "type": "function",
                        "function": {
                            "name": "get_current_weather",
                            "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}"
                        }
                    },
                    {
                        "id": "call_8iDBQBBVo5HiFcUFzOLMwpjp",
                        "type": "function",
                        "function": {
                            "name": "get_current_weather",
                            "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}"
                        }
                    }
                ]
            },
            "logprobs": null,
            "finish_reason": "tool_calls"
        }
    ],
    "usage": {
        "prompt_tokens": 88,
        "completion_tokens": 77,
        "total_tokens": 165,
    }
}

stream模式下的响应示例

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_s0N8ds232IKkopAi2rq3SQh4","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"lo"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"catio"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"n\": \"S"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"an F"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"ranci"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"sco\", "}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"uni"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"t\": \""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"celsiu"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"s\"}"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"id":"call_qiZYb2S6gLot9SbB9DpQhrdv","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"lo"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"catio"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"n\": \"T"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"okyo"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\", \"u"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"nit\": "}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\"cel"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"sius\""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"}"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"id":"call_u6RjCxUBAKsqJuBvFnFgpAVR","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\"lo"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"catio"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"n\": \"P"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"aris"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"\", \"u"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"nit\": "}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"\"cel"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"sius\""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":2,"function":{"arguments":"}"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":88,"completion_tokens":77,"total_tokens":165}}

data: [DONE]

根据我的理解现在主要需要先后实现两方面的需求：

Python 后端对比 OpenAI Function Call 接口，对于请求处理和响应生成都缺少一部分属性，需要实现这些属性。
类似于 stream 属性，实现通过 tools 属性检查 Function Call 模式，解析 tools 属性，按照Function Call 模式生成响应。

为了先实现需求1，我预计可能需要进行以下工作(routes/completion.py)：

类似于 message 属性，检查缺少的复杂属性并创建对应 BaseModel 子类，

e.g.
可能的“复杂“属性: tools、function、parameters、properties ```python3 class Tools(BaseModel): type: str = Field(min_length=0) function: Union[Function, None] ```
向 ChatCompletionBody类、BaseModel 子类补充简单属性
设置属性默认值、校验方法(if required)
简单测试修改后的 ChatCompletionBody，例如使用使用 quick_log 等方法

如果有不正确或者可以改进的地方可以补充 :D

当前最新的 Commit , 只看功能的话, 最新的提交能简单实现在非stream模式下, 单个函数的调用, 能够通过 function_call.py 的测试. 如果关注返回内容的话就会发现内容很不符合预期

现存主要问题

控制进入 tools (function call) 模式的 Prompts 只能产生单个 function call, 不符合OpenAI Tools接口下的Function Call功能
具有致命缺陷的 json 响应构建方式：tool_calls 写死只包含一个 function, function 属性只接受内容为json的字符串
没有做任何 stream 模式的工作
响应构建全部集中在了 eval_rwkv 函数中, eval_rwkv函数过于庞大

已完成工作

调整部分属性默认值 https://github.com/EliwiiKeeya/RWKV-Runner/commit/542a9eb01cb4163455308c5659028a7f17e1342d (schema.py)
兼容了对 role 为 tool 的 message 进行预处理的功能 https://github.com/EliwiiKeeya/RWKV-Runner/commit/c9e9d51e92de8e419d3e3fa416621f556c53aba2 (File completion.py, in line 387, in chat_template)
简单提供了一个能产生 json 内容字符串 function call 的 prompt https://github.com/EliwiiKeeya/RWKV-Runner/commit/c9e9d51e92de8e419d3e3fa416621f556c53aba2 (File completion.py, in line 456, in chat_with_tools)
能对不同请求分别产生对应响应, 且符合 OpenAI Client 接口

可能会有未说明或未表述清楚的地方，欢迎随时向我提问

一些困惑:

如果使用了 chat_template 的 presystem, 对应的 prompts 会被放在 function call promt 的后面, 这样会产生问题么
function call promt 写在 chat_with_tools 中是否妥当，还是说应该放在 chat_template 中
text mode 也需要适配 function call 么
构建 chat, tools 响应时, tool_calls id 怎么生成

就是对function call请求的响应
``` json { "id": "", "object": "chat.completion", "created": 1722434073, "model": "RWKV-x060-World-3B-v2.1-20240417-ctx4096", "choices": [ { "index": 0, "message": { "role": "assistant", "content": null, "tool_calls": [ { "id": "", // here "type": "function", "function": { "name": "get_current_weather", "arguments": "{\"location\": \"San Francisco, Tokyo, Paris\"}" } } ] }, "logprobs": null, "finish_reason": "tool_calls" } ], "usage": { "prompt_tokens": 210, "completion_tokens": 28, "total_tokens": 238 } } ```
json 作为令模型在 Function Call 模式下的格式是不是不太合适

e.g.
``` User: Assistant: {{"name": "", "arguments": '{{"": "", "": "", ...}}'}} 也就是说最终模型响应的字符串为: " {{"name": "", "arguments": '{{"": "", "": "", ...}}'}}" 将在最后调用 literal_eval 方法转为字典 ```

现在你可以用你的fork发起一个新的PR, 这样方便github直接查看diff 如果对我下面对应的逐点的回答有任何不清楚的地方, 可以继续直接问

目前因为这套function call prompt只适用于Mobius模型, 这是个12B模型, 本地运行和测试可能不太方便, 所以可以改动后让我来测, 先以程序逻辑上实现无误为主

另外tests目录应该和routes同级

现存主要问题

目前受限于模型本身和当前使用的prompt, 暂时以只能产生单个function call作为本PR的最终结果
和上个问题类似, 由于现阶段模型和prompt限制, 只能产生单个function call, 因此写死一个固定长度为1的数组可以作为本PR的最终结果. 但是注意, 产生的响应是不能和本PR目前这样直接返回的, 而是要对响应文本进行后处理的, 目前的响应看https://github.com/josStorer/RWKV-Runner/pull/364#issuecomment-2254054091 的第一个prompt示例, 结果是函数名加上一个python代码块, 需要将其参数提取出来, 处理成可返回的响应文本, 对应这里这个TODO: https://github.com/josStorer/RWKV-Runner/pull/364/files#diff-4e4a05876ad4fc0cfe0ed3cbdc219a3859cb0518161d9e959d52820520c87227R419
可以先实现非stream
可以不写在eval_rwkv中, eval_rwkv是通用的prompt补全, 而function call依然基于补全续写, 通过调用eval_rwkv得到一个响应, 再后处理为function call的json, 并在chat_with_tools中返回这个响应, 而不是写在eval_rwkv中, 类似的, 调用chat_with_tools前, 就已经对请求参数做了一些检查校验, 以及pydantic自动的校验, 不需要像现在这样把一些校验写在eval_rwkv中

已完成工作

注意部分字段, 根据openai格式, 是必填的, 不应都改为Optional: https://platform.openai.com/docs/api-reference/chat/create

一些困惑

没问题
chat_template是一个通用基本模板, function call实际上是在标准的chat下加了额外的prompt模板信息, 额外的后处理步骤, 没有相互冲突的部分, 因此可以不放在chat_template中
text mode是指什么, 如果是/completions, 不需要实现, /completions相当于eval_rwkv最原生公开的api, 实际上chat和function call, 都能由客户端侧, 通过/compltions实现
使用call_前缀, 加上一个随机的24位字符串即可, 参考上面我的评论, 另外目前由于只支持单个function call, 所以这个字段只有兼容性用途, 没有任何实际意义
注意遵循我上面评论中, 第一个代码块所提及的prompt, 只需要按照这个prompt实现即可, 目前你这里提及的这段prompt实际上是无法正确完成function call的. function call的整个流程是, 对话并提供可用工具, AI视情况使用工具, 或直接产生回答, 若使用工具, 响应中会包含tool_calls参数, 用户需要在本地执行对应函数调用后, 将调用结果作为tool角色的内容, 再次发送, AI依据调用结果再次生成实际可用的响应, 至此完成闭环其中这里tool_calls参数我们需要对eval_rwkv产生的响应进行字符串后处理才能得到, 而用户提供可用工具时, 传入的tools参数, 我们只需要提取function的内容即可, 参考这里: https://github.com/josStorer/RWKV-Runner/pull/364/files#diff-d16d47854f9f4a27efd8520938502741093433195730311b1b83ab4ea3000382R39, 然后重新构造一个数组, 插入到function call的System中即可, 构造完成的System看先前评论的第一个代码块: https://github.com/josStorer/RWKV-Runner/pull/364#issuecomment-2254054091 这个代码块实际上是整个function call流程完成, 并生成最终响应的prompt, 上述两个步骤分别对应这里两个Assistant:

截止当前最新的一次提交:https://github.com/EliwiiKeeya/RWKV-Runner/commit/6dfc9ec168656cc792767aad86cd91d931bf3c1c diff直达

现存问题:

现在先实现非stream模式， stream模式之后再说

已完成工作:

修改 Promts 为 Mobius 示例内容
创建 postprocess_response 函数功能: 响应文本后处理
还原 rwkv_eval()
实现 tool_calls id 的生成
对照 OpenAI文档调整参数属性(检查请求)
实现 function_call.py 的的测试, 我本地确实装载不了12B模型, 服务端第一次响应我开调试直接注入替换下面这样的字符串, 最终能成功返回一个响应
```
# 设置 content 的值为这个字符串
f"""
```python\n
get_current_wether("location"= "Tokyo", "unit"= "None")\n
```"""
```

我觉得可以调整一下tool call 的 prompt, 用12B模型测试 function_call.py 检查一下功能了

困惑:

我查看了Mobius模型进行function call的prompt示例

Tool Call 的 name 固定位 Observation, 依照"非stream模式下的function call响应示例"的格式预期发送的第二次请求内容中tool 具有 name 属性, 值为调用的函数名get_exchange_rate, 是否需要将tools name修改为动态获取

由function_call.py产生的第二次请求body
```json { "messages": [ { "role": "user", "content": "What's the weather like in SanFrancisco, Tokyo, and Paris?" }, { "content": null, "role": "assistant", "tool_calls": [ { "id": "", "function": { "arguments": "{\"location\": \"San Francisco, Tokyo, Paris\"}", "name": "get_current_weather" }, "type": "function" } ] }, { "tool_call_id": "", "role": "tool", "name": "get_current_weather", "content": "{\"location\": \"Tokyo\", \"temperature\":\"10\", \"unit\": null}" } ], "model": "gpt-4o" } ```
- python代码块函数名似乎直接变成 tool_call, 而实际的函数名被放在代码块外面, 我提交了一个测试文件, 其中包含我预期的输入格式，我不确定我预期接受并处理的格式是否合适
  快速掌握
```
import re

def postprocess_response(s):
    REGEX_BLOCKS = r'```[\w]*(.*?)```'
    REGEX_FUNCTIONS = r'(\w+)*\('
    REGEX_ARGS = r'"([^"]+)"\s*=\s*"([^"]+)"'

    blocks = re.findall(REGEX_BLOCKS, s, re.DOTALL)
    print(f"Blocks:\n{blocks}")
    for block in blocks:
        functions = block.strip().split('\n')
        print(f"Functions:\n{functions}")
        print()
        for function in functions:
            name = re.search(REGEX_FUNCTIONS, function).group(1)
            arguments = f"{dict(re.findall(REGEX_ARGS, function))}"

            print(function)
            print(name)
            print(arguments)
            print()

    return

if __name__ == '__main__':
    str = """
    some texts
    some texts
    some texts
    some texts

    ```python\n
    get_current_wether("location"= "Tokyo", "unit"= "None")\n
```
```
some texts
some texts
some texts
some texts
"""

# str = """ get_exchange_rate
# ```python
# tool_call("base_currency"="feat(Backend)", "target_currency"="CNY"),
# tool_call2("base_currency"="CNY", "target_currency"="USD"),
# ```"""

postprocess_response(str)
```
```
</details>
```

更多信息：

请求貌似没有 n 参数, 当前 choice 写死为一个
postprocess_response 原理为正则查找推理结果的 markdown 代码块, 逐行解析每一个function 整合成一个tool_calls列表
现在 funcion_call.py 一轮 tool_call 的处理流程是：
1. 客户端发送 User 请求1, message 包含 tools 属性
2. 服务端接收请求将所有 content 属性和 tools call promt(Role: System) 一块捏成一个 promt 再生成一个推理结果最终尝试后处理字符串
3. 服务端返回 Assistant 响应1, content 属性不包含值, tool_calls 属性包含值
4. 客户端发送 Tool 请求2, message 依次包含
  - 请求1历史(role, content, 不包含 tools)
  - 响应1历史(role, content=null, tool_calls)
  - 请求2(role, name="Observation", content="函数返回值")
5. 服务端接收请求将所有 content 属性(请求2)捏成 promt 生成一个推理结果
6. 服务端返回 Assistant 响应2

是否需要将tools name修改为动态获取

这个我没理解是什么意思

python代码块函数名似乎直接变成 tool_call, 而实际的函数名被放在代码块外面, 我提交了一个测试文件, 其中包含我预期的输入格式, 我不确定我预期接受并处理的格式是否合适

目前训练的模型就是这样的, 所以正则需要解析 \n```python\ntool_call(之前的文本作为function name, 如果解析失败, 则视为模型认为不需要调用function, 只是普通回答

请求貌似没有 n 参数, 当前 choice 写死为一个

目前还不支持batch推理, 所以没实现n参数

postprocess_response 原理为正则查找推理结果的 markdown 代码块, 逐行解析每一个function 整合成一个tool_calls列表

未来如果要支持多个function call我觉得你设想的这个多行调用格式是合理的, 但目前的模型尚不支持, 仍需要改为上面说的格式, 但我觉得你目前的多行解析逻辑可以先注释保留, 以便未来使用

目前训练的模型就是这样的, 所以正则需要解析 \n```python\ntool_call(之前的文本作为function name, 如果解析失败, 则视为模型认为不需要调用function, 只是普通回答未来如果要支持多个function call我觉得你设想的这个多行调用格式是合理的, 但目前的模型尚不支持, 仍需要改为上面说的格式, 但我觉得你目前的多行解析逻辑可以先注释保留, 以便未来使用

我已修改新的 postprocess_response 函数及测试文件并提交 https://github.com/josStorer/RWKV-Runner/pull/368/commits/ff5c44f99b10bf04cf6283f15bbfdc74ab453e05

是否需要将tools name修改为动态获取

这个我没理解是什么意思

关于这个问题, 简单来说就是我看到 tool 的"name"属性在 chat_template 函数合成 promt 的过程中没有使用

// OpenAI 库产生的第二次请求
{
    "messages": [
        {
            "role": "user",
            "content": "What's the weather like in SanFrancisco, Tokyo, and Paris?"
        },
        {
            "content": null,
            "role": "assistant",
            "tool_calls": [
                {
                    "id": "",
                    "function": {
                        "arguments": "{\"location\": \"San Francisco, Tokyo, Paris\"}",
                        "name": "get_current_weather"
                    },
                    "type": "function"
                }
            ]
        },
        {
            "tool_call_id": "",
            "role": "tool",
            "name": "get_current_weather",
            "content": "{\"location\": \"Tokyo\", \"temperature\":\"10\", \"unit\": null}"
        }
    ],
    "model": "gpt-4o"
}

# chat_template 函数片段
system = "System" if body.system_name is None else body.system_name
tool = "Obersavtion"
for message in body.messages:
    ...

所以我在想是否需要改成这个样子:

system = "System" if body.system_name is None else body.system_name
tool = "Obersavtion" if body.tool_name is None else body.tool_name # !: 当前 ChatCompletionBody 还没有 tool_name 属性
for message in body.messages:
    ...

不用改成这样, 目前的模型只能用Obersavtion

现在可以检查或测试非 stream 模式下还有哪些问题我近期将会着手 stream 模式下实现单个函数的 function call 的工作我想确定期望的 stream 模式下 function call 处理流程我研究了之前给出的 chatgpt 的响应示例, 拆分为三部分放在设想的处理流程中作为参照

设想的处理流程:

客户端发送请求

考虑到现在 tool_choice 暂时仅开发 "auto" , 因此令服务端检测请求 body 中 tools 属性是否为 None, 将 tools 属性不为 None 视作开始输出 tool call 的标志, 此时产生一个 role 为 assistant, content 为 None 的响应

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}

开始推理并持续检查推理结果, 直到推理结果中出现 <函数名>\n```<(缺省)编程语言>\ntool_call( 的字段, 中间不产生任何响应

产生一个 tool_calls 的首次响应, 这是一个特殊响应, 包含了此次 tool call 的基本信息

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_s0N8ds232IKkopAi2rq3SQh4","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}]}

开始对之后每个产生的 delta:
- 先进行后处理, 例如出现的双引号(“) 替换为反斜杠+双引号 (\")以及如果是最外层括号, 那么外面加上花括号等。
- 再将后处理结果打包成响应发送给客户端,。
- 重复, 直到出现的所有括号全部闭合。
```
data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"lo"}}]},"logprobs":null,"finish_reason":null}]}
```
data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"catio"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"n\": \"S"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"an F"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"ranci"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"sco\", "}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"uni"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"t\": \""}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"celsiu"}}]},"logprobs":null,"finish_reason":null}]}

data: {"id":"chatcmpl-9pBFVkUYV8ueHkN1JcluTwrcScTzZ","object":"chat.completion.chunk","created":1721984997,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"s\"}"}}]},"logprobs":null,"finish_reason":null}]}

注意到已超过三天没有新的Comment产生, 我将进一步描述我的问题。请问非 stream 模式的测试还顺利么? 我们应该开始着手实现 stream 模式的 function call 了么？

stream 模式相关问题:

关于 stream 模式下 function call 处理这一块, 我提出的设想是基于当前 Mobius promt 示例做出的。我不确定当前设想的模式是否合理，或者是否需要提出新的 promt 示例进行开发, 请进行确认。如果短期内仍仅需关注非 stream 模式的开发, 或者其他原因, 那么我将暂时保留这部分问题。

@EliwiiKeeya 最近在忙别的事, 我下午给你一个答复

到 #368 继续讨论吧

josStorer / RWKV-Runner