abetlen / llama-cpp-python

Python bindings for llama.cpp
https://llama-cpp-python.readthedocs.io
MIT License
8.16k stars 970 forks source link

fix: chat API logprobs format #1788

Open domdomegg opened 1 month ago

domdomegg commented 1 month ago

Summary

The OpenAI compatible server should match the response structure of the OpenAI API for chat completions. Unfortunately there is a discrepancy with the format of logprobs: we return the logprobs format for the completions API, rather than the chat completions API.

This PR:

Issues fixed

Fixes #1787

domdomegg commented 1 month ago

Demo

Request ```json { "model": "gpt-3.5-turbo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ], "logprobs": true, "top_logprobs": 10, "max_tokens": 5 } ```
Response ```json { "id": "chatcmpl-1898ccce-2bf6-431c-b9e0-2a82e90a9604", "object": "chat.completion", "created": 1728184671, "model": "gpt-3.5-turbo", "choices": [ { "index": 0, "message": { "content": "The capital of France is", "role": "assistant" }, "logprobs": { "content": [ { "token": "The", "logprob": -0.008244173601269722, "bytes": null, "top_logprobs": [ { "token": "The", "logprob": -0.008244173601269722, "bytes": null }, { "token": "Paris", "logprob": -5.3227219581604, "bytes": null }, { "token": "Sure", "logprob": -5.770838260650635, "bytes": null }, { "token": "Answer", "logprob": -9.54023265838623, "bytes": null }, { "token": "Yes", "logprob": -9.896768569946289, "bytes": null }, { "token": "France", "logprob": -10.62641429901123, "bytes": null }, { "token": " The", "logprob": -11.367059707641602, "bytes": null }, { "token": "According", "logprob": -11.45943546295166, "bytes": null }, { "token": "**", "logprob": -11.586193084716797, "bytes": null }, { "token": " Paris", "logprob": -11.59852409362793, "bytes": null } ] }, { "token": " capital", "logprob": -0.0005453529884107411, "bytes": null, "top_logprobs": [ { "token": " capital", "logprob": -0.0005453529884107411, "bytes": null }, { "token": " Capital", "logprob": -7.571288108825684, "bytes": null }, { "token": " city", "logprob": -11.57780647277832, "bytes": null }, { "token": " current", "logprob": -12.473557472229004, "bytes": null }, { "token": " correct", "logprob": -12.674555778503418, "bytes": null }, { "token": " ", "logprob": -12.77519416809082, "bytes": null }, { "token": " answer", "logprob": -12.833593368530273, "bytes": null }, { "token": " French", "logprob": -13.656529426574707, "bytes": null }, { "token": " Paris", "logprob": -13.73013687133789, "bytes": null }, { "token": " **", "logprob": -13.916248321533203, "bytes": null } ] }, { "token": " of", "logprob": -0.019254328683018684, "bytes": null, "top_logprobs": [ { "token": " of", "logprob": -0.019254328683018684, "bytes": null }, { "token": " city", "logprob": -3.9625728130340576, "bytes": null }, { "token": " and", "logprob": -10.33055305480957, "bytes": null }, { "token": " ", "logprob": -12.015106201171875, "bytes": null }, { "token": " is", "logprob": -12.049043655395508, "bytes": null }, { "token": " City", "logprob": -12.161520957946777, "bytes": null }, { "token": " o", "logprob": -12.770393371582031, "bytes": null }, { "token": " cities", "logprob": -14.372736930847168, "bytes": null }, { "token": " của", "logprob": -14.63923454284668, "bytes": null }, { "token": " ", "logprob": -14.65132999420166, "bytes": null } ] }, { "token": " France", "logprob": -0.0000252720492426306, "bytes": null, "top_logprobs": [ { "token": " France", "logprob": -0.0000252720492426306, "bytes": null }, { "token": " the", "logprob": -11.084362030029297, "bytes": null }, { "token": " ", "logprob": -12.06197738647461, "bytes": null }, { "token": "France", "logprob": -12.9952974319458, "bytes": null }, { "token": " French", "logprob": -13.759483337402344, "bytes": null }, { "token": " is", "logprob": -15.239158630371094, "bytes": null }, { "token": " **", "logprob": -15.40572452545166, "bytes": null }, { "token": " france", "logprob": -15.767807960510254, "bytes": null }, { "token": " ", "logprob": -16.346908569335938, "bytes": null }, { "token": " Frankreich", "logprob": -17.035612106323242, "bytes": null } ] }, { "token": " is", "logprob": -0.000060437283536884934, "bytes": null, "top_logprobs": [ { "token": " is", "logprob": -0.000060437283536884934, "bytes": null }, { "token": " ", "logprob": -9.920828819274902, "bytes": null }, { "token": ",", "logprob": -12.151354789733887, "bytes": null }, { "token": " was", "logprob": -13.53709602355957, "bytes": null }, { "token": " ", "logprob": -14.004632949829102, "bytes": null }, { "token": " in", "logprob": -14.70918083190918, "bytes": null }, { "token": " **", "logprob": -14.768845558166504, "bytes": null }, { "token": " the", "logprob": -14.776985168457031, "bytes": null }, { "token": " ", "logprob": -14.940979957580566, "bytes": null }, { "token": " Is", "logprob": -14.942352294921875, "bytes": null } ] } ], "refusal": null }, "finish_reason": "length" } ], "usage": { "prompt_tokens": 29, "completion_tokens": 5, "total_tokens": 34 } } ```