[BUG] Response from OpenAI compatible API with LocalAI not shown

tsoernes commented 1 week ago

Description

I'm attempting to make an API to interact with a RAG chat bot that I've made.

The API is OpenAI compatible (should be) but I cannot get big-AGI to display the response that the API sends to big-AGI.

I have logging enabled on the API side, and it is receiving the request from big-AGI, and, as far as I can tell, responding correctly. But no response message is shown. When I send a message from big-AGI, it waits until the API responds (and shows the typing animation), but it displays no message content from the response.

Example "messages" key in from request received in API:

[{'role': 'system', 'content': ''}, {'role': 'user', 'content': 'hvilke brønner er i maria feltet'}]

And the response which the API sends back, with status code 200 and mimetype="application/json":

{ 'choices': [ { 'content_filter_results': { 'hate': { 'filtered': False,
                                                       'severity': 'safe'},
                                             'self_harm': { 'filtered': False,
                                                            'severity': 'safe'},
                                             'sexual': { 'filtered': False,
                                                         'severity': 'safe'},
                                             'violence': { 'filtered': False,
                                                           'severity': 'safe'}},
                 'finish_reason': 'stop',
                 'index': 0,
                 'logprobs': None,
                 'message': { 'content': 'SOME MESSAGE',
                              'refusal': None,
                              'role': 'assistant'}}],
  'created': 1731670808,
  'id': 'chatcmpl-jYloJjizjHXfWIfhwHSjxZTjTvtAZ',
  'model': 'gpt-4o',
  'object': 'chat.completion',
  'prompt_filter_results': [{'content_filter_results': {}, 'prompt_index': 0}],
  'system_fingerprint': 'fp_000eow_rag',
  'usage': {'completion_tokens': 1, 'prompt_tokens': 1, 'total_tokens': 2}}

And here is the code for the API. It is an azure function running locally:

from string import ascii_letters
import os
import sys
import random
import json
import time
import azure.functions as func
import logging
import stackprinter
from query_pipeline import run_query, format_response

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variable and initialize LLM client
app = func.FunctionApp()
function_key = os.environ["FUNCTION_KEY"]

default_response = {
  "id": "chatcmpl-ATocTflkPHoUliboru2hb5h9tk0ox",
  "object": "chat.completion",
  "created": 1731669457,
  "model": "gpt-4o",
  "system_fingerprint": "fp_000eow_rag",
  "choices": [{
    "index": 0,
    "message": {
      "role": "assistant",
      "content": "PLACEHOLDER",
        "refusal": None,

    },
    "logprobs": None,
    "finish_reason": "stop",
    'content_filter_results': {
        'hate': {'filtered': False, 'severity': 'safe'},
        'self_harm': {'filtered': False, 'severity': 'safe'},
        'sexual': {'filtered': False, 'severity': 'safe'},
        'violence': {'filtered': False, 'severity': 'safe'}}
  }],
  "usage": {
    "prompt_tokens": 1,
    "completion_tokens": 1,
    "total_tokens": 2,
  },
  'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {}}]
}

@app.route(route="v1/chat/completions", auth_level=func.AuthLevel.ANONYMOUS)
def chat_completions(req: func.HttpRequest) -> func.HttpResponse:
    js = req.get_json()
    messages = js["messages"]
    print(f"{messages=}")
    default_response["id"] = "chatcmpl-" + ''.join(random.choices(ascii_letters, k=29))
    default_response["created"] = int(time.time())
    default_response["choices"][0]["message"]["content"] = "SOME MESSAGE"
    js = json.dumps(default_response)
    print(f"Returning {js}")
    return func.HttpResponse(js, status_code=200, mimetype="application/json")

@app.route(route="v1/models", auth_level=func.AuthLevel.ANONYMOUS)
def models(req: func.HttpRequest) -> func.HttpResponse:
    # OpenAI API compatible
    token = req.headers.get('Authorization')
    if not token:
        return func.HttpResponse("Unauthorized", status_code=401)
    token = token.replace("Bearer ", "")
    if token != function_key:
        return func.HttpResponse("Incorrect function key", status_code=401)

    models = {
        "object": "list",
        "data": [
            {
                "id": "gpt-4o",
                "object": "model",
                "created": 1700000000,
                "owned_by": "Bouvet"
            },
        ],
        "object": "list"
    }
    return func.HttpResponse(json.dumps(models), status_code=200)

Device and browser

Chrome

Screenshots and more

Willingness to Contribute

[ ] 🙋‍♂️ Yes, I would like to contribute a fix.

enricoros commented 1 week ago

Any messages in the console for big-AGI? And what branch was this on?

tsoernes commented 1 week ago

@enricoros There are no messages in console, besides big-AGI pinging the default chat llm (not the LocalAI rag one) in order to make a title for the chat.

This is on the v1-dev branch

enricoros commented 1 week ago

@tsoernes could you try the v2-dev branch? Its AI engine was rewritten from scratch and is much more powerful (and will tell you about non-compliances with strict schema parsing). v1-dev is insupported and will fade away soon (doesn't support images, etc.)

tsoernes commented 1 week ago

I have tried it now.

It says:

Debugging log from server says:

{ 'choices': [ { 'content_filter_results': { 'hate': { 'filtered': False,
                                                       'severity': 'safe'},
                                             'self_harm': { 'filtered': False,
                                                            'severity': 'safe'},
                                             'sexual': { 'filtered': False,
                                                         'severity': 'safe'},
                                             'violence': { 'filtered': False,
                                                           'severity': 'safe'}},
                 'finish_reason': 'stop',
                 'index': 0,
                 'logprobs': None,
                 'message': { 'content': 'SOME MESSAGE',
                              'refusal': None,
                              'role': 'assistant'}}],
  'created': 1731916661,
  'id': 'chatcmpl-TfZobWLVlSwzcTnAIMuGWFfwQKWrj',
  'model': 'gpt-4o',
  'object': 'chat.completion',
  'prompt_filter_results': [{'content_filter_results': {}, 'prompt_index': 0}],
  'system_fingerprint': 'fp_000eow_rag',
  'usage': {'completion_tokens': 1, 'prompt_tokens': 1, 'total_tokens': 2}}

From the code:

default_response = {
  "id": "chatcmpl-ATocTflkPHoUliboru2hb5h9tk0ox",
  "object": "chat.completion",
  "created": 1731669457,
  "model": "gpt-4o",
  "system_fingerprint": "fp_000eow_rag",
  "choices": [{
    "index": 0,
    "message": {
      "role": "assistant",
      "content": "PLACEHOLDER",
        "refusal": None,

    },
    "logprobs": None,
    "finish_reason": "stop",
    'content_filter_results': {
        'hate': {'filtered': False, 'severity': 'safe'},
        'self_harm': {'filtered': False, 'severity': 'safe'},
        'sexual': {'filtered': False, 'severity': 'safe'},
        'violence': {'filtered': False, 'severity': 'safe'}}
  }],
  "usage": {
    "prompt_tokens": 1,
    "completion_tokens": 1,
    "total_tokens": 2,
  },
  'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {}}]
}

@app.route(route="v1/chat/completions", auth_level=func.AuthLevel.ANONYMOUS)
def chat_completions(req: func.HttpRequest) -> func.HttpResponse:
    js = req.get_json()
    messages = js["messages"]
    print(f"{messages=}")
    default_response["id"] = "chatcmpl-" + ''.join(random.choices(ascii_letters, k=29))
    default_response["created"] = int(time.time())
    default_response["choices"][0]["message"]["content"] = "SOME MESSAGE"
    js = json.dumps(default_response)
    print(f"Returning {js}")
    return func.HttpResponse(js, status_code=200, mimetype="application/json")

tsoernes commented 6 days ago

For some reason, the API response is shown as the chat title header and nothing is shown as message:

enricoros commented 6 days ago

@tsoernes I think this is because it's expecting a Streaming answer (chat.completion.chunk) but getting a chat completion (full completion) object instead.

I think the fix won't be hard, the steaming flag needs to be set to false in the code when performing the AIX call.

2 options: 1. Change the server to reply with a Steaming (sse, chat completion chunk objects), or 2. Change Big AGI to not expect a streaming answer when making a streaming request, or even never make a streaming request for this model.

enricoros / big-AGI