netease-youdao / QAnything

Question and Answer based on Anything.
https://qanything.ai
GNU Affero General Public License v3.0
11.92k stars 1.16k forks source link

use AsyncOpenAI in llm_for_openai_api.py for performance consideration #417

Closed SoonyangZhang closed 4 months ago

SoonyangZhang commented 4 months ago

Please Describe The Problem To Be Solved use AsyncOpenAI

(Optional): Suggest A Solution

SoonyangZhang commented 4 months ago

in llm_for_openai_api.py

from openai import AsyncOpenAI class OpenAILLM(BaseAnswer, ABC):

change in init

def __init__(self):
    super().__init__()
    self.client = AsyncOpenAI(base_url=OPENAI_API_BASE, api_key=OPENAI_API_KEY)

# change in _call
async def _call(self, prompt: str, history: List[List[str]], streaming: bool=False) -> str:
    messages = []
    for pair in history:
        question, answer = pair
        messages.append({"role": "user", "content": question})
        messages.append({"role": "assistant", "content": answer})
    messages.append({"role": "user", "content": prompt})
    logging.info(messages)
    try:
        asyn_stream = await self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            stream=True,
            max_tokens=self.max_token,
            # temperature=self.temperature,
            stop=[self.stop_words] if self.stop_words is not None else None,)
        if streaming:
            async for completion in asyn_stream:
                text = completion.choices[0].delta.content
                # text is  none type for first value
                if text:
                    delta = {'answer': text}
                    yield f"data: " + json.dumps(delta, ensure_ascii=False)
        else:
            final_result = ""
            async for completion in asyn_stream:
                text = completion.choices[0].delta.content
                final_result = final_result + text
            delta = {'answer': final_result}
            yield f"data: " + json.dumps(delta, ensure_ascii=False)
    except Exception as e:
        logging.info(f"Error calling API: {e}")
        delta = {'answer': f"{e}"}
        yield f"data: " + json.dumps(delta, ensure_ascii=False)
    finally:
        logging.info("[debug] try-finally")
        yield f"data: [DONE]\n\n"

# change https://github.com/netease-youdao/QAnything/blob/master/qanything_kernel/connector/llm/llm_for_openai_api.py#L193
async def generatorAnswer(self, prompt: str,
                    history: List[List[str]] = [],
                    streaming: bool = False) -> AnswerResult:
    # change https://github.com/netease-youdao/QAnything/blob/master/qanything_kernel/connector/llm/llm_for_openai_api.py#L206
    async for response_text in response:

change https://github.com/netease-youdao/QAnything/blob/master/qanything_kernel/core/local_doc_qa.py#L241C1-L243C76

    async for answer_result in self.llm.generatorAnswer(prompt=prompt,
                                                  history=chat_history,
                                                  streaming=streaming):