zilliztech / GPTCache

Semantic cache for LLMs. Fully integrated with LangChain and llama_index.
https://gptcache.readthedocs.io
MIT License
6.89k stars 480 forks source link

problem with langchain chat_history #611

Open oussamaJmaaa opened 4 months ago

oussamaJmaaa commented 4 months ago
          the code works perfectly but the chat_history is  not being saved and the chatbot has no memory how can i fix that please ?

hi, @Yafaa5 I run the demo code and the cache work well. my all code:

import getpass
import time

from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Milvus

from gptcache import cache
from gptcache.adapter.langchain_models import LangChainChat
from gptcache.embedding import Onnx
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.processor.pre import get_messages_last_content
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation

openai_key = getpass.getpass("Enter your OpenAI key: ")

# diff 1
# openai_client = OpenAI(model=os.getenv("EMBEDDING_MODEL_DEPLOYMENT"))
openai_client = Onnx()

# get the content(only question) form the prompt to cache
def get_content_func(data, **_):
    return data.get("prompt").split("Question")[-1]

cache_base = CacheBase('sqlite')
vector_base = VectorBase('faiss', dimension=openai_client.dimension, collection_name='chatbot')
data_manager = get_data_manager(cache_base, vector_base)
cache.init(
    pre_embedding_func=get_messages_last_content,
    embedding_func=openai_client.to_embeddings,
    data_manager=data_manager,
    similarity_evaluation=SearchDistanceEvaluation(),
)

# diff 2
gpt_client = LangChainChat(chat=ChatOpenAI(openai_api_key=openai_key))

QUESTION_ANSWER_PROMPT = """
        [INSTRUCTION]: You are a helpful chatbot that has to satisfy user requests in its
        original language in the [USER REQUEST] section to the best of your capabilities.

        [SOURCES OF INFORMATION]:{context}
        [USER REQUEST]: {question}"""

question_prompt_template = PromptTemplate(template=QUESTION_ANSWER_PROMPT, input_variables=["context", "question"])

CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""

condense_prompt_template = PromptTemplate.from_template(CONDENSE_PROMPT)

doc_chain = load_qa_chain(gpt_client, chain_type="stuff", prompt=question_prompt_template)

question_generator = LLMChain(llm=gpt_client, prompt=condense_prompt_template)

# diff 3
vector_store = Milvus.from_texts(texts=[], embedding=OpenAIEmbeddings(openai_api_key=openai_key))
question_answer_chain = ConversationalRetrievalChain(
    retriever=vector_store.as_retriever(),
    combine_docs_chain=doc_chain, return_source_documents=True, question_generator=question_generator,
    return_generated_question=True, verbose=True)

vectordbkwargs = {"search_distance": 0.7}

chat_history = ""
user_query = "Who won the competition?"
chat_history = ""
start_time = time.time()
result = question_answer_chain({"question": user_query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})
print("Time consuming: {:.2f}s".format(time.time() - start_time))

I have commented the diff code, like diff 1, diff 2, diff 3.

The test result: image

So i guess it maybe cause the unstable network

Originally posted by @oussamaJmaaa in https://github.com/zilliztech/GPTCache/issues/481#issuecomment-1975252635