zilliztech / GPTCache

Semantic cache for LLMs. Fully integrated with LangChain and llama_index.
https://gptcache.readthedocs.io
MIT License
7.14k stars 503 forks source link

[Bug]: cache存入数据库中编码是16进制乱码不是汉字 #626

Closed Songjiadong closed 5 months ago

Songjiadong commented 5 months ago

Current Behavior

cache存入数据库中编码是16进制乱码不是汉字,我尝试用post_func 进行修改,跟踪输出是正确的汉字,但存入数据库中,依然是16进制数据

import json
import time
from typing import List, Any

from gptcache import Cache
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from langchain.chains.llm import LLMChain
from langchain_community.embeddings.xinference import XinferenceEmbeddings
from langchain_core.globals import set_llm_cache
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from openai.resources import Embeddings
from gptcache.embedding.langchain import LangChain
from config import MILVUS, XINFERENCE_URL, EMBEDDING_MODEL_NAME, CACHE_DB_URL, LLM_MODEL_NAME, API_KEY, API_BASE, \
    LLM_MAX_TOKENS, LLM_TEMPERATURE, APP_VERBOSE
from langchain_community.cache import GPTCache
from gptcache import Config

def __get_content_func(data, **_):
    prompt = data.get("prompt")
    dc = json.loads(prompt)
    result = dc[0].get("kwargs").get("content")
    split_0 = str(result.split("Human:")[-1])
    human = split_0.split('AI:')[0]
    if APP_VERBOSE is True:
        print(f"MIIC Cache:{human}")
    return human

def __get_messages_func(messages: List[Any], **_)-> Any:
    result=[]
    for message in messages:
       list= json.loads(message)
       result_list=[]
       for item in list:
           content=json.loads(item)
           new_item=json.dumps(content,ensure_ascii=False)
           result_list.append(new_item)
       messages_str = json.dumps(result_list, ensure_ascii=False)
       result.append(messages_str)
    if APP_VERBOSE is True:
        print(f"MIIC Cache  Response Decode:{result}")
    return result[0]

def init_miic_cache(embeddings: Embeddings):
    cache_base = CacheBase(name='mysql', sql_url=CACHE_DB_URL, table_name='gptcache')
    vector_base = VectorBase(name='milvus',
                             host=MILVUS["host"],
                             port=MILVUS["port"],
                             user=MILVUS["user"],
                             password=MILVUS["password"],
                             top_k=1,
                             index_params={
                                 "metric_type": "IP",
                                 "index_type": "IVF_FLAT",
                                 "params": {"nprobe": 10, "nlist": 128}
                             },
                             search_params={
                                 "metric_type": "IP",
                                 "index_type": "IVF_FLAT",
                                 "params": {"nprobe": 10, "nlist": 128}
                             },
                             dimension=1024,
                             collection_name="gptcache")
    data_manager = get_data_manager(cache_base, vector_base, max_size=1000)
    encoder = LangChain(embeddings=embeddings, dimension=1024)

    def init_gptcache(cache_obj: Cache, llm: str):
        encoder = LangChain(embeddings=embeddings, dimension=1024)
        cache_obj.init(pre_embedding_func=__get_content_func,
                       data_manager=data_manager,
                       similarity_evaluation=SearchDistanceEvaluation(),
                       embedding_func=encoder.to_embeddings,
                       #post_process_messages_func=__get_messages_func,
                       post_func=__get_messages_func,
                       config=Config(similarity_threshold=0.6))

    set_llm_cache(GPTCache(init_func=init_gptcache))

if __name__ == "__main__":
    xinference = XinferenceEmbeddings(
        server_url=XINFERENCE_URL, model_uid=EMBEDDING_MODEL_NAME
    )
    init_miic_cache(embeddings=xinference)
    cache_llm = ChatOpenAI(
        model=LLM_MODEL_NAME,
        openai_api_key=API_KEY,
        openai_api_base=API_BASE,
        max_tokens=LLM_MAX_TOKENS,
        temperature=LLM_TEMPERATURE
    )
    prompt = PromptTemplate.from_template(template="{input}")
    chain = LLMChain(llm=cache_llm, prompt=prompt)
    start_time = time.time()
    message = chain.invoke({"input": "你好介绍一下自己"})
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    start_time = time.time()
    message = chain.invoke({"input": "你好介绍一下自己"})
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    start_time = time.time()
    message = chain.invoke({"input": "你好介绍一下自己"})
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    start_time = time.time()
    message = chain.invoke({"input": "你好介绍一下自己"})
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    start_time = time.time()
    message = cache_llm.invoke("你好介绍一下自己")
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    start_time = time.time()
    message = cache_llm.invoke("你好介绍一下自己")
    print(f"》》》》{message}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    print("finished")

Expected Behavior

No response

Steps To Reproduce

No response

Environment

No response

Anything else?

No response

SimFG commented 5 months ago

@Songjiadong you need to check the mysql encode method.

Songjiadong commented 5 months ago

@SimFG question能存入中文,answer是乱码,不是mysql的编码问题,我想知道是否在往数据库存储的过程中json序列化没有ensure_ascii没有设置false,没有找到触发写入机制的地方

SimFG commented 5 months ago

here is the code line of saving data: https://github.com/zilliztech/GPTCache/blob/acc20f05400dabdcde451194e9bb73b986747685/gptcache/adapter/adapter.py#L257

Songjiadong commented 5 months ago

@SimFG 谢谢,我跟着你的代码跟踪了一下,应该是在langchain_community.cache.py的问题

        handled_data = _dumps_generations(return_val)
        put(prompt, handled_data, cache_obj=_gptcache)

或者在GPTCache的adapt/api.py 的put方法将其解码一下再传入,也可以解决问题

Songjiadong commented 5 months ago

@SimFG 另外我还想资讯你一个问题,现有的缓存采用的是langchain全局的方式入库的,如果我想加入session我该如何操作,我看咱官网之前有LangChainLLMs,但是使用报错

SimFG commented 5 months ago

you may not be able to use the seesion function. Currently, langchan llms is not maintained because they update too quickly. So you may need to implement a session-like function outside yourself