REGRESSION: ChatLiteLLM: ValidationError only when using cache

Checked other resources

[X] I added a very descriptive title to this issue.
[X] I searched the LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.
[X] I am sure that this is a bug in LangChain rather than my code.
[X] The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).

Example Code

import os
from pathlib import Path

from langchain.globals import set_verbose, set_debug, set_llm_cache
from langchain_community.chat_models import ChatLiteLLM
from langchain_community.cache import SQLiteCache
from langchain_core.output_parsers.string import StrOutputParser

os.environ["OPENAI_API_KEY"] = Path("OPENAI_API_KEY.txt").read_text().strip()
set_verbose(True)
set_debug(True)
Path("test_cache.db").unlink(missing_ok=True)
set_llm_cache(SQLiteCache(database_path="test_cache.db"))

llm = ChatLiteLLM(
    model_name="openai/gpt-4o",
    cache=True,
    verbose=True,
    temperature=0,
    )
print(llm.predict("this is a test"))  # works fine because cache empty
print("Success 1/2")
print(llm.predict("this is a test"))  # fails
print("Success 2/2")

Error Message and Stack Trace (if applicable)

Success 1/2
[llm/start] [llm:ChatLiteLLM] Entering LLM run with input:
{
  "prompts": [
    "Human: this is a test"
  ]
}
Retrieving a cache value that could not be deserialized properly. This is likely due to the cache being in an older format. Please recreate your cache to avoid this error.
[llm/error] [llm:ChatLiteLLM] [3ms] LLM run errored with error:
"ValidationError(model='ChatResult', errors=[{'loc': ('generations', 0, 'type'), 'msg': \"unexpected value; permitted: 'ChatGeneration'\", 'type': 'value_error.const', 'ctx': {'given': 'Generation', 'permitted': ('ChatGeneration',)}}, {'loc': ('generations', 0, 'message'), 'msg': 'field required', 'type': 'value_error.missing'}, {'loc': ('generations', 0, '__root__'), 'msg': 'Error while initializing ChatGeneration', 'type': 'value_error'}])Traceback (most recent call last):\n\n\n  File \"/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py\", line 446, in generate\n    self._generate_with_cache(\n\n\n  File \"/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py\", line 634, in _generate_with_cache\n    return ChatResult(generations=cache_val)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n\n  File \"/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/pydantic/v1/main.py\", line 341, in __init__\n    raise validation_error\n\n\npydantic.v1.error_wrappers.ValidationError: 3 validation errors for ChatResult\ngenerations -> 0 -> type\n  unexpected value; permitted: 'ChatGeneration' (type=value_error.const; given=Generation; permitted=('ChatGeneration',))\ngenerations -> 0 -> message\n  field required (type=value_error.missing)\ngenerations -> 0 -> __root__\n  Error while initializing ChatGeneration (type=value_error)"
Traceback (most recent call last):
  File "/home/LOCAL_PATH/DocToolsLLM/test.py", line 23, in <module>
    print(llm.predict("this is a test"))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/_api/deprecation.py", line 148, in warning_emitting_wrapper
    return wrapped(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 885, in predict
    result = self([HumanMessage(content=text)], stop=_stop, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/_api/deprecation.py", line 148, in warning_emitting_wrapper
    return wrapped(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 847, in __call__
    generation = self.generate(
                 ^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 456, in generate
    raise e
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 446, in generate
    self._generate_with_cache(
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 634, in _generate_with_cache
    return ChatResult(generations=cache_val)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/USER/.pyenv/versions/doctoolsllm/lib/python3.11/site-packages/pydantic/v1/main.py", line 341, in __init__
    raise validation_error
pydantic.v1.error_wrappers.ValidationError: 3 validation errors for ChatResult
generations -> 0 -> type
  unexpected value; permitted: 'ChatGeneration' (type=value_error.const; given=Generation; permitted=('ChatGeneration',))
generations -> 0 -> message
  field required (type=value_error.missing)
generations -> 0 -> __root__
  Error while initializing ChatGeneration (type=value_error)

Description

I want to use the caching with chatlitellm
It started happening when I upgraded from version 1 of langchain. I confirm it happens in langchain 0.2.0 and 0.2.1

System Info

System Information

OS: Linux OS Version: #35~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue May 7 09:00:52 UTC 2 Python Version: 3.11.7 (main, Dec 28 2023, 19:03:16) [GCC 11.4.0]

Package Information

langchain_core: 0.2.3 langchain: 0.2.1 langchain_community: 0.2.1 langsmith: 0.1.67 langchain_mistralai: 0.1.7 langchain_openai: 0.1.8 langchain_text_splitters: 0.2.0

Packages not installed (Not Necessarily a Problem)

The following packages were not found:

langgraph langserve

litellm==1.39.6

In the end I coded a workaround cache. It's a class that inherits from BaseCache and acts like InMemoryCache but actually stored a dill.dumps (as json cannot dumps a ChatGeneratin object) of a zlib compressed ChatGeneration.

"""
source : https://api.python.langchain.com/en/latest/_modules/langchain_community/cache.html#InMemoryCache

This workaround is to solve this: https://github.com/langchain-ai/langchain/issues/22389
Create a caching class that looks like it's just in memory but actually saves to sql

"""

import zlib
import sqlite3
import dill
from pathlib import Path, PosixPath
from typing import Union, Any, Optional
from threading import Lock

from langchain_core.caches import RETURN_VAL_TYPE, BaseCache

class SQLiteCacheFixed(BaseCache):
    """Cache that stores things in memory."""

    def __init__(
        self,
        database_path: Union[str, PosixPath],
        ) -> None:
        self.lock = Lock()
        self.database_path = Path(database_path)
        if database_path.exists():
            self.clear()
        else:
            conn = sqlite3.connect(self.database_path)
            cursor = conn.cursor()
            with self.lock:
                cursor.execute('''CREATE TABLE IF NOT EXISTS saved_llm_calls
                                (id INTEGER PRIMARY KEY AUTOINCREMENT,
                                data TEXT)''')
                conn.close()
            self._cache = {}

    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
        """Look up based on prompt and llm_string."""
        key = (prompt, llm_string)
        if key in self._cache:
            return self._cache[key]

    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
        """Update cache based on prompt and llm_string."""
        key = (prompt, llm_string)
        if key in self._cache and self._cache[key] == return_val:
            return
        self._cache[(prompt, llm_string)] = return_val
        data = zlib.compress(dill.dumps({"key": key, "value": return_val}))
        conn = sqlite3.connect(self.database_path)
        cursor = conn.cursor()
        with self.lock:
            cursor.execute("INSERT INTO saved_llm_calls (data) VALUES (?)", (data,))
            conn.commit()
            conn.close()

    def clear(self, **kwargs: Any) -> None:
        """Clear cache."""
        conn = sqlite3.connect(self.database_path)
        cursor = conn.cursor()
        with self.lock:
            cursor.execute('''CREATE TABLE IF NOT EXISTS saved_llm_calls
                            (id INTEGER PRIMARY KEY AUTOINCREMENT,
                            data TEXT)''')
            cursor.execute("SELECT data FROM saved_llm_calls")
            rows = cursor.fetchall()
            conn.commit()
            conn.close()
        datas = [
            dill.loads(
                zlib.decompress(row[0])
            ) for row in rows
        ]
        self._cache = {
            d["key"]: d["value"]
            for d in datas
        }

    async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
        """Look up based on prompt and llm_string."""
        return self.lookup(prompt, llm_string)

    async def aupdate(
        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
    ) -> None:
        """Update cache based on prompt and llm_string."""
        self.update(prompt, llm_string, return_val)

    async def aclear(self, **kwargs: Any) -> None:
        """Clear cache."""
        self.clear()

langchain-ai / langchain

REGRESSION: ChatLiteLLM: ValidationError only when using cache #22389