[ERROR] [Server Error] {"title":"'messages' array must only contain objects with a 'content' field that is not empty"}

DiogoR23 commented 3 weeks ago

Checked other resources

[x] I added a very descriptive title to this issue.
[x] I searched the LangChain documentation with the integrated search.
[x] I used the GitHub search to find a similar question and didn't find it.
[x] The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
[x] I am sure that this is a bug in LangChain rather than my code.

Example Code

logging.basicConfig(level=logging.DEBUG,
                    format='%(levelname)s - %(message)s')

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
BASE_URL = os.getenv("BASE_URL")
CASSANDRA_KEYSPACE = os.getenv("CASSANDRA_KEYSPACE")

def connect_to_cassandra_vstore(session):
    """
    Create a Cassandra Vector Store from a session.
    """
    logging.debug("Creating OpenAIEmbeddings...")
    embeddings = OpenAIEmbeddings(
        api_key=OPENAI_API_KEY,
        base_url=BASE_URL,
        model="CompendiumLabs/bge-large-en-v1.5-gguf",
        check_embedding_ctx_length=False,
        deployment="text-embedding-3-large",
        dimensions=768
    )

    logging.debug("Creating Cassandra vector store...")
    vstore = Cassandra(
        embedding=embeddings,
        session=session,
        table_name="vector_store",
        keyspace=CASSANDRA_KEYSPACE,
        setup_mode=SetupMode.SYNC
    )

    logging.debug("Cassandra vector store created successfully.")

    return vstore

def main():
    ai_answer = []
    user_question = []
    try:
        logging.debug("Connecting to Cassandra...")
        session = connect_to_cassandra()

        logging.debug("Fetching articles from Cassandra...")
        articles = fetch_articles_from_cassandra(session)
        logging.debug("Successfully, fetched articles!")

        logging.debug("Loading data in chat format...")
        prepared_data = load_data_chat_format(articles)
        logging.debug("Successfully data loaded in chat format!")

        logging.debug("Creating retriever tool...")
        vstore = connect_to_cassandra_vstore(session=session)
        retriever = vstore.as_retriever(search_kwargs={"k": 3})
        retriever_tool = create_retriever_tool(
            retriever=retriever,
            name="law_search_tool",
            description=("""Search for information about Portuguese laws.
                         For any questions about some law or some doubts the user has about Portugues rules, you must use this tool!"""
                        )
        )
        print("Retriever Tool -> ", retriever_tool)
        logging.debug("Successfully created retriever tool!")

        prompt = ("You are an intelligent assistant specialized in Portuguese law."
                  "Your role is to provide accurate and detailed information about Portuguese laws using the provided database."
                  "When answering user queries, refer to specific laws and articles where applicable. Ensure your responses are precise and useful."
                  )

        logging.debug("Initializing OpenAI client...")
        client = OpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
        logging.debug("OpenAI client initialized!")

        history = [
            {"role": "system", "content": prompt},
            {"role": "user", "content": "Hello, introduce yourself to someone opening this program for the first time. Be concise."},
            {"role": "user", "content": prepared_data}
        ]

        while True:
            user_input = input("User --> ")

            retriever_response = retriever_tool.invoke({"query": user_input})
            print("Retriever Response:", retriever_response)
            logging.debug("Similarity Search: \n")
            input_data = history + [{"role": "assistant", "content": retriever_response}]

            completion = client.chat.completions.create(
                model="LM Studio Community/Meta-Llama-3-8B-Instruct-GGUF",
                messages=input_data,
                temperature=0.7,
                stream=True,
            )

            response = ""

            for chunk in completion:
                if chunk.choices[0].delta.content:
                    response += chunk.choices[0].delta.content

            logging.debug(f"Jarvis --> {response}")

            history.append({"role": "user", "content": user_input})
            history.append({"role": "assistant", "content": response})

            ai_answer.append(response)
            user_question.append(user_input)

    except Exception as e:
        logging.error(f"Error initializing the system: {e}")

    finally:
        if session:
            save_answer_question(answers_history=ai_answer, input_history=user_question, session=session)
            session.shutdown()

Error Message and Stack Trace (if applicable)

Error during chat interaction: Error code: 400 - {'error': "'messages' array must only contain objects with a 'content' field that is not empty."}

Description

I am trying to create a retriever, using the function create_retriever_tool() from langchain library. I firstly a vector store using cassandra. An then I create a retriever.

I am using LM Studio as my local server, this is what it appears:

I know for the fact, that the problem has to do with retriever_tool, because when I try to print it, it does not appear anything.

System Info

Poetry Show:

python = "^3.10"
pytest-playwright = "^0.5.1"
cassandra-driver = "^3.29.1"
playwright = "^1.45.0"
beautifulsoup4 = "^4.12.3"
lxml = "^5.2.2"
langchain = {extras = ["all"], version = "^0.2.5"}
jsonpatch = "^1.33"
jsonpointer = "^2.4"
langchain-astradb = "*"
langchain-core = "*"
langchain-openai = "*"
langchain-text-splitters = "*"
langchainhub = "*"
langsmith = "*"
numpy = "*"
openai = "*"
python-dotenv = "*"
requests = "*"
nltk = "*"
tenacity = "*"
langchain-experimental = "^0.0.61"
astrapy = "^1.2.1"
bson = "^0.5.10"
transformers = "^4.41.2"
langchain-huggingface = "^0.0.3"
text-generation = "^0.7.0"
cassio = "^0.1.8"
langchain-community = "^0.2.11"

ccurme commented 3 weeks ago

It looks like retriever_response is "".

I'd suggest debugging this via the underlying vector store. e.g., try

vstore.similarity_search(user_input)

and see if the content is empty.

DiogoR23 commented 3 weeks ago

Yeah it is empty:

DiogoR23 commented 1 week ago

I used a different model, nomic-ai, and did not give me this error no more.

def connect_to_cassandra_vstore(session):
    """
    Create a Cassandra Vector Store from a session.
    """
    logging.debug("Creating OpenAIEmbeddings...")
    embeddings = OpenAIEmbeddings(
        api_key=OPENAI_API_KEY,
        base_url=BASE_URL,
        model="nomic-ai/nomic-embed-text-v1.5-GGUF",
        check_embedding_ctx_length=False,
        deployment="text-embedding-3-large",
        dimensions=768
    )

    logging.debug("Creating Cassandra vector store...")
    vstore = Cassandra(
        embedding=embeddings,
        session=session,
        table_name="vector_store",
        keyspace=CASSANDRA_KEYSPACE,
        setup_mode=SetupMode.SYNC
    )

    logging.debug("Cassandra vector store created successfully.")

    return vstore

DiogoR23 commented 1 week ago

I thought for using the new smaller model, it had fixed the problem, but it did not. It appears me another error Error initializing the system: Error from server: code=2200 [Invalid query] message="Not enough bytes to read a vector<float, 1024>". That I already fixed, and now it is appearing again this error Error initializing the system: Error code: 400 - {'error': "'messages' array must only contain objects with a 'content' field that is not empty."}.

DiogoR23 commented 6 days ago

I finally could debug this error.

Instead of using the retriever to invoke the question from the user, I created an AgentExecutor, using the PromptTemplate like this:

...
vstore = connect_to_cassandra_vstore(session=session)
        retriever = vstore.as_retriever(search_kwargs={"k": 100})
        tool = create_retriever_tool(
            retriever=retriever,
            name="law_search_tool",
            description=("""Search for information about Portuguese laws.
                         For any questions about some law or some doubts the user has about Portugues rules, you must use this tool!"""
                        )
        )
        tools = [tool]
        logging.debug("Successfully created retriever tool!")
        print(f"Retriever Tool -> {tool}")

        prompt_template = PromptTemplate(
            input_variables=["input", "agent_scratchpad"],
            template=("You are an intelligent assistant specialized in Portuguese law. "
                      "Your role is to provide accurate and detailed information about Portuguese laws using the provided database. "
                      "When answering user queries, refer to specific laws and articles where applicable. "
                      "Ensure your responses are precise and useful.\n\n"
                      "Query: {input}\n"
                      "{agent_scratchpad}")
        )

        logging.debug("Initializing OpenAI client...")
        llm = ChatOpenAI(
            api_key=OPENAI_API_KEY,
            base_url=BASE_URL,
            model="LM Studio Community/Meta-Llama-3-8B-Instruct-GGUF",
            temperature=0.7,
        )
        agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt_template)
        agent_executor = AgentExecutor(agent=agent, tools=tools)
        logging.debug("OpenAI client initialized!")

        while (user_input := input("User ('q' to quit) -> ")) != "q":
            result = agent_executor.invoke({"input": user_input, "agent_scratchpad": ""})
            response = result["output"]

            logging.debug(f"Response --> {response}")

            ai_answer.append(response)
            user_question.append(user_input)

    except Exception as e:
        logging.error(f"Error initializing the system: {e}")

    finally:
        if session:
            save_answer_question(answers_history=ai_answer, input_history=user_question, session=session)
            session.shutdown()

Closing thsi issue.

langchain-ai / langchain