Closed snassimr closed 2 months ago
Yes, storing an agent is supported in llama_index
for the Multi-Document Agent. The MultiDocumentAgentsPack
class initializes and stores agents for each document in the self.agents
dictionary. Additionally, a top-level agent is also stored in self.top_agent
[1].
To serialize and deserialize the state of an agent, you can use the to_json
method to serialize the state and the from_json
method to deserialize it. Here is an example based on the OneDriveReader
class:
from llama_index.readers.microsoft_onedrive import OneDriveReader
# Example client and tenant IDs
test_client_id = "test_client_id"
test_tenant_id = "test_tenant_id"
# Create an instance of the reader
reader = OneDriveReader(client_id=test_client_id, tenant_id=test_tenant_id)
# Serialize the reader to JSON
json_data = reader.json(exclude_unset=True)
# Deserialize the JSON back to a new reader instance
new_reader = OneDriveReader.parse_raw(json_data)
# Verify that the deserialized reader has the same state
assert new_reader.client_id == reader.client_id
assert new_reader.tenant_id == reader.tenant_id
This code demonstrates how to serialize the state of a OneDriveReader
instance to JSON and then deserialize it back to a new instance, ensuring that the state is preserved [2][3][4].
Additionally, the create_llama_agent
method facilitates saving and loading agents to/from disk using the agent_path
parameter [5].
To continue talking to Dosu, mention @dosu.
After some review I didn't find some reasonable answer to my question : MultiDocumentAgentsPack doesn't contain save/restore functionality and link [5] contains information about langchain agent , and not Multi-Document Agent
You are correct. The MultiDocumentAgentsPack
class does not directly provide save/restore functionality. However, you can implement custom serialization and deserialization for the MultiDocumentAgentsPack
by leveraging the to_json
and from_json
methods for the components within the pack.
Here is an example of how you might implement custom serialization and deserialization for MultiDocumentAgentsPack
:
import json
def serialize_multi_document_agents_pack(pack: MultiDocumentAgentsPack) -> str:
data = {
"agents": {title: agent.to_json() for title, agent in pack.agents.items()},
"top_agent": pack.top_agent.to_json(),
"obj_index": pack.obj_index.to_json(),
}
return json.dumps(data)
def deserialize_multi_document_agents_pack(json_data: str) -> MultiDocumentAgentsPack:
data = json.loads(json_data)
pack = MultiDocumentAgentsPack.__new__(MultiDocumentAgentsPack)
pack.agents = {title: OpenAIAgent.from_json(agent_json) for title, agent_json in data["agents"].items()}
pack.top_agent = OpenAIAgent.from_json(data["top_agent"])
pack.obj_index = ObjectIndex.from_json(data["obj_index"])
return pack
# Serialize the pack
json_data = serialize_multi_document_agents_pack(multi_document_agents_pack)
# Save to file
with open("multi_document_agents_pack.json", "w") as f:
f.write(json_data)
# Load from file
with open("multi_document_agents_pack.json", "r") as f:
json_data = f.read()
# Deserialize the pack
multi_document_agents_pack = deserialize_multi_document_agents_pack(json_data)
This approach allows you to save and restore the state of your MultiDocumentAgentsPack
by serializing its components to JSON and then deserializing them back. This should help you follow the training/inference paradigm you mentioned.
To continue talking to Dosu, mention @dosu.
I am getting AttributeError: 'OpenAIAgent' object has no attribute 'to_json' . Any fix for this ?
To fix the AttributeError
and properly serialize and deserialize the MultiDocumentAgentsPack
, you need to implement the to_json
and from_json
methods for the OpenAIAgent
class. Here are the implementations:
OpenAIAgent
Classimport json
class OpenAIAgent:
# Existing methods...
def to_json(self) -> str:
"""Serialize the OpenAIAgent to a JSON string."""
return json.dumps({
"tools": [tool.to_dict() for tool in self._get_tools("")],
"llm": self._llm.to_dict(),
"memory": self.memory.to_dict(),
"prefix_messages": [message.to_dict() for message in self.prefix_messages],
"verbose": self._verbose,
"max_function_calls": self._max_function_calls,
"callback_manager": self.callback_manager.to_dict() if self.callback_manager else None,
"tool_retriever": self._tool_retriever.to_dict() if self._tool_retriever else None,
})
@classmethod
def from_json(cls, json_str: str) -> "OpenAIAgent":
"""Deserialize a JSON string to an OpenAIAgent instance."""
data = json.loads(json_str)
tools = [BaseTool.from_dict(tool_data) for tool_data in data["tools"]]
llm = OpenAI.from_dict(data["llm"])
memory = BaseMemory.from_dict(data["memory"])
prefix_messages = [ChatMessage.from_dict(msg_data) for msg_data in data["prefix_messages"]]
callback_manager = CallbackManager.from_dict(data["callback_manager"]) if data["callback_manager"] else None
tool_retriever = ObjectRetriever.from_dict(data["tool_retriever"]) if data["tool_retriever"] else None
return cls(
tools=tools,
llm=llm,
memory=memory,
prefix_messages=prefix_messages,
verbose=data["verbose"],
max_function_calls=data["max_function_calls"],
callback_manager=callback_manager,
tool_retriever=tool_retriever,
)
MultiDocumentAgentsPack
ClassYou can then use these methods in your MultiDocumentAgentsPack
class for serialization and deserialization:
import json
from typing import Any, Dict, List
class MultiDocumentAgentsPack(BaseLlamaPack):
"""Multi-document Agents pack."""
def __init__(self, docs: List[Document], doc_titles: List[str], doc_descriptions: List[str], **kwargs: Any) -> None:
# Initialization code...
def get_modules(self) -> Dict[str, Any]:
"""Get modules."""
return {
"top_agent": self.top_agent,
"obj_index": self.obj_index,
"doc_agents": self.agents,
}
def run(self, *args: Any, **kwargs: Any) -> Any:
"""Run the pipeline."""
return self.top_agent.query(*args, **kwargs)
def serialize(self) -> str:
"""Serialize the MultiDocumentAgentsPack to a JSON string."""
data = {
"docs": [doc.to_dict() for doc in self.docs],
"doc_titles": self.doc_titles,
"doc_descriptions": self.doc_descriptions,
"agents": {title: agent.to_json() for title, agent in self.agents.items()},
"top_agent": self.top_agent.to_json(),
"obj_index": self.obj_index.to_json(),
}
return json.dumps(data)
@classmethod
def deserialize(cls, data: str) -> 'MultiDocumentAgentsPack':
"""Deserialize a JSON string to a MultiDocumentAgentsPack."""
data_dict = json.loads(data)
docs = [Document.from_dict(doc) for doc in data_dict["docs"]]
doc_titles = data_dict["doc_titles"]
doc_descriptions = data_dict["doc_descriptions"]
instance = cls(docs, doc_titles, doc_descriptions)
instance.agents = {title: OpenAIAgent.from_json(agent) for title, agent in data_dict["agents"].items()}
instance.top_agent = OpenAIAgent.from_json(data_dict["top_agent"])
instance.obj_index = ObjectIndex.from_json(data_dict["obj_index"])
return instance
This should resolve the AttributeError
and allow you to serialize and deserialize the MultiDocumentAgentsPack
properly.
To continue talking to Dosu, mention @dosu.
Question Validation
Question
Hi ,
If storing agent is supported in llama_index ? Despite most of time is occupied by index building I'd to save agent to follow training/inference paradigm. I am using Multi-Document Agent (https://docs.llamaindex.ai/en/stable/examples/agent/multi_document_agents/)
This capability was developed that times at rags (https://github.com/run-llama/rags)
Thanks