Open mjspeck opened 3 weeks ago
Can you point me to documentation that explains how Nodes and Documents are related in llama_index? From what I understand a Document is just a parent Node.
This is the current implementation.
def to_llama_index_nodes(self):
try:
from llama_index.core.schema import Document as LlamaIndexDocument
except ImportError as err:
raise ImportError(
"llama_index is not installed. Please install it with `pip install llama-index`."
) from err
li_doc = LlamaIndexDocument(
id_=self.id_,
metadata={
"file_name": self.filename,
"file_size": self.file_size,
"creation_date": self.creation_date.isoformat(),
"last_modified_date": self.last_modified_date.isoformat(),
},
excluded_embed_metadata_keys=[
"file_size",
"creation_date",
"last_modified_date",
],
excluded_llm_metadata_keys=[
"file_name",
"file_size",
"creation_date",
"last_modified_date",
],
)
li_nodes = self._nodes_to_llama_index(li_doc)
return li_nodes
def _nodes_to_llama_index(self, llama_index_doc):
try:
from llama_index.core.schema import NodeRelationship
except ImportError as err:
raise ImportError(
"llama_index is not installed. Please install it with `pip install llama-index`."
) from err
li_nodes = [node.to_llama_index() for node in sorted(self.nodes)]
for i in range(len(li_nodes) - 1):
li_nodes[i].relationships[NodeRelationship.NEXT] = li_nodes[
i + 1
].as_related_node_info()
li_nodes[i + 1].relationships[NodeRelationship.PREVIOUS] = li_nodes[
i
].as_related_node_info()
for li_node in li_nodes:
li_node.relationships[NodeRelationship.PARENT] = (
llama_index_doc.as_related_node_info()
) # NOTE: A DOC IS JUST A NODE?
return li_nodes
Description
It would be great to have, in addition to the
to_llama_index_nodes
method to have ato_llama_index_document
method on theopenparse.schemas.ParsedDocument
class that returns a validllama_index.core.schema.Document
object.