run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
35.75k stars 5.05k forks source link

[Question]: i follow the example of llamaindex Nebula, the error ocurr as this: #16274

Open abc-w opened 17 hours ago

abc-w commented 17 hours ago

Question Validation

Question

from llama_index.core.indices.property_graph import PropertyGraphIndex from llama_index.core.storage.storage_context import StorageContext from llama_index.llms.openai import OpenAI

index = PropertyGraphIndex.from_documents( documents, property_graph_store=graph_store, vector_store=vec_store, show_progress=True, )

index.storage_context.vector_store.persist("./data/nebula_vec_store.json")

the version: python 3.12.2 llama-index-graph-stores-nebula 0.3.0 llama-index 0.11.14 i follow the example of llamaindex Nebula, the error ocurr as this: image


Exception Traceback (most recent call last) Cell In[34], line 5 2 from llama_index.core.storage.storage_context import StorageContext 3 from llama_index.llms.openai import OpenAI ----> 5 index = PropertyGraphIndex.from_documents( 6 documents, 7 property_graph_store=graph_store, 8 vector_store=vec_store, 9 show_progress=True, 10 ) 12 index.storage_context.vector_store.persist("./data/nebula_vec_store.json")

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/base.py:119, in BaseIndex.from_documents(cls, documents, storage_context, show_progress, callback_manager, transformations, kwargs) 110 docstore.set_document_hash(doc.get_doc_id(), doc.hash) 112 nodes = run_transformations( 113 documents, # type: ignore 114 transformations, 115 show_progress=show_progress, 116 kwargs, 117 ) --> 119 return cls( 120 nodes=nodes, 121 storage_context=storage_context, 122 callback_manager=callback_manager, 123 show_progress=show_progress, 124 transformations=transformations, 125 **kwargs, 126 )

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/property_graph/base.py:134, in PropertyGraphIndex.init(self, nodes, llm, kg_extractors, property_graph_store, vector_store, use_async, embed_model, embed_kg_nodes, callback_manager, transformations, storage_context, show_progress, kwargs) 128 self._embed_kg_nodes = embed_kg_nodes 129 self._override_vector_store = ( 130 vector_store is not None 131 or not storage_context.property_graph_store.supports_vector_queries 132 ) --> 134 super().init( 135 nodes=nodes, 136 callback_manager=callback_manager, 137 storage_context=storage_context, 138 transformations=transformations, 139 show_progress=show_progress, 140 kwargs, 141 )

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/base.py:77, in BaseIndex.init(self, nodes, objects, index_struct, storage_context, callback_manager, transformations, show_progress, kwargs) 75 if index_struct is None: 76 nodes = nodes or [] ---> 77 index_struct = self.build_index_from_nodes( 78 nodes + objects, # type: ignore 79 kwargs, # type: ignore 80 ) 81 self._index_struct = index_struct 82 self._storage_context.index_store.add_index_struct(self._index_struct)

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/base.py:185, in BaseIndex.build_index_from_nodes(self, nodes, build_kwargs) 183 """Build the index from nodes.""" 184 self._docstore.add_documents(nodes, allow_update=True) --> 185 return self._build_index_from_nodes(nodes, build_kwargs)

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/property_graph/base.py:334, in PropertyGraphIndex._build_index_from_nodes(self, nodes, build_kwargs) 330 def _build_index_from_nodes( 331 self, nodes: Optional[Sequence[BaseNode]], build_kwargs: Any 332 ) -> IndexLPG: 333 """Build index from nodes.""" --> 334 nodes = self._insert_nodes(nodes or []) 336 # this isn't really used or needed 337 return IndexLPG()

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/indices/property_graph/base.py:293, in PropertyGraphIndex._insert_nodes(self, nodes) 290 self._insert_nodes_to_vector_index(kg_nodes_to_insert) 292 if len(nodes) > 0: --> 293 self.property_graph_store.upsert_llama_nodes(nodes) 295 if len(kg_nodes_to_insert) > 0: 296 self.property_graph_store.upsert_nodes(kg_nodes_to_insert)

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/core/graph_stores/types.py:367, in PropertyGraphStore.upsert_llama_nodes(self, llama_nodes) 358 metadata_dict = node_to_metadata_dict(llama_node, remove_text=True) 359 converted_nodes.append( 360 ChunkNode( 361 text=llama_node.get_content(metadata_mode=MetadataMode.NONE), (...) 365 ) 366 ) --> 367 self.upsert_nodes(converted_nodes)

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/graph_stores/nebula/nebula_property_graph.py:252, in NebulaPropertyGraphStore.upsert_nodes(self, nodes) 250 insertquery += f'"{chunk.id}":($chunk{i}),' 251 insert_query = insert_query[:-1] # Remove trailing comma --> 252 self.structured_query( 253 insert_query, 254 param_map={ 255 f"chunk_{i}": chunk.text for i, chunk in enumerate(chunk_list) 256 }, 257 ) 259 if entity_list: 260 # model with tag Entity and other tags(label) if applicable 261 # need to add properties as well, for extractors like SchemaLLMPathExtractor there is no properties (...) 266 267 # The meta tag Entity is used to store the entity name 268 insert_query = "INSERT VERTEX Entity__ (name) VALUES "

File /data/pingchuan/miniconda3/lib/python3.12/site-packages/llama_index/graph_stores/nebula/nebula_property_graph.py:550, in NebulaPropertyGraphStore.structured_query(self, query, param_map) 548 result = self._client.execute_parameter(query, build_param_map(param_map)) 549 if not result.is_succeeded(): --> 550 raise Exception( 551 "NebulaGraph query failed:", 552 result.error_msg(), 553 "Statement:", 554 query, 555 "Params:", 556 param_map, 557 ) 558 full_result = [ 559 { 560 key: result.row_values(row_index)[i].cast_primitive() (...) 563 for row_index in range(result.row_size()) 564 ] 565 if self.sanitize_query_output: 566 # Not applicable for NebulaGraph for now though

Exception: ('NebulaGraph query failed:', 'Wrong vertex id type: "6777185c-88e8-4634-97dd-dbfb93ab6332"', 'Statement:', 'INSERT VERTEX Chunk__ (text) VALUES "6777185c-88e8-4634-97dd-dbfb93ab6332":($chunk_0),"2b00ad27-bcf3-4269-8165-b84824b6bc5f":($chunk_1),"522c0eb6-beef-45c4-9b9f-b2f43ddb165b":($chunk_2),"8ec0b00b-5c3e-467f-89c5-5ef19d688662":($chunk_3),"62d9a209-e571-4e95-a0fd-1777f338ab44":($chunk_4),"93b541c7-623e-45c9-819a-28690e3bd272":($chunk_5),"6b4743f6-605d-4d2b-a214-2213c9c862e9":($chunk_6),"f94f76da-4dda-4bba-acb6-3dcdab051cef":($chunk_7),"c1acd5d9-c520-44d9-864e-e43a78989d07":($chunk_8),"01c5aef8-9eae-48c3-ba25-87ef1ace3145":($chunk_9),"3822cee4-6c24-4f35-bfea-729cc5cbec85":($chunk_10),"80e524cd-a525-4641-be29-1a7115489bc4":($chunk_11),"852a19b1-20d3-477b-bc5d-7d03f5ab46ca":($chunk_12),"8802294a-4356-49dc-9cc7-88d9c29f7d07":($chunk_13),"ba0d6a29-b687-4e8e-b9fe-e1a9ea8caf51":($chunk_14),"445e9a6b-f00a-4d66-8295-d444b17be69b":($chunk_15),"0867ee18-a0dc-4c63-b3fd-9b06cf29b85a":($chunk_16),"98961abe-f0c8-4e2a-905b-33ff3c8e5e8a":($chunk_17),"b1689ff2-a88d-4ce4-8a48-c43ae75e105a":($chunk_18),"cb2f6bcf-2662-4970-819f-9a369d1c4c69":($chunk_19),"2240fe62-0321-4163-8df7-7f471e9e90be":($chunk_20),"bd881a11-b66d-461a-bd47-381f1ab2189d":($chunk_21)', 'Params:', {'chunk_0': 'What I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays."})

dosubot[bot] commented 17 hours ago

Hey there, @abc-w! I'm here to help you with any bugs, questions, or contributions you have in mind. Let's tackle this together!

I found a similar open issue that might be relevant to your problem:

The error you're encountering, "Wrong vertex id type," indicates that the vertex IDs (vids) you are using do not match the expected type defined in your NebulaGraph space. The vertex ID type can either be INT64 or FIXED_STRING.

To resolve this, you need to ensure that the vertex IDs you are using match the expected type. Here is a relevant snippet that shows how to handle different vertex ID types:

def prepare_subjs_param(
    subjs: Optional[List[str]], vid_type: str = "FIXED_STRING(256)"
) -> Dict:
    """Prepare parameters for query."""
    if subjs is None:
        return {}
    from nebula3.common import ttypes

    subjs_list = []
    subjs_byte = ttypes.Value()

    # filter non-digit string for INT64 vid type
    if vid_type == "INT64":
        subjs = [subj for subj in subjs if subj.isdigit()]
        if len(subjs) == 0:
            logger.warning(
                f"KG is with INT64 vid type, but no digit string is provided."
                f"Return empty subjs, and no query will be executed."
                f"subjs: {subjs}"
            )
            return {}
    for subj in subjs:
        if not isinstance(subj, str):
            raise TypeError(f"Subject should be str, but got {type(subj).__name__}.")
        subj_byte = ttypes.Value()
        if vid_type == "INT64":
            assert subj.isdigit(), (
                "Subject should be a digit string in current "
                "graph store, where vid type is INT64."
            )
            subj_byte.set_iVal(int(subj))
        else:
            subj_byte.set_sVal(subj)
        subjs_list.append(subj_byte)
    subjs_nlist = ttypes.NList(values=subjs_list)
    subjs_byte.set_lVal(subjs_nlist)
    return {"subjs": subjs_byte}

Ensure that the vertex IDs you are using in your queries match the expected type. If the type is INT64, the IDs should be numeric strings. If the type is FIXED_STRING, the IDs should be strings.

Example usage:

vid_type = "INT64"  # or "FIXED_STRING(256)"
subjs = ["123", "456"]  # Ensure these match the expected type
params = prepare_subjs_param(subjs, vid_type)

By ensuring that the vertex IDs match the expected type, you can avoid the "Wrong vertex id type" error [2].

To continue talking to Dosu, mention @dosu.