milvus-io / milvus-lite

A lightweight version of Milvus
Apache License 2.0
281 stars 33 forks source link

Sparse Index Create Issue after insert data #213

Open bdockbockd opened 1 month ago

bdockbockd commented 1 month ago

Environment Base image: python=3.8.5

pymilvus==2.4.7
milvus-lite==2.4.10
74 collection_meta.cpp:148] [SERVER][CreateIndex][grpcpp_sync_ser] Add index failed, err: unrecognized token: ""

RPC error: [create_index], <MilvusException: (code=5, message=: internal error)>, <Time:{'RPC start': '2024-10-08 15:08:18.884977', 'RPC error': '2024-10-08 15:08:18.886644'}>
Failed to create an index on collection
  self.client.create_index(collection_name=collection_name, index_params=index_params)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/milvus_client/milvus_client.py", line 153, in create_index
    self._create_index(collection_name, index_param, timeout=timeout, **kwargs)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/milvus_client/milvus_client.py", line 175, in _create_index
    raise ex from ex
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/milvus_client/milvus_client.py", line 164, in _create_index
    conn.create_index(
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/decorators.py", line 147, in handler
    raise e from e
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/decorators.py", line 143, in handler
    return func(*args, **kwargs)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/decorators.py", line 182, in handler
    return func(self, *args, **kwargs)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/decorators.py", line 122, in handler
    raise e from e
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/decorators.py", line 87, in handler
    return func(*args, **kwargs)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py", line 987, in create_index
    check_status(status)
  File "/azureml-envs/azureml_aac38e7fe8910eda9496864610df677c/lib/python3.8/site-packages/pymilvus/client/utils.py", line 63, in check_status
    raise MilvusException(status.code, status.reason, status.error_code)
pymilvus.exceptions.MilvusException: <MilvusException: (code=5, message=: internal error)>
junjiejiangjjj commented 1 month ago

hi @bdockbockd , can you provide the code?

bdockbockd commented 1 month ago

Here is my relevant code

            self.client = MilvusClient(f'milvus_demo.db')
    def create_index(self, collection_name):
        """Create an index on the sparse vector field."""
        index_params = self.client.prepare_index_params()
        index_params.add_index(
            field_name="sparse_vector",
            index_name="sparse_inverted_index",
            index_type="SPARSE_WAND",
            metric_type="IP",
            params={"drop_ratio_build": 0.2}
        )
        **self.client.create_index(collection_name=collection_name, index_params=index_params)** << errror

Meaningful throw

74 collection_meta.cpp:148] [SERVER][CreateIndex][grpcpp_sync_ser] Add index failed, err: unrecognized token: "�"

RPC error: [create_index], <MilvusException: (code=5, message=: internal error)>, <Time:{'RPC start': '2024-10-08 15:08:18.884977', 'RPC error': '2024-10-08 15:08:18.886644'}>
Failed to create an index on collection
bdockbockd commented 1 month ago

My schema

    def create_collection(self, collection_name):
        """Create a collection with a schema including a sparse vector field."""
        schema = self.client.create_schema(
            auto_id=True,
            enable_dynamic_fields=True,
        )
        schema.add_field(field_name="pk", datatype=DataType.VARCHAR, is_primary=True, max_length=100)
        schema.add_field(field_name="uid", datatype=DataType.VARCHAR, max_length=100)
        schema.add_field(field_name="number", datatype=DataType.DOUBLE)
        schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)

        self.client.create_collection(collection_name=collection_name, schema=schema)

Before create_index operation, data seems to insert just fine

junjiejiangjjj commented 1 month ago

It may be because your collection_name is illegal. The following code can run normally.

from pymilvus import MilvusClient, DataType
milvus_client = MilvusClient(f'milvus_demo.db')
collection_name="my_collection"

schema = milvus_client.create_schema(
    auto_id=True,
    enable_dynamic_fields=True,
)
schema.add_field(field_name="pk", datatype=DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field(field_name="uid", datatype=DataType.VARCHAR, max_length=100)
schema.add_field(field_name="number", datatype=DataType.DOUBLE)
schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
milvus_client.create_collection(collection_name=collection_name, schema=schema)

index_params = milvus_client.prepare_index_params()
index_params.add_index(
    field_name="sparse_vector",
    index_name="sparse_inverted_index",
    index_type="SPARSE_WAND",
    metric_type="IP",
    params={"drop_ratio_build": 0.2}
)

milvus_client.create_index(collection_name=collection_name, index_params=index_params)
print(milvus_client.list_indexes(collection_name=collection_name))