zilliztech / milvus-backup

Backup and restore tool for Milvus
Apache License 2.0
110 stars 38 forks source link

[Bug]: If there are upsert/delete operations on the collection, the data obtained from backup and the original data may be inconsistent #316

Closed zhuwenxing closed 2 weeks ago

zhuwenxing commented 3 months ago

Current Behavior

[2024-03-22T08:25:07.710Z] [2024-03-22 08:25:05 - INFO - ci_test]: src count: [{'count(*)': 2900}], dist count: [{'count(*)': 3000}] (client_base.py:383)
[2024-03-22T08:25:07.708Z] [2024-03-22 08:25:06 - INFO - ci_test]: *********************************** teardown *********************************** (client_base.py:46)

[2024-03-22T08:25:07.708Z] [2024-03-22 08:25:06 - INFO - ci_test]: [teardown_method] Start teardown test case test_milvus_restore_back_with_delete... (client_base.py:47)

[2024-03-22T08:25:07.708Z] [2024-03-22 08:25:06 - INFO - ci_test]: [test][2024-03-22T08:25:06Z] [0.00233042s] restore_backup_duXrsMH1 drop -> None (wrapper.py:30)

[2024-03-22T08:25:07.708Z] [2024-03-22 08:25:06 - INFO - ci_test]: [teardown_class] Start teardown class... (client_base.py:32)

[2024-03-22T08:25:07.708Z] 

[2024-03-22T08:25:07.708Z] 

[2024-03-22T08:25:07.708Z] =================================== FAILURES ===================================

[2024-03-22T08:25:07.708Z] ____________ TestRestoreBackup.test_milvus_restore_back_with_delete ____________

[2024-03-22T08:25:07.708Z] 

[2024-03-22T08:25:07.708Z] self = <test_restore_backup.TestRestoreBackup object at 0x7f73173af580>

[2024-03-22T08:25:07.708Z] 

[2024-03-22T08:25:07.708Z]     @pytest.mark.tags(CaseLabel.L1)

[2024-03-22T08:25:07.708Z]     def test_milvus_restore_back_with_delete(self):

[2024-03-22T08:25:07.708Z]         self._connect()

[2024-03-22T08:25:07.708Z]         name_origin = cf.gen_unique_str(prefix)

[2024-03-22T08:25:07.708Z]         back_up_name = cf.gen_unique_str(backup_prefix)

[2024-03-22T08:25:07.708Z]         fields = [cf.gen_int64_field(name="int64", is_primary=True),

[2024-03-22T08:25:07.708Z]                     cf.gen_int64_field(name="key"),

[2024-03-22T08:25:07.708Z]                     cf.gen_json_field(name="json"),

[2024-03-22T08:25:07.708Z]                     cf.gen_array_field(name="var_array", element_type=DataType.VARCHAR),

[2024-03-22T08:25:07.708Z]                     cf.gen_array_field(name="int_array", element_type=DataType.INT64),

[2024-03-22T08:25:07.708Z]                     cf.gen_float_vec_field(name="float_vector", dim=128),

[2024-03-22T08:25:07.708Z]                     ]

[2024-03-22T08:25:07.708Z]         default_schema = cf.gen_collection_schema(fields)

[2024-03-22T08:25:07.708Z]         collection_w = self.init_collection_wrap(name=name_origin, schema=default_schema, active_trace=True)

[2024-03-22T08:25:07.708Z]         nb = 3000

[2024-03-22T08:25:07.708Z]         data = [

[2024-03-22T08:25:07.708Z]             [i for i in range(nb)],

[2024-03-22T08:25:07.708Z]             [i % 3 for i in range(nb)],

[2024-03-22T08:25:07.708Z]             [{f"key_{str(i)}": i} for i in range(nb)],

[2024-03-22T08:25:07.708Z]             [[str(x) for x in range(10)] for i in range(nb)],

[2024-03-22T08:25:07.708Z]             [[int(x) for x in range(10)] for i in range(nb)],

[2024-03-22T08:25:07.708Z]             [[np.float32(i) for i in range(128)] for _ in range(nb)],

[2024-03-22T08:25:07.708Z]         ]

[2024-03-22T08:25:07.708Z]         res, result = collection_w.insert(data=data)

[2024-03-22T08:25:07.708Z]         pk = res.primary_keys

[2024-03-22T08:25:07.708Z]         # delete first 100 rows

[2024-03-22T08:25:07.708Z]         delete_ids = pk[:100]

[2024-03-22T08:25:07.708Z]         collection_w.delete(expr=f"int64 in {delete_ids}")

[2024-03-22T08:25:07.708Z]         res = client.create_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin]})

[2024-03-22T08:25:07.708Z]         log.info(f"create_backup {res}")

[2024-03-22T08:25:07.708Z]         res = client.list_backup()

[2024-03-22T08:25:07.708Z]         log.info(f"list_backup {res}")

[2024-03-22T08:25:07.708Z]         if "data" in res:

[2024-03-22T08:25:07.708Z]             all_backup = [r["name"] for r in res["data"]]

[2024-03-22T08:25:07.708Z]         else:

[2024-03-22T08:25:07.708Z]             all_backup = []

[2024-03-22T08:25:07.709Z]         assert back_up_name in all_backup

[2024-03-22T08:25:07.709Z]         backup = client.get_backup(back_up_name)

[2024-03-22T08:25:07.709Z]         assert backup["data"]["name"] == back_up_name

[2024-03-22T08:25:07.709Z]         backup_collections = [backup["collection_name"]for backup in backup["data"]["collection_backups"]]

[2024-03-22T08:25:07.709Z]         assert name_origin in backup_collections

[2024-03-22T08:25:07.709Z]         res = client.restore_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin],

[2024-03-22T08:25:07.709Z]                                      "collection_suffix": suffix})

[2024-03-22T08:25:07.709Z]         log.info(f"restore_backup: {res}")

[2024-03-22T08:25:07.709Z]         res, _ = self.utility_wrap.list_collections()

[2024-03-22T08:25:07.709Z]         assert name_origin + suffix in res

[2024-03-22T08:25:07.709Z]         output_fields = None

[2024-03-22T08:25:07.709Z] >       self.compare_collections(name_origin, name_origin + suffix, output_fields=output_fields, verify_by_query=True)

[2024-03-22T08:25:07.709Z] 

[2024-03-22T08:25:07.709Z] test_restore_backup.py:531: 

[2024-03-22T08:25:07.709Z] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

[2024-03-22T08:25:07.709Z] 

[2024-03-22T08:25:07.709Z] self = <test_restore_backup.TestRestoreBackup object at 0x7f73173af580>

[2024-03-22T08:25:07.709Z] src_name = 'restore_backup_duXrsMH1', dist_name = 'restore_backup_duXrsMH1_bak'

[2024-03-22T08:25:07.709Z] output_fields = ['*'], verify_by_query = True

[2024-03-22T08:25:07.709Z] 

[2024-03-22T08:25:07.709Z]     def compare_collections(self, src_name, dist_name, output_fields=None, verify_by_query=False):

[2024-03-22T08:25:07.709Z]         if output_fields is None:

[2024-03-22T08:25:07.709Z]             output_fields = ["*"]

[2024-03-22T08:25:07.709Z]         collection_src, _ = self.collection_wrap.init_collection(name=src_name)

[2024-03-22T08:25:07.709Z]         collection_dist, _ = self.collection_wrap.init_collection(name=dist_name)

[2024-03-22T08:25:07.709Z]         log.info(f"collection_src schema: {collection_src.schema}")

[2024-03-22T08:25:07.709Z]         log.info(f"collection_dist schema: {collection_dist.schema}")

[2024-03-22T08:25:07.709Z]         assert collection_src.schema == collection_dist.schema

[2024-03-22T08:25:07.709Z]         # get partitions

[2024-03-22T08:25:07.709Z]         partitions_src = collection_src.partitions

[2024-03-22T08:25:07.709Z]         partitions_dist = collection_dist.partitions

[2024-03-22T08:25:07.709Z]         log.info(f"partitions_src: {partitions_src}, partitions_dist: {partitions_dist}")

[2024-03-22T08:25:07.709Z]         assert len(partitions_src) == len(partitions_dist)

[2024-03-22T08:25:07.709Z]         # get num entities

[2024-03-22T08:25:07.709Z]         src_num = collection_src.num_entities

[2024-03-22T08:25:07.709Z]         dist_num = collection_dist.num_entities

[2024-03-22T08:25:07.709Z]         log.info(f"src_num: {src_num}, dist_num: {dist_num}")

[2024-03-22T08:25:07.709Z]         if not verify_by_query:

[2024-03-22T08:25:07.709Z]             assert src_num == dist_num

[2024-03-22T08:25:07.709Z]             return

[2024-03-22T08:25:07.709Z]         for coll in [collection_src, collection_dist]:

[2024-03-22T08:25:07.709Z]             is_binary = self.is_binary_by_schema(coll.schema)

[2024-03-22T08:25:07.709Z]             try:

[2024-03-22T08:25:07.709Z]                 if is_binary:

[2024-03-22T08:25:07.709Z]                     coll.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index,

[2024-03-22T08:25:07.709Z]                                       index_name=cf.gen_unique_str())

[2024-03-22T08:25:07.709Z]                 else:

[2024-03-22T08:25:07.709Z]                     coll.create_index(ct.default_float_vec_field_name, ct.default_index, index_name=cf.gen_unique_str())

[2024-03-22T08:25:07.709Z]             except Exception as e:

[2024-03-22T08:25:07.709Z]                 log.error(f"collection {coll.name} create index failed with error: {e}")

[2024-03-22T08:25:07.709Z]             coll.load()

[2024-03-22T08:25:07.709Z]             time.sleep(5)

[2024-03-22T08:25:07.709Z]         # get entities by count

[2024-03-22T08:25:07.709Z]         src_count = collection_src.query(

[2024-03-22T08:25:07.709Z]             expr="",

[2024-03-22T08:25:07.709Z]             output_fields=["count(*)"]

[2024-03-22T08:25:07.709Z]         )

[2024-03-22T08:25:07.709Z]         dist_count = collection_dist.query(

[2024-03-22T08:25:07.709Z]             expr="",

[2024-03-22T08:25:07.709Z]             output_fields=["count(*)"]

[2024-03-22T08:25:07.709Z]         )

[2024-03-22T08:25:07.709Z]         log.info(f"src count: {src_count}, dist count: {dist_count}")

[2024-03-22T08:25:07.709Z]         src_res = collection_src.query(expr=f'{ct.default_int64_field_name} >= 0',

[2024-03-22T08:25:07.709Z]                                        output_fields=output_fields)

[2024-03-22T08:25:07.709Z]         # log.info(f"src res: {len(src_res)}, src res: {src_res[-1]}")

[2024-03-22T08:25:07.709Z]         dist_res = collection_dist.query(expr=f'{ct.default_int64_field_name} >= 0',

[2024-03-22T08:25:07.709Z]                                          output_fields=output_fields)

[2024-03-22T08:25:07.709Z]         # log.info(f"dist res: {len(dist_res)}, dist res: {dist_res[-1]}")

[2024-03-22T08:25:07.709Z] >       assert len(dist_res) == len(src_res)

[2024-03-22T08:25:07.709Z] E       AssertionError

[2024-03-22T08:25:07.709Z] 

[2024-03-22T08:25:07.709Z] ../base/client_base.py:390: AssertionError

[2024-03-22T08:25:07.709Z] ------------------------------ Captured log setup ------------------------------

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - INFO - ci_test]: ################################################################################ (conftest.py:197)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - INFO - ci_test]: [initialize_milvus] Log cleaned up, start testing... (conftest.py:198)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - INFO - ci_test]: [setup_class] Start setup class... (client_base.py:29)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - INFO - ci_test]: *********************************** setup *********************************** (client_base.py:35)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - INFO - ci_test]: [setup_method] Start setup test case test_milvus_restore_back_with_delete. (client_base.py:36)

[2024-03-22T08:25:07.709Z] ------------------------------ Captured log call -------------------------------

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [Connections.connect] args: ['default'], kwargs: {'host': '10.255.110.222', 'port': 19530} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : None  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['int64', <DataType.INT64: 5>, ''], kwargs: {'is_primary': True} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['key', <DataType.INT64: 5>, ''], kwargs: {'is_primary': False} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['json', <DataType.JSON: 23>, ''], kwargs: {'is_primary': False} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['var_array', <DataType.ARRAY: 22>, ''], kwargs: {'is_primary': False, 'element_type': <DataType.VARCHAR: 21>, 'max_capacity': 2000, 'max_length': 1500} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'var_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.VARCHAR: 21>}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['int_array', <DataType.ARRAY: 22>, ''], kwargs: {'is_primary': False, 'element_type': <DataType.INT64: 5>, 'max_capacity': 2000, 'max_length': 1500} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'int_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.INT64: 5>}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [FieldSchema] args: ['float_vector', <DataType.FLOAT_VECTOR: 101>, ''], kwargs: {'dim': 128, 'is_primary': False} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [CollectionSchema] args: [[{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_array', 'description': '', 'type': <DataType.ARR......, kwargs: {'primary_field': None, 'auto_id': False} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_a......  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [Connections.has_connection] args: ['default'], kwargs: {} (api_request.py:56)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : True  (api_request.py:31)

[2024-03-22T08:25:07.709Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [Collection] args: ['restore_backup_duXrsMH1', {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType......, kwargs: {'consistency_level': 'Strong', 'shards_num': 2} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_response) : <Collection>:

[2024-03-22T08:25:07.710Z] -------------

[2024-03-22T08:25:07.710Z] <name>: restore_backup_duXrsMH1

[2024-03-22T08:25:07.710Z] <description>: 

[2024-03-22T08:25:07.710Z] <schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: ......  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:09 - DEBUG - ci_test]: (api_request)  : [Collection.insert] args: [[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,......, kwargs: {'timeout': 120} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:10 - DEBUG - ci_test]: (api_response) : (insert count: 3000, delete count: 0, upsert count: 0, timestamp: 448553510534381572, success count: 3000, err count: 0)  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:10 - INFO - ci_test]: [test][2024-03-22T08:24:09Z] [0.36862773s] restore_backup_duXrsMH1 insert -> (insert count: 3000, delete count: 0, upsert count: 0, timestamp: 448553510534381572, success count: 3000, err count: 0) (wrapper.py:30)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:10 - DEBUG - ci_test]: (api_request)  : [Collection.delete] args: ['int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74......, kwargs: {} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:10 - DEBUG - ci_test]: (api_response) : (insert count: 0, delete count: 100, upsert count: 0, timestamp: 0, success count: 0, err count: 0)  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:10 - INFO - ci_test]: [test][2024-03-22T08:24:10Z] [0.00592285s] restore_backup_duXrsMH1 delete -> (insert count: 0, delete count: 100, upsert count: 0, timestamp: 0, success count: 0, err count: 0) (wrapper.py:30)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:13 - INFO - ci_test]: create_backup {'requestId': '8ed53720-e825-11ee-8042-26e975841c69', 'msg': 'success', 'data': {'id': '8ed53720-e825-11ee-8042-26e975841c69', 'state_code': 2, 'start_time': 1711095850304, 'end_time': 1711095853426, 'name': 'backup_w2RrggvM', 'backup_timestamp': 1711095850305, 'size': 0, 'milvus_version': '91749583'}} (test_restore_backup.py:513)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:13 - INFO - ci_test]: list_backup {'requestId': '90c06842-e825-11ee-8042-26e975841c69', 'msg': 'success', 'data': [{'id': '8ed53720-e825-11ee-8042-26e975841c69', 'state_code': 2, 'start_time': 1711095850304, 'end_time': 1711095853426, 'name': 'backup_w2RrggvM', 'backup_timestamp': 1711095850305, 'size': 46197, 'milvus_version': '91749583'}]} (test_restore_backup.py:515)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - INFO - ci_test]: restore_backup: {'requestId': '90d0b3a3-e825-11ee-8042-26e975841c69', 'msg': 'success', 'data': {'id': '90d0ca21-e825-11ee-8042-26e975841c69', 'state_code': 2, 'start_time': 1711095853, 'end_time': 1711095883, 'collection_restore_tasks': [{'id': '90d16cd0-e825-11ee-8042-26e975841c69', 'state_code': 2, 'start_time': 1711095853, 'target_collection_name': 'restore_backup_duXrsMH1_bak', 'restored_size': 46197, 'to_restore_size': 46197, 'progress': 100, 'target_db_name': 'default'}], 'restored_size': 0, 'to_restore_size': 0, 'progress': 100}} (test_restore_backup.py:527)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_request)  : [list_collections] args: [20, 'default'], kwargs: {} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_response) : ['restore_backup_duXrsMH1', 'restore_backup_duXrsMH1_bak']  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_request)  : [Collection] args: ['restore_backup_duXrsMH1', None, 'default'], kwargs: {'consistency_level': 'Strong', 'shards_num': 2} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_response) : <Collection>:

[2024-03-22T08:25:07.710Z] -------------

[2024-03-22T08:25:07.710Z] <name>: restore_backup_duXrsMH1

[2024-03-22T08:25:07.710Z] <description>: 

[2024-03-22T08:25:07.710Z] <schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: ......  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_request)  : [Collection] args: ['restore_backup_duXrsMH1_bak', None, 'default'], kwargs: {'consistency_level': 'Strong', 'shards_num': 2} (api_request.py:56)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - DEBUG - ci_test]: (api_response) : <Collection>:

[2024-03-22T08:25:07.710Z] -------------

[2024-03-22T08:25:07.710Z] <name>: restore_backup_duXrsMH1_bak

[2024-03-22T08:25:07.710Z] <description>: 

[2024-03-22T08:25:07.710Z] <schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT......  (api_request.py:31)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - INFO - ci_test]: collection_src schema: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': 21}, {'name': 'int_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': 5}, {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}]} (client_base.py:347)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - INFO - ci_test]: collection_dist schema: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': 21}, {'name': 'int_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': 5}, {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}]} (client_base.py:348)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - INFO - ci_test]: partitions_src: [{"name":"_default","collection_name":"restore_backup_duXrsMH1","description":""}], partitions_dist: [{"name":"_default","collection_name":"restore_backup_duXrsMH1_bak","description":""}] (client_base.py:353)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:24:43 - INFO - ci_test]: src_num: 3000, dist_num: 3000 (client_base.py:358)

[2024-03-22T08:25:07.710Z] [2024-03-22 08:25:05 - INFO - ci_test]: src count: [{'count(*)': 2900}], dist count: [{'count(*)': 3000}] (client_base.py:383)

Expected Behavior

No response

Steps To Reproduce

create collection a
insert 3000 rows to a
delete 100 rows in a
create backup and restore to a_bak
compare a and a_bak

result:
count of a is 2900,but count of a_bak is 3000

Environment

No response

Anything else?

log: backup.log

zhuwenxing commented 2 weeks ago

still reproduced with 2.4-20240612-eeba8511-amd64 failed job:https://qa-jenkins.milvus.io/blue/organizations/jenkins/milvus_backup_api_test/detail/milvus_backup_api_test/52/pipeline log: artifacts-backup-test-52-server-logs.tar.gz

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_request)  : [list_collections] args: [20, 'default'], kwargs: {} (api_request.py:56)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_response) : ['restore_backup_kLaOPrH4', 'restore_backup_kLaOPrH4_bak']  (api_request.py:31)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_request)  : [Collection] args: ['restore_backup_kLaOPrH4', None, 'default'], kwargs: {'consistency_level': 'Strong', 'shards_num': 2} (api_request.py:56)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_response) : <Collection>:

[2024-06-12T07:28:27.505Z] -------------

[2024-06-12T07:28:27.505Z] <name>: restore_backup_kLaOPrH4

[2024-06-12T07:28:27.505Z] <description>: 

[2024-06-12T07:28:27.505Z] <schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: ......  (api_request.py:31)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_request)  : [Collection] args: ['restore_backup_kLaOPrH4_bak', None, 'default'], kwargs: {'consistency_level': 'Strong', 'shards_num': 2} (api_request.py:56)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - DEBUG - ci_test]: (api_response) : <Collection>:

[2024-06-12T07:28:27.505Z] -------------

[2024-06-12T07:28:27.505Z] <name>: restore_backup_kLaOPrH4_bak

[2024-06-12T07:28:27.505Z] <description>: 

[2024-06-12T07:28:27.505Z] <schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT......  (api_request.py:31)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - INFO - ci_test]: collection_src schema: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.VARCHAR: 21>}, {'name': 'int_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.INT64: 5>}, {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}], 'enable_dynamic_field': False} (client_base.py:347)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - INFO - ci_test]: collection_dist schema: {'auto_id': False, 'description': '', 'fields': [{'name': 'int64', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'key', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'json', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'var_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.VARCHAR: 21>}, {'name': 'int_array', 'description': '', 'type': <DataType.ARRAY: 22>, 'params': {'max_length': 1500, 'max_capacity': 2000}, 'element_type': <DataType.INT64: 5>}, {'name': 'float_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}], 'enable_dynamic_field': False} (client_base.py:348)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - INFO - ci_test]: partitions_src: [{"name":"_default","collection_name":"restore_backup_kLaOPrH4","description":""}], partitions_dist: [{"name":"_default","collection_name":"restore_backup_kLaOPrH4_bak","description":""}] (client_base.py:353)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:09 - INFO - ci_test]: src_num: 3000, dist_num: 3000 (client_base.py:358)

[2024-06-12T07:28:27.505Z] [2024-06-12 07:28:25 - INFO - ci_test]: src count: [{'count(*)': 2900}], dist count: [{'count(*)': 3000}] (client_base.py:383)