Open zhuwenxing opened 4 hours ago
- Milvus version: - Deployment mode(standalone or cluster): - MQ type(rocksmq, pulsar or kafka): - SDK version(e.g. pymilvus v2.0.0rc2): - OS(Ubuntu or CentOS): - CPU/Memory: - GPU: - Others:
pytest : test] self = <test_search.TestSearchGroupBy object at 0x7fe84181ff70> [pytest : test] [pytest : test] @pytest.mark.tags(CaseLabel.L0) [pytest : test] def test_search_group_size_default(self): [pytest : test] """ [pytest : test] target: test search group by [pytest : test] method: 1. create a collection with 3 different float vectors [pytest : test] 2. build index with 3 different index types and metrics [pytest : test] 2. search on 3 different float vector fields with group by varchar field with group size [pytest : test] verify results entity = limit * group_size and group size is full if group_strict_size is True [pytest : test] verify results group counts = limit if group_strict_size is False [pytest : test] """ [pytest : test] self._connect() [pytest : test] dense_types = ["FLOAT16_VECTOR", "FLOAT_VECTOR", "BFLOAT16_VECTOR"] [pytest : test] dims = [16, 128, 64] [pytest : test] index_types = ["FLAT", "IVF_SQ8", "HNSW"] [pytest : test] metrics = ct.float_metrics [pytest : test] fields = [cf.gen_int64_field(is_primary=True), cf.gen_string_field()] [pytest : test] for i in range(len(dense_types)): [pytest : test] fields.append(cf.gen_float_vec_field(name=dense_types[i], [pytest : test] vector_data_type=dense_types[i], dim=dims[i])) [pytest : test] schema = cf.gen_collection_schema(fields, auto_id=True) [pytest : test] collection_w = self.init_collection_wrap(name=prefix, schema=schema) [pytest : test] [pytest : test] # insert with the same values for scalar fields [pytest : test] nb = 100 [pytest : test] for _ in range(100): [pytest : test] string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string") [pytest : test] data = [string_values] [pytest : test] for i in range(len(dense_types)): [pytest : test] data.append(cf.gen_vectors(dim=dims[i], nb=nb, vector_data_type=dense_types[i])) [pytest : test] collection_w.insert(data) [pytest : test] [pytest : test] collection_w.flush() [pytest : test] for i in range(len(dense_types)): [pytest : test] _index_params = {"index_type": index_types[i], "metric_type": metrics[i], [pytest : test] "params": cf.get_index_params_params(index_types[i])} [pytest : test] collection_w.create_index(dense_types[i], _index_params) [pytest : test] collection_w.load() [pytest : test] [pytest : test] nq = 2 [pytest : test] limit = 50 [pytest : test] group_size = 5 [pytest : test] for j in range(len(dense_types)): [pytest : test] search_vectors = cf.gen_vectors(nq, dim=dims[j], vector_data_type=dense_types[j]) [pytest : test] search_params = {"params": cf.get_search_params_params(index_types[j])} [pytest : test] # when group_strict_size=true, it shall return results with entities = limit * group_size [pytest : test] res1 = collection_w.search(data=search_vectors, anns_field=dense_types[j], [pytest : test] param=search_params, limit=limit, # consistency_level=CONSISTENCY_STRONG, [pytest : test] group_by_field=ct.default_string_field_name, [pytest : test] group_size=group_size, group_strict_size=True, [pytest : test] output_fields=[ct.default_string_field_name])[0] [pytest : test] for i in range(nq): [pytest : test] for l in range(limit): [pytest : test] group_values = [] [pytest : test] for k in range(10): [pytest : test] group_values.append(res1[i][l].fields.get(ct.default_string_field_name)) [pytest : test] assert len(set(group_values)) == 1 [pytest : test] assert len(res1[i]) == limit * group_size [pytest : test] [pytest : test] # when group_strict_size=false, it shall return results with group counts = limit [pytest : test] res1 = collection_w.search(data=search_vectors, anns_field=dense_types[j], [pytest : test] param=search_params, limit=limit, # consistency_level=CONSISTENCY_STRONG, [pytest : test] group_by_field=ct.default_string_field_name, [pytest : test] group_size=group_size, group_strict_size=False, [pytest : test] output_fields=[ct.default_string_field_name])[0] [pytest : test] for i in range(nq): [pytest : test] group_values = [] [pytest : test] for l in range(len(res1[i])): [pytest : test] group_values.append(res1[i][l].fields.get(ct.default_string_field_name)) [pytest : test] assert len(set(group_values)) == limit [pytest : test] [pytest : test] # hybrid search group by [pytest : test] req_list = [] [pytest : test] for j in range(len(dense_types)): [pytest : test] search_params = { [pytest : test] "data": cf.gen_vectors(nq, dim=dims[j], vector_data_type=dense_types[j]), [pytest : test] "anns_field": dense_types[j], [pytest : test] "param": {"params": cf.get_search_params_params(index_types[j])}, [pytest : test] "limit": limit, [pytest : test] "expr": "int64 > 0"} [pytest : test] req = AnnSearchRequest(**search_params) [pytest : test] req_list.append(req) [pytest : test] # 4. hybrid search group by [pytest : test] import numpy as np [pytest : test] rank_scorers = ["max", "avg", "sum"] [pytest : test] for scorer in rank_scorers: [pytest : test] res = collection_w.hybrid_search(req_list, WeightedRanker(0.3, 0.3, 0.3), limit=limit, [pytest : test] group_by_field=ct.default_string_field_name, [pytest : test] group_size=group_size, rank_group_scorer=scorer, [pytest : test] output_fields=[ct.default_string_field_name])[0] [pytest : test] for i in range(nq): [pytest : test] group_values = [] [pytest : test] for l in range(len(res[i])): [pytest : test] group_values.append(res[i][l].fields.get(ct.default_string_field_name)) [pytest : test] > assert len(set(group_values)) == limit [pytest : test] E AssertionError: assert 36 == 50 [pytest : test] E + where 36 = len({'11', '13', '17', '18', '19', '2', ...}) [pytest : test] E + where {'11', '13', '17', '18', '19', '2', ...} = set(['42', '74', '47', '53', '85', '93', ...])
No response
failed job: https://jenkins.milvus.io:18080/blue/organizations/jenkins/Milvus%20HA%20CI/detail/PR-36381/5/pipeline/94
It appears to be a case with a relatively high failure probability, failing in both branches on this case.
Is there an existing issue for this?
Environment
Current Behavior
Expected Behavior
No response
Steps To Reproduce
No response
Milvus Log
failed job: https://jenkins.milvus.io:18080/blue/organizations/jenkins/Milvus%20HA%20CI/detail/PR-36381/5/pipeline/94
Anything else?
No response