Open LLLjun opened 2 years ago
I also encountered the same issue: qps and recall didn't change as expected. Here is the code I tested with the default configuration of sift1m dataset
vector_number = 100000
vector_dimension = 1000
x = np.random.rand(vector_number, vector_dimension).astype(np.float32)
q = np.random.rand(1000, vector_dimension).astype(np.float32)
m = ''
for i in range(vector_number):
m += str(i) + '\n'
index = SPTAG.AnnIndex('SPANN', 'Float', vector_dimension)
index.SetBuildParam("IndexAlgoType", "BKT", "Base")
index.SetBuildParam("IndexDirectory", "spann_index", "Base")
index.SetBuildParam("DistCalcMethod", "L2", "Base")
index.SetBuildParam("isExecute", "true", "SelectHead")
index.SetBuildParam("NumberOfThreads", '64', "SelectHead")
index.SetBuildParam("Ratio", "0.16", "SelectHead") # index.SetBuildParam("Count", "200", "SelectHead")
index.SetBuildParam("TreeNumber", "1", "SelectHead")
index.SetBuildParam("BKTKmeansK", "32", "SelectHead")
index.SetBuildParam("BKTLeafSize", "8", "SelectHead")
index.SetBuildParam("SaveBKT", "false", "SelectHead")
index.SetBuildParam("SplitFactor", "6", "SelectHead")
index.SetBuildParam("SplitThreshold", "100", "SelectHead")
index.SetBuildParam("BKTLambdaFactor", "-1", "SelectHead")
index.SetBuildParam("SamplesNumber", "1000", "SelectHead")
index.SetBuildParam("SelectThreshold", "50", "SelectHead")
index.SetBuildParam("isExecute", "true", "BuildHead")
index.SetBuildParam("NeighborhoodSize", "32", "BuildHead")
index.SetBuildParam("TPTNumber", "32", "BuildHead")
index.SetBuildParam("TPTLeafSize", "2000", "BuildHead")
index.SetBuildParam("MaxCheck", "8192", "BuildHead")
index.SetBuildParam("MaxCheckForRefineGraph", "8192", "BuildHead")
index.SetBuildParam("RefineIterations", "3", "BuildHead")
index.SetBuildParam("NumberOfThreads", "64", "BuildHead")
index.SetBuildParam("BKTLambdaFactor", "-1", "BuildHead")
index.SetBuildParam("isExecute", "true", "BuildSSDIndex")
index.SetBuildParam("BuildSsdIndex", "true", "BuildSSDIndex")
index.SetBuildParam("InternalResultNum", "64", "BuildSSDIndex")
index.SetBuildParam("ReplicaCount", "8", "BuildSSDIndex")
index.SetBuildParam("PostingPageLimit", "12", "BuildSSDIndex")
index.SetBuildParam("NumberOfThreads", "64", "BuildSSDIndex")
index.SetBuildParam("MaxCheck", "8192", "BuildSSDIndex")
if (os.path.exists("spann_index")):
shutil.rmtree("spann_index")
print ("Build.............................")
st = time.time()
index.BuildWithMetaData(x, m, vector_number, False, False)
et = time.time()
build_time = et - st
print("Build time : ", build_time)
maxcheck = [100, 200, 400, 1000, 2000]
searchPostingPageLimit = [1, 5, 10, 40, 100]
for m in maxcheck:
for s in searchPostingPageLimit:
index.SetSearchParam("isExecute", "true", "SearchSSDIndex")
index.SetSearchParam("BuildSsdIndex", "false", "SearchSSDIndex")
index.SetSearchParam("InternalResultNum", "32", "SearchSSDIndex")
index.SetSearchParam("NumberOfThreads", "4", "SearchSSDIndex")
index.SetSearchParam("HashTableExponent", "4", "SearchSSDIndex")
index.SetSearchParam("ResultNum", "10", "SearchSSDIndex")
index.SetSearchParam("MaxCheck", str(m) , "SearchSSDIndex")
index.SetSearchParam("MaxDistRatio", "10000", "SearchSSDIndex")
index.SetSearchParam("SearchPostingPageLimit", str(s), "SearchSSDIndex")
st = time.time()
for t in tqdm(range(q.shape[0])):
result = index.SearchWithMetaData(q[t], 3) # Search k=3 nearest vectors for query vector q
et = time.time()
search_time = et - st
print(f"{m}/{s} Search time : ", et - st)
Hi, I encountered the same issue. Have you figure out the reason? Thanks in advance!
I ran the code following the example, but while searching I found that the MaxCheck parameter doesn't adjust the recall as described.
[1] [query] [maxcheck] [avg] [99%] [95%] [recall] [qps] [mem] [1] 0-10000 16384 0.0028 0.0090 0.0064 0.8103 2886.7361 0GB [1] 0-10000 8192 0.0024 0.0086 0.0056 0.8103 3368.8591 0GB [1] 0-10000 4096 0.0015 0.0058 0.0033 0.8103 5320.0259 0GB [1] 0-10000 2048 0.0015 0.0060 0.0035 0.8103 5267.8604 0GB [1] 0-10000 1024 0.0016 0.0055 0.0036 0.8103 5104.4990 0GB [1] 0-10000 512 0.0014 0.0050 0.0032 0.8103 5527.0239 0GB [1] 0-10000 256 0.0016 0.0054 0.0037 0.8103 4964.4180 0GB