Reproduce LVQ and Leanvec speedup

Hi, I am using the cpp example in the documentation with the test dataset svs/data/test_dataset, but LVQ and Leanvec indexes takes longer to complete the search. Can anyone help me to check what may have caused this issue?

running lvq8 vs uncompressed:

//! [Example All]

//! [Includes]
// SVS Dependencies
#include "svs/orchestrators/vamana.h"  // bulk of the dependencies required.
#include "svs/core/recall.h"           // Convenient k-recall@n computation.
#include "svs/extensions/vamana/lvq.h" // LVQ extensions for the Vamana index.

// Alternative main definition
#include "svsmain.h"

// stl
#include <chrono>
#include <map>
#include <string>
#include <string_view>
#include <vector>
//! [Includes]

const std::string save_prefix = "index/";

//! [Helper Utilities]
double run_recall(
    svs::Vamana& index,
    const svs::data::SimpleData<float>& queries,
    const svs::data::SimpleData<uint32_t>& groundtruth,
    size_t search_window_size,
    size_t num_neighbors,
    std::string_view message = ""
) {
    index.set_num_threads(12);
    auto search_start = std::chrono::high_resolution_clock::now();
    index.set_search_window_size(search_window_size);
    auto results = index.search(queries, num_neighbors);
    auto search_end = std::chrono::high_resolution_clock::now();

    double recall = svs::k_recall_at_n(groundtruth, results, num_neighbors, num_neighbors);
    if (!message.empty()) {
        fmt::print("[{}] ", message);
    }
    auto duration = std::chrono::duration<double>(search_end - search_start).count();
    fmt::print("Windowsize = {}, Duration = {}, Recall = {}\n", search_window_size,  duration, recall);
    return recall;
}

const bool DEBUG = false;
void check(double expected, double got, double eps = 0.001) {
    double diff = std::abs(expected - got);
    if constexpr (DEBUG) {
        fmt::print("Expected {}. Got {}\n", expected, got);
    } else {
        if (diff > eps) {
            throw ANNEXCEPTION("Expected ", expected, ". Got ", got, '!');
        }
    }
}
//! [Helper Utilities]

// Alternative main definition
int svs_main(std::vector<std::string> args) {
    //! [Argument Extraction]
    const size_t nargs = args.size();
    if (nargs != 4) {
        throw ANNEXCEPTION("Expected 3 arguments. Instead, got ", nargs, '!');
    }
    const std::string& data_vecs = args.at(1);
    const std::string& query_vecs = args.at(2);
    const std::string& groundtruth_vecs = args.at(3);
    //! [Argument Extraction]

    // Building the index

    //! [Build Parameters]
    auto parameters = svs::index::vamana::VamanaBuildParameters{
        1.2,  // alpha
        64,   // graph max degree
        128,  // search window size
        1024, // max candidate pool size
        60,   // prune to degree
        true, // full search history
    };
    //! [Build Parameters]

    //! [Index Build]
    auto uncompressed_build_start = std::chrono::high_resolution_clock::now();
    size_t num_threads = 12;
    svs::Vamana index = svs::Vamana::build<float>(
        parameters, svs::VectorDataLoader<float>(data_vecs), svs::DistanceL2(), num_threads
    );
    auto uncompressed_build_end = std::chrono::high_resolution_clock::now();
    printf("Uncompressed build time = %.6f\n", std::chrono::duration<double>(uncompressed_build_end - uncompressed_build_start).count());
    //! [Index Build]

    // Searching the index

    //! [Load Aux]
    // Load the queries and ground truth.
    auto queries = svs::load_data<float>(query_vecs);
    auto groundtruth = svs::load_data<uint32_t>(groundtruth_vecs);
    //! [Load Aux]

    //! [Perform Queries]
    auto recall = run_recall(index, queries, groundtruth, 30, 10, "Uncompressed");
    //! [Perform Queries]

    //! [Saving]
    index.save( save_prefix+"example_config",  save_prefix+"example_graph",  save_prefix+"example_data");
    //! [Saving]

    // Reloading a saved index

    //! [Loading]
    // We can reload an index from a previously saved set of files.
    index = svs::Vamana::assemble<float>(
         save_prefix+"example_config",
        svs::GraphLoader( save_prefix+"example_graph"),
        svs::VectorDataLoader<float>( save_prefix+"example_data"),
        svs::DistanceType::L2,
        4 // num_threads
    );

    recall = run_recall(index, queries, groundtruth, 30, 10, "Reload");
    printf("Recall = %f\n", recall);
    //! [Loading]

    // Search using vector compression

    //! [Compressed Loader]
    // Quantization
    size_t padding = 32;
    namespace lvq = svs::quantization::lvq;

    // Wrap the compressor object in a lazy functor.
    // This will defer loading and compression of the LVQ dataset until the threadpool
    // used in the index has been created.
    auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) {
        auto data = svs::VectorDataLoader<float, 128>( save_prefix+"example_data").load();
        return lvq::LVQDataset<8, 0, 128>::compress(data, threadpool, padding);
    });
    index = svs::Vamana::assemble<float>(
         save_prefix+"example_config", svs::GraphLoader( save_prefix+"example_graph"), compressor, svs::DistanceL2()
    );
    //! [Compressed Loader]

    //! [Search Compressed]
    recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Load");
    printf("Recall = %f\n", recall);
    //! [Search Compressed]

    //! [Build Index Compressed]
    // Compressed building
    auto compressed_build_start = std::chrono::high_resolution_clock::now();
    index =
        svs::Vamana::build<float>(parameters, compressor, svs::DistanceL2(), num_threads);
    auto compressed_build_end = std::chrono::high_resolution_clock::now();
    printf("Compressed build time = %.6f\n", std::chrono::duration<double>(compressed_build_end - compressed_build_start).count());
    recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Build");
    auto compressed_search_end = std::chrono::high_resolution_clock::now();
    //! [Build Index Compressed]

    //! [Save Index Compressed]
    index.save( save_prefix+"example_compressed_config",  save_prefix+"example_compressed_graph",  save_prefix+"example_compressed_data");
    //! [Save Index Compressed]

    //! [Loading compressed]
    // We can reload an index from a previously saved set of files.
    index = svs::Vamana::assemble<float>(
        save_prefix + "example_compressed_config",
        svs::GraphLoader(save_prefix + "example_compressed_graph"),
        // svs::VectorDataLoader<float>(save_prefix + "example_compressed_data"),
        svs::lib::Lazy([&]() {
            return svs::lib::load_from_disk<svs::quantization::lvq::LVQDataset<
                8, 0>>(save_prefix + "example_compressed_data");
        }),
        svs::DistanceL2(),
        4  // num_threads
    );
    recall = run_recall(index, queries, groundtruth, 30, 10, "Load compressed");
    printf("Recall = %f\n", recall);
    //! [Loading compressed]

    return 0;
}

// Special main providing some helpful utilties.
SVS_DEFINE_MAIN();
//! [Example All]

// ./vamana ../svs/data/test_dataset/data_f32.fvecs ../svs/data/test_dataset/queries_f32.fvecs ../svs/data/test_dataset/groundtruth_euclidean.ivecs

runing leanvec

//! [Example All]

#define SPDLOG_FMT_EXTERNAL 1

//! [Includes]
// SVS Dependencies
// #include "svs/leanvec/leanvec.h"
#include "svs/extensions/vamana/leanvec.h"

#include "svs/core/recall.h"            // Convenient k-recall@n computation.
#include "svs/extensions/vamana/lvq.h"  // LVQ extensions for the Vamana index.
#include "svs/orchestrators/dynamic_vamana.h"  // bulk of the dependencies required.

// Alternative main definition
#include "svsmain.h"

// stl
#include <map>
#include <string>
#include <string_view>
#include <vector>
//! [Includes]

const std::string save_prefix = "index/";

// Alternative main definition
int svs_main(std::vector<std::string> args) {
  //! [Argument Extraction]
  const size_t nargs = args.size();
  if (nargs != 4) {
    throw ANNEXCEPTION("Expected 3 arguments. Instead, got ", nargs, '!');
  }
  const std::string& data_vecs = args.at(1);
  const std::string& query_vecs = args.at(2);
  const std::string& groundtruth_vecs = args.at(3);
  //! [Argument Extraction]

  //! [Build Parameters]
  auto parameters = svs::index::vamana::VamanaBuildParameters{
      1.2,   // alpha
      64,    // graph max degree
      128,   // search window size
      1024,  // max candidate pool size
      60,    // prune to degree
      true,  // full search history
  };
  size_t num_threads = 4;
  //! [Build Parameters]

  auto data_vecs_ = svs::load_data<float>(data_vecs);
  //! [Load Aux]
  // Load the queries and ground truth.
  auto queries = svs::load_data<float>(query_vecs);
  auto groundtruth = svs::load_data<uint32_t>(groundtruth_vecs);
  //   print the dimension
  fmt::print("Data count: {}\n", data_vecs_.size());                    // 10000
  fmt::print("Data dimension: {}\n", data_vecs_.dimensions());          // 128
  fmt::print("Queries count: {}\n", queries.size());                    // 1000
  fmt::print("Queries dimension: {}\n", queries.dimensions());          // 128
  fmt::print("Groundtruth count: {}\n", groundtruth.size());            // 1000
  fmt::print("Groundtruth dimension: {}\n", groundtruth.dimensions());  // 100
  //! [Load Aux]

  std::vector<size_t> ids;
  ids.reserve(10000);
  for (size_t i = 0; i < 10000; i++) {
    ids.push_back(i);
  }

  //! [Search Parameters]
  auto search_params = svs::index::vamana::VamanaSearchParameters();
  search_params.buffer_config(svs::index::vamana::SearchBufferConfig(30));
  //! [Search Parameters]

  bool use_pca = true;  // they only support loading pre-generated OOD matrices

  auto leanvec_data =
      svs::leanvec::LeanDataset<svs::leanvec::UsingLVQ<8>,
                                svs::leanvec::UsingLVQ<8>, 64,
                                svs::Dynamic>::reduce(data_vecs_, num_threads);

  auto loader = svs::lib::Lazy([&leanvec_data]() { return leanvec_data; });

  // Building the index with original vectors
  {
    auto build_start = std::chrono::high_resolution_clock::now();
    auto index = svs::Vamana::build<float>(parameters, loader,
                                           svs::DistanceL2(), num_threads);
    auto build_end = std::chrono::high_resolution_clock::now();
    printf("Build time = %.6f\n",
           std::chrono::duration<double>(build_end - build_start).count());

    index.set_search_parameters(search_params);
    {
      auto search_start = std::chrono::high_resolution_clock::now();
      auto results = index.search(queries, 10);
      auto search_end = std::chrono::high_resolution_clock::now();
      printf("Search time = %.6f\n",
             std::chrono::duration<double>(search_end - search_start).count());

      double recall = svs::k_recall_at_n(groundtruth, results);
      fmt::print("Recall = {}\n", recall);
    }
    //! [Saving]
    index.save(save_prefix + "example_config", save_prefix + "example_graph",
               save_prefix + "example_data");
    //! [Saving]
  }

  // Reloading a saved index
  // We can reload an index from a previously saved set of files.
  {
    auto reloaded = svs::Vamana::assemble<float>(
        save_prefix + "example_config",
        svs::graphs::SimpleBlockedGraph<uint32_t>::load(save_prefix +
                                                        "example_graph"),
        svs::lib::Lazy([&]() {
          return svs::lib::load_from_disk<svs::leanvec::LeanDataset<
              svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, 64,
              svs::Dynamic>>(save_prefix + "example_data");
        }),
        svs::DistanceL2(),
        4  // num_threads
    );
    auto search_start = std::chrono::high_resolution_clock::now();
    auto results = reloaded.search(queries, 10);
    auto search_end = std::chrono::high_resolution_clock::now();
    printf("Reload search time = %.6f\n",
           std::chrono::duration<double>(search_end - search_start).count());

    double recall = svs::k_recall_at_n(groundtruth, results);
    fmt::print("Recall = {}\n", recall);
  }

  return 0;
}

// Special main providing some helpful utilties.
SVS_DEFINE_MAIN();
//! [Example All]

// ./leanvec ../svs/data/test_dataset/data_f32.fvecs ../svs/data/test_dataset/queries_f32.fvecs ../svs/data/test_dataset/groundtruth_euclidean.ivecs

Measured search duration

Uncompressed build time = 0.972131
[Uncompressed] Windowsize = 30, Duration = 0.006569277, Recall = 0.8218
[Reload] Windowsize = 30, Duration = 0.006516681000000001, Recall = 0.8218
Recall = 0.821800
[Compressed Load] Windowsize = 30, Duration = 0.029918565, Recall = 0.822
Recall = 0.822000
Compressed build time = 5.918356
[Compressed Build] Windowsize = 30, Duration = 0.035164214, Recall = 0.8215
[Load compressed] Windowsize = 30, Duration = 0.040216505, Recall = 0.8215
Recall = 0.821500

Data count: 10000
Data dimension: 128
Queries count: 1000
Queries dimension: 128
Groundtruth count: 1000
Groundtruth dimension: 100
Build time = 4.987378
Search time = 0.132416
Recall = 0.3101
Reload search time = 0.129005
Recall = 0.3101

Search duration: Leanvec > lvq > uncompressed ?

Build flags:

# building svs
cmake .. -DSVS_EXPERIMENTAL_LEANVEC=1 -DSVS_BUILD_EXAMPLES=YES -DSVS_BUILD_BENCHMARK=YES -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_INSTALL_PREFIX=../../install
# for the executable
g++-11 -Ofast -lrt -std=c++2a -DHAVE_CXX0X -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0 -I. -I$(HOME)/intel/oneapi/mkl/2024.0//include -I$(HOME)/lvq/install/include -DSPDLOG_FMT_EXTERNAL -I$(HOME)/lvq/install/include/eve-2022.9.1 -DSVS_VERSION_MAJOR=0 -DSVS_VERSION_MINOR=0 -DSVS_VERSION_PATCH=3 -DSVS_ENABLE_NUMA=0 -DSVS_CHECK_BOUNDS=1 -DSVS_INITIALIZE_LOGGER=1 vamana.cpp -o vamana -I. -L$(HOME)/lvq/install/lib -l:libfmt.a -l:libspdlog.a -pthread -lm -fopenmp -L$(HOME)/intel/oneapi/mkl/2024.0/lib/ -lmkl_rt

intel / ScalableVectorSearch

Reproduce LVQ and Leanvec speedup #55