nmslib / hnswlib

Header-only C++/python library for fast approximate nearest neighbors
https://github.com/nmslib/hnswlib
Apache License 2.0
4.3k stars 633 forks source link

Regarding the identification of elements with a high contribution rate in the inspection vector. #560

Open screwface106 opened 4 months ago

screwface106 commented 4 months ago

This logic ranks the squared differences between the query vector parameters and the learned vector parameters used in Euclidean distance calculations. If this logic is customized and added to the HNSW algorithm, it could enable the calculation of the contribution of each feature to the detection of outliers.

include

include

include

include

// Function to calculate the sum of squared differences between vectors std::vector computeSquaredDifferences(const std::vector& v1, const std::vector& v2) { std::vector differences(v1.size()); for (size_t i = 0; i < v1.size(); i++) { differences[i] = pow(v1[i] - v2[i], 2); } return differences; }

// Function to rank contributions from the sum of squared differences std::vector rankContributions(const std::vector& squaredDifferences) { std::vector indices(squaredDifferences.size()); for (size_t i = 0; i < indices.size(); i++) indices[i] = i;

// Sort in descending order
std::sort(indices.begin(), indices.end(), [&](size_t a, size_t b) {
    return squaredDifferences[a] > squaredDifferences[b];
});

return indices;

}

int main() { // Example vectors std::vector learnedVector = {1.0, 2.0, 3.0}; std::vector queryVector = {1.1, 2.1, 3.5};

// Calculate squared differences
auto squaredDifferences = computeSquaredDifferences(learnedVector, queryVector);

// Get ranking of contributions
auto rankings = rankContributions(squaredDifferences);

// Output rankings
for (auto idx : rankings) {
    std::cout << "Feature " << idx << " Contribution: " << squaredDifferences[idx] << std::endl;
}

return 0;

}