// <!-- // The MIT License (MIT) // // Copyright (c) 2024 Kris Jusiak kris@jusiak.net // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //
// --> Overview / Examples / API / FAQ
A static perfect hash function maps a set of keys known in advance to a set of values with no collisions.
-DNTEST
- see FAQ)-Wall -Wextra -Werror -pedantic -pedantic-errors -fno-exceptions -fno-rtti
)Hello world (https://godbolt.org/z/dzd6o3Pxo)
enum class color { red, green, blue };
constexpr auto colors = std::array{
std::pair{"red"sv, color::red},
std::pair{"green"sv, color::green},
std::pair{"blue"sv, color::blue},
};
static_assert(color::green == mph::lookup<colors>("green"));
static_assert(color::red == mph::lookup<colors>("red"));
static_assert(color::blue == mph::lookup<colors>("blue"));
std::print("{}", mph::lookup<colors>("green"sv)); // prints 1
mph::lookup
assumes only valid input and returns mapped value direclty.
static_assert(not mph::find<colors>("unknown"));
static_assert(mph::find<colors>("green"));
static_assert(mph::find<colors>("red"));
static_assert(mph::find<colors>("blue"));
std::print("{}", *mph::find<colors>("green"sv)); // prints 1
mph::find
doesnt assume valid input and returns optional of mapped value.
Performance (https://godbolt.org/z/rqYj9a1cr)
int lookup(int id) {
static constexpr std::array ids{
std::pair{54u, 91u},
std::pair{64u, 324u},
std::pair{91u, 234u},
};
return mph::lookup<ids>(id);
}
lookup: // g++ -DNDEBUG -std=c++20 -O3
imull $1275516394, %edi, %eax
shrl $23, %eax
movl $24029728, %ecx
shrxl %eax, %ecx, %eax
andl $511, %eax
retq
Performance (https://godbolt.org/z/vv6W4nGfb)
int lookup(int id) {
static constexpr std::array ids{
std::pair{54u, 91u},
std::pair{324u, 54u},
std::pair{64u, 324u},
std::pair{234u, 64u},
std::pair{91u, 234u},
};
return mph::lookup<ids>(id);
}
lookup: // g++ -DNDEBUG -std=c++20 -O3
andl $7, %edi
leaq lookup(%rip), %rax
movl (%rax,%rdi,4), %eax
retq
lookup:
.long 324
.long 0
.long 64
.long 234
.long 54
.long 0
.long 91
Performance (https://godbolt.org/z/qMzxKK4sd)
int find(int id) {
static constexpr std::array ids{
std::pair{27629, 1},
std::pair{6280, 2},
// 1..128 pairs...
std::pair{33691, 128},
};
return *mph::find<ids>(id);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2 -mavx512f
vpbroadcastd %edi, %zmm0
shll $4, %edi
movzbl %dil, %ecx
leaq find
vpcmpeqd (%rdx,%rcx,4), %zmm0, %k0
kmovw %k0, %esi
kortestw %k0, %k0
rep bsfq %rax, %rax
movl $64, %eax
addl %eax, %ecx
xorl %eax, %eax
testw %si, %si
cmovnel 1024(%rdx,%rcx,4), %eax
vzeroupper
retq
find:
... // see godbolt
Performance (https://godbolt.org/z/KaKzf7Pax)
int find(std::span<const char, 8> str) {
static constexpr auto symbols = std::array{
std::pair{"AMZN "sv, 1},
std::pair{"AAPL "sv, 2},
std::pair{"GOOGL "sv, 3},
std::pair{"META "sv, 4},
std::pair{"MSFT "sv, 5},
std::pair{"NVDA "sv, 6},
std::pair{"TSLA "sv, 7},
};
return *mph::find<symbols>(str);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2
movq 8(%rsi), %rax
movl $1031, %ecx
leaq find(%rip), %rdx
xorl %esi, %esi
movq (%rax), %rax
pextq %rcx, %rax, %rcx
shll $4, %ecx
cmpq (%rcx,%rdx), %rax
movzbl 8(%rcx,%rdx), %eax
cmovnel %esi, %eax
retq
find:
... // see godbolt
Performance (https://godbolt.org/z/fdMPsYWjE)
int find(std::string_view str) {
using std::literals::operator""sv;
// values assigned from 0..N-1
static constexpr std::array symbols{
"BTC "sv, "ETH "sv, "BNB "sv,
"SOL "sv, "XRP "sv, "DOGE"sv,
"TON "sv, "ADA "sv, "SHIB"sv,
"AVAX"sv, "LINK"sv, "BCH "sv,
};
return *mph::find<symbols>(str);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2
shll $3, %edi
bzhil %edi, (%rsi), %eax
movl $789, %ecx
pextl %ecx, %eax, %ecx
leaq find(%rip), %rdx
xorl %esi, %esi
cmpl (%rdx,%rcx,8), %eax
movzbl 4(%rdx,%rcx,8), %eax
cmovnel %esi, %eax
retq
find:
... // see godbolt
lookup/find
customization point - https://godbolt.org/z/enqeGxKK9to
customization point - https://godbolt.org/z/jTMx4n6j3branchless dispatcher
- https://godbolt.org/z/5PTE3ercEenum_to_string
- https://godbolt.org/z/ojohP6j7fstring_to_enum
- https://godbolt.org/z/83vGhY7M8
clang++ -std=c++20 -O3 -DNDEBUG -mbmi2 benchmark.cpp
| ns/op | op/s | err% |total | benchmark
|------:|---------------:|-----:|-----:|:----------
| 12.25 | 81,602,449.70 | 0.3% | 0.15 | `random_strings_5_len_4.std.map`
| 5.56 | 179,750,906.50 | 0.2% | 0.07 | `random_strings_5_len_4.std.unordered_map`
| 9.17 | 109,096,850.98 | 0.2% | 0.11 | `random_strings_5_len_4.boost.unordered_map`
| 13.48 | 74,210,250.54 | 0.3% | 0.16 | `random_strings_5_len_4.boost.flat_map`
| 7.70 | 129,942,965.18 | 0.3% | 0.09 | `random_strings_5_len_4.gperf`
| 1.61 | 621,532,188.81 | 0.1% | 0.02 | `random_strings_5_len_4.mph`
| 14.66 | 68,218,086.71 | 0.8% | 0.18 | `random_strings_5_len_8.std.map`
| 13.45 | 74,365,239.56 | 0.2% | 0.16 | `random_strings_5_len_8.std.unordered_map`
| 9.68 | 103,355,605.09 | 0.2% | 0.12 | `random_strings_5_len_8.boost.unordered_map`
| 16.00 | 62,517,180.19 | 0.4% | 0.19 | `random_strings_5_len_8.boost.flat_map`
| 7.70 | 129,809,356.36 | 0.3% | 0.09 | `random_strings_5_len_8.gperf`
| 1.58 | 633,084,194.24 | 0.1% | 0.02 | `random_strings_5_len_8.mph`
| 17.21 | 58,109,576.87 | 0.3% | 0.21 | `random_strings_6_len_2_5.std.map`
| 15.28 | 65,461,167.99 | 0.2% | 0.18 | `random_strings_6_len_2_5.std.unordered_map`
| 12.21 | 81,931,391.20 | 0.4% | 0.15 | `random_strings_6_len_2_5.boost.unordered_map`
| 17.15 | 58,323,741.08 | 0.5% | 0.21 | `random_strings_6_len_2_5.boost.flat_map`
| 7.94 | 125,883,197.55 | 0.5% | 0.09 | `random_strings_6_len_2_5.gperf`
| 6.05 | 165,239,616.00 | 0.5% | 0.07 | `random_strings_6_len_2_5.mph`
| 31.61 | 31,631,402.94 | 0.2% | 0.38 | `random_strings_100_len_8.std.map`
| 15.32 | 65,280,594.09 | 0.2% | 0.18 | `random_strings_100_len_8.std.unordered_map`
| 17.13 | 58,383,850.20 | 0.3% | 0.20 | `random_strings_100_len_8.boost.unordered_map`
| 31.42 | 31,822,519.67 | 0.2% | 0.38 | `random_strings_100_len_8.boost.flat_map`
| 8.04 | 124,397,773.85 | 0.2% | 0.10 | `random_strings_100_len_8.gperf`
| 1.58 | 632,813,481.73 | 0.1% | 0.02 | `random_strings_100_len_8.mph`
| 32.62 | 30,656,015.03 | 0.3% | 0.39 | `random_strings_100_len_1_8.std.map`
| 19.34 | 51,697,107.73 | 0.5% | 0.23 | `random_strings_100_len_1_8.std.unordered_map`
| 19.51 | 51,254,525.17 | 0.3% | 0.23 | `random_strings_100_len_1_8.boost.unordered_map`
| 33.58 | 29,780,574.17 | 0.6% | 0.40 | `random_strings_100_len_1_8.boost.flat_map`
| 13.06 | 76,577,037.07 | 0.7% | 0.16 | `random_strings_100_len_1_8.gperf`
| 6.02 | 166,100,665.07 | 0.2% | 0.07 | `random_strings_100_len_1_8.mph`
| 1.28 | 778,723,795.75 | 0.1% | 0.02 | `random_uints_5.mph`
g++ -std=c++20 -O3 -DNDEBUG -mbmi2 benchmark.cpp
| ns/op | op/s | err% |total | benchmark
|------:|---------------:|-----:|-----:|:----------
| 12.28 | 81,460,330.38 | 0.9% | 0.15 | `random_strings_5_len_4.std.map`
| 5.29 | 188,967,241.90 | 0.3% | 0.06 | `random_strings_5_len_4.std.unordered_map`
| 9.69 | 103,163,192.67 | 0.2% | 0.12 | `random_strings_5_len_4.boost.unordered_map`
| 13.56 | 73,756,333.08 | 0.4% | 0.16 | `random_strings_5_len_4.boost.flat_map`
| 7.69 | 130,055,662.66 | 0.6% | 0.09 | `random_strings_5_len_4.gperf`
| 1.39 | 718,910,252.82 | 0.1% | 0.02 | `random_strings_5_len_4.mph`
| 14.26 | 70,103,007.82 | 2.4% | 0.17 | `random_strings_5_len_8.std.map`
| 13.36 | 74,871,047.51 | 0.4% | 0.16 | `random_strings_5_len_8.std.unordered_map`
| 9.82 | 101,802,074.00 | 0.3% | 0.12 | `random_strings_5_len_8.boost.unordered_map`
| 15.97 | 62,621,571.95 | 0.3% | 0.19 | `random_strings_5_len_8.boost.flat_map`
| 7.92 | 126,265,206.30 | 0.3% | 0.09 | `random_strings_5_len_8.gperf`
| 1.40 | 713,596,376.62 | 0.4% | 0.02 | `random_strings_5_len_8.mph`
| 15.98 | 62,576,142.34 | 0.5% | 0.19 | `random_strings_6_len_2_5.std.map`
| 17.56 | 56,957,868.12 | 0.5% | 0.21 | `random_strings_6_len_2_5.std.unordered_map`
| 11.68 | 85,637,378.45 | 0.3% | 0.14 | `random_strings_6_len_2_5.boost.unordered_map`
| 17.25 | 57,965,732.68 | 0.6% | 0.21 | `random_strings_6_len_2_5.boost.flat_map`
| 9.13 | 109,580,632.48 | 0.7% | 0.11 | `random_strings_6_len_2_5.gperf`
| 7.17 | 139,563,745.72 | 0.4% | 0.09 | `random_strings_6_len_2_5.mph`
| 30.20 | 33,117,522.76 | 0.7% | 0.36 | `random_strings_100_len_8.std.map`
| 15.01 | 66,627,962.89 | 0.4% | 0.18 | `random_strings_100_len_8.std.unordered_map`
| 16.79 | 59,559,414.60 | 0.6% | 0.20 | `random_strings_100_len_8.boost.unordered_map`
| 31.36 | 31,884,629.57 | 0.8% | 0.38 | `random_strings_100_len_8.boost.flat_map`
| 7.75 | 128,973,947.61 | 0.7% | 0.09 | `random_strings_100_len_8.gperf`
| 1.50 | 667,041,673.54 | 0.1% | 0.02 | `random_strings_100_len_8.mph`
| 30.92 | 32,340,612.08 | 0.4% | 0.37 | `random_strings_100_len_1_8.std.map`
| 25.35 | 39,450,222.09 | 0.4% | 0.30 | `random_strings_100_len_1_8.std.unordered_map`
| 19.76 | 50,609,820.90 | 0.2% | 0.24 | `random_strings_100_len_1_8.boost.unordered_map`
| 32.39 | 30,878,018.77 | 0.6% | 0.39 | `random_strings_100_len_1_8.boost.flat_map`
| 11.20 | 89,270,687.92 | 0.2% | 0.13 | `random_strings_100_len_1_8.gperf`
| 7.17 | 139,471,159.67 | 0.5% | 0.09 | `random_strings_100_len_1_8.mph`
| 1.93 | 519,047,110.39 | 0.3% | 0.02 | `random_uints_5.mph`
namespace mph {
/**
* Static [minimal] perfect hash lookup function
* @tparam entries constexpr array of keys or key/value pairs
*/
template<const auto& entries>
inline constexpr auto lookup = [](const auto& key) {
if constexpr(constexpr lookup$magic_lut<entries> lookup{}; lookup) {
return lookup(key);
} else {
return lookup$pext<entries>(key);
}
};
/**
* Static perfect hash find function
* @tparam entries constexpr array of keys or key/value pairs
*/
template<const auto& entries>
inline constexpr auto find =
[]<u8 probability = 50u>(const auto& key, const auto& unknown = {}) -> optional {
if constexpr (entries.size() == 0u) {
return unknown;
} else if constexpr (entries.size() <= 64u) {
return find$pext<entries>.operator()<probability>(key, unknown);
} else {
constexpr auto bucket_size = simd_size_v<key_type, simd_abi::native<key_type>>;
return find$simd<entries, bucket_size>.operator()<probability>(key, unknown);
}
};
} // namespace mph
Trade-offs?
mph
supports different types of key/value pairs and thousands of key/value pairs, but not millions - (see benchmarks).
uint128_t
, that includes strings.mph
will SFINAE away lookup
function.lookup
implementation), for example:template<const auto& entries> requires (entries.size() > 1'000'000)
inline constexpr auto mph::find =
[](const auto& key, const auto& unknown = {}) -> optional { ... }
How mph
is working under the hood?
mph
takes advantage of knowing the key/value pairs at compile-time as well as the specific hardware instructions. The following is a pseudo code of thelookup
algorithm for minimal perfect hash table.
def lookup$magic_lut[entries: array](key : any, max_attempts = 100'000):
# 0. magic and lut for entries [compile-time]
nbits = sizeof(u32) * CHAR_BIT - countl_zero(max(entries.second))
mask = (1u << nbits) - 1u;
shift = sizeof(u32) * CHAR_BIT - nbits;
lut = {};
while max_attempts--:
magic = rand()
for k, v in entries:
lut |= v << (k * magic >> shift);
for k, v in entries:
if (lut >> (k * magic >> shift) & mask) != v:
lut = {}
break
assert magic != 0 and lut != 0 and shift != 0 and mask != 0
# 1. lookup [run-time]
return (lut >> ((key * magic) >> shift)) & mask;
The following is a pseudo code of the
find
algorithm for perfect hash table.
# word: 00101011
# mask: 11100001
# &: 000____1
# pext: ____0001 # intel/intrinsics-guide/index.html#text=pext
def pext(a : uN, mask : uN):
dst, m, k = ([], 0, 0)
while m < nbits(a):
if mask[m] == 1:
dst.append(a[m])
k += 1
m += 1
return uN(dst)
def find$pext[entries: array](key : any, unknown: any):
# 0. find mask which uniquely identifies all keys [compile-time]
mask = 0b111111...
for i in range(nbits(mask)):
masked = []
mask.unset(i)
for k, v in entries:
masked.append(k & mask)
if not unique(masked):
mask.set(i)
assert unique(masked)
assert mask != ~mask{}
# 1. create lookup table [compile-time]
lookup = array(typeof(entries[0]), 2**popcount(mask))
for k, v in entries:
lookup[pext(k, mask)] = (k, v)
# 2. lookup [run-time] # if key is a string convert to integral first (memcpy)
k, v = lookup[pext(key, mask)]
if k == key: # cmove
return v
else:
return unknown
def find$simd[entries: array](key : any, unknown: any):
# 0. find mask which uniquely identifies all keys [compile-time]
mask = 0b111111...
bucket_size = simd_size_v<entries[0].first, native>
for i in range(nbits(mask)):
masked = []
mask.unset(i)
for k, v in entries:
masked.append(k & mask)
if not unique(masked, bucket_size):
mask.set(i)
assert unique(masked, bucket_size)
assert mask != ~mask{}
# 1. create lookup table [compile-time]
keys = array(typeof(entries[0].first), bucket_size * 2**popcount(mask))
values = array(typeof(entries[0].second), bucket_size * 2**popcount(mask))
for k, v in entries:
slot = pext(k, mask)
while (keys[slot]) slot++;
keys[slot] = k
values[slot] = v
# 2. lookup [run-time] # if key is a string convert to integral first (memcpy)
index = bucket_size * pext(key, mask)
match = k == keys[&index] # simd element-wise comparison
if any_of(match):
return values[index + find_first_set(match)]
else:
return unknown
How to tweak lookup/find
performance for my data/use case?
Always measure!
span
, array
.probability
values to optimize lookups. Especially benefitial if its known that input keys are always coming from predefined entries
(probability = 100) as it will avoid the comparison.hardware_destructive_interference_size
- usually 64u
) to the lookup/find
. That will align the underlying lookup table.How to fix compilation error constexpr evaluation hit maximum step limit
?
The following options can be used to increase the limits, however, compilation-times should be monitored.
gcc: -fconstexpr-ops-limit=N
clang: -fconstexpr-steps=N
Is support for bmi2 instructions required?
mph
works on platforms withoutbmi2
instructions which can be emulated with some limitations (*).
// bmi2
mov ecx, 789
pext ecx, eax, ecx
// no bmi2
mov ecx, eax
and ecx, 789
imul ecx, ecx, 57
shr ecx, 2
and ecx, 248
https://stackoverflow.com/questions/14547087/extracting-bits-with-a-single-multiplication (*)
How to disable cmov
generation?
Set
probability
value to something else than50u
(default) - it means that the input data is predictable in some way andjmp
will be generated instead. Additionaly the following compiler options can be used.
clang: -mllvm -x86-cmov-converter=false
How to disable running tests at compile-time?
When
-DNTEST
is defined static_asserts tests wont be executed upon inclusion. Note: Use with caution as disabling tests means that there are no gurantees upon inclusion that given compiler/env combination works as expected.
How to integrate with CMake.FetchContent?
include(FetchContent)
FetchContent_Declare(
qlibs.mph
GIT_REPOSITORY https://github.com/qlibs/mph
GIT_TAG v5.0.3
)
FetchContent_MakeAvailable(qlibs.mph)
target_link_libraries(${PROJECT_NAME} PUBLIC qlibs.mph);
Similar projects?
gperf, frozen, nbperf, cmph, perfecthash, lemonhash, pthash, shockhash, burr, hash-prospector
Acknowledgments
https://lemire.me/blog, http://0x80.pl, https://easyperf.net, https://www.jabperf.com, https://johnnysswlab.com, pefect-hashing, gperf, cmph, smasher, minimal perfect hashing, hash functions