Open MichaelMure opened 4 years ago
Actually I was incorrect, I didn't realize that I was actually filling the filter.
Now, if you really want, you could use the following code to make the insert ~20% faster when the filter is filled:
// taken from https://nuclear.llnl.gov/CNP/rng/rngman/node4.html
const a = uint64(2862933555777941757)
const b = uint64(3037000493)
// Linear Congruential Generator
// See https://link.springer.com/chapter/10.1007/978-1-4615-2317-8_3
type LCG struct {
state uint64
}
func (l *LCG) Intn(n int) int {
for {
state := atomic.LoadUint64(&l.state)
newState := a*state + b
// Replace only if the state is still the same
swapped := atomic.CompareAndSwapUint64(&l.state, state, newState)
if swapped {
return int(newState % uint64(n))
}
}
}
diff --git a/filter-experiment/cuckoofilter/cuckoofilter.go b/filter-experiment/cuckoofilter/cuckoofilter.go
index 2ba5613..9ba8439 100644
--- a/filter-experiment/cuckoofilter/cuckoofilter.go
+++ b/filter-experiment/cuckoofilter/cuckoofilter.go
@@ -3,7 +3,6 @@ package cuckoo
import (
"fmt"
"math/bits"
- "math/rand"
)
const maxCuckooCount = 500
@@ -13,6 +12,7 @@ type Filter struct {
buckets []bucket
count uint
bucketPow uint
+ gen LCG
}
// NewFilter returns a new cuckoofilter with a given capacity.
@@ -45,8 +45,8 @@ func (cf *Filter) Reset() {
cf.count = 0
}
-func randi(i1, i2 uint) uint {
- if rand.Intn(2) == 0 {
+func (cf *Filter) randi(i1, i2 uint) uint {
+ if cf.gen.Intn(2) == 0 {
return i1
}
return i2
@@ -58,7 +58,7 @@ func (cf *Filter) Insert(data []byte) bool {
if cf.insert(fp, i1) || cf.insert(fp, i2) {
return true
}
- return cf.reinsert(fp, randi(i1, i2))
+ return cf.reinsert(fp, cf.randi(i1, i2))
}
// InsertUnique inserts data into the counter if not exists and returns true upon success
@@ -79,7 +79,7 @@ func (cf *Filter) insert(fp byte, i uint) bool {
func (cf *Filter) reinsert(fp byte, i uint) bool {
for k := 0; k < maxCuckooCount; k++ {
- j := rand.Intn(bucketSize)
+ j := cf.gen.Intn(bucketSize)
oldfp := fp
fp = cf.buckets[i][j]
cf.buckets[i][j] = oldfp
Profiling a bit this package, I found that about 75% of the time spend for
Insert
is callingrand.Intn(bucketSize)
. This is with an almost empty filter so I expect it's getting worse as the it fill.I expect the requirement for randomness here is fairly low (it's just drawing a number between 0 and 3 to not do the same thing each time), there should be ways to do that much faster, and especially without mutex locking as
rand.Intn
has.