seiflotfy / cuckoofilter

Cuckoo Filter: Practically Better Than Bloom
MIT License
1.13k stars 109 forks source link

Faster insert when the filter is filled #32

Open MichaelMure opened 4 years ago

MichaelMure commented 4 years ago

Profiling a bit this package, I found that about 75% of the time spend for Insert is calling rand.Intn(bucketSize). This is with an almost empty filter so I expect it's getting worse as the it fill.

I expect the requirement for randomness here is fairly low (it's just drawing a number between 0 and 3 to not do the same thing each time), there should be ways to do that much faster, and especially without mutex locking as rand.Intn has.

MichaelMure commented 4 years ago

Actually I was incorrect, I didn't realize that I was actually filling the filter.

Now, if you really want, you could use the following code to make the insert ~20% faster when the filter is filled:

// taken from https://nuclear.llnl.gov/CNP/rng/rngman/node4.html
const a = uint64(2862933555777941757)
const b = uint64(3037000493)

// Linear Congruential Generator
// See https://link.springer.com/chapter/10.1007/978-1-4615-2317-8_3
type LCG struct {
    state uint64
}

func (l *LCG) Intn(n int) int {
    for {
        state := atomic.LoadUint64(&l.state)

        newState := a*state + b

        // Replace only if the state is still the same
        swapped := atomic.CompareAndSwapUint64(&l.state, state, newState)
        if swapped {
            return int(newState % uint64(n))
        }
    }
}
diff --git a/filter-experiment/cuckoofilter/cuckoofilter.go b/filter-experiment/cuckoofilter/cuckoofilter.go
index 2ba5613..9ba8439 100644
--- a/filter-experiment/cuckoofilter/cuckoofilter.go
+++ b/filter-experiment/cuckoofilter/cuckoofilter.go
@@ -3,7 +3,6 @@ package cuckoo
 import (
        "fmt"
        "math/bits"
-       "math/rand"
 )

 const maxCuckooCount = 500
@@ -13,6 +12,7 @@ type Filter struct {
        buckets   []bucket
        count     uint
        bucketPow uint
+       gen       LCG
 }

 // NewFilter returns a new cuckoofilter with a given capacity.
@@ -45,8 +45,8 @@ func (cf *Filter) Reset() {
        cf.count = 0
 }

-func randi(i1, i2 uint) uint {
-       if rand.Intn(2) == 0 {
+func (cf *Filter) randi(i1, i2 uint) uint {
+       if cf.gen.Intn(2) == 0 {
                return i1
        }
        return i2
@@ -58,7 +58,7 @@ func (cf *Filter) Insert(data []byte) bool {
        if cf.insert(fp, i1) || cf.insert(fp, i2) {
                return true
        }
-       return cf.reinsert(fp, randi(i1, i2))
+       return cf.reinsert(fp, cf.randi(i1, i2))
 }

 // InsertUnique inserts data into the counter if not exists and returns true upon success
@@ -79,7 +79,7 @@ func (cf *Filter) insert(fp byte, i uint) bool {

 func (cf *Filter) reinsert(fp byte, i uint) bool {
        for k := 0; k < maxCuckooCount; k++ {
-               j := rand.Intn(bucketSize)
+               j := cf.gen.Intn(bucketSize)
                oldfp := fp
                fp = cf.buckets[i][j]
                cf.buckets[i][j] = oldfp