lxzan / memorycache

minimalist in-memory kv storage, powered by hashmap and minimal quad heap.
https://pkg.go.dev/github.com/lxzan/memorycache
MIT License
50 stars 5 forks source link

Memorycache benchmarks #13

Closed maypok86 closed 11 months ago

maypok86 commented 11 months ago

I think atomic counter is not needed in benchmarks and something strange happens with capacity, that's why I suggest this code:

package benchmark

import (
    "testing"
    "time"

    "github.com/dgraph-io/ristretto"
    "github.com/lxzan/memorycache"
    "github.com/lxzan/memorycache/internal/utils"
    "github.com/maypok86/otter"
)

const (
    benchcount = 1280000
    capacity   = benchcount / 10
)

var benchkeys = make([]string, 0, benchcount)

func init() {
    for i := 0; i < benchcount; i++ {
        benchkeys = append(benchkeys, string(utils.AlphabetNumeric.Generate(16)))
    }
}

func BenchmarkMemoryCache_Set(b *testing.B) {
    var mc = memorycache.New(
        memorycache.WithBucketNum(128),
        memorycache.WithBucketSize(capacity/1280, capacity/128),
    )
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            mc.Set(benchkeys[index], 1, time.Hour)
        }
    })
}

func BenchmarkMemoryCache_Get(b *testing.B) {
    var mc = memorycache.New(
        memorycache.WithBucketNum(128),
        memorycache.WithBucketSize(capacity/1280, capacity/128),
    )
    for i := 0; i < benchcount; i++ {
        mc.Set(benchkeys[i%benchcount], 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            mc.Get(benchkeys[index])
        }
    })
}

func BenchmarkMemoryCache_SetAndGet(b *testing.B) {
    var mc = memorycache.New(
        memorycache.WithBucketNum(128),
        memorycache.WithBucketSize(capacity/1280, capacity/128),
    )
    for i := 0; i < benchcount; i++ {
        mc.Set(benchkeys[i%benchcount], 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            if index&7 == 0 {
                mc.Set(benchkeys[index], 1, time.Hour)
            } else {
                mc.Get(benchkeys[index])
            }
        }
    })
}

func BenchmarkRistretto_Set(b *testing.B) {
    var mc, _ = ristretto.NewCache(&ristretto.Config{
        NumCounters: 10 * capacity, // number of keys to track frequency of (10M).
        MaxCost:     capacity,      // maximum cost of cache (1GB).
        BufferItems: 64,            // number of keys per Get buffer.
    })
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            mc.SetWithTTL(benchkeys[index], 1, 1, time.Hour)
        }
    })
}

func BenchmarkRistretto_Get(b *testing.B) {
    var mc, _ = ristretto.NewCache(&ristretto.Config{
        NumCounters: 10 * capacity, // number of keys to track frequency of (10M).
        MaxCost:     capacity,      // maximum cost of cache (1GB).
        BufferItems: 64,            // number of keys per Get buffer.
    })
    for i := 0; i < benchcount; i++ {
        mc.SetWithTTL(benchkeys[i%benchcount], 1, 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            mc.Get(benchkeys[index])
        }
    })
}

func BenchmarkRistretto_SetAndGet(b *testing.B) {
    var mc, _ = ristretto.NewCache(&ristretto.Config{
        NumCounters: 10 * capacity, // number of keys to track frequency of (10M).
        MaxCost:     capacity,      // maximum cost of cache (1GB).
        BufferItems: 64,            // number of keys per Get buffer.
    })
    for i := 0; i < benchcount; i++ {
        mc.SetWithTTL(benchkeys[i%benchcount], 1, 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            if index&7 == 0 {
                mc.SetWithTTL(benchkeys[index], 1, 1, time.Hour)
            } else {
                mc.Get(benchkeys[index])
            }
        }
    })
}

func BenchmarkOtter_Set(b *testing.B) {
    var mc, _ = otter.MustBuilder[string, int](capacity).Build()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            mc.SetWithTTL(benchkeys[index], 1, time.Hour)
        }
    })
}

func BenchmarkOtter_Get(b *testing.B) {
    mc, _ := otter.MustBuilder[string, int](capacity).Build()
    for i := 0; i < benchcount; i++ {
        mc.SetWithTTL(benchkeys[i%benchcount], 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            if index&7 == 0 {
                mc.SetWithTTL(benchkeys[index], 1, time.Hour)
            } else {
                mc.Get(benchkeys[index])
            }
        }
    })
}

func BenchmarkOtter_SetAndGet(b *testing.B) {
    mc, _ := otter.MustBuilder[string, int](capacity).Build()
    for i := 0; i < benchcount; i++ {
        mc.SetWithTTL(benchkeys[i%benchcount], 1, time.Hour)
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        i := 0
        for pb.Next() {
            index := i % benchcount
            i++
            if index&7 == 0 {
                mc.SetWithTTL(benchkeys[index], 1, time.Hour)
            } else {
                mc.Get(benchkeys[index])
            }
        }
    })
}
maypok86 commented 11 months ago

And on these benchmarks I got these results:

➜ go test -run='^$' -cpu=8 -bench . -benchmem -timeout=0
goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkMemoryCache_Set-8              27894867                78.19 ns/op           55 B/op          0 allocs/op
BenchmarkMemoryCache_Get-8              65804805                17.21 ns/op            0 B/op          0 allocs/op
BenchmarkMemoryCache_SetAndGet-8        67827427                25.67 ns/op            3 B/op          0 allocs/op
BenchmarkRistretto_Set-8                19721312                99.02 ns/op          102 B/op          2 allocs/op
BenchmarkRistretto_Get-8                41125585                26.89 ns/op           18 B/op          1 allocs/op
BenchmarkRistretto_SetAndGet-8          35126967                33.68 ns/op           29 B/op          1 allocs/op
BenchmarkOtter_Set-8                    17611048                64.37 ns/op           32 B/op          0 allocs/op
BenchmarkOtter_Get-8                    35887419                31.57 ns/op            5 B/op          0 allocs/op
BenchmarkOtter_SetAndGet-8              32818468                35.92 ns/op            5 B/op          0 allocs/op
PASS
ok      github.com/lxzan/memorycache/benchmark  45.032s
lxzan commented 11 months ago

I didn't realize that atomic operations had such a large impact on benchmarking results

lxzan commented 11 months ago

I used the code you shared to run the benchmark. Maybe otter works fine on darwin/arm64, but it has bugs on amd64.

goos: windows
goarch: amd64
pkg: github.com/lxzan/memorycache/benchmark
cpu: AMD Ryzen 5 PRO 4650G with Radeon Graphics
BenchmarkMemoryCache_Set
BenchmarkMemoryCache_Set-12             19173800                72.80 ns/op
BenchmarkMemoryCache_Get
BenchmarkMemoryCache_Get-12             57163272                27.71 ns/op
BenchmarkMemoryCache_SetAndGet
BenchmarkMemoryCache_SetAndGet-12       43583112                30.95 ns/op
BenchmarkRistretto_Set
BenchmarkRistretto_Set-12               18181927                57.00 ns/op
BenchmarkRistretto_Get
BenchmarkRistretto_Get-12               61429777                23.37 ns/op
BenchmarkRistretto_SetAndGet
BenchmarkRistretto_SetAndGet-12         39921752                28.41 ns/op
BenchmarkOtter_Set
BenchmarkOtter_Set-12                   13182075               183.7 ns/op
BenchmarkOtter_Get
BenchmarkOtter_Get-12                   35014639                31.21 ns/op
BenchmarkOtter_SetAndGet
BenchmarkOtter_SetAndGet-12              2260393              1721 ns/op
PASS
goos: linux
goarch: amd64
pkg: github.com/lxzan/memorycache/benchmark
cpu: AMD Ryzen 5 PRO 4650G with Radeon Graphics
BenchmarkMemoryCache_Set-12             19678532               107.3 ns/op            50 B/op          0 allocs/op
BenchmarkMemoryCache_Get-12             39053472                33.69 ns/op            0 B/op          0 allocs/op
BenchmarkMemoryCache_SetAndGet-12       36526280                45.92 ns/op            2 B/op          0 allocs/op
BenchmarkRistretto_Set-12               16221285                79.46 ns/op           99 B/op          2 allocs/op
BenchmarkRistretto_Get-12               48637548                27.88 ns/op           16 B/op          1 allocs/op
BenchmarkRistretto_SetAndGet-12         24576748                52.45 ns/op           27 B/op          1 allocs/op
BenchmarkOtter_Set-12                   13831672               191.3 ns/op            38 B/op          0 allocs/op
BenchmarkOtter_Get-12                   33329025                36.29 ns/op            4 B/op          0 allocs/op
BenchmarkOtter_SetAndGet-12               359437              3032 ns/op             205 B/op          0 allocs/op
PASS
ok      github.com/lxzan/memorycache/benchmark  99.506s
lxzan commented 11 months ago
  1. I can't run the cachebench test because the test data file is too large, and access to the Internet is limited in China.

  2. map with double_linkedlist or heap can realize the LRU algorithm https://leetcode.com/problems/lru-cache/description/

  3. I don't think you should use generic types in cache libraries. If you use any, one instance is enough.

maypok86 commented 11 months ago

Oh, I'm back with answers and news

  1. atomics allow cache scaling and the gap will only get bigger as the number of cores increases, but a sharded map with mutex can't scale like that.

  2. Yes, the repository with ristretto benchmarks is very large and I didn't want to download it either, but nobody forbids to download separate *.gz files and copy the code. All the traces together weigh about 100 MB, and even 50 MB if S3 is removed.

  3. Of course, with the help of map and heap you can implement lru, but I never understood why you should spend O(logn) operations instead of O(1). And how the lru list gets along with ttl in one heap, I still don't understand. And ignoring reads in the policy too (but it's okay, it's better to check it on benchmarks).

  4. And this is already a holivar. For example, at work I more often encounter the use of id (int or uuid) or their combination as keys. And to use them with a cache without generics, which stores only strings as keys, you need to allocate strings and cast such keys to strings.

  5. Many thanks for the bug, I believe I fixed it, the updated version can be downloaded like this

    go get github.com/maypok86/otter@dev
  6. I also got fucked up and ran hit ratio benchmarks from theine and otter. In theine benchmarks memorycache ran in the following configuration:

    
    package clients

import ( "gonum.org/v1/plot/vg/draw" "image/color" "time"

"github.com/lxzan/memorycache"

)

type Memory struct { client *memorycache.MemoryCache }

func (c Memory) Style() Style { return &Style{ Color: color.RGBA{G: 28, A: 255}, Shape: draw.PlusGlyph{}, } }

func (c *Memory) Init(cap int) { client := memorycache.New( memorycache.WithBucketNum(128), memorycache.WithBucketSize(cap/128/10, (cap+127)/128), ) c.client = client }

func (c *Memory) Name() string { return "Memory" }

func (c *Memory) Get(key string) (string, bool) { v, ok := c.client.Get(key) if ok { return v.(string), true } return "", false }

func (c *Memory) Set(key string, value string) { c.client.Set(key, value, time.Hour) }

func (c *Memory) Close() { c.client.Stop() c.client.Clear() c.client = nil }


The division with rounding up is used, because memorycache rounds the number of shards to the nearest power of two (it was not obvious at all), and capacity in benchmarks does not divide by that. I attach pictures with the results.
![zipf](https://github.com/lxzan/memorycache/assets/72698179/393a203f-fc32-4ef4-8af1-4bb5c81a80f3)
![s3](https://github.com/lxzan/memorycache/assets/72698179/b0e85b7e-b5ca-491d-a26d-adfb5e6fab9e)
![ds1](https://github.com/lxzan/memorycache/assets/72698179/bbd5b2d8-c9dd-4527-9601-1a4deb9b967b)

Also here is the result of running otter benchmarks with similar memorycache configuration: [ratio.txt](https://github.com/lxzan/memorycache/files/13539300/ratio.txt)

As a result we have that memorycache is much inferior to theine by hit ratio and I bet that when choosing a library the majority will prefer the slower theine.

7. Now for the punch line. I rewrote the memorycache benchmarks a bit and tested on two types of traces: scrambled zipfian and random.
```go
package benchmark

import (
    "math/rand"
    "strconv"
    "testing"
    "time"

    "github.com/dgraph-io/ristretto"
    "github.com/lxzan/memorycache"
    "github.com/maypok86/otter"
    "github.com/pingcap/go-ycsb/pkg/generator"
)

type Client interface {
    Init(cap int)
    Get(key string) (any, bool)
    Set(key string, value any)
    Name() string
    Close()
}

type Memory struct {
    client *memorycache.MemoryCache
}

func (c *Memory) Init(cap int) {
    client := memorycache.New(
        memorycache.WithBucketNum(128),
        memorycache.WithBucketSize(cap/128/10, cap/128),
    )
    c.client = client
}

func (c *Memory) Name() string {
    return "Memory"
}

func (c *Memory) Get(key string) (any, bool) {
    return c.client.Get(key)
}

func (c *Memory) Set(key string, value any) {
    c.client.Set(key, value, time.Hour)
}

func (c *Memory) Close() {
    c.client.Stop()
    c.client.Clear()
}

type Otter[K comparable, V any] struct {
    client *otter.Cache[K, V]
}

func (c *Otter[K, V]) Init(cap int) {
    client, err := otter.MustBuilder[K, V](cap).Build()
    if err != nil {
        panic(err)
    }
    c.client = client
}

func (c *Otter[K, V]) Name() string {
    return "Otter"
}

func (c *Otter[K, V]) Get(key K) (V, bool) {
    return c.client.Get(key)
}

func (c *Otter[K, V]) Set(key K, value V) {
    c.client.SetWithTTL(key, value, time.Hour)
}

func (c *Otter[K, V]) Close() {
    c.client.Close()
}

type Ristretto struct {
    client *ristretto.Cache
}

func (c *Ristretto) Init(cap int) {
    client, err := ristretto.NewCache(&ristretto.Config{
        NumCounters: int64(cap * 10),
        MaxCost:     int64(cap),
        BufferItems: 64,
    })
    if err != nil {
        panic(err)
    }
    c.client = client
}

func (c *Ristretto) Name() string {
    return "Ristretto"
}

func (c *Ristretto) Get(key string) (any, bool) {
    return c.client.Get(key)
}

func (c *Ristretto) Set(key string, value any) {
    c.client.SetWithTTL(key, value, 1, time.Hour)
}

func (c *Ristretto) Close() {
    c.client.Close()
}

const (
    benchcount = 3840000
    capacity   = benchcount / 10
)

var benchkeys = make([]string, 0, benchcount)

func init() {
    // populate using realistic distribution
    z := generator.NewScrambledZipfian(0, benchcount/3, generator.ZipfianConstant)
    r := rand.New(rand.NewSource(time.Now().UnixNano()))

    for i := 0; i < benchcount; i++ {
        //benchkeys = append(benchkeys, string(utils.AlphabetNumeric.Generate(16)))
        benchkeys = append(benchkeys, strconv.Itoa(int(z.Next(r))))
    }
}

var benchClients = []Client{
    &Memory{},
    &Ristretto{},
    &Otter[string, any]{},
}

func BenchmarkCache_Get(b *testing.B) {
    for _, mc := range benchClients {
        mc.Init(capacity)
        for i := 0; i < benchcount; i++ {
            mc.Set(benchkeys[i], "1")
        }
        b.ResetTimer()
        b.Run(mc.Name(), func(b *testing.B) {
            b.RunParallel(func(pb *testing.PB) {
                i := 0
                for pb.Next() {
                    mc.Get(benchkeys[i%benchcount])
                    i++
                }
            })
        })
        mc.Close()
    }
}

func BenchmarkCache_Set(b *testing.B) {
    for _, mc := range benchClients {
        mc.Init(capacity)
        b.ResetTimer()
        b.Run(mc.Name(), func(b *testing.B) {
            b.RunParallel(func(pb *testing.PB) {
                i := 0
                for pb.Next() {
                    mc.Set(benchkeys[i%benchcount], "1")
                    i++
                }
            })
        })
        mc.Close()
    }
}

func BenchmarkCache_SetGet(b *testing.B) {
    for _, mc := range benchClients {
        mc.Init(capacity)
        for i := 0; i < benchcount; i++ {
            mc.Set(benchkeys[i], "1")
        }
        b.ResetTimer()
        b.Run(mc.Name(), func(b *testing.B) {
            b.RunParallel(func(pb *testing.PB) {
                i := 0
                for pb.Next() {
                    if i&7 == 0 {
                        mc.Set(benchkeys[i%benchcount], "1")
                    } else {
                        mc.Get(benchkeys[i%benchcount])
                    }
                    i++
                }
            })
        })
        mc.Close()
    }
}

All benchmarks were run with the command

go test -run='^$' -cpu=8 -bench . -benchmem -timeout=0

And now their results are as follows

Scrambled zipfian (mac)

 goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-8             58323561                28.81 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Get/Ristretto-8          40171566                27.97 ns/op           18 B/op          1 allocs/op
BenchmarkCache_Get/Otter-8              58735746                20.29 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Set/Memory-8             34251462                47.14 ns/op           10 B/op          0 allocs/op
BenchmarkCache_Set/Ristretto-8           9676972               112.8 ns/op           101 B/op          2 allocs/op
BenchmarkCache_Set/Otter-8              29418043                46.07 ns/op           12 B/op          0 allocs/op
BenchmarkCache_SetGet/Memory-8          47939595                30.44 ns/op            0 B/op          0 allocs/op
BenchmarkCache_SetGet/Ristretto-8       15708579                66.94 ns/op           31 B/op          1 allocs/op
BenchmarkCache_SetGet/Otter-8           43945512                23.10 ns/op            2 B/op          0 allocs/op
PASS
ok      github.com/lxzan/memorycache/benchmark  19.541s

Scrambled zipfian (linux)

goos: linux
goarch: amd64
pkg: github.com/lxzan/memorycache/benchmark
cpu: AMD EPYC 7663 56-Core Processor
BenchmarkCache_Get/Memory-8             17790519                92.28 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Get/Ristretto-8          20446557                61.80 ns/op           17 B/op          1 allocs/op
BenchmarkCache_Get/Otter-8              18460036                59.78 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Set/Memory-8             10917672               103.9 ns/op             5 B/op          0 allocs/op
BenchmarkCache_Set/Ristretto-8           5078590               228.7 ns/op           101 B/op          2 allocs/op
BenchmarkCache_Set/Otter-8              17816726               153.1 ns/op            12 B/op          0 allocs/op
BenchmarkCache_SetGet/Memory-8          15022515                82.15 ns/op            0 B/op          0 allocs/op
BenchmarkCache_SetGet/Ristretto-8        7374313               147.9 ns/op            29 B/op          1 allocs/op
BenchmarkCache_SetGet/Otter-8           19831430                54.10 ns/op            0 B/op          0 allocs/op
PASS

Random (mac)

goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-8             47254713                24.98 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Get/Ristretto-8          56881586                25.67 ns/op           18 B/op          1 allocs/op
BenchmarkCache_Get/Otter-8              68656597                15.70 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Set/Memory-8             30147376                74.02 ns/op           42 B/op          0 allocs/op
BenchmarkCache_Set/Ristretto-8          15542128                94.62 ns/op          100 B/op          2 allocs/op
BenchmarkCache_Set/Otter-8              18378082                85.23 ns/op           23 B/op          0 allocs/op
BenchmarkCache_SetGet/Memory-8          48471139                30.68 ns/op            1 B/op          0 allocs/op
BenchmarkCache_SetGet/Ristretto-8       34942609                34.37 ns/op           29 B/op          1 allocs/op
BenchmarkCache_SetGet/Otter-8           51608001                22.25 ns/op            3 B/op          0 allocs/op
PASS
ok      github.com/lxzan/memorycache/benchmark  22.653s

Random (linux)

goos: linux
goarch: amd64
pkg: github.com/lxzan/memorycache/benchmark
cpu: AMD EPYC 7663 56-Core Processor
BenchmarkCache_Get/Memory-8             18087849               100.9 ns/op             0 B/op          0 allocs/op
BenchmarkCache_Get/Ristretto-8          21577394                54.34 ns/op           16 B/op          1 allocs/op
BenchmarkCache_Get/Otter-8              19081875                54.22 ns/op            0 B/op          0 allocs/op
BenchmarkCache_Set/Memory-8             11988404               181.2 ns/op            28 B/op          0 allocs/op
BenchmarkCache_Set/Ristretto-8          13593417               114.0 ns/op            99 B/op          2 allocs/op
BenchmarkCache_Set/Otter-8               9905139               337.8 ns/op            21 B/op          0 allocs/op
BenchmarkCache_SetGet/Memory-8          12151204                84.96 ns/op            0 B/op          0 allocs/op
BenchmarkCache_SetGet/Ristretto-8       15254090                71.99 ns/op           27 B/op          1 allocs/op
BenchmarkCache_SetGet/Otter-8           21679112                50.55 ns/op            2 B/op          0 allocs/op
PASS
ok      github.com/lxzan/memorycache/benchmark  44.890s

In the end otter is faster almost always, except for the load of 100% random writes, but this is absolutely not a realistic cache load (and random keys are not realistic either).

lxzan commented 11 months ago

If you don't reach the maximum capacity limit, the hit rate is the same for every library, right?

In web application caching scenarios, MC is less likely to reach the maximum capacity limit than others. Because MC's expiration checking is almost real-time, RTO relies on random checking? If every element in the test data has a fixed expiration time of one hour, a large number of elements will be eliminated in a short period of time, which will lead to worse MC hit rate results.

If the MC comparison function uses access times + timestamps, will the hit rate increase?

maypok86 commented 11 months ago
  1. Hmm, obviously yes, hit ratio in that case should always be 100% (except for ristretto, but that's a separate story). But in this case you don't need an eviction policy at all, and it is very rare, in fact it means that you can put all your data in the RAM, which is almost always not the case.

  2. Strong statement, but it's not true simply because theine uses timerwheel (actually a more advanced version of the heap), ristretto uses a bucket-based algorithm which also has extremely comparable efficiency, otter on the other hand uses a combination of a bucket-based algorithm and a probabilistic algorithm from redis (this can be changed in a couple of days). And all of these algorithms are about as efficient as a regular heap. In fact, you're making the point here that the heap will remove items better, but there are a few problems here. First of all, ttl is usually quite large, and a rare item is likely to be evicted in a short period of time, while a frequent item is likely to be requested again in a short period of time and the mini delay is unlikely to have any effect (redis has been doing this for many years without any complaints from users). About removing a large number of items, rto removes the same number of items, and if you mean that they will have the same expiry time and the eviction order is undefined (it's again about lru list together with ttl in the same heap actually), it's a pity, you save time on getting the current time, but you can't tell people with 10000+ rps production load: "Please slow down".

  3. It may or may not increase, I thought the result would be worse. The only thing I can say for sure is that as you increase the number of shards the hit ratio decreases. As I said, lru list and ttl in the same heap, ignoring reads and cutting lru into parts doesn't look good (since already here you had to be sophisticated to store as many items as the others).

lxzan commented 11 months ago

Timestamp caching (updated once per second) causes MC GetWithTTL in the hit rate test to not have the desired effect:

image

If you disable caching on line 114, you'll get a much better hit rate.

func (c *MemoryCache) getExp(d time.Duration) int64 {
    if d <= 0 {
        return math.MaxInt
    }
    return time.Now().UnixMilli() + d.Milliseconds()
}
package client

import (
    "github.com/lxzan/memorycache"
    "time"
)

const shard = 16

type MemoryCache[K comparable, V any] struct {
    client *memorycache.MemoryCache
}

func (c *MemoryCache[K, V]) Init(cap int) {
    c.client = memorycache.New(
        memorycache.WithBucketNum(shard),
        memorycache.WithBucketSize(cap/10/shard, cap/shard),
    )
}

func (c *MemoryCache[K, V]) Name() string {
    return "MemoryCache"
}

func (c *MemoryCache[K, V]) Get(key K) (value V, exist bool) {
    v, ok := c.client.GetWithTTL(ToString(key), time.Hour)
    if !ok {
        return
    }
    return v.(V), true
}

func (c *MemoryCache[K, V]) Set(key K, value V) {
    c.client.Set(ToString(key), value, time.Hour)
}

func (c *MemoryCache[K, V]) Close() {
    c.client.Stop()
}

func ToString(v any) string {
    switch result := v.(type) {
    case string:
        return result
    default:
        return ""
    }
}
Zipf:
        Capacity: 500
                Optimal ratio: 25.74
                Otter ratio: 17.44
                Theine ratio: 23.35
                Ristretto ratio: 2.88
                MemoryCache ratio: 12.06
        Capacity: 1000
                Optimal ratio: 30.15
                Otter ratio: 21.02
                Theine ratio: 27.68
                Ristretto ratio: 4.70
                MemoryCache ratio: 16.82
        Capacity: 2000
                Optimal ratio: 34.56
                Otter ratio: 25.07
                Theine ratio: 31.78
                Ristretto ratio: 6.65
                MemoryCache ratio: 21.66
        Capacity: 5000
                Optimal ratio: 40.36
                Otter ratio: 31.37
                Theine ratio: 36.61
                Ristretto ratio: 9.73
                MemoryCache ratio: 27.97
        Capacity: 10000
                Optimal ratio: 44.78
                Otter ratio: 36.31
                Theine ratio: 40.11
                Ristretto ratio: 12.49
                MemoryCache ratio: 32.92
        Capacity: 20000
                Optimal ratio: 48.69
                Otter ratio: 41.03
                Theine ratio: 43.24
                Ristretto ratio: 16.06
                MemoryCache ratio: 37.73
        Capacity: 40000
                Optimal ratio: 52.11
                Otter ratio: 44.95
                Theine ratio: 45.70
                Ristretto ratio: 20.90
                MemoryCache ratio: 42.48
        Capacity: 80000
                Optimal ratio: 54.33
                Otter ratio: 48.28
                Theine ratio: 48.31
                Ristretto ratio: 26.35
                MemoryCache ratio: 47.02

P3:
        Capacity: 25000
                Optimal ratio: 18.75
                Otter ratio: 2.84
                Theine ratio: 8.16
                Ristretto ratio: 0.44
                MemoryCache ratio: 2.61
        Capacity: 50000
                Optimal ratio: 30.91
                Otter ratio: 12.47
                Theine ratio: 18.91
                Ristretto ratio: 0.82
                MemoryCache ratio: 6.86
        Capacity: 100000
                Optimal ratio: 45.14
                Otter ratio: 54.73
                Theine ratio: 37.41
                Ristretto ratio: 1.45
                MemoryCache ratio: 29.40
        Capacity: 200000
                Optimal ratio: 63.06
                Otter ratio: 69.55
                Theine ratio: 53.94
                Ristretto ratio: 2.51
                MemoryCache ratio: 59.35
        Capacity: 300000
                Optimal ratio: 74.29
                Otter ratio: 76.93
                Theine ratio: 68.78
                Ristretto ratio: 3.62
                MemoryCache ratio: 68.95
        Capacity: 400000
                Optimal ratio: 79.24
                Otter ratio: 79.30
                Theine ratio: 76.16
                Ristretto ratio: 4.66
                MemoryCache ratio: 75.28
        Capacity: 500000
                Optimal ratio: 80.51
                Otter ratio: 80.47
                Theine ratio: 79.01
                Ristretto ratio: 5.85
                MemoryCache ratio: 79.45
        Capacity: 600000
                Optimal ratio: 80.51
                Otter ratio: 80.51
                Theine ratio: 79.91
                Ristretto ratio: 6.70
                MemoryCache ratio: 80.16

OLTP:
        Capacity: 250
                Optimal ratio: 20.10
                Otter ratio: 25.83
                Theine ratio: 19.29
                Ristretto ratio: 1.28
                MemoryCache ratio: 12.04
        Capacity: 500
                Optimal ratio: 26.40
                Otter ratio: 32.68
                Theine ratio: 28.27
                Ristretto ratio: 2.30
                MemoryCache ratio: 20.85
        Capacity: 750
                Optimal ratio: 30.18
                Otter ratio: 37.82
                Theine ratio: 33.87
                Ristretto ratio: 3.25
                MemoryCache ratio: 27.24
        Capacity: 1000
                Optimal ratio: 33.26
                Otter ratio: 41.04
                Theine ratio: 37.29
                Ristretto ratio: 4.07
                MemoryCache ratio: 32.43
        Capacity: 1250
                Optimal ratio: 35.14
                Otter ratio: 43.39
                Theine ratio: 39.69
                Ristretto ratio: 4.52
                MemoryCache ratio: 36.26
        Capacity: 1500
                Optimal ratio: 36.49
                Otter ratio: 45.50
                Theine ratio: 41.06
                Ristretto ratio: 5.17
                MemoryCache ratio: 38.78
        Capacity: 1750
                Optimal ratio: 37.71
                Otter ratio: 47.22
                Theine ratio: 40.99
                Ristretto ratio: 5.75
                MemoryCache ratio: 40.86
        Capacity: 2000
                Optimal ratio: 39.18
                Otter ratio: 48.63
                Theine ratio: 44.11
                Ristretto ratio: 6.25
                MemoryCache ratio: 42.44
maypok86 commented 11 months ago

These changes make MC more similar to classic lru (in fact now MC is lru cut into 16 parts), BUT what is the point of expiry, if every get changes the expiry time to a new one? Also on the most complex trades the hit ratio even worsened, for example on capacity = 8000000 in DS1 the hit ratio was 58% and now it is 43%. Results of the full run: r.txt

Also these changes obviously slow down MC, for example: Scrambled zipfian (8 cpu)

goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-8             21284205                54.05 ns/op
BenchmarkCache_Get/Ristretto-8          51188260                28.41 ns/op
BenchmarkCache_Get/Theine-8             10862472               101.0 ns/op
BenchmarkCache_Get/Otter-8              58896658                19.96 ns/op
BenchmarkCache_Set/Memory-8             19290187                90.47 ns/op
BenchmarkCache_Set/Ristretto-8           8643814               133.5 ns/op
BenchmarkCache_Set/Theine-8              3288130               370.5 ns/op
BenchmarkCache_Set/Otter-8              25777232                45.29 ns/op
BenchmarkCache_SetGet/Memory-8          17409510                61.39 ns/op
BenchmarkCache_SetGet/Ristretto-8       12512870                94.95 ns/op
BenchmarkCache_SetGet/Theine-8          10701565               110.9 ns/op
BenchmarkCache_SetGet/Otter-8           39549194                25.84 ns/op
PASS
ok      github.com/lxzan/memorycache/benchmark  29.431s

Scrambled zipfian (10 cpu)

goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-10            16925833                65.10 ns/op
BenchmarkCache_Get/Ristretto-10         46420614                27.59 ns/op
BenchmarkCache_Get/Theine-10            11783352                96.27 ns/op
BenchmarkCache_Get/Otter-10             52399650                19.17 ns/op
BenchmarkCache_Set/Memory-10            16972125                87.19 ns/op
BenchmarkCache_Set/Ristretto-10          8934678               142.5 ns/op
BenchmarkCache_Set/Theine-10             3719070               336.7 ns/op
BenchmarkCache_Set/Otter-10             35732010                37.69 ns/op
BenchmarkCache_SetGet/Memory-10         17723926                71.56 ns/op
BenchmarkCache_SetGet/Ristretto-10      13688382                86.66 ns/op
BenchmarkCache_SetGet/Theine-10          6222848               186.7 ns/op
BenchmarkCache_SetGet/Otter-10          49031374                21.20 ns/op
PASS
ok      github.com/lxzan/memorycache/benchmark  27.198s

Random (8 cpu)

goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-8             33311780                36.62 ns/op
BenchmarkCache_Get/Ristretto-8          42969618                29.08 ns/op
BenchmarkCache_Get/Theine-8             11034896                92.26 ns/op
BenchmarkCache_Get/Otter-8              60822754                17.59 ns/op
BenchmarkCache_Set/Memory-8             13982930                96.16 ns/op
BenchmarkCache_Set/Ristretto-8          16769505                94.12 ns/op
BenchmarkCache_Set/Theine-8              4216605               302.2 ns/op
BenchmarkCache_Set/Otter-8              19645789                86.18 ns/op
BenchmarkCache_SetGet/Memory-8          18841964                55.93 ns/op
BenchmarkCache_SetGet/Ristretto-8       29058194                41.28 ns/op
BenchmarkCache_SetGet/Theine-8          13093878                95.86 ns/op
BenchmarkCache_SetGet/Otter-8           36327537                30.75 ns/op
PASS
ok      github.com/lxzan/memorycache/benchmark  32.883s

Random (10 cpu)

goos: darwin
goarch: arm64
pkg: github.com/lxzan/memorycache/benchmark
BenchmarkCache_Get/Memory-10            28998932                35.77 ns/op
BenchmarkCache_Get/Ristretto-10         48598736                23.21 ns/op
BenchmarkCache_Get/Theine-10            10401951               102.0 ns/op
BenchmarkCache_Get/Otter-10             77731116                14.74 ns/op
BenchmarkCache_Set/Memory-10            14441642                95.81 ns/op
BenchmarkCache_Set/Ristretto-10         17092383                68.19 ns/op
BenchmarkCache_Set/Theine-10             4749001               255.9 ns/op
BenchmarkCache_Set/Otter-10             20907091                71.31 ns/op
BenchmarkCache_SetGet/Memory-10         20492385                54.05 ns/op
BenchmarkCache_SetGet/Ristretto-10      27400563                40.59 ns/op
BenchmarkCache_SetGet/Theine-10          9810357               105.3 ns/op
BenchmarkCache_SetGet/Otter-10          44856319                24.40 ns/op
PASS
ok      github.com/lxzan/memorycache/benchmark  31.929s

And if here put time.Now().UnixMilli(), it will be even slower

lxzan commented 11 months ago

These changes make MC more similar to classic lru

That's why I said that MC implements LRU

And if here put time.Now().UnixMilli(), it will be even slower

This is not necessary for the actual project. Pressure testing writes a lot of data in a short period of time, and because of the timestamp caching they all expire at the same time.

MC hit rate is a little worse when memory is tight. If it ever becomes popular, I'll improve it to increase the hit rate.

maypok86 commented 11 months ago

Okay, well, I got what I wanted, thank you.