From 03284ca422855c22f78fabc19050734205aeea58 Mon Sep 17 00:00:00 2001 From: Patrick Mylund Nielsen Date: Mon, 22 Dec 2014 10:37:59 -0500 Subject: [PATCH] Replace sharded hash function/misc mods --- cache_test.go | 28 ++---------------- sharded.go | 79 ++++++++++++++++++++++++++++++++++++++++++------- sharded_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 37 deletions(-) create mode 100644 sharded_test.go diff --git a/cache_test.go b/cache_test.go index 8f77235..d5b2a60 100644 --- a/cache_test.go +++ b/cache_test.go @@ -1472,7 +1472,8 @@ func BenchmarkRWMutexMapGetConcurrent(b *testing.B) { func BenchmarkCacheGetManyConcurrent(b *testing.B) { // This is the same as BenchmarkCacheGetConcurrent, but its result - // can be compared against BenchmarkShardedCacheGetManyConcurrent. + // can be compared against BenchmarkShardedCacheGetManyConcurrent + // in sharded_test.go. b.StopTimer() n := 10000 tc := New(DefaultExpiration, 0) @@ -1497,31 +1498,6 @@ func BenchmarkCacheGetManyConcurrent(b *testing.B) { wg.Wait() } -func BenchmarkShardedCacheGetManyConcurrent(b *testing.B) { - b.StopTimer() - n := 10000 - tsc := unexportedNewSharded(20, DefaultExpiration, 0) - keys := make([]string, n) - for i := 0; i < n; i++ { - k := "foo" + strconv.Itoa(n) - keys[i] = k - tsc.Set(k, "bar", DefaultExpiration) - } - each := b.N / n - wg := new(sync.WaitGroup) - wg.Add(n) - for _, v := range keys { - go func() { - for j := 0; j < each; j++ { - tsc.Get(v) - } - wg.Done() - }() - } - b.StartTimer() - wg.Wait() -} - func BenchmarkCacheSet(b *testing.B) { b.StopTimer() tc := New(DefaultExpiration, 0) diff --git a/sharded.go b/sharded.go index ebf2662..7ae92af 100644 --- a/sharded.go +++ b/sharded.go @@ -1,8 +1,11 @@ package cache import ( - "encoding/binary" - "hash/fnv" + "crypto/rand" + "math" + "math/big" + insecurerand "math/rand" + "os" "runtime" "time" ) @@ -10,8 +13,9 @@ import ( // This is an experimental and unexported (for now) attempt at making a cache // with better algorithmic complexity than the standard one, namely by // preventing write locks of the entire cache when an item is added. As of the -// time of writing, the overhead of selecting buckets is much too great to be -// worth it except perhaps with extremely large caches. +// time of writing, the overhead of selecting buckets results in cache +// operations being about twice as slow as for the standard cache with small +// total cache sizes, and faster for larger ones. // // See cache_test.go for a few benchmarks. @@ -20,16 +24,46 @@ type unexportedShardedCache struct { } type shardedCache struct { + seed uint32 m uint32 cs []*cache janitor *shardedJanitor } +// djb2 with better shuffling. 5x faster than FNV with the hash.Hash overhead. +func djb33(seed uint32, k string) uint32 { + var ( + l = uint32(len(k)) + d = 5381 + seed + l + i = uint32(0) + ) + // Why is all this 5x faster than a for loop? + if l >= 4 { + for i < l-4 { + d = (d * 33) ^ uint32(k[i]) + d = (d * 33) ^ uint32(k[i+1]) + d = (d * 33) ^ uint32(k[i+2]) + d = (d * 33) ^ uint32(k[i+3]) + i += 4 + } + } + switch l - i { + case 1: + case 2: + d = (d * 33) ^ uint32(k[i]) + case 3: + d = (d * 33) ^ uint32(k[i]) + d = (d * 33) ^ uint32(k[i+1]) + case 4: + d = (d * 33) ^ uint32(k[i]) + d = (d * 33) ^ uint32(k[i+1]) + d = (d * 33) ^ uint32(k[i+2]) + } + return d ^ (d >> 16) +} + func (sc *shardedCache) bucket(k string) *cache { - h := fnv.New32() - h.Write([]byte(k)) - n := binary.BigEndian.Uint32(h.Sum(nil)) - return sc.cs[n%sc.m] + return sc.cs[djb33(sc.seed, k)%sc.m] } func (sc *shardedCache) Set(k string, x interface{}, d time.Duration) { @@ -70,6 +104,19 @@ func (sc *shardedCache) DeleteExpired() { } } +// Returns the items in the cache. This may include items that have expired, +// but have not yet been cleaned up. If this is significant, the Expiration +// fields of the items should be checked. Note that explicit synchronization +// is needed to use a cache and its corresponding Items() return values at +// the same time, as the maps are shared. +func (sc *shardedCache) Items() []map[string]*Item { + res := make([]map[string]*Item, len(sc.cs)) + for i, v := range sc.cs { + res[i] = v.Items() + } + return res +} + func (sc *shardedCache) Flush() { for _, v := range sc.cs { v.Flush() @@ -107,9 +154,19 @@ func runShardedJanitor(sc *shardedCache, ci time.Duration) { } func newShardedCache(n int, de time.Duration) *shardedCache { + max := big.NewInt(0).SetUint64(uint64(math.MaxUint32)) + rnd, err := rand.Int(rand.Reader, max) + var seed uint32 + if err != nil { + os.Stderr.Write([]byte("WARNING: go-cache's newShardedCache failed to read from the system CSPRNG (/dev/urandom or equivalent.) Your system's security may be compromised. Continuing with an insecure seed.\n")) + seed = insecurerand.Uint32() + } else { + seed = uint32(rnd.Uint64()) + } sc := &shardedCache{ - m: uint32(n - 1), - cs: make([]*cache, n), + seed: seed, + m: uint32(n), + cs: make([]*cache, n), } for i := 0; i < n; i++ { c := &cache{ @@ -121,7 +178,7 @@ func newShardedCache(n int, de time.Duration) *shardedCache { return sc } -func unexportedNewSharded(shards int, defaultExpiration, cleanupInterval time.Duration) *unexportedShardedCache { +func unexportedNewSharded(defaultExpiration, cleanupInterval time.Duration, shards int) *unexportedShardedCache { if defaultExpiration == 0 { defaultExpiration = -1 } diff --git a/sharded_test.go b/sharded_test.go new file mode 100644 index 0000000..6ef5eb3 --- /dev/null +++ b/sharded_test.go @@ -0,0 +1,68 @@ +package cache + +import ( + "strconv" + "sync" + "testing" +) + +// func TestDjb33(t *testing.T) { +// } + +var shardedKeys = []string{ + "f", + "fo", + "foo", + "barf", + "barfo", + "foobar", + "bazbarf", + "bazbarfo", + "bazbarfoo", + "foobarbazq", + "foobarbazqu", + "foobarbazquu", + "foobarbazquux", +} + +func TestShardedCache(t *testing.T) { + tc := unexportedNewSharded(DefaultExpiration, 0, 13) + for _, v := range shardedKeys { + tc.Set(v, "value", DefaultExpiration) + } +} + +func BenchmarkShardedCacheGet(b *testing.B) { + b.StopTimer() + tc := unexportedNewSharded(DefaultExpiration, 0, 10) + tc.Set("foobarba", "zquux", DefaultExpiration) + b.StartTimer() + for i := 0; i < b.N; i++ { + tc.Get("foobarba") + } +} + +func BenchmarkShardedCacheGetManyConcurrent(b *testing.B) { + b.StopTimer() + n := 10000 + tsc := unexportedNewSharded(DefaultExpiration, 0, 20) + keys := make([]string, n) + for i := 0; i < n; i++ { + k := "foo" + strconv.Itoa(n) + keys[i] = k + tsc.Set(k, "bar", DefaultExpiration) + } + each := b.N / n + wg := new(sync.WaitGroup) + wg.Add(n) + for _, v := range keys { + go func() { + for j := 0; j < each; j++ { + tsc.Get(v) + } + wg.Done() + }() + } + b.StartTimer() + wg.Wait() +}