/
mem_speed_test.go
163 lines (132 loc) · 5.21 KB
/
mem_speed_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package cache
/*
These benchmarks test raw in-memory throughput, without any simulated database latency.
This basically just shows that there's no crazy lock contention and that throughput
does actually improve by adding more processes and goroutines.
Typical throughput is ~1M ops/sec on my laptop for the good configurations (~1 usec/op)
*/
import (
"fmt"
"math/rand"
"runtime"
"strconv"
"sync"
"testing"
"time"
)
func BenchmarkMem_8pr_64strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 8, 64, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_8pr_32strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 8, 32, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_4pr_32strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 4, 32, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_4pr_16strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 4, 16, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_4pr_64strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 4, 64, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_4pr_64strp_8gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 4, 64, 8, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_4pr_64strp_32gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 4, 64, 32, int64(200), time.Hour, int64(900), 0)
}
// A degenerate case where there are many procs and goroutines competing for 1 stripe lock.
func BenchmarkMem_8pr_1strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 8, 1, 16, int64(200), time.Hour, int64(900), 0)
}
// The single-threaded case, with no concurrency.
func BenchmarkMem_1pr_1strp_16gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 1, 1, 16, int64(200), time.Hour, int64(900), 0)
}
func BenchmarkMem_8pr_1strp_1gort_200maxsize_HourExpire(b *testing.B) {
doBenchmark(b.N, b, 8, 1, 1, int64(200), time.Hour, int64(900), 0)
}
// // This isn't a proper golang "benchmark," it hard-codes the number of iterations and has
// // no meaningful concept of latency per operation. So that's why it doesn't use go's builtin
// // benchmark system.
// func TestCacheWarmup(t *testing.T) {
// runBenchmark := func(count int) float64 {
// beforeNanos := time.Now().UnixNano()
// doBenchmark(count, nil, 8, 128, 128, int64(100000), time.Hour, int64(900000), 0)
// duration := time.Now().UnixNano() - beforeNanos
// throughput := float64(count) / float64(duration)
// return throughput
// }
// firstCount := 10000
// firstThroughput := runBenchmark(firstCount)
// fmt.Printf("%d ops throughput=%f\n", firstCount, firstThroughput)
// secondCount := firstCount * 5
// secondThroughput := runBenchmark(secondCount)
// fmt.Printf("%d ops throughput=%f\n", secondCount, secondThroughput)
// speedUp := secondThroughput / firstThroughput
// fmt.Printf("Speedup: %f\n", speedUp)
// }
func doBenchmark(numOps int, b *testing.B, gomaxprocs int, numStripes int, numGoRoutines int,
maxSize int64, maxAge time.Duration, fakeDbMax int64, simLatency time.Duration) {
if b != nil {
b.StopTimer()
}
origGomaxprocs := runtime.GOMAXPROCS(-1)
runtime.GOMAXPROCS(gomaxprocs)
db := &FakeDatabase{fakeDbMax, simLatency}
loader := func(key string) (interface{}, error) {
return db.Get(key)
}
sizer := func(x interface{}) int64 {
return 1
}
randMax := int64(1000000)
opsForGoroutine := make([]int, numGoRoutines)
for i := 0; i < numGoRoutines; i++ {
opsForGoroutine[i] = numOps / numGoRoutines
}
remainder := numOps - (numGoRoutines * (numOps / numGoRoutines))
for i := 0; i < remainder; i++ {
opsForGoroutine[i] += 1
}
initWg := new(sync.WaitGroup)
finishedWg := new(sync.WaitGroup)
initWg.Add(1)
finishedWg.Add(numGoRoutines)
cache := NewCache(numStripes, loader, sizer)
for i := 0; i < numGoRoutines; i++ {
go func(numOpsForThread int, seed int) {
// fmt.Printf("numops: %d\n", numOpsForThread)
rng := rand.New(rand.NewSource(int64(seed)))
// We use a zipfian distribution of key lookup frequency to simulate the fact
// that different cache keys are more popular than others.
// The params to zipf are totally hacked, I just eyeballed the output values and
// the distribution looks kind of OK-ish.
// We probably shouldn't hardcode these, and instead take them as arguments.
zipf := rand.NewZipf(rng, 1.1, 10, uint64(randMax))
initWg.Wait() // Block until all goroutines are ready
for j := 0; j < numOpsForThread; j++ {
r := zipf.Uint64()
// fmt.Printf("zipf: %d\t", r)
result, err := cache.GetOrLoad(strconv.FormatInt(int64(r), 10))
cache.Expire(maxSize, maxAge) // Enforce cache size and max age constraints
if err != nil {
panic("Cache lookup error")
}
if r <= uint64(fakeDbMax) && result != 10*int(r) {
panic(fmt.Sprintf("Unexpected result %T:%d looking up %T:%d",
result, result, r, r))
} else if r > uint64(fakeDbMax) && result != nil {
panic("Result should have been nil")
}
}
finishedWg.Done()
}(opsForGoroutine[i], i)
}
if b != nil {
b.StartTimer()
}
initWg.Done() // Unblock worker goroutines
finishedWg.Wait() // Wait for workers to finish
runtime.GOMAXPROCS(origGomaxprocs)
}