forked from dgryski/go-cuckoof
/
cuckoof.go
216 lines (170 loc) · 4.55 KB
/
cuckoof.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Package cuckoof implements cuckoo filter
/*
This implements a (2-4)-cuckoo filter with 8-bit fingerprints. This gives a
false positive rate of approximately 3.1%.
http://mybiasedcoin.blogspot.nl/2014/10/cuckoo-filters.html
https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf
https://www.cs.cmu.edu/~binfan/papers/login_cuckoofilter.pdf
*/
package cuckoof
import (
"math/rand"
"github.com/dchest/siphash"
)
// CF is a cuckoo filter
type CF struct {
// TODO(dgryski): add larger fingerprints
t [][4]byte
occupied []byte
rnd uint64
}
// New returns a new cuckoo filter with size hash table entries. Size must be a power of two
func New(size int) *CF {
if size&(size-1) != 0 {
panic("cuckoof: size must be a power of two")
}
// TODO(dgryski): size is a power of two, all `%len(t)` should become bitmasks instead
return &CF{
t: make([][4]byte, size),
occupied: make([]byte, size/2),
rnd: uint64(rand.Int63()),
}
}
// Insert adds an element to the filter and returns if the insertion was successful.
func (cf *CF) Insert(x []byte) bool {
h := siphash.Hash(0, 0, x)
i1 := uint32(h) % uint32(len(cf.t))
f := byte(h >> 32)
i2 := (i1 ^ hashfp(f)) % uint32(len(cf.t))
if idx, ok := cf.hasSpace(i1); ok {
cf.setOccupied(i1, idx, f)
return true
}
if idx, ok := cf.hasSpace(i2); ok {
cf.setOccupied(i2, idx, f)
return true
}
i := i1
cf.rnd = rnd(cf.rnd)
if cf.rnd&1 == 1 {
i = i2
}
for n := 0; n < 500; n++ {
f = cf.evict(i, f)
i = (i ^ hashfp(f)) % uint32(len(cf.t))
if idx, ok := cf.hasSpace(i); ok {
cf.setOccupied(i, idx, f)
return true
}
}
return false
}
// Lookup queries the cuckoo filter for an item
func (cf *CF) Lookup(x []byte) bool {
h := siphash.Hash(0, 0, x)
i1 := uint32(h) % uint32(len(cf.t))
f := byte(h >> 32)
if cf.hasFP(i1, f) {
return true
}
i2 := (i1 ^ hashfp(f)) % uint32(len(cf.t))
return cf.hasFP(i2, f)
}
// Delete removes an item from the cuckoo filter
func (cf *CF) Delete(x []byte) bool {
h := siphash.Hash(0, 0, x)
i1 := uint32(h) % uint32(len(cf.t))
f := byte(h >> 32)
if cf.delFP(i1, f) {
return true
}
i2 := (i1 ^ hashfp(f)) % uint32(len(cf.t))
return cf.delFP(i2, f)
}
// evict sets f in row and returns the evicted element
func (cf *CF) evict(row uint32, f byte) byte {
cf.rnd = rnd(cf.rnd)
// random bucket
bucket := cf.rnd & 3
e := cf.t[row][bucket]
cf.t[row][bucket] = f
return e
}
// hasFP searches the row for the given fingerprint
func (cf *CF) hasFP(row uint32, f byte) bool {
b := cf.occupied[row/2]
t := row & 1
b = (b >> (uint(t) * 4)) & 0xF
return false ||
b&0x01 == 0x01 && cf.t[row][0] == f ||
b&0x02 == 0x02 && cf.t[row][1] == f ||
b&0x04 == 0x04 && cf.t[row][2] == f ||
b&0x08 == 0x08 && cf.t[row][3] == f
}
// delFP deletes a fingerprint from a given row, and returns if it was successful
func (cf *CF) delFP(row uint32, f byte) bool {
b := cf.occupied[row/2]
t := row & 1
b = (b >> (uint(t) * 4)) & 0xF
switch {
case b&0x01 == 0x01 && cf.t[row][0] == f:
cf.occupied[row/2] &^= (1 << 0) << (uint(t) * 4)
return true
case b&0x02 == 0x02 && cf.t[row][1] == f:
cf.occupied[row/2] &^= (1 << 1) << (uint(t) * 4)
return true
case b&0x04 == 0x04 && cf.t[row][2] == f:
cf.occupied[row/2] &^= (1 << 2) << (uint(t) * 4)
return true
case b&0x08 == 0x08 && cf.t[row][3] == f:
cf.occupied[row/2] &^= (1 << 3) << (uint(t) * 4)
return true
}
return false
}
// setOccupied puts the fingerprint at the given row/index and marks the slot as occupied
func (cf *CF) setOccupied(row uint32, idx byte, f byte) {
t := row & 1
cf.t[row][idx] = f
cf.occupied[row/2] |= (1 << idx) << (uint(t) * 4)
}
// freebits indicates the offset of the first 0 bit in the nybble
var freebits = [16]byte{
0, // 0000
1, // 0001
0, // 0010
2, // 0011
0, // 0100
1, // 0101
0, // 0110
3, // 0111
0, // 1000
1, // 1001
0, // 1010
2, // 1011
0, // 1100
1, // 1101
0, // 1110
0, // 1111
}
// hasSpace returns the index of a free entry in 'row' and a bool indicating if it was found
func (cf *CF) hasSpace(row uint32) (byte, bool) {
b := cf.occupied[row/2]
t := row & 1
b = (b >> (uint(t) * 4)) & 0xF
return freebits[b], b != 0xF
}
// TODO(dgryski): make rnd a type that can respond with 1-4 *bits*
// rnd is an xorshift/multiple random number generator
func rnd(x uint64) uint64 {
x ^= x >> 12 // a
x ^= x << 25 // b
x ^= x >> 27 // c
x *= 2685821657736338717
return x
}
// hashfp hashes a fingerprint with 2 rounds of an xorshift-mult rng
func hashfp(b byte) uint32 {
x := rnd(rnd(uint64(b)))
return uint32(x) ^ uint32(x>>32)
}