func NewBloomWrap(capSize uint) BloomWrap { wrap := []bloom.Bloom{} newFilter := func(h hash.Hash) { filter := standard.New(capSize) filter.SetHasher(h) wrap = append(wrap, filter) } newFilter(cityhash.New64()) newFilter(fnv.New64()) newFilter(md5.New()) log.Println("bloomLen: ", len(wrap)) return wrap }
func BenchmarkBloomCityHash(b *testing.B) { var lines []string lines = append(lines, web2...) for len(lines) < b.N { lines = append(lines, web2...) } bf := New(uint(b.N)) bf.SetHasher(cityhash.New64()) fn := 0 b.ResetTimer() for l := 0; l < b.N; l++ { if !(bf.Add([]byte(lines[l])).Check([]byte(lines[l]))) { fn++ } } b.StopTimer() }
func generateGarbage() { var capSize uint = 1000000000 filter := standard.New(capSize) filter.SetHasher(cityhash.New64()) v := []byte("Love") b := filter.Add(v).Check(v) log.Println("check @v:", b) bad := 0 for i := 0; i < 1000000000; i++ { if i%1000000 == 0 { log.Println(i) debug.FreeOSMemory() } d := []byte(fmt.Sprint("data", i)) if filter.Check(d) == true { bad++ // panic(fmt.Sprint("should not exist @d:",string(d))) } if flag := filter.Add(d).Check(d); flag == false { panic(d) } } log.Println("====>>That is all @bad:", bad) }
func TestBloomFilter(t *testing.T) { l := []uint{uint(len(web2)), 200000, 100000, 50000} h := []hash.Hash{fnv.New64(), crc64.New(crc64.MakeTable(crc64.ECMA)), murmur3.New64(), cityhash.New64(), md5.New(), sha1.New()} n := []string{"fnv.New64()", "crc64.New()", "murmur3.New64()", "cityhash.New64()", "md5.New()", "sha1.New()"} for i := range l { for j := range h { fmt.Printf("\n\nTesting %s with size %d\n", n[j], l[i]) bf := New(l[i]) bf.SetHasher(h[j]) testBloomFilter(t, bf) } } }
func TestBloomFilter(t *testing.T) { l := []uint{uint(len(web2)), 200000, 100000, 50000} h := []hash.Hash{fnv.New64(), crc64.New(crc64.MakeTable(crc64.ECMA)), murmur3.New64(), cityhash.New64(), md5.New(), sha1.New()} n := []string{"fnv.New64()", "crc64.New()", "murmur3.New64()", "cityhash.New64()", "md5.New()", "sha1.New()"} b := []func(uint) bloom.Bloom{standard.New, partitioned.New} bn := []string{"standard", "partitioned"} for i := range l { for j := range h { for k := range b { fmt.Printf("\n\nTesting %s with size %d using %s\n", n[j], l[i], bn[k]) bf := New(l[i]) bf.SetHasher(h[j]) bf.(*ScalableBloom).SetBloomFilter(b[k]) bf.Reset() testBloomFilter(t, bf) } } } }