func main() { var docs = [][]byte{ []byte("Z2X6629402"), []byte("q2X6629302"), []byte("ZyX6629302"), } fmt.Printf("%b\n", 8) uint64s, _ := strconv.ParseUint("1001100110111100011011010111101101100111100111000110111010010", 2, 64) fmt.Printf("64:%x\n", uint64s) hashes := make([]uint64, len(docs)) for i, d := range docs { hashes[i] = simhash.Simhash(simhash.NewWordFeatureSet(d)) fmt.Printf("Simhash of %s: %x b:%b\n", d, hashes[i], hashes[i]) } q, e := strconv.ParseUint("13378daf6cf38dd2", 16, 64) if e != nil { fmt.Println(e.Error()) } q2, e := strconv.ParseUint("f597e9511bbc518d", 16, 64) if e != nil { fmt.Println(e.Error()) } fmt.Printf("simhash:%d\n", simhash.Compare(q, q2)) fmt.Printf("Comparison of `%s` and `%s`: %d\n", docs[0], docs[1], simhash.Compare(hashes[0], hashes[1])) fmt.Printf("Comparison of `%s` and `%s`: %d\n", docs[0], docs[2], simhash.Compare(hashes[0], hashes[2])) }
func TestFingerprint(t *testing.T) { const s1 = ` <html> <head> </head> <body> <p>Hello, World</p> </body> </html> ` const s2 = ` <html> <head> </head> <body> <p>你好,世界</p> <p>维基 百科</p> </body> </html> ` const s3 = ` <html> <head> </head> <body> <p>Hello, World</p> <p>你好,世界</p> </body> </html> ` f1, _ := Compute(strings.NewReader(s1), 4096, 2) f2, _ := Compute(strings.NewReader(s2), 4096, 2) f3, _ := Compute(strings.NewReader(s3), 4096, 2) if d := simhash.Compare(f1, f2); d > 3 { t.Errorf("distance should <= 3, actual: %d\n", d) } if d := simhash.Compare(f2, f3); d > 3 { t.Errorf("distance should <= 3, actual: %d\n", d) } }
// Distance return the similarity distance between two fingerprint. func Distance(a, b uint64) uint8 { return simhash.Compare(a, b) }
func Distance(a, b uint64) int { return int(simhash.Compare(a, b)) }