func Hash(key string) []int { m := []int{0, 0, 0, 0} h0 := fnv.New64() io.WriteString(h0, key) hash0 := h0.Sum(nil) h1 := mmh3.New128() io.WriteString(h1, key) hash1 := h1.Sum(nil) m[0] += int(hash0[0]) m[0] += int(hash0[1]) m[0] += int(hash0[2]) m[0] += int(hash0[3]) m[0] += int(hash0[4]) m[0] += int(hash0[5]) m[0] += int(hash0[6]) m[0] += int(hash0[7]) m[3] += int(hash1[0]) m[3] += int(hash1[1]) m[3] += int(hash1[2]) m[3] += int(hash1[3]) m[3] += int(hash1[4]) m[3] += int(hash1[5]) m[3] += int(hash1[6]) m[3] += int(hash1[7]) m[3] += int(hash1[8]) m[3] += int(hash1[9]) m[3] += int(hash1[10]) m[3] += int(hash1[11]) m[3] += int(hash1[12]) m[3] += int(hash1[13]) m[3] += int(hash1[14]) m[3] += int(hash1[15]) m[1] = m[0] + (BloomFilterDoubleHashI0 * m[3]) + (BloomFilterDoubleHashI0 * BloomFilterDoubleHashI0) m[2] = m[0] + (BloomFilterDoubleHashI1 * m[3]) + (BloomFilterDoubleHashI1 * BloomFilterDoubleHashI1) return m }
func (db *Db) index() { t0 := time.Now() hashes := make(map[string]string) hasher := mmh3.New128() hashed := make(map[string]int) for index, f := range db.Files { hashed[s("%s-%d", f.MurmurHash2m, f.Size)] = index } filepath.Walk(filepath.Dir(db.path), func(path string, info os.FileInfo, err error) error { if err != nil { return nil } if info.IsDir() { return nil } ext := strings.ToLower(filepath.Ext(path)) mimeType := mime.TypeByExtension(ext) if !strings.HasPrefix(mimeType, "video") { for _, e := range videoExtensions { if e == ext { goto is_video } } return nil } is_video: p("%s\n", path) hasher.Reset() f, err := os.Open(path) if err != nil { return nil } defer f.Close() io.CopyN(hasher, f, 1024*1024*2) //h := string(hasher.Sum(nil)) h := s("%x", hasher.Sum(nil)) if name, has := hashes[h]; has { // duplicated file or conflict p("=== duplicated file or hash conflict. stop processing ===\n") p("%s\n", name) p("%s\n", path) panic("stop") } hashes[h] = path // update file info index, has := hashed[s("%s-%d", h, info.Size())] if !has { // new file db.Files = append(db.Files, &FileInfo{ MurmurHash2m: h, Size: info.Size(), }) index = len(db.Files) - 1 } // add file paths fileinfo := db.Files[index] has = false for _, p := range fileinfo.Filepaths { if p == path { has = true break } } if !has { fileinfo.Filepaths = append(fileinfo.Filepaths, path) } // update path info pathInfo, ok := db.Paths[path] if !ok { pathInfo = new(PathInfo) db.Paths[path] = pathInfo } pathInfo.Index = index pathInfo.ModTime = info.ModTime() return nil }) p("=== %d files indexed ===\n", len(db.Files)) p("%v\n", time.Now().Sub(t0)) // clear paths p("=== clear path ===\n") for path, _ := range db.Paths { _, err := os.Stat(path) if err != nil { delete(db.Paths, path) p("%s\n", path) } } err := db.Save() if err != nil { panic(err) } }