Beispiel #1
0
func Hash(key string) []int {

	m := []int{0, 0, 0, 0}

	h0 := fnv.New64()
	io.WriteString(h0, key)
	hash0 := h0.Sum(nil)

	h1 := mmh3.New128()
	io.WriteString(h1, key)
	hash1 := h1.Sum(nil)

	m[0] += int(hash0[0])
	m[0] += int(hash0[1])
	m[0] += int(hash0[2])
	m[0] += int(hash0[3])
	m[0] += int(hash0[4])
	m[0] += int(hash0[5])
	m[0] += int(hash0[6])
	m[0] += int(hash0[7])

	m[3] += int(hash1[0])
	m[3] += int(hash1[1])
	m[3] += int(hash1[2])
	m[3] += int(hash1[3])
	m[3] += int(hash1[4])
	m[3] += int(hash1[5])
	m[3] += int(hash1[6])
	m[3] += int(hash1[7])
	m[3] += int(hash1[8])
	m[3] += int(hash1[9])
	m[3] += int(hash1[10])
	m[3] += int(hash1[11])
	m[3] += int(hash1[12])
	m[3] += int(hash1[13])
	m[3] += int(hash1[14])
	m[3] += int(hash1[15])

	m[1] = m[0] + (BloomFilterDoubleHashI0 * m[3]) + (BloomFilterDoubleHashI0 * BloomFilterDoubleHashI0)
	m[2] = m[0] + (BloomFilterDoubleHashI1 * m[3]) + (BloomFilterDoubleHashI1 * BloomFilterDoubleHashI1)

	return m
}
Beispiel #2
0
func (db *Db) index() {
	t0 := time.Now()

	hashes := make(map[string]string)
	hasher := mmh3.New128()

	hashed := make(map[string]int)
	for index, f := range db.Files {
		hashed[s("%s-%d", f.MurmurHash2m, f.Size)] = index
	}

	filepath.Walk(filepath.Dir(db.path), func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return nil
		}
		if info.IsDir() {
			return nil
		}
		ext := strings.ToLower(filepath.Ext(path))
		mimeType := mime.TypeByExtension(ext)
		if !strings.HasPrefix(mimeType, "video") {
			for _, e := range videoExtensions {
				if e == ext {
					goto is_video
				}
			}
			return nil
		}
	is_video:
		p("%s\n", path)

		hasher.Reset()
		f, err := os.Open(path)
		if err != nil {
			return nil
		}
		defer f.Close()
		io.CopyN(hasher, f, 1024*1024*2)
		//h := string(hasher.Sum(nil))
		h := s("%x", hasher.Sum(nil))
		if name, has := hashes[h]; has { // duplicated file or conflict
			p("=== duplicated file or hash conflict. stop processing ===\n")
			p("%s\n", name)
			p("%s\n", path)
			panic("stop")
		}
		hashes[h] = path

		// update file info
		index, has := hashed[s("%s-%d", h, info.Size())]
		if !has { // new file
			db.Files = append(db.Files, &FileInfo{
				MurmurHash2m: h,
				Size:         info.Size(),
			})
			index = len(db.Files) - 1
		}
		// add file paths
		fileinfo := db.Files[index]
		has = false
		for _, p := range fileinfo.Filepaths {
			if p == path {
				has = true
				break
			}
		}
		if !has {
			fileinfo.Filepaths = append(fileinfo.Filepaths, path)
		}

		// update path info
		pathInfo, ok := db.Paths[path]
		if !ok {
			pathInfo = new(PathInfo)
			db.Paths[path] = pathInfo
		}
		pathInfo.Index = index
		pathInfo.ModTime = info.ModTime()

		return nil
	})
	p("=== %d files indexed ===\n", len(db.Files))
	p("%v\n", time.Now().Sub(t0))

	// clear paths
	p("=== clear path ===\n")
	for path, _ := range db.Paths {
		_, err := os.Stat(path)
		if err != nil {
			delete(db.Paths, path)
			p("%s\n", path)
		}
	}

	err := db.Save()
	if err != nil {
		panic(err)
	}

}