Ejemplo n.º 1
0
func new_docsummary_internal(n *html.Node, f hash.Hash64) *DocumentSummary {
	rtn := &DocumentSummary{}
	if n == nil {
		return rtn
	}
	foreach_child(n, func(child *html.Node) {
		switch {
		case child.Type == html.CommentNode:
		case child.Type == html.DoctypeNode:
		case child.Type == html.TextNode:
			c, _ := f.Write([]byte(child.Data))
			rtn.WordCount += c
			if node_is_in_a(child) {
				rtn.LinkWordCount += c
			}
			rtn.Text += child.Data
		case child.Data == "img":
			rtn.Images = append(rtn.Images, make_mediasummary(child))
		case node_is_media(child):
			rtn.Medias = append(rtn.Medias, make_mediasummary(child))
		case child.Data == "a":
			rtn.LinkCount++
			ac := new_docsummary_internal(child, f)
			rtn.Images = append(rtn.Images, ac.Images...)
			rtn.Medias = append(rtn.Medias, ac.Medias...)
		default:
			sc := new_docsummary_internal(child, f)
			rtn.add(sc)
		}
	})
	return rtn
}
Ejemplo n.º 2
0
func TestRef(t *testing.T) {
	for _, elem := range data {

		var h32 hash.Hash32 = New32()
		h32.Write([]byte(elem.s))
		if v := h32.Sum32(); v != elem.h32 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h32)
		}

		if v := Sum32([]byte(elem.s)); v != elem.h32 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h32)
		}

		var h64 hash.Hash64 = New64()
		h64.Write([]byte(elem.s))
		if v := h64.Sum64(); v != elem.h64_1 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h64_1)
		}

		var h128 Hash128 = New128()
		h128.Write([]byte(elem.s))
		if v1, v2 := h128.Sum128(); v1 != elem.h64_1 || v2 != elem.h64_2 {
			t.Errorf("'%s': 0x%x-0x%x (want 0x%x-0x%x)", elem.s, v1, v2, elem.h64_1, elem.h64_2)
		}

		if v1, v2 := Sum128([]byte(elem.s)); v1 != elem.h64_1 || v2 != elem.h64_2 {
			t.Errorf("'%s': 0x%x-0x%x (want 0x%x-0x%x)", elem.s, v1, v2, elem.h64_1, elem.h64_2)
		}
	}
}
Ejemplo n.º 3
0
Archivo: bloom.go Proyecto: uluyol/misc
func makeHashes(d []byte, h hash.Hash64) ([]uint32, error) {
	_, err := h.Write(d)
	if err != nil {
		return nil, err
	}
	hashed := h.Sum64()
	lower := uint32(hashed)
	upper := uint32(hashed >> 32)
	return []uint32{lower, upper}, nil
}
Ejemplo n.º 4
0
// ID with which you can identify a daemon connection to the same SMTP server
// independent of the scope ID.
func (dm *Daemon) ID() uint64 {
	var h hash.Hash64
	h = fnv.New64()
	data := []byte(dm.getHost() + strconv.Itoa(dm.getPort()) + dm.getUsername())
	if _, err := h.Write(data); err != nil {
		log.Error("mail.daemon.ID", "err", err, "hashWrite", string(data))
		return 0
	}
	return h.Sum64()
}
Ejemplo n.º 5
0
func TestRef(t *testing.T) {
	for _, elem := range data {

		var h32 hash.Hash32 = New32()
		h32.Write([]byte(elem.s))
		if v := h32.Sum32(); v != elem.h32 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h32)
		}

		var h32_byte hash.Hash32 = New32()
		h32_byte.Write([]byte(elem.s))
		target := fmt.Sprintf("%08x", elem.h32)
		if p := fmt.Sprintf("%x", h32_byte.Sum(nil)); p != target {
			t.Errorf("'%s': %s (want %s)", elem.s, p, target)
		}

		if v := Sum32([]byte(elem.s)); v != elem.h32 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h32)
		}

		var h64 hash.Hash64 = New64()
		h64.Write([]byte(elem.s))
		if v := h64.Sum64(); v != elem.h64_1 {
			t.Errorf("'%s': 0x%x (want 0x%x)", elem.s, v, elem.h64_1)
		}

		var h64_byte hash.Hash64 = New64()
		h64_byte.Write([]byte(elem.s))
		target = fmt.Sprintf("%016x", elem.h64_1)
		if p := fmt.Sprintf("%x", h64_byte.Sum(nil)); p != target {
			t.Errorf("Sum64: '%s': %s (want %s)", elem.s, p, target)
		}

		if v := Sum64([]byte(elem.s)); v != elem.h64_1 {
			t.Errorf("Sum64: '%s': 0x%x (want 0x%x)", elem.s, v, elem.h64_1)
		}

		var h128 Hash128 = New128()
		h128.Write([]byte(elem.s))
		if v1, v2 := h128.Sum128(); v1 != elem.h64_1 || v2 != elem.h64_2 {
			t.Errorf("New128: '%s': 0x%x-0x%x (want 0x%x-0x%x)", elem.s, v1, v2, elem.h64_1, elem.h64_2)
		}

		var h128_byte Hash128 = New128()
		h128_byte.Write([]byte(elem.s))
		target = fmt.Sprintf("%016x%016x", elem.h64_1, elem.h64_2)
		if p := fmt.Sprintf("%x", h128_byte.Sum(nil)); p != target {
			t.Errorf("New128: '%s': %s (want %s)", elem.s, p, target)
		}

		if v1, v2 := Sum128([]byte(elem.s)); v1 != elem.h64_1 || v2 != elem.h64_2 {
			t.Errorf("Sum128: '%s': 0x%x-0x%x (want 0x%x-0x%x)", elem.s, v1, v2, elem.h64_1, elem.h64_2)
		}
	}
}
Ejemplo n.º 6
0
func benchmarkHash(b *testing.B, h hash.Hash64) {
	uids := getUids(b.N)
	var s uint64
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		h.Reset()
		io.WriteString(h, uids[i])
		s = h.Sum64()
	}
	result = s
}
Ejemplo n.º 7
0
func hashUpdateOrdered(h hash.Hash64, a, b uint64) uint64 {
	// For ordered updates, use a real hash function
	h.Reset()

	// We just panic if the binary writes fail because we are writing
	// an int64 which should never be fail-able.
	e1 := binary.Write(h, binary.LittleEndian, a)
	e2 := binary.Write(h, binary.LittleEndian, b)
	if e1 != nil {
		panic(e1)
	}
	if e2 != nil {
		panic(e2)
	}

	return h.Sum64()
}
Ejemplo n.º 8
0
// ID with which you can identify a daemon connection to the same SMTP server
// independent of the scope ID.
func (u *uniqueID) Get() (id uint64, hasChanged bool) {
	var h hash.Hash64
	h = fnv.New64()
	data := []byte(u.getHost() + strconv.Itoa(u.getPort()) + u.getUsername())
	if _, err := h.Write(data); err != nil {
		log.Error("mail.daemon.ID", "err", err, "hashWrite", string(data))
		return
	}
	if u.lastID != h.Sum64() {
		u.lastID = h.Sum64()
		return u.lastID, true // ID has changed, means some one updated the configuration.
	}
	return h.Sum64(), false // has not changed
}
Ejemplo n.º 9
0
func testCollissions(t *testing.T, h hash.Hash64) {
	uids := getUids(uidSize)
	results := make(map[uint64]bool)
	cols := 0

	for i := 0; i < uidSize; i++ {
		h.Reset()
		io.WriteString(h, uids[i])
		s := h.Sum64()
		if _, col := results[s]; col {
			cols += 1
		} else {
			results[s] = true
		}
	}
	if cols > 0 {
		t.Errorf("Found %v collissions for uidSize %v\n", cols, uidSize)
	}
}
Ejemplo n.º 10
0
func writeChecksumBlock(hash hash.Hash64, output io.Writer) error {
	// file path length... zero
	err := binary.Write(output, binary.BigEndian, uint16(0))
	if err == nil {
		blockType := []byte{byte(blockTypeChecksum)}
		_, err = output.Write(blockType)
	}
	if err == nil {
		err = binary.Write(output, binary.BigEndian, hash.Sum64())
	}
	return err
}
Ejemplo n.º 11
0
// Hash64 is a convenience method for hashing a string against a hash.Hash64
func Hash64(s string, h hash.Hash64) uint64 {
	h.Reset()
	h.Write([]byte(s))
	return h.Sum64()
}
Ejemplo n.º 12
0
func (ds *Directory) Get(key string, opts store.GetOptions) ([]byte, store.Stat, error) {
	ds.mu.RLock()
	defer ds.mu.RUnlock()

	select {
	case <-opts.Cancel:
		return nil, store.Stat{}, store.ErrCancelled
	default:
	}

	fh, path, err := ds.findAndOpen(key)
	if err != nil {
		return nil, store.Stat{}, err
	}
	if fh == nil {
		return nil, store.Stat{}, store.ErrNotFound
	}
	defer fh.Close()

	var expectedFNV [8]byte
	_, err = io.ReadFull(fh, expectedFNV[:])
	if err != nil {
		return nil, store.Stat{}, err
	}

	var rdr io.Reader
	var fnver hash.Hash64
	if opts.NoVerify {
		rdr = fh
	} else {
		fnver = fnv.New64a()
		rdr = io.TeeReader(fh, fnver)
	}

	var expectedSHA256 [32]byte
	_, err = io.ReadFull(rdr, expectedSHA256[:])
	if err != nil {
		return nil, store.Stat{}, err
	}

	fi, err := fh.Stat()
	if err != nil {
		return nil, store.Stat{}, err
	}

	size := fi.Size() - 40

	if int64(int(size)) != size {
		return nil, store.Stat{}, errors.New("file is too big")
	}

	writeTime := fi.ModTime().Unix()

	data := make([]byte, int(size))
	at := 0
	for at < len(data) {
		readInto := data[at:]
		if len(readInto) > 1024*1024 {
			readInto = readInto[:1024*1024]
		}

		n, err := rdr.Read(readInto)
		at += n
		if err != nil {
			if err == io.EOF {
				break
			}
			return nil, store.Stat{}, err
		}

		select {
		case <-opts.Cancel:
			return nil, store.Stat{}, store.ErrCancelled
		default:
		}
	}

	if at != len(data) {
		return nil, store.Stat{}, errors.New("file was shortened during read")
	}

	if !opts.NoVerify {
		actualFNV := fnver.Sum(nil)

		if !bytes.Equal(actualFNV, expectedFNV[:]) {
			fh.Close()

			// TODO: this relocking is f****d and racy
			ds.mu.RUnlock()
			ds.mu.Lock()
			ds.quarantine(key, path)
			ds.mu.Unlock()
			ds.mu.RLock()
			return nil, store.Stat{}, ErrCorruptObject
		}
	}

	return data, store.Stat{
		SHA256:    expectedSHA256,
		Size:      size,
		WriteTime: writeTime,
	}, nil
}
Ejemplo n.º 13
0
// hashKernel returns the upper and lower base hash values from which the k
// hashes are derived.
func hashKernel(data []byte, hash hash.Hash64) (uint32, uint32) {
	hash.Write(data)
	sum := hash.Sum(nil)
	hash.Reset()
	return binary.BigEndian.Uint32(sum[4:8]), binary.BigEndian.Uint32(sum[0:4])
}