Esempio n. 1
0
func TestGolden(t *testing.T) {
	for _, g := range golden {
		in := g.in

		// We test the vanilla implementation
		p := []byte(g.in)
		vanilla := adler32.New()
		vanilla.Write(p)
		if got := vanilla.Sum32(); got != g.out {
			t.Errorf("vanilla implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got)
			continue
		}

		// We test the rolling implementation by prefixing the slice by a
		// space, writing it to our rolling hash, and then rolling once
		q := []byte(" ")
		q = append(q, p...)
		rolling := rollsum.New()
		rolling.Write(q[:len(q)-1])
		rolling.Roll(q[len(q)-1])
		if got := rolling.Sum32(); got != g.out {
			t.Errorf("rolling implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got)
			continue
		}
	}
}
Esempio n. 2
0
func TestUninitialized(t *testing.T) {
	s := []byte(data)
	hash := rollsum.New()
	err := hash.Roll(s[0])

	if err == nil {
		t.Fatal("Rolling with an uninitialized window should trigger an error")
	}
}
Esempio n. 3
0
func BenchmarkWeakHashAdler32(b *testing.B) {
	data := make([]byte, size)
	hf := adler32.New()

	for i := 0; i < b.N; i++ {
		hf.Write(data)
	}

	_ = hf.Sum32()
	b.SetBytes(size)
}
Esempio n. 4
0
func BenchmarkWeakHashAdler32Roll(b *testing.B) {
	data := make([]byte, size)
	hf := adler32.New()
	hf.Write(data)

	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		for i := 0; i <= size; i++ {
			hf.Roll('a')
		}
	}

	b.SetBytes(size)
}
Esempio n. 5
0
func BenchmarkRolling128B(b *testing.B) {
	b.SetBytes(1024)
	window := make([]byte, 128)
	for i := range window {
		window[i] = byte(i)
	}

	h := rollsum.New()
	in := make([]byte, 0, h.Size())

	b.ResetTimer()
	h.Write(window)
	for i := 0; i < b.N; i++ {
		h.Roll(byte(128 + i))
		h.Sum(in)
	}
}
Esempio n. 6
0
func chop(r *bufio.Reader, level int) (rollsum uint32, id string, n int, err error) {

	m := boundaryMask(level)

	if level > 0 {

		entries := make(map[int]string)
		offset := 0
		for {
			rollsum, id, n, err := chop(r, level-1)
			entries[offset] = id
			offset += n
			if (rollsum&m == m && level < levelmax) || err != nil {
				resb, _ := json.Marshal(entries)
				id = store(resb)
				return rollsum, id, offset, err
			}
		}
	} else {
		data := make([]byte, 128, 4*(1<<chunkbits))
		hash := adler32.New()

		n, err := r.Read(data)
		if err != nil {
			if err == io.EOF && n > 0 {
				data = data[:n]
				hash.Write(data)
				return hash.Sum32(), store(data), n, err
			}
			return 0, "", 0, err
		}
		hash.Write(data)
		for hash.Sum32()&m != m {
			b, err := r.ReadByte()
			if err != nil {
				break
			}
			hash.Roll(b)
			data = append(data, b)
		}
		return hash.Sum32(), store(data), n, err
	}

}
Esempio n. 7
0
// Find finds all the blocks of the given size within io.Reader that matches
// the hashes provided, and returns a hash -> slice of offsets within reader
// map, that produces the same weak hash.
func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, error) {
	if ir == nil {
		return nil, nil
	}

	r := bufio.NewReader(ir)
	hf := adler32.New()

	n, err := io.CopyN(hf, r, int64(size))
	if err == io.EOF {
		return nil, nil
	}
	if err != nil {
		return nil, err
	}
	if n != int64(size) {
		return nil, io.ErrShortBuffer
	}

	offsets := make(map[uint32][]int64)
	for _, hashToFind := range hashesToFind {
		offsets[hashToFind] = nil
	}

	var i int64
	var hash uint32
	for {
		hash = hf.Sum32()
		if existing, ok := offsets[hash]; ok {
			offsets[hash] = append(existing, i)
		}
		i++

		bt, err := r.ReadByte()
		if err == io.EOF {
			break
		} else if err != nil {
			return offsets, err
		}
		hf.Roll(bt)
	}
	return offsets, nil
}
Esempio n. 8
0
	{0xd0201df6, "'Invariant assertions' is the most elegant programming technique!  -Tom Szymanski"},
	{0x211297c8, strings.Repeat("\xff", 5548) + "8"},
	{0xbaa198c8, strings.Repeat("\xff", 5549) + "9"},
	{0x553499be, strings.Repeat("\xff", 5550) + "0"},
	{0xf0c19abe, strings.Repeat("\xff", 5551) + "1"},
	{0x8d5c9bbe, strings.Repeat("\xff", 5552) + "2"},
	{0x2af69cbe, strings.Repeat("\xff", 5553) + "3"},
	{0xc9809dbe, strings.Repeat("\xff", 5554) + "4"},
	{0x69189ebe, strings.Repeat("\xff", 5555) + "5"},
	{0x86af0001, strings.Repeat("\x00", 1e5)},
	{0x79660b4d, strings.Repeat("a", 1e5)},
	{0x110588ee, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)},
}

// This is a no-op to prove that rollsum.Hash32 implements hash.Hash32
var _ = hash.Hash32(rollsum.New())

func TestGolden(t *testing.T) {
	for _, g := range golden {
		in := g.in

		// We test the vanilla implementation
		p := []byte(g.in)
		vanilla := adler32.New()
		vanilla.Write(p)
		if got := vanilla.Sum32(); got != g.out {
			t.Errorf("vanilla implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got)
			continue
		}

		// We test the rolling implementation by prefixing the slice by a
Esempio n. 9
0
func Example() {
	s := []byte(data)

	vanilla := adler32.New()
	rolling := rollsum.New()

	// arbitrary window len
	n := 16

	// Load the window into the rolling hash
	rolling.Write(s[:n])

	// Roll it and compare the result with full re-calculus every time
	for i := n; i < len(s); i++ {

		vanilla.Reset()
		vanilla.Write(s[i-n+1 : i+1])

		err := rolling.Roll(s[i])
		if err != nil {
			log.Fatal(err)
		}

		fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum32())

		if vanilla.Sum32() != rolling.Sum32() {
			log.Fatalf("%v: expected %x, got %x",
				s[i-n+1:i+1], vanilla.Sum32(), rolling.Sum32())
		}
	}

	// Output:
	// he quick brown f: checksum 31e905d9
	// e quick brown fo: checksum 314805e0
	//  quick brown fox: checksum 30ea05f3
	// quick brown fox : checksum 34dc05f3
	// uick brown fox j: checksum 33b705ec
	// ick brown fox ju: checksum 325205ec
	// ck brown fox jum: checksum 31b105f0
	// k brown fox jump: checksum 317d05fd
	//  brown fox jumps: checksum 30d10605
	// brown fox jumps : checksum 34d50605
	// rown fox jumps o: checksum 34c60612
	// own fox jumps ov: checksum 33bb0616
	// wn fox jumps ove: checksum 32d6060c
	// n fox jumps over: checksum 316c0607
	//  fox jumps over : checksum 304405b9
	// fox jumps over t: checksum 3450060d
	// ox jumps over th: checksum 33fe060f
	// x jumps over the: checksum 33120605
	//  jumps over the : checksum 313e05ad
	// jumps over the l: checksum 353605f9
	// umps over the la: checksum 348505f0
	// mps over the laz: checksum 332905f5
	// ps over the lazy: checksum 32590601
	// s over the lazy : checksum 310905b1
	//  over the lazy d: checksum 2f7a05a2
	// over the lazy do: checksum 336a05f1
	// ver the lazy dog: checksum 326205e9

}
Esempio n. 10
0
// Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) {
	hf := sha256.New()
	hashLength := hf.Size()
	whf := adler32.New()
	mhf := io.MultiWriter(hf, whf)

	var blocks []protocol.BlockInfo
	var hashes, thisHash []byte

	if sizehint >= 0 {
		// Allocate contiguous blocks for the BlockInfo structures and their
		// hashes once and for all, and stick to the specified size.
		r = io.LimitReader(r, sizehint)
		numBlocks := int(sizehint / int64(blocksize))
		blocks = make([]protocol.BlockInfo, 0, numBlocks)
		hashes = make([]byte, 0, hashLength*numBlocks)
	}

	// A 32k buffer is used for copying into the hash function.
	buf := make([]byte, 32<<10)

	var offset int64
	for {
		lr := io.LimitReader(r, int64(blocksize))
		n, err := io.CopyBuffer(mhf, lr, buf)
		if err != nil {
			return nil, err
		}

		if n == 0 {
			break
		}

		if counter != nil {
			counter.Update(n)
		}

		// Carve out a hash-sized chunk of "hashes" to store the hash for this
		// block.
		hashes = hf.Sum(hashes)
		thisHash, hashes = hashes[:hashLength], hashes[hashLength:]

		b := protocol.BlockInfo{
			Size:     int32(n),
			Offset:   offset,
			Hash:     thisHash,
			WeakHash: whf.Sum32(),
		}

		blocks = append(blocks, b)
		offset += n

		hf.Reset()
		whf.Reset()
	}

	if len(blocks) == 0 {
		// Empty file
		blocks = append(blocks, protocol.BlockInfo{
			Offset: 0,
			Size:   0,
			Hash:   SHA256OfNothing,
		})
	}

	return blocks, nil
}