Ejemplo n.º 1
0
func NewRollsum32(blocksize uint) *Rollsum32 {
	return &Rollsum32{
		Rollsum32Base: Rollsum32Base{
			blockSize: blocksize,
		},
		buffer: circularbuffer.MakeC2Buffer(int(blocksize)),
	}
}
Ejemplo n.º 2
0
// The C2 veersion should avoid all allocations in the main loop, and beat the pants off the
// other versions
func BenchmarkIncrementalRollsum32WithC2(b *testing.B) {
	const BLOCK_SIZE = 100
	r := NewRollsum32Base(BLOCK_SIZE)
	buffer := make([]byte, BLOCK_SIZE)
	b.SetBytes(1)
	cbuffer := circularbuffer.MakeC2Buffer(BLOCK_SIZE)

	r.AddBytes(buffer)
	cbuffer.Write(buffer)

	b.ReportAllocs()
	checksum := make([]byte, 16)
	increment := make([]byte, 1)

	b.StartTimer()
	for i := 0; i < b.N; i++ {
		cbuffer.Write(increment)
		r.AddAndRemoveBytes(increment, cbuffer.Evicted(), BLOCK_SIZE)
		r.GetSum(checksum)
	}
	b.StopTimer()
}
Ejemplo n.º 3
0
/*
TODO: When matching duplicated blocks, a channel of BlockMatchResult slices would be more efficient
*/
func (c *Comparer) startFindMatchingBlocks_int(
	results chan<- BlockMatchResult,
	comparison io.Reader,
	baseOffset int64,
	generator *filechecksum.FileChecksumGenerator,
	reference Index,
) {
	defer close(results)

	block := make([]byte, generator.BlockSize)
	var err error

	ReportErr := func(err error) {
		results <- BlockMatchResult{
			Err: err,
		}
	}

	_, err = io.ReadFull(comparison, block)

	if err != nil {
		ReportErr(
			fmt.Errorf("Error reading first block in comparison: %v", err),
		)
		return
	}

	generator.WeakRollingHash.SetBlock(block)
	singleByte := make([]byte, 1)
	weaksum := make([]byte, generator.WeakRollingHash.Size())
	strongSum := make([]byte, 0, generator.GetStrongHash().Size())

	blockMemory := circularbuffer.MakeC2Buffer(int(generator.BlockSize))
	blockMemory.Write(block)

	strong := generator.GetStrongHash()
	// All the bytes
	i := int64(0)
	next := READ_NEXT_BYTE

	//ReadLoop:
	for {

		atomic.AddInt64(&c.Comparisons, 1)

		// look for a weak match
		generator.WeakRollingHash.GetSum(weaksum)
		if weakMatchList := reference.FindWeakChecksum2(weaksum); weakMatchList != nil {
			atomic.AddInt64(&c.WeakHashHits, 1)

			block = blockMemory.GetBlock()

			strong.Reset()
			strong.Write(block)
			strongSum = strong.Sum(strongSum)
			strongList := reference.FindStrongChecksum2(strongSum, weakMatchList)

			// clear the slice
			strongSum = strongSum[:0]

			// If there are many matches, it means that this block is
			// duplicated in the reference.
			// since we care about finding all the blocks in the reference,
			// we must report all of them
			off := i + baseOffset
			for _, strongMatch := range strongList {
				results <- BlockMatchResult{
					ComparisonOffset: off,
					BlockIdx:         strongMatch.ChunkOffset,
				}
			}

			if len(strongList) > 0 {
				atomic.AddInt64(&c.StrongHashHits, 1)
				if next == READ_NONE {
					// found the match at the end, so exit
					break
				}
				// No point looking for a match that overlaps this block
				next = READ_NEXT_BLOCK
			}
		}

		var n int
		var readBytes []byte

		switch next {
		case READ_NEXT_BYTE:
			n, err = comparison.Read(singleByte)
			readBytes = singleByte
		case READ_NEXT_BLOCK:
			n, err = io.ReadFull(comparison, block)
			readBytes = block[:n]
			next = READ_NEXT_BYTE
		}

		if uint(n) == generator.BlockSize {
			generator.WeakRollingHash.SetBlock(block)
			blockMemory.Write(block)
			i += int64(n)
		} else if n > 0 {
			b_len := blockMemory.Len()
			blockMemory.Write(readBytes)
			generator.WeakRollingHash.AddAndRemoveBytes(
				readBytes,
				blockMemory.Evicted(),
				b_len,
			)
			i += int64(n)
		}

		if next != READ_NONE && (err == io.EOF || err == io.ErrUnexpectedEOF) {
			err = io.EOF
			next = READ_NONE
		}

		if next == READ_NONE {
			if blockMemory.Empty() {
				break
			}

			b_len := blockMemory.Len()
			removedByte := blockMemory.Truncate(1)
			generator.WeakRollingHash.RemoveBytes(removedByte, b_len)
			i += 1
		}
	}

	if err != io.EOF {
		ReportErr(err)
		return
	}
}