func NewRollsum32(blocksize uint) *Rollsum32 { return &Rollsum32{ Rollsum32Base: Rollsum32Base{ blockSize: blocksize, }, buffer: circularbuffer.MakeC2Buffer(int(blocksize)), } }
// The C2 veersion should avoid all allocations in the main loop, and beat the pants off the // other versions func BenchmarkIncrementalRollsum32WithC2(b *testing.B) { const BLOCK_SIZE = 100 r := NewRollsum32Base(BLOCK_SIZE) buffer := make([]byte, BLOCK_SIZE) b.SetBytes(1) cbuffer := circularbuffer.MakeC2Buffer(BLOCK_SIZE) r.AddBytes(buffer) cbuffer.Write(buffer) b.ReportAllocs() checksum := make([]byte, 16) increment := make([]byte, 1) b.StartTimer() for i := 0; i < b.N; i++ { cbuffer.Write(increment) r.AddAndRemoveBytes(increment, cbuffer.Evicted(), BLOCK_SIZE) r.GetSum(checksum) } b.StopTimer() }
/* TODO: When matching duplicated blocks, a channel of BlockMatchResult slices would be more efficient */ func (c *Comparer) startFindMatchingBlocks_int( results chan<- BlockMatchResult, comparison io.Reader, baseOffset int64, generator *filechecksum.FileChecksumGenerator, reference Index, ) { defer close(results) block := make([]byte, generator.BlockSize) var err error ReportErr := func(err error) { results <- BlockMatchResult{ Err: err, } } _, err = io.ReadFull(comparison, block) if err != nil { ReportErr( fmt.Errorf("Error reading first block in comparison: %v", err), ) return } generator.WeakRollingHash.SetBlock(block) singleByte := make([]byte, 1) weaksum := make([]byte, generator.WeakRollingHash.Size()) strongSum := make([]byte, 0, generator.GetStrongHash().Size()) blockMemory := circularbuffer.MakeC2Buffer(int(generator.BlockSize)) blockMemory.Write(block) strong := generator.GetStrongHash() // All the bytes i := int64(0) next := READ_NEXT_BYTE //ReadLoop: for { atomic.AddInt64(&c.Comparisons, 1) // look for a weak match generator.WeakRollingHash.GetSum(weaksum) if weakMatchList := reference.FindWeakChecksum2(weaksum); weakMatchList != nil { atomic.AddInt64(&c.WeakHashHits, 1) block = blockMemory.GetBlock() strong.Reset() strong.Write(block) strongSum = strong.Sum(strongSum) strongList := reference.FindStrongChecksum2(strongSum, weakMatchList) // clear the slice strongSum = strongSum[:0] // If there are many matches, it means that this block is // duplicated in the reference. // since we care about finding all the blocks in the reference, // we must report all of them off := i + baseOffset for _, strongMatch := range strongList { results <- BlockMatchResult{ ComparisonOffset: off, BlockIdx: strongMatch.ChunkOffset, } } if len(strongList) > 0 { atomic.AddInt64(&c.StrongHashHits, 1) if next == READ_NONE { // found the match at the end, so exit break } // No point looking for a match that overlaps this block next = READ_NEXT_BLOCK } } var n int var readBytes []byte switch next { case READ_NEXT_BYTE: n, err = comparison.Read(singleByte) readBytes = singleByte case READ_NEXT_BLOCK: n, err = io.ReadFull(comparison, block) readBytes = block[:n] next = READ_NEXT_BYTE } if uint(n) == generator.BlockSize { generator.WeakRollingHash.SetBlock(block) blockMemory.Write(block) i += int64(n) } else if n > 0 { b_len := blockMemory.Len() blockMemory.Write(readBytes) generator.WeakRollingHash.AddAndRemoveBytes( readBytes, blockMemory.Evicted(), b_len, ) i += int64(n) } if next != READ_NONE && (err == io.EOF || err == io.ErrUnexpectedEOF) { err = io.EOF next = READ_NONE } if next == READ_NONE { if blockMemory.Empty() { break } b_len := blockMemory.Len() removedByte := blockMemory.Truncate(1) generator.WeakRollingHash.RemoveBytes(removedByte, b_len) i += 1 } } if err != io.EOF { ReportErr(err) return } }