func compare( original string, modified string, block_size uint, ) (results <-chan BlockMatchResult, err error) { originalFileContent := bytes.NewBufferString(original) generator := filechecksum.NewFileChecksumGenerator(block_size) _, reference, _, err := indexbuilder.BuildChecksumIndex( generator, originalFileContent, ) if err != nil { return } modifiedContent := bytes.NewBufferString(modified) results = (&Comparer{}).StartFindMatchingBlocks( modifiedContent, 0, generator, reference, ) return }
func BenchmarkIndexComparisons(b *testing.B) { b.ReportAllocs() const SIZE = 200 * KB b.SetBytes(SIZE) file := readers.NewSizedNonRepeatingSequence(6, SIZE) generator := filechecksum.NewFileChecksumGenerator(8 * KB) _, index, _, err := indexbuilder.BuildChecksumIndex(generator, file) if err != nil { b.Fatal(err) } b.StartTimer() for i := 0; i < b.N; i++ { // must reinitialize the file for each comparison otherFile := readers.NewSizedNonRepeatingSequence(745656, SIZE) compare := &comparer.Comparer{} m := compare.StartFindMatchingBlocks(otherFile, 0, generator, index) for _, ok := <-m; ok; { } } b.StopTimer() }
func TestTwoComparisons(t *testing.T) { const BLOCK_SIZE = 4 const ORIGINAL_STRING = "The quick brown fox jumped over the lazy dog" const MODIFIED_STRING = "The qwik brown fox jumped 0v3r the lazy" numMatchers := int64(4) sectionSize := int64(len(ORIGINAL_STRING)) / numMatchers sectionSize += int64(BLOCK_SIZE) - (sectionSize % int64(BLOCK_SIZE)) merger := &MatchMerger{} originalFile := bytes.NewReader([]byte(ORIGINAL_STRING)) modifiedFile := bytes.NewReader([]byte(MODIFIED_STRING)) generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE) _, reference, _, _ := indexbuilder.BuildChecksumIndex( generator, originalFile, ) for i := int64(0); i < numMatchers; i++ { compare := &Comparer{} offset := sectionSize * i t.Logf("Section %v: %v-%v", i, offset, offset+sectionSize) sectionReader := bufio.NewReaderSize( io.NewSectionReader(modifiedFile, offset, sectionSize+BLOCK_SIZE), 100000, // 1 MB buffer ) // Bakes in the assumption about how to generate checksums (extract) sectionGenerator := filechecksum.NewFileChecksumGenerator( uint(BLOCK_SIZE), ) matchStream := compare.StartFindMatchingBlocks( sectionReader, offset, sectionGenerator, reference, ) merger.StartMergeResultStream(matchStream, int64(BLOCK_SIZE)) } merged := merger.GetMergedBlocks() missing := merged.GetMissingBlocks(uint(len(ORIGINAL_STRING) / BLOCK_SIZE)) expected := []string{ "quic", "ed over ", " dog", } t.Logf("Missing blocks: %v", len(missing)) for x, v := range missing { start := v.StartBlock * BLOCK_SIZE end := (v.EndBlock + 1) * BLOCK_SIZE if end > uint(len(ORIGINAL_STRING)) { end = uint(len(ORIGINAL_STRING)) } s := ORIGINAL_STRING[start:end] if s != expected[x] { t.Errorf( "Wrong block %v (%v-%v): %#v (expected %#v)", x, v.StartBlock, v.EndBlock, s, expected[x], ) } else { t.Logf( "Correct block %v (%v-%v): %#v (expected %#v)", x, v.StartBlock, v.EndBlock, s, expected[x], ) } } }