Beispiel #1
0
func compare(
	original string,
	modified string,
	block_size uint,
) (results <-chan BlockMatchResult, err error) {

	originalFileContent := bytes.NewBufferString(original)
	generator := filechecksum.NewFileChecksumGenerator(block_size)

	_, reference, _, err := indexbuilder.BuildChecksumIndex(
		generator,
		originalFileContent,
	)

	if err != nil {
		return
	}

	modifiedContent := bytes.NewBufferString(modified)

	results = (&Comparer{}).StartFindMatchingBlocks(
		modifiedContent,
		0,
		generator,
		reference,
	)

	return
}
Beispiel #2
0
func BenchmarkIndexComparisons(b *testing.B) {
	b.ReportAllocs()

	const SIZE = 200 * KB
	b.SetBytes(SIZE)

	file := readers.NewSizedNonRepeatingSequence(6, SIZE)
	generator := filechecksum.NewFileChecksumGenerator(8 * KB)
	_, index, _, err := indexbuilder.BuildChecksumIndex(generator, file)

	if err != nil {
		b.Fatal(err)
	}

	b.StartTimer()
	for i := 0; i < b.N; i++ {
		// must reinitialize the file for each comparison
		otherFile := readers.NewSizedNonRepeatingSequence(745656, SIZE)
		compare := &comparer.Comparer{}
		m := compare.StartFindMatchingBlocks(otherFile, 0, generator, index)

		for _, ok := <-m; ok; {
		}
	}

	b.StopTimer()
}
Beispiel #3
0
func TestTwoComparisons(t *testing.T) {
	const BLOCK_SIZE = 4
	const ORIGINAL_STRING = "The quick brown fox jumped over the lazy dog"
	const MODIFIED_STRING = "The qwik brown fox jumped 0v3r the lazy"

	numMatchers := int64(4)
	sectionSize := int64(len(ORIGINAL_STRING)) / numMatchers
	sectionSize += int64(BLOCK_SIZE) - (sectionSize % int64(BLOCK_SIZE))

	merger := &MatchMerger{}

	originalFile := bytes.NewReader([]byte(ORIGINAL_STRING))
	modifiedFile := bytes.NewReader([]byte(MODIFIED_STRING))
	generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE)

	_, reference, _, _ := indexbuilder.BuildChecksumIndex(
		generator,
		originalFile,
	)

	for i := int64(0); i < numMatchers; i++ {
		compare := &Comparer{}
		offset := sectionSize * i

		t.Logf("Section %v: %v-%v", i, offset, offset+sectionSize)

		sectionReader := bufio.NewReaderSize(
			io.NewSectionReader(modifiedFile, offset, sectionSize+BLOCK_SIZE),
			100000, // 1 MB buffer
		)

		// Bakes in the assumption about how to generate checksums (extract)
		sectionGenerator := filechecksum.NewFileChecksumGenerator(
			uint(BLOCK_SIZE),
		)

		matchStream := compare.StartFindMatchingBlocks(
			sectionReader, offset, sectionGenerator, reference,
		)

		merger.StartMergeResultStream(matchStream, int64(BLOCK_SIZE))
	}

	merged := merger.GetMergedBlocks()
	missing := merged.GetMissingBlocks(uint(len(ORIGINAL_STRING) / BLOCK_SIZE))

	expected := []string{
		"quic", "ed over ", " dog",
	}

	t.Logf("Missing blocks: %v", len(missing))

	for x, v := range missing {
		start := v.StartBlock * BLOCK_SIZE
		end := (v.EndBlock + 1) * BLOCK_SIZE
		if end > uint(len(ORIGINAL_STRING)) {
			end = uint(len(ORIGINAL_STRING))
		}
		s := ORIGINAL_STRING[start:end]

		if s != expected[x] {
			t.Errorf(
				"Wrong block %v (%v-%v): %#v (expected %#v)",
				x, v.StartBlock, v.EndBlock, s, expected[x],
			)
		} else {
			t.Logf(
				"Correct block %v (%v-%v): %#v (expected %#v)",
				x, v.StartBlock, v.EndBlock, s, expected[x],
			)
		}
	}
}