Exemplo n.º 1
0
func compare(
	original string,
	modified string,
	block_size uint,
) (results <-chan BlockMatchResult, err error) {

	originalFileContent := bytes.NewBufferString(original)
	generator := filechecksum.NewFileChecksumGenerator(block_size)

	_, reference, _, err := indexbuilder.BuildChecksumIndex(
		generator,
		originalFileContent,
	)

	if err != nil {
		return
	}

	modifiedContent := bytes.NewBufferString(modified)

	results = (&Comparer{}).StartFindMatchingBlocks(
		modifiedContent,
		0,
		generator,
		reference,
	)

	return
}
Exemplo n.º 2
0
func readIndex(r io.Reader, blocksize uint) (
	i *index.ChecksumIndex,
	checksumLookup filechecksum.ChecksumLookup,
	blockCount uint,
	err error,
) {
	generator := filechecksum.NewFileChecksumGenerator(blocksize)

	readChunks, e := chunks.LoadChecksumsFromReader(
		r,
		generator.WeakRollingHash.Size(),
		generator.StrongHash.Size(),
	)

	err = e

	if err != nil {
		return
	}

	checksumLookup = chunks.StrongChecksumGetter(readChunks)
	i = index.MakeChecksumIndex(readChunks)
	blockCount = uint(len(readChunks))

	return
}
Exemplo n.º 3
0
func BenchmarkIndexComparisons(b *testing.B) {
	b.ReportAllocs()

	const SIZE = 200 * KB
	b.SetBytes(SIZE)

	file := readers.NewSizedNonRepeatingSequence(6, SIZE)
	generator := filechecksum.NewFileChecksumGenerator(8 * KB)
	_, index, _, err := indexbuilder.BuildChecksumIndex(generator, file)

	if err != nil {
		b.Fatal(err)
	}

	b.StartTimer()
	for i := 0; i < b.N; i++ {
		// must reinitialize the file for each comparison
		otherFile := readers.NewSizedNonRepeatingSequence(745656, SIZE)
		compare := &comparer.Comparer{}
		m := compare.StartFindMatchingBlocks(otherFile, 0, generator, index)

		for _, ok := <-m; ok; {
		}
	}

	b.StopTimer()
}
Exemplo n.º 4
0
func fetchIndex(indexFileUrl string) (referenceFileIndex *index.ChecksumIndex, checksumLookup filechecksum.ChecksumLookup, fileSize int64, err error) {
	generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE)

	_, referenceFileIndex, checksumLookup, err = indexbuilder.BuildIndexFromString(generator, REFERENCE)

	if err != nil {
		return
	}

	fileSize = int64(len([]byte(REFERENCE)))

	return
}
Exemplo n.º 5
0
// Patch the files
func (rsync *RSync) Patch() (err error) {
	numMatchers := int64(DefaultConcurrency)
	blockSize := rsync.Summary.GetBlockSize()
	sectionSize := rsync.Summary.GetFileSize() / numMatchers
	sectionSize += int64(blockSize) - (sectionSize % int64(blockSize))

	merger := &comparer.MatchMerger{}

	for i := int64(0); i < numMatchers; i++ {
		compare := &comparer.Comparer{}
		offset := sectionSize * i

		sectionReader := bufio.NewReaderSize(
			io.NewSectionReader(rsync.Input, offset, sectionSize+int64(blockSize)),
			megabyte, // 1 MB buffer
		)

		// Bakes in the assumption about how to generate checksums (extract)
		sectionGenerator := filechecksum.NewFileChecksumGenerator(
			uint(blockSize),
		)

		matchStream := compare.StartFindMatchingBlocks(
			sectionReader, offset, sectionGenerator, rsync.Summary,
		)

		merger.StartMergeResultStream(matchStream, int64(blockSize))
	}

	mergedBlocks := merger.GetMergedBlocks()
	missing := mergedBlocks.GetMissingBlocks(rsync.Summary.GetBlockCount() - 1)

	return sequential.SequentialPatcher(
		rsync.Input,
		rsync.Source,
		toPatcherMissingSpan(missing, int64(blockSize)),
		toPatcherFoundSpan(mergedBlocks, int64(blockSize)),
		20*megabyte,
		rsync.Output,
	)
}
Exemplo n.º 6
0
func BenchmarkStrongComparison(b *testing.B) {
	b.ReportAllocs()
	b.SetBytes(1)

	const BLOCK_SIZE = 8
	generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE)

	b.StartTimer()

	results := (&Comparer{}).StartFindMatchingBlocks(
		readers.OneReader(b.N+BLOCK_SIZE),
		0,
		generator,
		&NegativeStrongIndex{},
	)

	for _, ok := <-results; ok; {
	}

	b.StopTimer()
}
Exemplo n.º 7
0
func multithreadedMatching(
	localFile *os.File,
	idx *index.ChecksumIndex,
	localFileSize,
	matcherCount int64,
	blocksize uint,
) (*comparer.MatchMerger, *comparer.Comparer) {
	// Note: Since not all sections of the file are equal in work
	// it would be better to divide things up into more sections and
	// pull work from a queue channel as each finish
	sectionSize := localFileSize / matcherCount
	sectionSize += int64(blocksize) - (sectionSize % int64(blocksize))
	merger := &comparer.MatchMerger{}
	compare := &comparer.Comparer{}

	for i := int64(0); i < matcherCount; i++ {
		offset := sectionSize * i

		// Sections must overlap by blocksize (strictly blocksize - 1?)
		if i > 0 {
			offset -= int64(blocksize)
		}

		sectionReader := bufio.NewReaderSize(
			io.NewSectionReader(localFile, offset, sectionSize),
			MB,
		)

		sectionGenerator := filechecksum.NewFileChecksumGenerator(uint(blocksize))

		matchStream := compare.StartFindMatchingBlocks(
			sectionReader, offset, sectionGenerator, idx)

		merger.StartMergeResultStream(matchStream, int64(blocksize))
	}

	return merger, compare
}
Exemplo n.º 8
0
func TestTwoComparisons(t *testing.T) {
	const BLOCK_SIZE = 4
	const ORIGINAL_STRING = "The quick brown fox jumped over the lazy dog"
	const MODIFIED_STRING = "The qwik brown fox jumped 0v3r the lazy"

	numMatchers := int64(4)
	sectionSize := int64(len(ORIGINAL_STRING)) / numMatchers
	sectionSize += int64(BLOCK_SIZE) - (sectionSize % int64(BLOCK_SIZE))

	merger := &MatchMerger{}

	originalFile := bytes.NewReader([]byte(ORIGINAL_STRING))
	modifiedFile := bytes.NewReader([]byte(MODIFIED_STRING))
	generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE)

	_, reference, _, _ := indexbuilder.BuildChecksumIndex(
		generator,
		originalFile,
	)

	for i := int64(0); i < numMatchers; i++ {
		compare := &Comparer{}
		offset := sectionSize * i

		t.Logf("Section %v: %v-%v", i, offset, offset+sectionSize)

		sectionReader := bufio.NewReaderSize(
			io.NewSectionReader(modifiedFile, offset, sectionSize+BLOCK_SIZE),
			100000, // 1 MB buffer
		)

		// Bakes in the assumption about how to generate checksums (extract)
		sectionGenerator := filechecksum.NewFileChecksumGenerator(
			uint(BLOCK_SIZE),
		)

		matchStream := compare.StartFindMatchingBlocks(
			sectionReader, offset, sectionGenerator, reference,
		)

		merger.StartMergeResultStream(matchStream, int64(BLOCK_SIZE))
	}

	merged := merger.GetMergedBlocks()
	missing := merged.GetMissingBlocks(uint(len(ORIGINAL_STRING) / BLOCK_SIZE))

	expected := []string{
		"quic", "ed over ", " dog",
	}

	t.Logf("Missing blocks: %v", len(missing))

	for x, v := range missing {
		start := v.StartBlock * BLOCK_SIZE
		end := (v.EndBlock + 1) * BLOCK_SIZE
		if end > uint(len(ORIGINAL_STRING)) {
			end = uint(len(ORIGINAL_STRING))
		}
		s := ORIGINAL_STRING[start:end]

		if s != expected[x] {
			t.Errorf(
				"Wrong block %v (%v-%v): %#v (expected %#v)",
				x, v.StartBlock, v.EndBlock, s, expected[x],
			)
		} else {
			t.Logf(
				"Correct block %v (%v-%v): %#v (expected %#v)",
				x, v.StartBlock, v.EndBlock, s, expected[x],
			)
		}
	}
}
Exemplo n.º 9
0
func Example() {
	// due to short example strings, use a very small block size
	// using one this small in practice would increase your file transfer!
	const blockSize = 4

	// This is the "file" as described by the authoritive version
	const reference = "The quick brown fox jumped over the lazy dog"

	// This is what we have locally. Not too far off, but not correct.
	const localVersion = "The qwik brown fox jumped 0v3r the lazy"

	generator := filechecksum.NewFileChecksumGenerator(blockSize)
	_, referenceFileIndex, _, err := indexbuilder.BuildIndexFromString(
		generator,
		reference,
	)

	if err != nil {
		return
	}

	referenceAsBytes := []byte(reference)
	localVersionAsBytes := []byte(localVersion)

	blockCount := len(referenceAsBytes) / blockSize
	if len(referenceAsBytes)%blockSize != 0 {
		blockCount++
	}

	inputFile := bytes.NewReader(localVersionAsBytes)
	patchedFile := bytes.NewBuffer(nil)

	// This is more complicated than usual, because we're using in-memory
	// "files" and sources. Normally you would use MakeRSync
	summary := &BasicSummary{
		ChecksumIndex:  referenceFileIndex,
		ChecksumLookup: nil,
		BlockCount:     uint(blockCount),
		BlockSize:      blockSize,
		FileSize:       int64(len(referenceAsBytes)),
	}

	rsync := &RSync{
		Input:  inputFile,
		Output: patchedFile,
		Source: blocksources.NewReadSeekerBlockSource(
			bytes.NewReader(referenceAsBytes),
			blocksources.MakeNullFixedSizeResolver(uint64(blockSize)),
		),
		Summary: summary,
		OnClose: nil,
	}

	if err := rsync.Patch(); err != nil {
		fmt.Printf("Error: %v", err)
		return
	}

	fmt.Printf("Patched result: \"%s\"\n", patchedFile.Bytes())
	// Output:
	// Patched result: "The quick brown fox jumped over the lazy dog"
}
Exemplo n.º 10
0
// This is exceedingly similar to the module Example, but uses the http blocksource and a local http server
func Example_httpBlockSource() {
	PORT := <-setupServer()
	LOCAL_URL := fmt.Sprintf("http://localhost:%v/content", PORT)

	generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE)
	_, referenceFileIndex, checksumLookup, err := indexbuilder.BuildIndexFromString(generator, REFERENCE)

	if err != nil {
		return
	}

	fileSize := int64(len([]byte(REFERENCE)))

	// This would normally be saved in a file

	blockCount := fileSize / BLOCK_SIZE
	if fileSize%BLOCK_SIZE != 0 {
		blockCount++
	}

	fs := &BasicSummary{
		ChecksumIndex:  referenceFileIndex,
		ChecksumLookup: checksumLookup,
		BlockCount:     uint(blockCount),
		BlockSize:      uint(BLOCK_SIZE),
		FileSize:       fileSize,
	}

	/*
		// Normally, this would be:
		rsync, err := MakeRSync(
			"toPatch.file",
			"http://localhost/content",
			"out.file",
			fs,
		)
	*/
	// Need to replace the output and the input
	inputFile := bytes.NewReader([]byte(LOCAL_VERSION))
	patchedFile := bytes.NewBuffer(nil)

	resolver := blocksources.MakeFileSizedBlockResolver(
		uint64(fs.GetBlockSize()),
		fs.GetFileSize(),
	)

	rsync := &RSync{
		Input:  inputFile,
		Output: patchedFile,
		Source: blocksources.NewHttpBlockSource(
			LOCAL_URL,
			1,
			resolver,
			&filechecksum.HashVerifier{
				Hash:                md5.New(),
				BlockSize:           fs.GetBlockSize(),
				BlockChecksumGetter: fs,
			},
		),
		Summary: fs,
		OnClose: nil,
	}

	err = rsync.Patch()

	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	err = rsync.Close()

	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	fmt.Printf("Patched content: \"%v\"\n", patchedFile.String())

	// Just for inspection
	remoteReferenceSource := rsync.Source.(*blocksources.BlockSourceBase)
	fmt.Printf("Downloaded Bytes: %v\n", remoteReferenceSource.ReadBytes())

	// Output:
	// Patched content: "The quick brown fox jumped over the lazy dog"
	// Downloaded Bytes: 16
}
Exemplo n.º 11
0
func Build(c *cli.Context) {
	filename := c.Args()[0]
	blocksize := uint32(c.Int("blocksize"))
	generator := filechecksum.NewFileChecksumGenerator(uint(blocksize))
	inputFile, err := os.Open(filename)

	if err != nil {
		absInputPath, err2 := filepath.Abs(filename)
		if err2 == nil {
			handleFileError(absInputPath, err)
		} else {
			handleFileError(filename, err)
		}

		os.Exit(1)
	}

	s, _ := inputFile.Stat()
	// TODO: Error?
	file_size := s.Size()

	defer inputFile.Close()

	ext := filepath.Ext(filename)
	outfilePath := filename[:len(filename)-len(ext)] + ".gosync"
	outputFile, err := os.Create(outfilePath)

	if err != nil {
		handleFileError(outfilePath, err)
		os.Exit(1)
	}

	defer outputFile.Close()

	if err = writeHeaders(
		outputFile,
		magicString,
		blocksize,
		file_size,
		[]uint16{majorVersion, minorVersion, patchVersion},
	); err != nil {
		fmt.Fprintf(
			os.Stderr,
			"Error getting file info: %v\n",
			filename,
			err,
		)
		os.Exit(2)
	}

	start := time.Now()
	_, err = generator.GenerateChecksums(inputFile, outputFile)
	end := time.Now()

	if err != nil {
		fmt.Fprintf(
			os.Stderr,
			"Error generating checksum: %v\n",
			filename,
			err,
		)
		os.Exit(2)
	}

	inputFileInfo, err := os.Stat(filename)
	if err != nil {
		fmt.Fprintf(
			os.Stderr,
			"Error getting file info: %v\n",
			filename,
			err,
		)
		os.Exit(2)
	}

	fmt.Fprintf(
		os.Stderr,
		"Index for %v file generated in %v (%v bytes/S)\n",
		inputFileInfo.Size(),
		end.Sub(start),
		float64(inputFileInfo.Size())/end.Sub(start).Seconds(),
	)
}