func compare( original string, modified string, block_size uint, ) (results <-chan BlockMatchResult, err error) { originalFileContent := bytes.NewBufferString(original) generator := filechecksum.NewFileChecksumGenerator(block_size) _, reference, _, err := indexbuilder.BuildChecksumIndex( generator, originalFileContent, ) if err != nil { return } modifiedContent := bytes.NewBufferString(modified) results = (&Comparer{}).StartFindMatchingBlocks( modifiedContent, 0, generator, reference, ) return }
func readIndex(r io.Reader, blocksize uint) ( i *index.ChecksumIndex, checksumLookup filechecksum.ChecksumLookup, blockCount uint, err error, ) { generator := filechecksum.NewFileChecksumGenerator(blocksize) readChunks, e := chunks.LoadChecksumsFromReader( r, generator.WeakRollingHash.Size(), generator.StrongHash.Size(), ) err = e if err != nil { return } checksumLookup = chunks.StrongChecksumGetter(readChunks) i = index.MakeChecksumIndex(readChunks) blockCount = uint(len(readChunks)) return }
func BenchmarkIndexComparisons(b *testing.B) { b.ReportAllocs() const SIZE = 200 * KB b.SetBytes(SIZE) file := readers.NewSizedNonRepeatingSequence(6, SIZE) generator := filechecksum.NewFileChecksumGenerator(8 * KB) _, index, _, err := indexbuilder.BuildChecksumIndex(generator, file) if err != nil { b.Fatal(err) } b.StartTimer() for i := 0; i < b.N; i++ { // must reinitialize the file for each comparison otherFile := readers.NewSizedNonRepeatingSequence(745656, SIZE) compare := &comparer.Comparer{} m := compare.StartFindMatchingBlocks(otherFile, 0, generator, index) for _, ok := <-m; ok; { } } b.StopTimer() }
func fetchIndex(indexFileUrl string) (referenceFileIndex *index.ChecksumIndex, checksumLookup filechecksum.ChecksumLookup, fileSize int64, err error) { generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE) _, referenceFileIndex, checksumLookup, err = indexbuilder.BuildIndexFromString(generator, REFERENCE) if err != nil { return } fileSize = int64(len([]byte(REFERENCE))) return }
// Patch the files func (rsync *RSync) Patch() (err error) { numMatchers := int64(DefaultConcurrency) blockSize := rsync.Summary.GetBlockSize() sectionSize := rsync.Summary.GetFileSize() / numMatchers sectionSize += int64(blockSize) - (sectionSize % int64(blockSize)) merger := &comparer.MatchMerger{} for i := int64(0); i < numMatchers; i++ { compare := &comparer.Comparer{} offset := sectionSize * i sectionReader := bufio.NewReaderSize( io.NewSectionReader(rsync.Input, offset, sectionSize+int64(blockSize)), megabyte, // 1 MB buffer ) // Bakes in the assumption about how to generate checksums (extract) sectionGenerator := filechecksum.NewFileChecksumGenerator( uint(blockSize), ) matchStream := compare.StartFindMatchingBlocks( sectionReader, offset, sectionGenerator, rsync.Summary, ) merger.StartMergeResultStream(matchStream, int64(blockSize)) } mergedBlocks := merger.GetMergedBlocks() missing := mergedBlocks.GetMissingBlocks(rsync.Summary.GetBlockCount() - 1) return sequential.SequentialPatcher( rsync.Input, rsync.Source, toPatcherMissingSpan(missing, int64(blockSize)), toPatcherFoundSpan(mergedBlocks, int64(blockSize)), 20*megabyte, rsync.Output, ) }
func BenchmarkStrongComparison(b *testing.B) { b.ReportAllocs() b.SetBytes(1) const BLOCK_SIZE = 8 generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE) b.StartTimer() results := (&Comparer{}).StartFindMatchingBlocks( readers.OneReader(b.N+BLOCK_SIZE), 0, generator, &NegativeStrongIndex{}, ) for _, ok := <-results; ok; { } b.StopTimer() }
func multithreadedMatching( localFile *os.File, idx *index.ChecksumIndex, localFileSize, matcherCount int64, blocksize uint, ) (*comparer.MatchMerger, *comparer.Comparer) { // Note: Since not all sections of the file are equal in work // it would be better to divide things up into more sections and // pull work from a queue channel as each finish sectionSize := localFileSize / matcherCount sectionSize += int64(blocksize) - (sectionSize % int64(blocksize)) merger := &comparer.MatchMerger{} compare := &comparer.Comparer{} for i := int64(0); i < matcherCount; i++ { offset := sectionSize * i // Sections must overlap by blocksize (strictly blocksize - 1?) if i > 0 { offset -= int64(blocksize) } sectionReader := bufio.NewReaderSize( io.NewSectionReader(localFile, offset, sectionSize), MB, ) sectionGenerator := filechecksum.NewFileChecksumGenerator(uint(blocksize)) matchStream := compare.StartFindMatchingBlocks( sectionReader, offset, sectionGenerator, idx) merger.StartMergeResultStream(matchStream, int64(blocksize)) } return merger, compare }
func TestTwoComparisons(t *testing.T) { const BLOCK_SIZE = 4 const ORIGINAL_STRING = "The quick brown fox jumped over the lazy dog" const MODIFIED_STRING = "The qwik brown fox jumped 0v3r the lazy" numMatchers := int64(4) sectionSize := int64(len(ORIGINAL_STRING)) / numMatchers sectionSize += int64(BLOCK_SIZE) - (sectionSize % int64(BLOCK_SIZE)) merger := &MatchMerger{} originalFile := bytes.NewReader([]byte(ORIGINAL_STRING)) modifiedFile := bytes.NewReader([]byte(MODIFIED_STRING)) generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE) _, reference, _, _ := indexbuilder.BuildChecksumIndex( generator, originalFile, ) for i := int64(0); i < numMatchers; i++ { compare := &Comparer{} offset := sectionSize * i t.Logf("Section %v: %v-%v", i, offset, offset+sectionSize) sectionReader := bufio.NewReaderSize( io.NewSectionReader(modifiedFile, offset, sectionSize+BLOCK_SIZE), 100000, // 1 MB buffer ) // Bakes in the assumption about how to generate checksums (extract) sectionGenerator := filechecksum.NewFileChecksumGenerator( uint(BLOCK_SIZE), ) matchStream := compare.StartFindMatchingBlocks( sectionReader, offset, sectionGenerator, reference, ) merger.StartMergeResultStream(matchStream, int64(BLOCK_SIZE)) } merged := merger.GetMergedBlocks() missing := merged.GetMissingBlocks(uint(len(ORIGINAL_STRING) / BLOCK_SIZE)) expected := []string{ "quic", "ed over ", " dog", } t.Logf("Missing blocks: %v", len(missing)) for x, v := range missing { start := v.StartBlock * BLOCK_SIZE end := (v.EndBlock + 1) * BLOCK_SIZE if end > uint(len(ORIGINAL_STRING)) { end = uint(len(ORIGINAL_STRING)) } s := ORIGINAL_STRING[start:end] if s != expected[x] { t.Errorf( "Wrong block %v (%v-%v): %#v (expected %#v)", x, v.StartBlock, v.EndBlock, s, expected[x], ) } else { t.Logf( "Correct block %v (%v-%v): %#v (expected %#v)", x, v.StartBlock, v.EndBlock, s, expected[x], ) } } }
func Example() { // due to short example strings, use a very small block size // using one this small in practice would increase your file transfer! const blockSize = 4 // This is the "file" as described by the authoritive version const reference = "The quick brown fox jumped over the lazy dog" // This is what we have locally. Not too far off, but not correct. const localVersion = "The qwik brown fox jumped 0v3r the lazy" generator := filechecksum.NewFileChecksumGenerator(blockSize) _, referenceFileIndex, _, err := indexbuilder.BuildIndexFromString( generator, reference, ) if err != nil { return } referenceAsBytes := []byte(reference) localVersionAsBytes := []byte(localVersion) blockCount := len(referenceAsBytes) / blockSize if len(referenceAsBytes)%blockSize != 0 { blockCount++ } inputFile := bytes.NewReader(localVersionAsBytes) patchedFile := bytes.NewBuffer(nil) // This is more complicated than usual, because we're using in-memory // "files" and sources. Normally you would use MakeRSync summary := &BasicSummary{ ChecksumIndex: referenceFileIndex, ChecksumLookup: nil, BlockCount: uint(blockCount), BlockSize: blockSize, FileSize: int64(len(referenceAsBytes)), } rsync := &RSync{ Input: inputFile, Output: patchedFile, Source: blocksources.NewReadSeekerBlockSource( bytes.NewReader(referenceAsBytes), blocksources.MakeNullFixedSizeResolver(uint64(blockSize)), ), Summary: summary, OnClose: nil, } if err := rsync.Patch(); err != nil { fmt.Printf("Error: %v", err) return } fmt.Printf("Patched result: \"%s\"\n", patchedFile.Bytes()) // Output: // Patched result: "The quick brown fox jumped over the lazy dog" }
// This is exceedingly similar to the module Example, but uses the http blocksource and a local http server func Example_httpBlockSource() { PORT := <-setupServer() LOCAL_URL := fmt.Sprintf("http://localhost:%v/content", PORT) generator := filechecksum.NewFileChecksumGenerator(BLOCK_SIZE) _, referenceFileIndex, checksumLookup, err := indexbuilder.BuildIndexFromString(generator, REFERENCE) if err != nil { return } fileSize := int64(len([]byte(REFERENCE))) // This would normally be saved in a file blockCount := fileSize / BLOCK_SIZE if fileSize%BLOCK_SIZE != 0 { blockCount++ } fs := &BasicSummary{ ChecksumIndex: referenceFileIndex, ChecksumLookup: checksumLookup, BlockCount: uint(blockCount), BlockSize: uint(BLOCK_SIZE), FileSize: fileSize, } /* // Normally, this would be: rsync, err := MakeRSync( "toPatch.file", "http://localhost/content", "out.file", fs, ) */ // Need to replace the output and the input inputFile := bytes.NewReader([]byte(LOCAL_VERSION)) patchedFile := bytes.NewBuffer(nil) resolver := blocksources.MakeFileSizedBlockResolver( uint64(fs.GetBlockSize()), fs.GetFileSize(), ) rsync := &RSync{ Input: inputFile, Output: patchedFile, Source: blocksources.NewHttpBlockSource( LOCAL_URL, 1, resolver, &filechecksum.HashVerifier{ Hash: md5.New(), BlockSize: fs.GetBlockSize(), BlockChecksumGetter: fs, }, ), Summary: fs, OnClose: nil, } err = rsync.Patch() if err != nil { fmt.Printf("Error: %v\n", err) return } err = rsync.Close() if err != nil { fmt.Printf("Error: %v\n", err) return } fmt.Printf("Patched content: \"%v\"\n", patchedFile.String()) // Just for inspection remoteReferenceSource := rsync.Source.(*blocksources.BlockSourceBase) fmt.Printf("Downloaded Bytes: %v\n", remoteReferenceSource.ReadBytes()) // Output: // Patched content: "The quick brown fox jumped over the lazy dog" // Downloaded Bytes: 16 }
func Build(c *cli.Context) { filename := c.Args()[0] blocksize := uint32(c.Int("blocksize")) generator := filechecksum.NewFileChecksumGenerator(uint(blocksize)) inputFile, err := os.Open(filename) if err != nil { absInputPath, err2 := filepath.Abs(filename) if err2 == nil { handleFileError(absInputPath, err) } else { handleFileError(filename, err) } os.Exit(1) } s, _ := inputFile.Stat() // TODO: Error? file_size := s.Size() defer inputFile.Close() ext := filepath.Ext(filename) outfilePath := filename[:len(filename)-len(ext)] + ".gosync" outputFile, err := os.Create(outfilePath) if err != nil { handleFileError(outfilePath, err) os.Exit(1) } defer outputFile.Close() if err = writeHeaders( outputFile, magicString, blocksize, file_size, []uint16{majorVersion, minorVersion, patchVersion}, ); err != nil { fmt.Fprintf( os.Stderr, "Error getting file info: %v\n", filename, err, ) os.Exit(2) } start := time.Now() _, err = generator.GenerateChecksums(inputFile, outputFile) end := time.Now() if err != nil { fmt.Fprintf( os.Stderr, "Error generating checksum: %v\n", filename, err, ) os.Exit(2) } inputFileInfo, err := os.Stat(filename) if err != nil { fmt.Fprintf( os.Stderr, "Error getting file info: %v\n", filename, err, ) os.Exit(2) } fmt.Fprintf( os.Stderr, "Index for %v file generated in %v (%v bytes/S)\n", inputFileInfo.Size(), end.Sub(start), float64(inputFileInfo.Size())/end.Sub(start).Seconds(), ) }