func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) chunks := testWithData(t, ch, chunks1) // test reader for i, c := range chunks { rd := c.Reader(bytes.NewReader(buf)) h := sha256.New() n, err := io.Copy(h, rd) if err != nil { t.Fatalf("io.Copy(): %v", err) } if uint(n) != chunks1[i].Length { t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", chunks1[i].Length, n) } d := h.Sum(nil) if !bytes.Equal(d, chunks1[i].Digest) { t.Fatalf("wrong hash returned: expected %02x, got %02x", chunks1[i].Digest, d) } } // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) }
func benchmarkChunker(b *testing.B, hash hash.Hash) { size := 10 * 1024 * 1024 rd := bytes.NewReader(getRandom(23, size)) b.ResetTimer() b.SetBytes(int64(size)) var chunks int for i := 0; i < b.N; i++ { chunks = 0 rd.Seek(0, 0) ch := chunker.New(rd, testPol, hash) for { _, err := ch.Next() if err == io.EOF { break } if err != nil { b.Fatalf("Unexpected error occurred: %v", err) } chunks++ } } b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks) }
// saveFile reads from rd and saves the blobs in the repository. The list of // IDs is returned. func (fs fakeFileSystem) saveFile(rd io.Reader) (blobs IDs) { blobs = IDs{} ch := chunker.New(rd, fs.repo.Config().ChunkerPolynomial) for { chunk, err := ch.Next(getBuf()) if errors.Cause(err) == io.EOF { break } if err != nil { fs.t.Fatalf("unable to save chunk in repo: %v", err) } id := Hash(chunk.Data) if !fs.blobIsKnown(id, DataBlob) { _, err := fs.repo.SaveBlob(DataBlob, chunk.Data, id) if err != nil { fs.t.Fatalf("error saving chunk: %v", err) } fs.knownBlobs.Insert(id) } freeBuf(chunk.Data) blobs = append(blobs, id) } return blobs }
func BenchmarkNewChunker(b *testing.B) { p, err := chunker.RandomPolynomial() OK(b, err) b.ResetTimer() for i := 0; i < b.N; i++ { chunker.New(bytes.NewBuffer(nil), p, nil) } }
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { ch := chunker.New(rd, testPol) for { chunk, err := ch.Next(buf) if err == io.EOF { break } // reduce length of chunkBuf crypto.Encrypt(key, chunk.Data, chunk.Data) } }
func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) ch := chunker.New(bytes.NewReader(buf), testPol, nil) chunks := testWithData(t, ch, chunks1) // test reader for i, c := range chunks { rd := c.Reader(bytes.NewReader(buf)) buf2, err := ioutil.ReadAll(rd) if err != nil { t.Fatalf("io.Copy(): %v", err) } if uint(len(buf2)) != chunks1[i].Length { t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", chunks1[i].Length, uint(len(buf2))) } if uint(len(buf2)) != chunks1[i].Length { t.Fatalf("wrong number of bytes returned: expected %02x, got %02x", chunks[i].Length, len(buf2)) } if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) { t.Fatalf("invalid data for chunk returned: expected %02x, got %02x", buf[c.Start:c.Start+c.Length], buf2) } } // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) }
func getRandomData(seed int, size int) []chunker.Chunk { buf := Random(seed, size) var chunks []chunker.Chunk chunker := chunker.New(bytes.NewReader(buf), testPol) for { c, err := chunker.Next(nil) if err == io.EOF { break } chunks = append(chunks, c) } return chunks }
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() if err == io.EOF { break } // reduce length of chunkBuf buf = buf[:chunk.Length] io.ReadFull(chunk.Reader(rd), buf) crypto.Encrypt(key, buf, buf) } }
func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) { buf := Random(seed, size) chunks := []*chunker.Chunk{} chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) for { c, err := chunker.Next() if err == io.EOF { break } chunks = append(chunks, c) } return buf, chunks }
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { rd.Seek(0, 0) ch := chunker.New(rd, testPol) for { chunk, err := ch.Next(buf) if err == io.EOF { break } OK(b, err) // reduce length of buf Assert(b, uint(len(chunk.Data)) == chunk.Length, "invalid length: got %d, expected %d", len(chunk.Data), chunk.Length) _, err = crypto.Encrypt(key, buf2, chunk.Data) OK(b, err) } }
// SaveFile stores the content of the file on the backend as a Blob by calling // Save for each chunk. func (arch *Archiver) SaveFile(p *restic.Progress, node *restic.Node) (*restic.Node, error) { file, err := fs.Open(node.Path) defer file.Close() if err != nil { return node, errors.Wrap(err, "Open") } debug.RunHook("archiver.SaveFile", node.Path) node, err = arch.reloadFileIfChanged(node, file) if err != nil { return node, err } chnker := chunker.New(file, arch.repo.Config().ChunkerPolynomial) resultChannels := [](<-chan saveResult){} for { chunk, err := chnker.Next(getBuf()) if errors.Cause(err) == io.EOF { break } if err != nil { return node, errors.Wrap(err, "chunker.Next") } resCh := make(chan saveResult, 1) go arch.saveChunk(chunk, p, <-arch.blobToken, file, resCh) resultChannels = append(resultChannels, resCh) } results, err := waitForResults(resultChannels) if err != nil { return node, err } err = updateNodeContent(node, results) return node, err }
// saveFile reads from rd and saves the blobs in the repository. The list of // IDs is returned. func saveFile(t testing.TB, repo *repository.Repository, rd io.Reader) (blobs backend.IDs) { ch := chunker.New(rd, repo.Config.ChunkerPolynomial) for { chunk, err := ch.Next(getBuf()) if err == io.EOF { break } if err != nil { t.Fatalf("unabel to save chunk in repo: %v", err) } id, err := repo.SaveAndEncrypt(pack.Data, chunk.Data, nil) if err != nil { t.Fatalf("error saving chunk: %v", err) } blobs = append(blobs, id) } return blobs }
// SaveFile stores the content of the file on the backend as a Blob by calling // Save for each chunk. func (arch *Archiver) SaveFile(p *Progress, node *Node) error { file, err := node.OpenForReading() defer file.Close() if err != nil { return err } node, err = arch.reloadFileIfChanged(node, file) if err != nil { return err } chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial, sha256.New()) resultChannels := [](<-chan saveResult){} for { chunk, err := chnker.Next() if err == io.EOF { break } if err != nil { return errors.Annotate(err, "SaveFile() chunker.Next()") } resCh := make(chan saveResult, 1) go arch.saveChunk(chunk, p, <-arch.blobToken, file, resCh) resultChannels = append(resultChannels, resCh) } results, err := waitForResults(resultChannels) if err != nil { return err } err = updateNodeContent(node, results) return err }
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { rd.Seek(0, 0) ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() if err == io.EOF { break } OK(b, err) // reduce length of buf buf = buf[:chunk.Length] n, err := io.ReadFull(chunk.Reader(rd), buf) OK(b, err) Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length) _, err = crypto.Encrypt(key, buf2, buf) OK(b, err) } }
func TestChunkerWithRandomPolynomial(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) // generate a new random polynomial start := time.Now() p, err := chunker.RandomPolynomial() OK(t, err) t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different c, err := ch.Next() Assert(t, c.Cut != chunks1[0].CutFP, "Cut point is the same") Assert(t, c.Length != chunks1[0].Length, "Length is the same") Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest), "Digest is the same") }
// ArchiveReader reads from the reader and archives the data. Returned is the // resulting snapshot and its ID. func ArchiveReader(repo *repository.Repository, p *Progress, rd io.Reader, name string) (*Snapshot, backend.ID, error) { debug.Log("ArchiveReader", "start archiving %s", name) sn, err := NewSnapshot([]string{name}) if err != nil { return nil, backend.ID{}, err } p.Start() defer p.Done() chnker := chunker.New(rd, repo.Config.ChunkerPolynomial) var ids backend.IDs var fileSize uint64 for { chunk, err := chnker.Next(getBuf()) if err == io.EOF { break } if err != nil { return nil, backend.ID{}, err } id := backend.Hash(chunk.Data) if !repo.Index().Has(id) { _, err := repo.SaveAndEncrypt(pack.Data, chunk.Data, nil) if err != nil { return nil, backend.ID{}, err } debug.Log("ArchiveReader", "saved blob %v (%d bytes)\n", id.Str(), chunk.Length) } else { debug.Log("ArchiveReader", "blob %v already saved in the repo\n", id.Str()) } freeBuf(chunk.Data) ids = append(ids, id) p.Report(Stat{Bytes: uint64(chunk.Length)}) fileSize += uint64(chunk.Length) } tree := &Tree{ Nodes: []*Node{ &Node{ Name: name, AccessTime: time.Now(), ModTime: time.Now(), Type: "file", Mode: 0644, Size: fileSize, UID: sn.UID, GID: sn.GID, User: sn.Username, Content: ids, }, }, } treeID, err := saveTreeJSON(repo, tree) if err != nil { return nil, backend.ID{}, err } sn.Tree = &treeID debug.Log("ArchiveReader", "tree saved as %v", treeID.Str()) id, err := repo.SaveJSONUnpacked(backend.Snapshot, sn) if err != nil { return nil, backend.ID{}, err } sn.id = &id debug.Log("ArchiveReader", "snapshot saved as %v", id.Str()) err = repo.Flush() if err != nil { return nil, backend.ID{}, err } err = repo.SaveIndex() if err != nil { return nil, backend.ID{}, err } return sn, id, nil }
// ArchiveReader reads from the reader and archives the data. Returned is the // resulting snapshot and its ID. func ArchiveReader(repo restic.Repository, p *restic.Progress, rd io.Reader, name string, tags []string) (*restic.Snapshot, restic.ID, error) { debug.Log("start archiving %s", name) sn, err := restic.NewSnapshot([]string{name}, tags) if err != nil { return nil, restic.ID{}, err } p.Start() defer p.Done() chnker := chunker.New(rd, repo.Config().ChunkerPolynomial) var ids restic.IDs var fileSize uint64 for { chunk, err := chnker.Next(getBuf()) if errors.Cause(err) == io.EOF { break } if err != nil { return nil, restic.ID{}, errors.Wrap(err, "chunker.Next()") } id := restic.Hash(chunk.Data) if !repo.Index().Has(id, restic.DataBlob) { _, err := repo.SaveBlob(restic.DataBlob, chunk.Data, id) if err != nil { return nil, restic.ID{}, err } debug.Log("saved blob %v (%d bytes)\n", id.Str(), chunk.Length) } else { debug.Log("blob %v already saved in the repo\n", id.Str()) } freeBuf(chunk.Data) ids = append(ids, id) p.Report(restic.Stat{Bytes: uint64(chunk.Length)}) fileSize += uint64(chunk.Length) } tree := &restic.Tree{ Nodes: []*restic.Node{ &restic.Node{ Name: name, AccessTime: time.Now(), ModTime: time.Now(), Type: "file", Mode: 0644, Size: fileSize, UID: sn.UID, GID: sn.GID, User: sn.Username, Content: ids, }, }, } treeID, err := repo.SaveTree(tree) if err != nil { return nil, restic.ID{}, err } sn.Tree = &treeID debug.Log("tree saved as %v", treeID.Str()) id, err := repo.SaveJSONUnpacked(restic.SnapshotFile, sn) if err != nil { return nil, restic.ID{}, err } debug.Log("snapshot saved as %v", id.Str()) err = repo.Flush() if err != nil { return nil, restic.ID{}, err } err = repo.SaveIndex() if err != nil { return nil, restic.ID{}, err } return sn, id, nil }