func (arch *Archiver) saveChunk(chunk chunker.Chunk, p *restic.Progress, token struct{}, file fs.File, resultChannel chan<- saveResult) { defer freeBuf(chunk.Data) id := restic.Hash(chunk.Data) err := arch.Save(restic.DataBlob, chunk.Data, id) // TODO handle error if err != nil { panic(err) } p.Report(restic.Stat{Bytes: uint64(chunk.Length)}) arch.blobToken <- token resultChannel <- saveResult{id: id, bytes: uint64(chunk.Length)} }
// Scan traverses the dirs to collect restic.Stat information while emitting progress // information with p. func Scan(dirs []string, filter pipe.SelectFunc, p *restic.Progress) (restic.Stat, error) { p.Start() defer p.Done() var stat restic.Stat for _, dir := range dirs { debug.Log("Start for %v", dir) err := fs.Walk(dir, func(str string, fi os.FileInfo, err error) error { // TODO: integrate error reporting if err != nil { fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err) return nil } if fi == nil { fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str) return nil } if !filter(str, fi) { debug.Log("path %v excluded", str) if fi.IsDir() { return filepath.SkipDir } return nil } s := restic.Stat{} if fi.IsDir() { s.Dirs++ } else { s.Files++ if isRegularFile(fi) { s.Bytes += uint64(fi.Size()) } } p.Report(s) stat.Add(s) // TODO: handle error? return nil }) debug.Log("Done for %v, err: %v", dir, err) if err != nil { return restic.Stat{}, errors.Wrap(err, "fs.Walk") } } return stat, nil }
// ReadData loads all data from the repository and checks the integrity. func (c *Checker) ReadData(p *restic.Progress, errChan chan<- error, done <-chan struct{}) { defer close(errChan) p.Start() defer p.Done() worker := func(wg *sync.WaitGroup, in <-chan backend.ID) { defer wg.Done() for { var id backend.ID var ok bool select { case <-done: return case id, ok = <-in: if !ok { return } } err := checkPack(c.repo, id) p.Report(restic.Stat{Blobs: 1}) if err == nil { continue } select { case <-done: return case errChan <- err: } } } ch := c.repo.List(backend.Data, done) var wg sync.WaitGroup for i := 0; i < defaultParallelism; i++ { wg.Add(1) go worker(&wg, ch) } wg.Wait() }
// New creates a new index for repo from scratch. func New(repo restic.Repository, p *restic.Progress) (*Index, error) { done := make(chan struct{}) defer close(done) p.Start() defer p.Done() ch := make(chan worker.Job) go list.AllPacks(repo, ch, done) idx := newIndex() for job := range ch { p.Report(restic.Stat{Blobs: 1}) packID := job.Data.(restic.ID) if job.Error != nil { fmt.Fprintf(os.Stderr, "unable to list pack %v: %v\n", packID.Str(), job.Error) continue } j := job.Result.(list.Result) debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries())) err := idx.AddPack(packID, j.Size(), j.Entries()) if err != nil { return nil, err } p := Pack{Entries: j.Entries(), Size: j.Size()} idx.Packs[packID] = p } return idx, nil }
// Load creates an index by loading all index files from the repo. func Load(repo restic.Repository, p *restic.Progress) (*Index, error) { debug.Log("loading indexes") p.Start() defer p.Done() done := make(chan struct{}) defer close(done) supersedes := make(map[restic.ID]restic.IDSet) results := make(map[restic.ID]map[restic.ID]Pack) index := newIndex() for id := range repo.List(restic.IndexFile, done) { p.Report(restic.Stat{Blobs: 1}) debug.Log("Load index %v", id.Str()) idx, err := loadIndexJSON(repo, id) if err != nil { return nil, err } res := make(map[restic.ID]Pack) supersedes[id] = restic.NewIDSet() for _, sid := range idx.Supersedes { debug.Log(" index %v supersedes %v", id.Str(), sid) supersedes[id].Insert(sid) } for _, jpack := range idx.Packs { entries := make([]restic.Blob, 0, len(jpack.Blobs)) for _, blob := range jpack.Blobs { entry := restic.Blob{ ID: blob.ID, Type: blob.Type, Offset: blob.Offset, Length: blob.Length, } entries = append(entries, entry) } if err = index.AddPack(jpack.ID, 0, entries); err != nil { return nil, err } } results[id] = res index.IndexIDs.Insert(id) } for superID, list := range supersedes { for indexID := range list { if _, ok := results[indexID]; !ok { continue } debug.Log(" removing index %v, superseded by %v", indexID.Str(), superID.Str()) fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str()) delete(results, indexID) } } return index, nil }
// Snapshot creates a snapshot of the given paths. If parentrestic.ID is set, this is // used to compare the files to the ones archived at the time this snapshot was // taken. func (arch *Archiver) Snapshot(p *restic.Progress, paths, tags []string, parentID *restic.ID) (*restic.Snapshot, restic.ID, error) { paths = unique(paths) sort.Sort(baseNameSlice(paths)) debug.Log("start for %v", paths) debug.RunHook("Archiver.Snapshot", nil) // signal the whole pipeline to stop done := make(chan struct{}) var err error p.Start() defer p.Done() // create new snapshot sn, err := restic.NewSnapshot(paths, tags) if err != nil { return nil, restic.ID{}, err } sn.Excludes = arch.Excludes jobs := archivePipe{} // use parent snapshot (if some was given) if parentID != nil { sn.Parent = parentID // load parent snapshot parent, err := restic.LoadSnapshot(arch.repo, *parentID) if err != nil { return nil, restic.ID{}, err } // start walker on old tree ch := make(chan walk.TreeJob) go walk.Tree(arch.repo, *parent.Tree, done, ch) jobs.Old = ch } else { // use closed channel ch := make(chan walk.TreeJob) close(ch) jobs.Old = ch } // start walker pipeCh := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go func() { pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh) debug.Log("pipe.Walk done") }() jobs.New = pipeCh ch := make(chan pipe.Job) go jobs.compare(done, ch) var wg sync.WaitGroup entCh := make(chan pipe.Entry) dirCh := make(chan pipe.Dir) // split wg.Add(1) go func() { pipe.Split(ch, dirCh, entCh) debug.Log("split done") close(dirCh) close(entCh) wg.Done() }() // run workers for i := 0; i < maxConcurrency; i++ { wg.Add(2) go arch.fileWorker(&wg, p, done, entCh) go arch.dirWorker(&wg, p, done, dirCh) } // run index saver var wgIndexSaver sync.WaitGroup stopIndexSaver := make(chan struct{}) wgIndexSaver.Add(1) go arch.saveIndexes(&wgIndexSaver, stopIndexSaver) // wait for all workers to terminate debug.Log("wait for workers") wg.Wait() // stop index saver close(stopIndexSaver) wgIndexSaver.Wait() debug.Log("workers terminated") // receive the top-level tree root := (<-resCh).(*restic.Node) debug.Log("root node received: %v", root.Subtree.Str()) sn.Tree = root.Subtree // save snapshot id, err := arch.repo.SaveJSONUnpacked(restic.SnapshotFile, sn) if err != nil { return nil, restic.ID{}, err } debug.Log("saved snapshot %v", id.Str()) // flush repository err = arch.repo.Flush() if err != nil { return nil, restic.ID{}, err } // save index err = arch.repo.SaveIndex() if err != nil { debug.Log("error saving index: %v", err) return nil, restic.ID{}, err } debug.Log("saved indexes") return sn, id, nil }
func (arch *Archiver) dirWorker(wg *sync.WaitGroup, p *restic.Progress, done <-chan struct{}, dirCh <-chan pipe.Dir) { debug.Log("start") defer func() { debug.Log("done") wg.Done() }() for { select { case dir, ok := <-dirCh: if !ok { // channel is closed return } debug.Log("save dir %v (%d entries), error %v\n", dir.Path(), len(dir.Entries), dir.Error()) // ignore dir nodes with errors if dir.Error() != nil { fmt.Fprintf(os.Stderr, "error walking dir %v: %v\n", dir.Path(), dir.Error()) dir.Result() <- nil p.Report(restic.Stat{Errors: 1}) continue } tree := restic.NewTree() // wait for all content for _, ch := range dir.Entries { debug.Log("receiving result from %v", ch) res := <-ch // if we get a nil pointer here, an error has happened while // processing this entry. Ignore it for now. if res == nil { debug.Log("got nil result?") continue } // else insert node node := res.(*restic.Node) tree.Insert(node) if node.Type == "dir" { debug.Log("got tree node for %s: %v", node.Path, node.Subtree) if node.Subtree.IsNull() { panic("invalid null subtree restic.ID") } } } node := &restic.Node{} if dir.Path() != "" && dir.Info() != nil { n, err := restic.NodeFromFileInfo(dir.Path(), dir.Info()) if err != nil { n.Error = err.Error() dir.Result() <- n continue } node = n } if err := dir.Error(); err != nil { node.Error = err.Error() } id, err := arch.SaveTreeJSON(tree) if err != nil { panic(err) } debug.Log("save tree for %s: %v", dir.Path(), id.Str()) if id.IsNull() { panic("invalid null subtree restic.ID return from SaveTreeJSON()") } node.Subtree = &id debug.Log("sending result to %v", dir.Result()) dir.Result() <- node if dir.Path() != "" { p.Report(restic.Stat{Dirs: 1}) } case <-done: // pipeline was cancelled return } } }
func (arch *Archiver) fileWorker(wg *sync.WaitGroup, p *restic.Progress, done <-chan struct{}, entCh <-chan pipe.Entry) { defer func() { debug.Log("done") wg.Done() }() for { select { case e, ok := <-entCh: if !ok { // channel is closed return } debug.Log("got job %v", e) // check for errors if e.Error() != nil { debug.Log("job %v has errors: %v", e.Path(), e.Error()) // TODO: integrate error reporting fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error()) // ignore this file e.Result() <- nil p.Report(restic.Stat{Errors: 1}) continue } node, err := restic.NodeFromFileInfo(e.Fullpath(), e.Info()) if err != nil { // TODO: integrate error reporting debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err) e.Result() <- nil p.Report(restic.Stat{Errors: 1}) continue } // try to use old node, if present if e.Node != nil { debug.Log(" %v use old data", e.Path()) oldNode := e.Node.(*restic.Node) // check if all content is still available in the repository contentMissing := false for _, blob := range oldNode.Content { if !arch.repo.Index().Has(blob, restic.DataBlob) { debug.Log(" %v not using old data, %v is missing", e.Path(), blob.Str()) contentMissing = true break } } if !contentMissing { node.Content = oldNode.Content debug.Log(" %v content is complete", e.Path()) } } else { debug.Log(" %v no old data", e.Path()) } // otherwise read file normally if node.Type == "file" && len(node.Content) == 0 { debug.Log(" read and save %v, content: %v", e.Path(), node.Content) err = arch.SaveFile(p, node) if err != nil { // TODO: integrate error reporting fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.Path, err) // ignore this file e.Result() <- nil p.Report(restic.Stat{Errors: 1}) continue } } else { // report old data size p.Report(restic.Stat{Bytes: node.Size}) } debug.Log(" processed %v, %d blobs", e.Path(), len(node.Content)) e.Result() <- node p.Report(restic.Stat{Files: 1}) case <-done: // pipeline was cancelled return } } }
// ArchiveReader reads from the reader and archives the data. Returned is the // resulting snapshot and its ID. func ArchiveReader(repo restic.Repository, p *restic.Progress, rd io.Reader, name string, tags []string) (*restic.Snapshot, restic.ID, error) { debug.Log("start archiving %s", name) sn, err := restic.NewSnapshot([]string{name}, tags) if err != nil { return nil, restic.ID{}, err } p.Start() defer p.Done() chnker := chunker.New(rd, repo.Config().ChunkerPolynomial) var ids restic.IDs var fileSize uint64 for { chunk, err := chnker.Next(getBuf()) if errors.Cause(err) == io.EOF { break } if err != nil { return nil, restic.ID{}, errors.Wrap(err, "chunker.Next()") } id := restic.Hash(chunk.Data) if !repo.Index().Has(id, restic.DataBlob) { _, err := repo.SaveBlob(restic.DataBlob, chunk.Data, id) if err != nil { return nil, restic.ID{}, err } debug.Log("saved blob %v (%d bytes)\n", id.Str(), chunk.Length) } else { debug.Log("blob %v already saved in the repo\n", id.Str()) } freeBuf(chunk.Data) ids = append(ids, id) p.Report(restic.Stat{Bytes: uint64(chunk.Length)}) fileSize += uint64(chunk.Length) } tree := &restic.Tree{ Nodes: []*restic.Node{ &restic.Node{ Name: name, AccessTime: time.Now(), ModTime: time.Now(), Type: "file", Mode: 0644, Size: fileSize, UID: sn.UID, GID: sn.GID, User: sn.Username, Content: ids, }, }, } treeID, err := repo.SaveTree(tree) if err != nil { return nil, restic.ID{}, err } sn.Tree = &treeID debug.Log("tree saved as %v", treeID.Str()) id, err := repo.SaveJSONUnpacked(restic.SnapshotFile, sn) if err != nil { return nil, restic.ID{}, err } debug.Log("snapshot saved as %v", id.Str()) err = repo.Flush() if err != nil { return nil, restic.ID{}, err } err = repo.SaveIndex() if err != nil { return nil, restic.ID{}, err } return sn, id, nil }