// New returns a new checker which runs on repo. func New(repo restic.Repository) *Checker { c := &Checker{ packs: restic.NewIDSet(), blobs: restic.NewIDSet(), masterIndex: repository.NewMasterIndex(), indexes: make(map[restic.ID]*repository.Index), repo: repo, } c.blobRefs.M = make(map[restic.ID]uint) return c }
func newIndex() *Index { return &Index{ Packs: make(map[restic.ID]Pack), Blobs: make(map[restic.BlobHandle]Blob), IndexIDs: restic.NewIDSet(), } }
// NewSnapshotsDir returns a new dir object for the snapshots. func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool) *SnapshotsDir { debug.Log("fuse mount initiated") return &SnapshotsDir{ repo: repo, knownSnapshots: make(map[string]SnapshotWithId), ownerIsRoot: ownerIsRoot, processed: restic.NewIDSet(), } }
func listPacks(t *testing.T, repo restic.Repository) restic.IDSet { done := make(chan struct{}) defer close(done) list := restic.NewIDSet() for id := range repo.List(restic.DataFile, done) { list.Insert(id) } return list }
// Packs returns all packs in this index func (idx *Index) Packs() restic.IDSet { idx.m.Lock() defer idx.m.Unlock() packs := restic.NewIDSet() for _, list := range idx.pack { for _, entry := range list { packs.Insert(entry.packID) } } return packs }
// RebuildIndex lists all packs in the repo, writes a new index and removes all // old indexes. This operation should only be done with an exclusive lock in // place. func RebuildIndex(repo restic.Repository) error { debug.Log("start rebuilding index") done := make(chan struct{}) defer close(done) ch := make(chan worker.Job) go list.AllPacks(repo, ch, done) idx := NewIndex() for job := range ch { id := job.Data.(restic.ID) if job.Error != nil { fmt.Fprintf(os.Stderr, "error for pack %v: %v\n", id, job.Error) continue } res := job.Result.(list.Result) for _, entry := range res.Entries() { pb := restic.PackedBlob{ Blob: entry, PackID: res.PackID(), } idx.Store(pb) } } oldIndexes := restic.NewIDSet() for id := range repo.List(restic.IndexFile, done) { idx.AddToSupersedes(id) oldIndexes.Insert(id) } id, err := SaveIndex(repo, idx) if err != nil { debug.Log("error saving index: %v", err) return err } debug.Log("new index saved as %v", id.Str()) for indexID := range oldIndexes { err := repo.Backend().Remove(restic.IndexFile, indexID.String()) if err != nil { fmt.Fprintf(os.Stderr, "unable to remove index %v: %v\n", indexID.Str(), err) } } return nil }
// PacksForBlobs returns the set of packs in which the blobs are contained. func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { packs = restic.NewIDSet() for h := range blobs { blob, ok := idx.Blobs[h] if !ok { continue } for id := range blob.Packs { packs.Insert(id) } } return packs }
func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet { packs := restic.NewIDSet() idx := repo.Index() for h := range blobs { list, err := idx.Lookup(h.ID, h.Type) if err != nil { t.Fatal(err) } for _, pb := range list { packs.Insert(pb.PackID) } } return packs }
// AddPack adds a pack to the index. If this pack is already in the index, an // error is returned. func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error { if _, ok := idx.Packs[id]; ok { return errors.Errorf("pack %v already present in the index", id.Str()) } idx.Packs[id] = Pack{Size: size, Entries: entries} for _, entry := range entries { h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} if _, ok := idx.Blobs[h]; !ok { idx.Blobs[h] = Blob{ Size: int64(entry.Length), Packs: restic.NewIDSet(), } } idx.Blobs[h].Packs.Insert(id) } return nil }
// New returns a new archiver. func New(repo restic.Repository) *Archiver { arch := &Archiver{ repo: repo, blobToken: make(chan struct{}, maxConcurrentBlobs), knownBlobs: struct { restic.IDSet sync.Mutex }{ IDSet: restic.NewIDSet(), }, } for i := 0; i < maxConcurrentBlobs; i++ { arch.blobToken <- struct{}{} } arch.Error = archiverAbortOnAllErrors arch.SelectFilter = archiverAllowAllFiles return arch }
func TestIndexPacks(t *testing.T) { idx := repository.NewIndex() packs := restic.NewIDSet() for i := 0; i < 20; i++ { packID := restic.NewRandomID() idx.Store(restic.PackedBlob{ Blob: restic.Blob{ Type: restic.DataBlob, ID: restic.NewRandomID(), Offset: 0, Length: 23, }, PackID: packID, }) packs.Insert(packID) } idxPacks := idx.Packs() Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index") }
// Packs checks that all packs referenced in the index are still available and // there are no packs that aren't in an index. errChan is closed after all // packs have been checked. func (c *Checker) Packs(errChan chan<- error, done <-chan struct{}) { defer close(errChan) debug.Log("checking for %d packs", len(c.packs)) seenPacks := restic.NewIDSet() var workerWG sync.WaitGroup IDChan := make(chan restic.ID) for i := 0; i < defaultParallelism; i++ { workerWG.Add(1) go packIDTester(c.repo, IDChan, errChan, &workerWG, done) } for id := range c.packs { seenPacks.Insert(id) IDChan <- id } close(IDChan) debug.Log("waiting for %d workers to terminate", defaultParallelism) workerWG.Wait() debug.Log("workers terminated") for id := range c.repo.List(restic.DataFile, done) { debug.Log("check data blob %v", id.Str()) if !seenPacks.Has(id) { c.orphanedPacks = append(c.orphanedPacks, id) select { case <-done: return case errChan <- PackError{ID: id, Orphaned: true, Err: errors.New("not referenced in any index")}: } } } }
// Load creates an index by loading all index files from the repo. func Load(repo restic.Repository, p *restic.Progress) (*Index, error) { debug.Log("loading indexes") p.Start() defer p.Done() done := make(chan struct{}) defer close(done) supersedes := make(map[restic.ID]restic.IDSet) results := make(map[restic.ID]map[restic.ID]Pack) index := newIndex() for id := range repo.List(restic.IndexFile, done) { p.Report(restic.Stat{Blobs: 1}) debug.Log("Load index %v", id.Str()) idx, err := loadIndexJSON(repo, id) if err != nil { return nil, err } res := make(map[restic.ID]Pack) supersedes[id] = restic.NewIDSet() for _, sid := range idx.Supersedes { debug.Log(" index %v supersedes %v", id.Str(), sid) supersedes[id].Insert(sid) } for _, jpack := range idx.Packs { entries := make([]restic.Blob, 0, len(jpack.Blobs)) for _, blob := range jpack.Blobs { entry := restic.Blob{ ID: blob.ID, Type: blob.Type, Offset: blob.Offset, Length: blob.Length, } entries = append(entries, entry) } if err = index.AddPack(jpack.ID, 0, entries); err != nil { return nil, err } } results[id] = res index.IndexIDs.Insert(id) } for superID, list := range supersedes { for indexID := range list { if _, ok := results[indexID]; !ok { continue } debug.Log(" removing index %v, superseded by %v", indexID.Str(), superID.Str()) fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str()) delete(results, indexID) } } return index, nil }
func runPrune(gopts GlobalOptions) error { repo, err := OpenRepository(gopts) if err != nil { return err } lock, err := lockRepoExclusive(repo) defer unlockRepo(lock) if err != nil { return err } err = repo.LoadIndex() if err != nil { return err } done := make(chan struct{}) defer close(done) var stats struct { blobs int packs int snapshots int bytes int64 } Verbosef("counting files in repo\n") for _ = range repo.List(restic.DataFile, done) { stats.packs++ } Verbosef("building new index for repo\n") bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs") idx, err := index.New(repo, bar) if err != nil { return err } for _, pack := range idx.Packs { stats.bytes += pack.Size } Verbosef("repository contains %v packs (%v blobs) with %v bytes\n", len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes))) blobCount := make(map[restic.BlobHandle]int) duplicateBlobs := 0 duplicateBytes := 0 // find duplicate blobs for _, p := range idx.Packs { for _, entry := range p.Entries { stats.blobs++ h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} blobCount[h]++ if blobCount[h] > 1 { duplicateBlobs++ duplicateBytes += int(entry.Length) } } } Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n", stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes))) Verbosef("load all snapshots\n") // find referenced blobs snapshots, err := restic.LoadAllSnapshots(repo) if err != nil { return err } stats.snapshots = len(snapshots) Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots) usedBlobs := restic.NewBlobSet() seenBlobs := restic.NewBlobSet() bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots") bar.Start() for _, sn := range snapshots { debug.Log("process snapshot %v", sn.ID().Str()) err = restic.FindUsedBlobs(repo, *sn.Tree, usedBlobs, seenBlobs) if err != nil { return err } debug.Log("found %v blobs for snapshot %v", sn.ID().Str()) bar.Report(restic.Stat{Blobs: 1}) } bar.Done() Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs)) // find packs that need a rewrite rewritePacks := restic.NewIDSet() for h, blob := range idx.Blobs { if !usedBlobs.Has(h) { rewritePacks.Merge(blob.Packs) continue } if blobCount[h] > 1 { rewritePacks.Merge(blob.Packs) } } removeBytes := 0 // find packs that are unneeded removePacks := restic.NewIDSet() for packID, p := range idx.Packs { hasActiveBlob := false for _, blob := range p.Entries { h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} if usedBlobs.Has(h) { hasActiveBlob = true continue } removeBytes += int(blob.Length) } if hasActiveBlob { continue } removePacks.Insert(packID) if !rewritePacks.Has(packID) { return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str()) } rewritePacks.Delete(packID) } Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n", len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes))) err = repository.Repack(repo, rewritePacks, usedBlobs) if err != nil { return err } for packID := range removePacks { err = repo.Backend().Remove(restic.DataFile, packID.String()) if err != nil { Warnf("unable to remove file %v from the repository\n", packID.Str()) } } Verbosef("creating new index\n") stats.packs = 0 for _ = range repo.List(restic.DataFile, done) { stats.packs++ } bar = newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs") idx, err = index.New(repo, bar) if err != nil { return err } var supersedes restic.IDs for idxID := range repo.List(restic.IndexFile, done) { err := repo.Backend().Remove(restic.IndexFile, idxID.String()) if err != nil { fmt.Fprintf(os.Stderr, "unable to remove index %v: %v\n", idxID.Str(), err) } supersedes = append(supersedes, idxID) } id, err := idx.Save(repo, supersedes) if err != nil { return err } Verbosef("saved new index as %v\n", id.Str()) Verbosef("done\n") return nil }
// LoadIndex loads all index files. func (c *Checker) LoadIndex() (hints []error, errs []error) { debug.Log("Start") type indexRes struct { Index *repository.Index ID string } indexCh := make(chan indexRes) worker := func(id restic.ID, done <-chan struct{}) error { debug.Log("worker got index %v", id) idx, err := repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeIndex) if errors.Cause(err) == repository.ErrOldIndexFormat { debug.Log("index %v has old format", id.Str()) hints = append(hints, ErrOldIndexFormat{id}) idx, err = repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeOldIndex) } if err != nil { return err } select { case indexCh <- indexRes{Index: idx, ID: id.String()}: case <-done: } return nil } var perr error go func() { defer close(indexCh) debug.Log("start loading indexes in parallel") perr = repository.FilesInParallel(c.repo.Backend(), restic.IndexFile, defaultParallelism, repository.ParallelWorkFuncParseID(worker)) debug.Log("loading indexes finished, error: %v", perr) }() done := make(chan struct{}) defer close(done) if perr != nil { errs = append(errs, perr) return hints, errs } packToIndex := make(map[restic.ID]restic.IDSet) for res := range indexCh { debug.Log("process index %v", res.ID) idxID, err := restic.ParseID(res.ID) if err != nil { errs = append(errs, errors.Errorf("unable to parse as index ID: %v", res.ID)) continue } c.indexes[idxID] = res.Index c.masterIndex.Insert(res.Index) debug.Log("process blobs") cnt := 0 for blob := range res.Index.Each(done) { c.packs.Insert(blob.PackID) c.blobs.Insert(blob.ID) c.blobRefs.M[blob.ID] = 0 cnt++ if _, ok := packToIndex[blob.PackID]; !ok { packToIndex[blob.PackID] = restic.NewIDSet() } packToIndex[blob.PackID].Insert(idxID) } debug.Log("%d blobs processed", cnt) } debug.Log("done, error %v", perr) debug.Log("checking for duplicate packs") for packID := range c.packs { debug.Log(" check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID])) if len(packToIndex[packID]) > 1 { hints = append(hints, ErrDuplicatePacks{ PackID: packID, Indexes: packToIndex[packID], }) } } c.repo.SetIndex(c.masterIndex) return hints, errs }