// New returns a new checker which runs on repo. func New(repo *repository.Repository) *Checker { c := &Checker{ packs: backend.NewIDSet(), blobs: backend.NewIDSet(), masterIndex: repository.NewMasterIndex(), indexes: make(map[backend.ID]*repository.Index), repo: repo, } c.blobRefs.M = make(map[backend.ID]uint) return c }
func (cmd CmdOptimize) Execute(args []string) error { if len(args) != 0 { return errors.New("optimize has no arguments") } repo, err := cmd.global.OpenRepository() if err != nil { return err } cmd.global.Verbosef("Create exclusive lock for repository\n") lock, err := lockRepoExclusive(repo) defer unlockRepo(lock) if err != nil { return err } chkr := checker.New(repo) cmd.global.Verbosef("Load indexes\n") _, errs := chkr.LoadIndex() if len(errs) > 0 { for _, err := range errs { cmd.global.Warnf("error: %v\n", err) } return fmt.Errorf("LoadIndex returned errors") } done := make(chan struct{}) errChan := make(chan error) go chkr.Structure(errChan, done) for err := range errChan { if e, ok := err.(checker.TreeError); ok { cmd.global.Warnf("error for tree %v:\n", e.ID.Str()) for _, treeErr := range e.Errors { cmd.global.Warnf(" %v\n", treeErr) } } else { cmd.global.Warnf("error: %v\n", err) } } unusedBlobs := backend.NewIDSet(chkr.UnusedBlobs()...) cmd.global.Verbosef("%d unused blobs found, repacking...\n", len(unusedBlobs)) repacker := checker.NewRepacker(repo, unusedBlobs) err = repacker.Repack() if err != nil { return err } cmd.global.Verbosef("repacking done\n") return nil }
func TestIDSet(t *testing.T) { set := backend.NewIDSet() for i, test := range idsetTests { seen := set.Has(test.id) if seen != test.seen { t.Errorf("IDSet test %v failed: wanted %v, got %v", i, test.seen, seen) } set.Insert(test.id) } }
// Packs returns all packs in this index func (idx *Index) Packs() backend.IDSet { idx.m.Lock() defer idx.m.Unlock() packs := backend.NewIDSet() for _, entry := range idx.pack { packs.Insert(entry.packID) } return packs }
// FindBlobsForPacks returns the set of blobs contained in a pack of packs. func FindBlobsForPacks(repo *repository.Repository, packs backend.IDSet) (backend.IDSet, error) { blobs := backend.NewIDSet() for packID := range packs { for _, packedBlob := range repo.Index().ListPack(packID) { blobs.Insert(packedBlob.ID) } } return blobs, nil }
// FindPacksForBlobs returns the set of packs that contain the blobs. func FindPacksForBlobs(repo *repository.Repository, blobs backend.IDSet) (backend.IDSet, error) { packs := backend.NewIDSet() idx := repo.Index() for id := range blobs { blob, err := idx.Lookup(id) if err != nil { return nil, err } packs.Insert(blob.PackID) } return packs, nil }
func TestIndexPacks(t *testing.T) { idx := repository.NewIndex() packs := backend.NewIDSet() for i := 0; i < 20; i++ { packID := randomID() idx.Store(repository.PackedBlob{ Type: pack.Data, ID: randomID(), PackID: packID, Offset: 0, Length: 23, }) packs.Insert(packID) } idxPacks := idx.Packs() Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index") }
// NewArchiver returns a new archiver. func NewArchiver(repo *repository.Repository) *Archiver { arch := &Archiver{ repo: repo, blobToken: make(chan struct{}, maxConcurrentBlobs), knownBlobs: struct { backend.IDSet sync.Mutex }{ IDSet: backend.NewIDSet(), }, } for i := 0; i < maxConcurrentBlobs; i++ { arch.blobToken <- struct{}{} } arch.Error = archiverAbortOnAllErrors arch.SelectFilter = archiverAllowAllFiles return arch }
func TestRepacker(t *testing.T) { WithTestEnvironment(t, checkerTestData, func(repodir string) { repo := OpenLocalRepo(t, repodir) OK(t, repo.LoadIndex()) repo.Backend().Remove(backend.Snapshot, "c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") unusedBlobs := backend.IDSet{ ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, } chkr := checker.New(repo) _, errs := chkr.LoadIndex() OKs(t, errs) errs = checkStruct(chkr) OKs(t, errs) list := backend.NewIDSet(chkr.UnusedBlobs()...) if !unusedBlobs.Equals(list) { t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) } repacker := checker.NewRepacker(repo, unusedBlobs) OK(t, repacker.Repack()) chkr = checker.New(repo) _, errs = chkr.LoadIndex() OKs(t, errs) OKs(t, checkPacks(chkr)) OKs(t, checkStruct(chkr)) blobs := chkr.UnusedBlobs() Assert(t, len(blobs) == 0, "expected zero unused blobs, got %v", blobs) }) }
// Packs checks that all packs referenced in the index are still available and // there are no packs that aren't in an index. errChan is closed after all // packs have been checked. func (c *Checker) Packs(errChan chan<- error, done <-chan struct{}) { defer close(errChan) debug.Log("Checker.Packs", "checking for %d packs", len(c.packs)) seenPacks := backend.NewIDSet() var workerWG sync.WaitGroup IDChan := make(chan backend.ID) for i := 0; i < defaultParallelism; i++ { workerWG.Add(1) go packIDTester(c.repo, IDChan, errChan, &workerWG, done) } for id := range c.packs { seenPacks.Insert(id) IDChan <- id } close(IDChan) debug.Log("Checker.Packs", "waiting for %d workers to terminate", defaultParallelism) workerWG.Wait() debug.Log("Checker.Packs", "workers terminated") for id := range c.repo.List(backend.Data, done) { debug.Log("Checker.Packs", "check data blob %v", id.Str()) if !seenPacks.Has(id) { c.orphanedPacks = append(c.orphanedPacks, id) select { case <-done: return case errChan <- PackError{ID: id, Orphaned: true, Err: errors.New("not referenced in any index")}: } } } }
} }) } func TestRebuildIndexAlwaysFull(t *testing.T) { repository.IndexFull = func(*repository.Index) bool { return true } TestRebuildIndex(t) } var optimizeTests = []struct { testFilename string snapshots backend.IDSet }{ { filepath.Join("..", "..", "restic", "checker", "testdata", "checker-test-repo.tar.gz"), backend.NewIDSet(ParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43")), }, { filepath.Join("testdata", "old-index-repo.tar.gz"), nil, }, { filepath.Join("testdata", "old-index-repo.tar.gz"), backend.NewIDSet( ParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41"), ParseID("51d249d28815200d59e4be7b3f21a157b864dc343353df9d8e498220c2499b02"), ), }, } func TestCheckRestoreNoLock(t *testing.T) {
// LoadIndex loads all index files. func (c *Checker) LoadIndex() (hints []error, errs []error) { debug.Log("LoadIndex", "Start") type indexRes struct { Index *repository.Index ID string } indexCh := make(chan indexRes) worker := func(id backend.ID, done <-chan struct{}) error { debug.Log("LoadIndex", "worker got index %v", id) idx, err := repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeIndex) if err == repository.ErrOldIndexFormat { debug.Log("LoadIndex", "index %v has old format", id.Str()) hints = append(hints, ErrOldIndexFormat{id}) idx, err = repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeOldIndex) } if err != nil { return err } select { case indexCh <- indexRes{Index: idx, ID: id.String()}: case <-done: } return nil } var perr error go func() { defer close(indexCh) debug.Log("LoadIndex", "start loading indexes in parallel") perr = repository.FilesInParallel(c.repo.Backend(), backend.Index, defaultParallelism, repository.ParallelWorkFuncParseID(worker)) debug.Log("LoadIndex", "loading indexes finished, error: %v", perr) }() done := make(chan struct{}) defer close(done) if perr != nil { errs = append(errs, perr) return hints, errs } packToIndex := make(map[backend.ID]backend.IDSet) for res := range indexCh { debug.Log("LoadIndex", "process index %v", res.ID) idxID, err := backend.ParseID(res.ID) if err != nil { errs = append(errs, fmt.Errorf("unable to parse as index ID: %v", res.ID)) continue } c.indexes[idxID] = res.Index c.masterIndex.Insert(res.Index) debug.Log("LoadIndex", "process blobs") cnt := 0 for blob := range res.Index.Each(done) { c.packs.Insert(blob.PackID) c.blobs.Insert(blob.ID) c.blobRefs.M[blob.ID] = 0 cnt++ if _, ok := packToIndex[blob.PackID]; !ok { packToIndex[blob.PackID] = backend.NewIDSet() } packToIndex[blob.PackID].Insert(idxID) } debug.Log("LoadIndex", "%d blobs processed", cnt) } debug.Log("LoadIndex", "done, error %v", perr) debug.Log("LoadIndex", "checking for duplicate packs") for packID := range c.packs { debug.Log("LoadIndex", " check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID])) if len(packToIndex[packID]) > 1 { hints = append(hints, ErrDuplicatePacks{ PackID: packID, Indexes: packToIndex[packID], }) } } c.repo.SetIndex(c.masterIndex) return hints, errs }
func (cmd CmdRebuildIndex) RebuildIndex() error { debug.Log("RebuildIndex.RebuildIndex", "start") done := make(chan struct{}) defer close(done) indexIDs := backend.NewIDSet() for id := range cmd.repo.List(backend.Index, done) { indexIDs.Insert(id) } cmd.global.Printf("rebuilding index from %d indexes\n", len(indexIDs)) debug.Log("RebuildIndex.RebuildIndex", "found %v indexes", len(indexIDs)) combinedIndex := repository.NewIndex() packsDone := backend.NewIDSet() type Blob struct { id backend.ID tpe pack.BlobType } blobsDone := make(map[Blob]struct{}) i := 0 for indexID := range indexIDs { cmd.global.Printf(" loading index %v\n", i) debug.Log("RebuildIndex.RebuildIndex", "load index %v", indexID.Str()) idx, err := repository.LoadIndex(cmd.repo, indexID.String()) if err != nil { return err } debug.Log("RebuildIndex.RebuildIndex", "adding blobs from index %v", indexID.Str()) for packedBlob := range idx.Each(done) { packsDone.Insert(packedBlob.PackID) b := Blob{ id: packedBlob.ID, tpe: packedBlob.Type, } if _, ok := blobsDone[b]; ok { continue } blobsDone[b] = struct{}{} combinedIndex.Store(packedBlob) } combinedIndex.AddToSupersedes(indexID) if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } i++ } var err error if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("removing %d old indexes\n", len(indexIDs)) for id := range indexIDs { debug.Log("RebuildIndex.RebuildIndex", "remove index %v", id.Str()) err := cmd.repo.Backend().Remove(backend.Index, id.String()) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error removing index %v: %v", id.Str(), err) return err } } cmd.global.Printf("checking for additional packs\n") newPacks := 0 var buf []byte for packID := range cmd.repo.List(backend.Data, done) { if packsDone.Has(packID) { continue } debug.Log("RebuildIndex.RebuildIndex", "pack %v not indexed", packID.Str()) newPacks++ var err error h := backend.Handle{Type: backend.Data, Name: packID.String()} buf, err = backend.LoadAll(cmd.repo.Backend(), h, buf) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error while loading pack %v", packID.Str()) return fmt.Errorf("error while loading pack %v: %v", packID.Str(), err) } hash := backend.Hash(buf) if !hash.Equal(packID) { debug.Log("RebuildIndex.RebuildIndex", "Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) return fmt.Errorf("Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) } up, err := pack.NewUnpacker(cmd.repo.Key(), bytes.NewReader(buf)) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error while unpacking pack %v", packID.Str()) return err } for _, blob := range up.Entries { debug.Log("RebuildIndex.RebuildIndex", "pack %v: blob %v", packID.Str(), blob) combinedIndex.Store(repository.PackedBlob{ Type: blob.Type, ID: blob.ID, PackID: packID, Offset: blob.Offset, Length: blob.Length, }) } if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } } if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("added %d packs to the index\n", newPacks) debug.Log("RebuildIndex.RebuildIndex", "done") return nil }