func (cmd CmdOptimize) Execute(args []string) error { if len(args) != 0 { return errors.New("optimize has no arguments") } repo, err := cmd.global.OpenRepository() if err != nil { return err } cmd.global.Verbosef("Create exclusive lock for repository\n") lock, err := lockRepoExclusive(repo) defer unlockRepo(lock) if err != nil { return err } chkr := checker.New(repo) cmd.global.Verbosef("Load indexes\n") _, errs := chkr.LoadIndex() if len(errs) > 0 { for _, err := range errs { cmd.global.Warnf("error: %v\n", err) } return fmt.Errorf("LoadIndex returned errors") } done := make(chan struct{}) errChan := make(chan error) go chkr.Structure(errChan, done) for err := range errChan { if e, ok := err.(checker.TreeError); ok { cmd.global.Warnf("error for tree %v:\n", e.ID.Str()) for _, treeErr := range e.Errors { cmd.global.Warnf(" %v\n", treeErr) } } else { cmd.global.Warnf("error: %v\n", err) } } unusedBlobs := backend.NewIDSet(chkr.UnusedBlobs()...) cmd.global.Verbosef("%d unused blobs found, repacking...\n", len(unusedBlobs)) repacker := checker.NewRepacker(repo, unusedBlobs) err = repacker.Repack() if err != nil { return err } cmd.global.Verbosef("repacking done\n") return nil }
func TestIDSet(t *testing.T) { set := backend.NewIDSet() for i, test := range idsetTests { seen := set.Has(test.id) if seen != test.seen { t.Errorf("IDSet test %v failed: wanted %v, got %v", i, test.seen, seen) } set.Insert(test.id) } }
// NewMasterIndex creates a new master index. func NewMasterIndex() *MasterIndex { return &MasterIndex{ inFlight: struct { backend.IDSet sync.RWMutex }{ IDSet: backend.NewIDSet(), }, } }
// FindBlobsForPacks returns the set of blobs contained in a pack of packs. func FindBlobsForPacks(repo *repository.Repository, packs backend.IDSet) (backend.IDSet, error) { blobs := backend.NewIDSet() for packID := range packs { for _, packedBlob := range repo.Index().ListPack(packID) { blobs.Insert(packedBlob.ID) } } return blobs, nil }
// Packs returns all packs in this index func (idx *Index) Packs() backend.IDSet { idx.m.Lock() defer idx.m.Unlock() packs := backend.NewIDSet() for _, entry := range idx.pack { packs.Insert(entry.packID) } return packs }
// FindPacksForBlobs returns the set of packs that contain the blobs. func FindPacksForBlobs(repo *repository.Repository, blobs backend.IDSet) (backend.IDSet, error) { packs := backend.NewIDSet() idx := repo.Index() for id := range blobs { blob, err := idx.Lookup(id) if err != nil { return nil, err } packs.Insert(blob.PackID) } return packs, nil }
func TestIndexPacks(t *testing.T) { idx := repository.NewIndex() packs := backend.NewIDSet() for i := 0; i < 20; i++ { packID := randomID() idx.Store(pack.Data, randomID(), packID, 0, 23) packs.Insert(packID) } idxPacks := idx.Packs() Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index") }
func TestIndexPacks(t *testing.T) { idx := repository.NewIndex() packs := backend.NewIDSet() for i := 0; i < 20; i++ { packID := randomID() idx.Store(repository.PackedBlob{ Type: pack.Data, ID: randomID(), PackID: packID, Offset: 0, Length: 23, }) packs.Insert(packID) } idxPacks := idx.Packs() Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index") }
func TestSet(t *testing.T) { s := backend.NewIDSet() testID := randomID() err := s.Find(testID) Assert(t, err != nil, "found test ID in IDSet before insertion") for i := 0; i < 238; i++ { s.Insert(randomID()) } s.Insert(testID) OK(t, s.Find(testID)) for i := 0; i < 80; i++ { s.Insert(randomID()) } s.Insert(testID) OK(t, s.Find(testID)) }
// NewArchiver returns a new archiver. func NewArchiver(repo *repository.Repository) *Archiver { arch := &Archiver{ repo: repo, blobToken: make(chan struct{}, maxConcurrentBlobs), knownBlobs: struct { backend.IDSet sync.Mutex }{ IDSet: backend.NewIDSet(), }, } for i := 0; i < maxConcurrentBlobs; i++ { arch.blobToken <- struct{}{} } arch.Error = archiverAbortOnAllErrors arch.SelectFilter = archiverAllowAllFiles return arch }
func TestRepacker(t *testing.T) { WithTestEnvironment(t, checkerTestData, func(repodir string) { repo := OpenLocalRepo(t, repodir) OK(t, repo.LoadIndex()) repo.Backend().Remove(backend.Snapshot, "c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") unusedBlobs := backend.IDSet{ ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, } chkr := checker.New(repo) _, errs := chkr.LoadIndex() OKs(t, errs) errs = checkStruct(chkr) OKs(t, errs) list := backend.NewIDSet(chkr.UnusedBlobs()...) if !unusedBlobs.Equals(list) { t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) } repacker := checker.NewRepacker(repo, unusedBlobs) OK(t, repacker.Repack()) chkr = checker.New(repo) _, errs = chkr.LoadIndex() OKs(t, errs) OKs(t, checkPacks(chkr)) OKs(t, checkStruct(chkr)) blobs := chkr.UnusedBlobs() Assert(t, len(blobs) == 0, "expected zero unused blobs, got %v", blobs) }) }
// LoadIndex loads all index files. func (c *Checker) LoadIndex() (hints []error, errs []error) { debug.Log("LoadIndex", "Start") type indexRes struct { Index *repository.Index ID string } indexCh := make(chan indexRes) worker := func(id backend.ID, done <-chan struct{}) error { debug.Log("LoadIndex", "worker got index %v", id) idx, err := repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeIndex) if err == repository.ErrOldIndexFormat { debug.Log("LoadIndex", "index %v has old format", id.Str()) hints = append(hints, ErrOldIndexFormat{id}) idx, err = repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeOldIndex) } if err != nil { return err } select { case indexCh <- indexRes{Index: idx, ID: id.String()}: case <-done: } return nil } var perr error go func() { defer close(indexCh) debug.Log("LoadIndex", "start loading indexes in parallel") perr = repository.FilesInParallel(c.repo.Backend(), backend.Index, defaultParallelism, repository.ParallelWorkFuncParseID(worker)) debug.Log("LoadIndex", "loading indexes finished, error: %v", perr) }() done := make(chan struct{}) defer close(done) if perr != nil { errs = append(errs, perr) return hints, errs } packToIndex := make(map[backend.ID]backend.IDSet) for res := range indexCh { debug.Log("LoadIndex", "process index %v", res.ID) idxID, err := backend.ParseID(res.ID) if err != nil { errs = append(errs, fmt.Errorf("unable to parse as index ID: %v", res.ID)) continue } c.indexes[idxID] = res.Index c.masterIndex.Insert(res.Index) debug.Log("LoadIndex", "process blobs") cnt := 0 for blob := range res.Index.Each(done) { c.packs[blob.PackID] = struct{}{} c.blobs[blob.ID] = struct{}{} c.blobRefs.M[blob.ID] = 0 cnt++ if _, ok := packToIndex[blob.PackID]; !ok { packToIndex[blob.PackID] = backend.NewIDSet() } packToIndex[blob.PackID].Insert(idxID) } debug.Log("LoadIndex", "%d blobs processed", cnt) } debug.Log("LoadIndex", "done, error %v", perr) debug.Log("LoadIndex", "checking for duplicate packs") for packID := range c.packs { debug.Log("LoadIndex", " check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID])) if len(packToIndex[packID]) > 1 { hints = append(hints, ErrDuplicatePacks{ PackID: packID, Indexes: packToIndex[packID], }) } } c.repo.SetIndex(c.masterIndex) return hints, errs }
} }) } func TestRebuildIndexAlwaysFull(t *testing.T) { repository.IndexFull = func(*repository.Index) bool { return true } TestRebuildIndex(t) } var optimizeTests = []struct { testFilename string snapshots backend.IDSet }{ { filepath.Join("..", "..", "checker", "testdata", "checker-test-repo.tar.gz"), backend.NewIDSet(ParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43")), }, { filepath.Join("testdata", "old-index-repo.tar.gz"), nil, }, { filepath.Join("testdata", "old-index-repo.tar.gz"), backend.NewIDSet( ParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41"), ParseID("51d249d28815200d59e4be7b3f21a157b864dc343353df9d8e498220c2499b02"), ), }, } func TestOptimizeRemoveUnusedBlobs(t *testing.T) {
func fsckTree(global CmdFsck, repo *repository.Repository, id backend.ID) error { debug.Log("restic.fsckTree", "checking tree %v", id.Str()) tree, err := restic.LoadTree(repo, id) if err != nil { return err } // if orphan check is active, record storage id if global.o_trees != nil { // add ID to list global.o_trees.Insert(id) } var firstErr error seenIDs := backend.NewIDSet() for i, node := range tree.Nodes { if node.Name == "" { return fmt.Errorf("node %v of tree %v has no name", i, id.Str()) } if node.Type == "" { return fmt.Errorf("node %q of tree %v has no type", node.Name, id.Str()) } switch node.Type { case "file": if node.Content == nil { debug.Log("restic.fsckTree", "file node %q of tree %v has no content: %v", node.Name, id, node) return fmt.Errorf("file node %q of tree %v has no content: %v", node.Name, id, node) } if node.Content == nil && node.Error == "" { debug.Log("restic.fsckTree", "file node %q of tree %v has no content", node.Name, id) return fmt.Errorf("file node %q of tree %v has no content", node.Name, id) } // record ids for _, id := range node.Content { seenIDs.Insert(id) } debug.Log("restic.fsckTree", "check file %v (%v)", node.Name, id.Str()) bytes, err := fsckFile(global, repo, node.Content) if err != nil { return err } if bytes != node.Size { debug.Log("restic.fsckTree", "file node %q of tree %v has size %d, but only %d bytes could be found", node.Name, id, node.Size, bytes) return fmt.Errorf("file node %q of tree %v has size %d, but only %d bytes could be found", node.Name, id, node.Size, bytes) } case "dir": if node.Subtree == nil { return fmt.Errorf("dir node %q of tree %v has no subtree", node.Name, id) } // record id seenIDs.Insert(node.Subtree) err = fsckTree(global, repo, node.Subtree) if err != nil { firstErr = err fmt.Fprintf(os.Stderr, "%v\n", err) } } } // check map for unused ids // for _, id := range tree.Map.IDs() { // if seenIDs.Find(id) != nil { // return fmt.Errorf("tree %v: map contains unused ID %v", id, id) // } // } return firstErr }
func (cmd CmdFsck) Execute(args []string) error { if len(args) != 0 { return errors.New("fsck has no arguments") } if cmd.RemoveOrphaned && !cmd.Orphaned { cmd.Orphaned = true } s, err := cmd.global.OpenRepository() if err != nil { return err } err = s.LoadIndex() if err != nil { return err } if cmd.Snapshot != "" { id, err := restic.FindSnapshot(s, cmd.Snapshot) if err != nil { return fmt.Errorf("invalid id %q: %v", cmd.Snapshot, err) } err = fsckSnapshot(cmd, s, id) if err != nil { fmt.Fprintf(os.Stderr, "check for snapshot %v failed\n", id) } return err } if cmd.Orphaned { cmd.o_data = backend.NewIDSet() cmd.o_trees = backend.NewIDSet() } done := make(chan struct{}) defer close(done) var firstErr error for id := range s.List(backend.Snapshot, done) { err = fsckSnapshot(cmd, s, id) if err != nil { fmt.Fprintf(os.Stderr, "check for snapshot %v failed\n", id) firstErr = err } } if !cmd.Orphaned { return firstErr } debug.Log("restic.fsck", "starting orphaned check\n") cnt := make(map[pack.BlobType]*backend.IDSet) cnt[pack.Data] = cmd.o_data cnt[pack.Tree] = cmd.o_trees for blob := range s.Index().Each(done) { debug.Log("restic.fsck", "checking %v blob %v\n", blob.Type, blob.ID) err = cnt[blob.Type].Find(blob.ID) if err != nil { debug.Log("restic.fsck", " blob %v is orphaned\n", blob.ID) if !cmd.RemoveOrphaned { fmt.Printf("orphaned %v blob %v\n", blob.Type, blob.ID) continue } fmt.Printf("removing orphaned %v blob %v\n", blob.Type, blob.ID) // err := s.Remove(d.tpe, name) // if err != nil { // return err // } return errors.New("not implemented") } } return firstErr }
func (cmd CmdRebuildIndex) RebuildIndex() error { debug.Log("RebuildIndex.RebuildIndex", "start") done := make(chan struct{}) defer close(done) indexIDs := backend.NewIDSet() for id := range cmd.repo.List(backend.Index, done) { indexIDs.Insert(id) } cmd.global.Printf("rebuilding index from %d indexes\n", len(indexIDs)) debug.Log("RebuildIndex.RebuildIndex", "found %v indexes", len(indexIDs)) combinedIndex := repository.NewIndex() packsDone := backend.NewIDSet() type Blob struct { id backend.ID tpe pack.BlobType } blobsDone := make(map[Blob]struct{}) i := 0 for indexID := range indexIDs { cmd.global.Printf(" loading index %v\n", i) debug.Log("RebuildIndex.RebuildIndex", "load index %v", indexID.Str()) idx, err := repository.LoadIndex(cmd.repo, indexID.String()) if err != nil { return err } debug.Log("RebuildIndex.RebuildIndex", "adding blobs from index %v", indexID.Str()) for packedBlob := range idx.Each(done) { packsDone.Insert(packedBlob.PackID) b := Blob{ id: packedBlob.ID, tpe: packedBlob.Type, } if _, ok := blobsDone[b]; ok { continue } blobsDone[b] = struct{}{} combinedIndex.Store(packedBlob) } combinedIndex.AddToSupersedes(indexID) if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } i++ } var err error if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("removing %d old indexes\n", len(indexIDs)) for id := range indexIDs { debug.Log("RebuildIndex.RebuildIndex", "remove index %v", id.Str()) err := cmd.repo.Backend().Remove(backend.Index, id.String()) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error removing index %v: %v", id.Str(), err) return err } } cmd.global.Printf("checking for additional packs\n") newPacks := 0 for packID := range cmd.repo.List(backend.Data, done) { if packsDone.Has(packID) { continue } debug.Log("RebuildIndex.RebuildIndex", "pack %v not indexed", packID.Str()) newPacks++ rd, err := cmd.repo.Backend().GetReader(backend.Data, packID.String(), 0, 0) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "GetReader returned error: %v", err) return err } var readSeeker io.ReadSeeker if r, ok := rd.(io.ReadSeeker); ok { debug.Log("RebuildIndex.RebuildIndex", "reader is seekable") readSeeker = r } else { debug.Log("RebuildIndex.RebuildIndex", "reader is not seekable, loading contents to ram") buf, err := ioutil.ReadAll(rd) if err != nil { return err } readSeeker = bytes.NewReader(buf) } up, err := pack.NewUnpacker(cmd.repo.Key(), readSeeker) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error while unpacking pack %v", packID.Str()) return err } for _, blob := range up.Entries { debug.Log("RebuildIndex.RebuildIndex", "pack %v: blob %v", packID.Str(), blob) combinedIndex.Store(repository.PackedBlob{ Type: blob.Type, ID: blob.ID, PackID: packID, Offset: blob.Offset, Length: blob.Length, }) } err = rd.Close() debug.Log("RebuildIndex.RebuildIndex", "error closing reader for pack %v: %v", packID.Str(), err) if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } } if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("added %d packs to the index\n", newPacks) debug.Log("RebuildIndex.RebuildIndex", "done") return nil }
func (cmd CmdRebuildIndex) RebuildIndex() error { debug.Log("RebuildIndex.RebuildIndex", "start") done := make(chan struct{}) defer close(done) indexIDs := backend.NewIDSet() for id := range cmd.repo.List(backend.Index, done) { indexIDs.Insert(id) } cmd.global.Printf("rebuilding index from %d indexes\n", len(indexIDs)) debug.Log("RebuildIndex.RebuildIndex", "found %v indexes", len(indexIDs)) combinedIndex := repository.NewIndex() packsDone := backend.NewIDSet() type Blob struct { id backend.ID tpe pack.BlobType } blobsDone := make(map[Blob]struct{}) i := 0 for indexID := range indexIDs { cmd.global.Printf(" loading index %v\n", i) debug.Log("RebuildIndex.RebuildIndex", "load index %v", indexID.Str()) idx, err := repository.LoadIndex(cmd.repo, indexID.String()) if err != nil { return err } debug.Log("RebuildIndex.RebuildIndex", "adding blobs from index %v", indexID.Str()) for packedBlob := range idx.Each(done) { packsDone.Insert(packedBlob.PackID) b := Blob{ id: packedBlob.ID, tpe: packedBlob.Type, } if _, ok := blobsDone[b]; ok { continue } blobsDone[b] = struct{}{} combinedIndex.Store(packedBlob) } combinedIndex.AddToSupersedes(indexID) if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } i++ } var err error if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("removing %d old indexes\n", len(indexIDs)) for id := range indexIDs { debug.Log("RebuildIndex.RebuildIndex", "remove index %v", id.Str()) err := cmd.repo.Backend().Remove(backend.Index, id.String()) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error removing index %v: %v", id.Str(), err) return err } } cmd.global.Printf("checking for additional packs\n") newPacks := 0 var buf []byte for packID := range cmd.repo.List(backend.Data, done) { if packsDone.Has(packID) { continue } debug.Log("RebuildIndex.RebuildIndex", "pack %v not indexed", packID.Str()) newPacks++ var err error h := backend.Handle{Type: backend.Data, Name: packID.String()} buf, err = backend.LoadAll(cmd.repo.Backend(), h, buf) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error while loading pack %v", packID.Str()) return fmt.Errorf("error while loading pack %v: %v", packID.Str(), err) } hash := backend.Hash(buf) if !hash.Equal(packID) { debug.Log("RebuildIndex.RebuildIndex", "Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) return fmt.Errorf("Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) } up, err := pack.NewUnpacker(cmd.repo.Key(), bytes.NewReader(buf)) if err != nil { debug.Log("RebuildIndex.RebuildIndex", "error while unpacking pack %v", packID.Str()) return err } for _, blob := range up.Entries { debug.Log("RebuildIndex.RebuildIndex", "pack %v: blob %v", packID.Str(), blob) combinedIndex.Store(repository.PackedBlob{ Type: blob.Type, ID: blob.ID, PackID: packID, Offset: blob.Offset, Length: blob.Length, }) } if repository.IndexFull(combinedIndex) { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } } if combinedIndex.Length() > 0 { combinedIndex, err = cmd.storeIndex(combinedIndex) if err != nil { return err } } cmd.global.Printf("added %d packs to the index\n", newPacks) debug.Log("RebuildIndex.RebuildIndex", "done") return nil }