Beispiel #1
0
// New returns a new checker which runs on repo.
func New(repo restic.Repository) *Checker {
	c := &Checker{
		packs:       restic.NewIDSet(),
		blobs:       restic.NewIDSet(),
		masterIndex: repository.NewMasterIndex(),
		indexes:     make(map[restic.ID]*repository.Index),
		repo:        repo,
	}

	c.blobRefs.M = make(map[restic.ID]uint)

	return c
}
Beispiel #2
0
func newIndex() *Index {
	return &Index{
		Packs:    make(map[restic.ID]Pack),
		Blobs:    make(map[restic.BlobHandle]Blob),
		IndexIDs: restic.NewIDSet(),
	}
}
Beispiel #3
0
// NewSnapshotsDir returns a new dir object for the snapshots.
func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool) *SnapshotsDir {
	debug.Log("fuse mount initiated")
	return &SnapshotsDir{
		repo:           repo,
		knownSnapshots: make(map[string]SnapshotWithId),
		ownerIsRoot:    ownerIsRoot,
		processed:      restic.NewIDSet(),
	}
}
Beispiel #4
0
func listPacks(t *testing.T, repo restic.Repository) restic.IDSet {
	done := make(chan struct{})
	defer close(done)

	list := restic.NewIDSet()
	for id := range repo.List(restic.DataFile, done) {
		list.Insert(id)
	}

	return list
}
Beispiel #5
0
// Packs returns all packs in this index
func (idx *Index) Packs() restic.IDSet {
	idx.m.Lock()
	defer idx.m.Unlock()

	packs := restic.NewIDSet()
	for _, list := range idx.pack {
		for _, entry := range list {
			packs.Insert(entry.packID)
		}
	}

	return packs
}
Beispiel #6
0
// RebuildIndex lists all packs in the repo, writes a new index and removes all
// old indexes. This operation should only be done with an exclusive lock in
// place.
func RebuildIndex(repo restic.Repository) error {
	debug.Log("start rebuilding index")

	done := make(chan struct{})
	defer close(done)

	ch := make(chan worker.Job)
	go list.AllPacks(repo, ch, done)

	idx := NewIndex()
	for job := range ch {
		id := job.Data.(restic.ID)

		if job.Error != nil {
			fmt.Fprintf(os.Stderr, "error for pack %v: %v\n", id, job.Error)
			continue
		}

		res := job.Result.(list.Result)

		for _, entry := range res.Entries() {
			pb := restic.PackedBlob{
				Blob:   entry,
				PackID: res.PackID(),
			}
			idx.Store(pb)
		}
	}

	oldIndexes := restic.NewIDSet()
	for id := range repo.List(restic.IndexFile, done) {
		idx.AddToSupersedes(id)
		oldIndexes.Insert(id)
	}

	id, err := SaveIndex(repo, idx)
	if err != nil {
		debug.Log("error saving index: %v", err)
		return err
	}
	debug.Log("new index saved as %v", id.Str())

	for indexID := range oldIndexes {
		err := repo.Backend().Remove(restic.IndexFile, indexID.String())
		if err != nil {
			fmt.Fprintf(os.Stderr, "unable to remove index %v: %v\n", indexID.Str(), err)
		}
	}

	return nil
}
Beispiel #7
0
// PacksForBlobs returns the set of packs in which the blobs are contained.
func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
	packs = restic.NewIDSet()

	for h := range blobs {
		blob, ok := idx.Blobs[h]
		if !ok {
			continue
		}

		for id := range blob.Packs {
			packs.Insert(id)
		}
	}

	return packs
}
Beispiel #8
0
func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet {
	packs := restic.NewIDSet()

	idx := repo.Index()
	for h := range blobs {
		list, err := idx.Lookup(h.ID, h.Type)
		if err != nil {
			t.Fatal(err)
		}

		for _, pb := range list {
			packs.Insert(pb.PackID)
		}
	}

	return packs
}
Beispiel #9
0
// AddPack adds a pack to the index. If this pack is already in the index, an
// error is returned.
func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error {
	if _, ok := idx.Packs[id]; ok {
		return errors.Errorf("pack %v already present in the index", id.Str())
	}

	idx.Packs[id] = Pack{Size: size, Entries: entries}

	for _, entry := range entries {
		h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
		if _, ok := idx.Blobs[h]; !ok {
			idx.Blobs[h] = Blob{
				Size:  int64(entry.Length),
				Packs: restic.NewIDSet(),
			}
		}

		idx.Blobs[h].Packs.Insert(id)
	}

	return nil
}
Beispiel #10
0
// New returns a new archiver.
func New(repo restic.Repository) *Archiver {
	arch := &Archiver{
		repo:      repo,
		blobToken: make(chan struct{}, maxConcurrentBlobs),
		knownBlobs: struct {
			restic.IDSet
			sync.Mutex
		}{
			IDSet: restic.NewIDSet(),
		},
	}

	for i := 0; i < maxConcurrentBlobs; i++ {
		arch.blobToken <- struct{}{}
	}

	arch.Error = archiverAbortOnAllErrors
	arch.SelectFilter = archiverAllowAllFiles

	return arch
}
Beispiel #11
0
func TestIndexPacks(t *testing.T) {
	idx := repository.NewIndex()
	packs := restic.NewIDSet()

	for i := 0; i < 20; i++ {
		packID := restic.NewRandomID()
		idx.Store(restic.PackedBlob{
			Blob: restic.Blob{
				Type:   restic.DataBlob,
				ID:     restic.NewRandomID(),
				Offset: 0,
				Length: 23,
			},
			PackID: packID,
		})

		packs.Insert(packID)
	}

	idxPacks := idx.Packs()
	Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index")
}
Beispiel #12
0
// Packs checks that all packs referenced in the index are still available and
// there are no packs that aren't in an index. errChan is closed after all
// packs have been checked.
func (c *Checker) Packs(errChan chan<- error, done <-chan struct{}) {
	defer close(errChan)

	debug.Log("checking for %d packs", len(c.packs))
	seenPacks := restic.NewIDSet()

	var workerWG sync.WaitGroup

	IDChan := make(chan restic.ID)
	for i := 0; i < defaultParallelism; i++ {
		workerWG.Add(1)
		go packIDTester(c.repo, IDChan, errChan, &workerWG, done)
	}

	for id := range c.packs {
		seenPacks.Insert(id)
		IDChan <- id
	}
	close(IDChan)

	debug.Log("waiting for %d workers to terminate", defaultParallelism)
	workerWG.Wait()
	debug.Log("workers terminated")

	for id := range c.repo.List(restic.DataFile, done) {
		debug.Log("check data blob %v", id.Str())
		if !seenPacks.Has(id) {
			c.orphanedPacks = append(c.orphanedPacks, id)
			select {
			case <-done:
				return
			case errChan <- PackError{ID: id, Orphaned: true, Err: errors.New("not referenced in any index")}:
			}
		}
	}
}
Beispiel #13
0
// Load creates an index by loading all index files from the repo.
func Load(repo restic.Repository, p *restic.Progress) (*Index, error) {
	debug.Log("loading indexes")

	p.Start()
	defer p.Done()

	done := make(chan struct{})
	defer close(done)

	supersedes := make(map[restic.ID]restic.IDSet)
	results := make(map[restic.ID]map[restic.ID]Pack)

	index := newIndex()

	for id := range repo.List(restic.IndexFile, done) {
		p.Report(restic.Stat{Blobs: 1})

		debug.Log("Load index %v", id.Str())
		idx, err := loadIndexJSON(repo, id)
		if err != nil {
			return nil, err
		}

		res := make(map[restic.ID]Pack)
		supersedes[id] = restic.NewIDSet()
		for _, sid := range idx.Supersedes {
			debug.Log("  index %v supersedes %v", id.Str(), sid)
			supersedes[id].Insert(sid)
		}

		for _, jpack := range idx.Packs {
			entries := make([]restic.Blob, 0, len(jpack.Blobs))
			for _, blob := range jpack.Blobs {
				entry := restic.Blob{
					ID:     blob.ID,
					Type:   blob.Type,
					Offset: blob.Offset,
					Length: blob.Length,
				}
				entries = append(entries, entry)
			}

			if err = index.AddPack(jpack.ID, 0, entries); err != nil {
				return nil, err
			}
		}

		results[id] = res
		index.IndexIDs.Insert(id)
	}

	for superID, list := range supersedes {
		for indexID := range list {
			if _, ok := results[indexID]; !ok {
				continue
			}
			debug.Log("  removing index %v, superseded by %v", indexID.Str(), superID.Str())
			fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
			delete(results, indexID)
		}
	}

	return index, nil
}
Beispiel #14
0
func runPrune(gopts GlobalOptions) error {
	repo, err := OpenRepository(gopts)
	if err != nil {
		return err
	}

	lock, err := lockRepoExclusive(repo)
	defer unlockRepo(lock)
	if err != nil {
		return err
	}

	err = repo.LoadIndex()
	if err != nil {
		return err
	}

	done := make(chan struct{})
	defer close(done)

	var stats struct {
		blobs     int
		packs     int
		snapshots int
		bytes     int64
	}

	Verbosef("counting files in repo\n")
	for _ = range repo.List(restic.DataFile, done) {
		stats.packs++
	}

	Verbosef("building new index for repo\n")

	bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
	idx, err := index.New(repo, bar)
	if err != nil {
		return err
	}

	for _, pack := range idx.Packs {
		stats.bytes += pack.Size
	}
	Verbosef("repository contains %v packs (%v blobs) with %v bytes\n",
		len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes)))

	blobCount := make(map[restic.BlobHandle]int)
	duplicateBlobs := 0
	duplicateBytes := 0

	// find duplicate blobs
	for _, p := range idx.Packs {
		for _, entry := range p.Entries {
			stats.blobs++
			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
			blobCount[h]++

			if blobCount[h] > 1 {
				duplicateBlobs++
				duplicateBytes += int(entry.Length)
			}
		}
	}

	Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
		stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
	Verbosef("load all snapshots\n")

	// find referenced blobs
	snapshots, err := restic.LoadAllSnapshots(repo)
	if err != nil {
		return err
	}

	stats.snapshots = len(snapshots)

	Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots)

	usedBlobs := restic.NewBlobSet()
	seenBlobs := restic.NewBlobSet()

	bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
	bar.Start()
	for _, sn := range snapshots {
		debug.Log("process snapshot %v", sn.ID().Str())

		err = restic.FindUsedBlobs(repo, *sn.Tree, usedBlobs, seenBlobs)
		if err != nil {
			return err
		}

		debug.Log("found %v blobs for snapshot %v", sn.ID().Str())
		bar.Report(restic.Stat{Blobs: 1})
	}
	bar.Done()

	Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
		len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))

	// find packs that need a rewrite
	rewritePacks := restic.NewIDSet()
	for h, blob := range idx.Blobs {
		if !usedBlobs.Has(h) {
			rewritePacks.Merge(blob.Packs)
			continue
		}

		if blobCount[h] > 1 {
			rewritePacks.Merge(blob.Packs)
		}
	}

	removeBytes := 0

	// find packs that are unneeded
	removePacks := restic.NewIDSet()
	for packID, p := range idx.Packs {

		hasActiveBlob := false
		for _, blob := range p.Entries {
			h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
			if usedBlobs.Has(h) {
				hasActiveBlob = true
				continue
			}

			removeBytes += int(blob.Length)
		}

		if hasActiveBlob {
			continue
		}

		removePacks.Insert(packID)

		if !rewritePacks.Has(packID) {
			return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
		}

		rewritePacks.Delete(packID)
	}

	Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
		len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))

	err = repository.Repack(repo, rewritePacks, usedBlobs)
	if err != nil {
		return err
	}

	for packID := range removePacks {
		err = repo.Backend().Remove(restic.DataFile, packID.String())
		if err != nil {
			Warnf("unable to remove file %v from the repository\n", packID.Str())
		}
	}

	Verbosef("creating new index\n")

	stats.packs = 0
	for _ = range repo.List(restic.DataFile, done) {
		stats.packs++
	}
	bar = newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
	idx, err = index.New(repo, bar)
	if err != nil {
		return err
	}

	var supersedes restic.IDs
	for idxID := range repo.List(restic.IndexFile, done) {
		err := repo.Backend().Remove(restic.IndexFile, idxID.String())
		if err != nil {
			fmt.Fprintf(os.Stderr, "unable to remove index %v: %v\n", idxID.Str(), err)
		}

		supersedes = append(supersedes, idxID)
	}

	id, err := idx.Save(repo, supersedes)
	if err != nil {
		return err
	}
	Verbosef("saved new index as %v\n", id.Str())

	Verbosef("done\n")
	return nil
}
Beispiel #15
0
// LoadIndex loads all index files.
func (c *Checker) LoadIndex() (hints []error, errs []error) {
	debug.Log("Start")
	type indexRes struct {
		Index *repository.Index
		ID    string
	}

	indexCh := make(chan indexRes)

	worker := func(id restic.ID, done <-chan struct{}) error {
		debug.Log("worker got index %v", id)
		idx, err := repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeIndex)
		if errors.Cause(err) == repository.ErrOldIndexFormat {
			debug.Log("index %v has old format", id.Str())
			hints = append(hints, ErrOldIndexFormat{id})

			idx, err = repository.LoadIndexWithDecoder(c.repo, id, repository.DecodeOldIndex)
		}

		if err != nil {
			return err
		}

		select {
		case indexCh <- indexRes{Index: idx, ID: id.String()}:
		case <-done:
		}

		return nil
	}

	var perr error
	go func() {
		defer close(indexCh)
		debug.Log("start loading indexes in parallel")
		perr = repository.FilesInParallel(c.repo.Backend(), restic.IndexFile, defaultParallelism,
			repository.ParallelWorkFuncParseID(worker))
		debug.Log("loading indexes finished, error: %v", perr)
	}()

	done := make(chan struct{})
	defer close(done)

	if perr != nil {
		errs = append(errs, perr)
		return hints, errs
	}

	packToIndex := make(map[restic.ID]restic.IDSet)

	for res := range indexCh {
		debug.Log("process index %v", res.ID)
		idxID, err := restic.ParseID(res.ID)
		if err != nil {
			errs = append(errs, errors.Errorf("unable to parse as index ID: %v", res.ID))
			continue
		}

		c.indexes[idxID] = res.Index
		c.masterIndex.Insert(res.Index)

		debug.Log("process blobs")
		cnt := 0
		for blob := range res.Index.Each(done) {
			c.packs.Insert(blob.PackID)
			c.blobs.Insert(blob.ID)
			c.blobRefs.M[blob.ID] = 0
			cnt++

			if _, ok := packToIndex[blob.PackID]; !ok {
				packToIndex[blob.PackID] = restic.NewIDSet()
			}
			packToIndex[blob.PackID].Insert(idxID)
		}

		debug.Log("%d blobs processed", cnt)
	}

	debug.Log("done, error %v", perr)

	debug.Log("checking for duplicate packs")
	for packID := range c.packs {
		debug.Log("  check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID]))
		if len(packToIndex[packID]) > 1 {
			hints = append(hints, ErrDuplicatePacks{
				PackID:  packID,
				Indexes: packToIndex[packID],
			})
		}
	}

	c.repo.SetIndex(c.masterIndex)

	return hints, errs
}