Example #1
0
func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
	if raft.IsEmptySnap(apply.snapshot) {
		return
	}

	if apply.snapshot.Metadata.Index <= ep.appliedi {
		plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1",
			apply.snapshot.Metadata.Index, ep.appliedi)
	}

	if s.cfg.V3demo {
		snapfn, err := s.r.storage.DBFilePath(apply.snapshot.Metadata.Index)
		if err != nil {
			plog.Panicf("get database snapshot file path error: %v", err)
		}

		fn := path.Join(s.cfg.SnapDir(), databaseFilename)
		if err := os.Rename(snapfn, fn); err != nil {
			plog.Panicf("rename snapshot file error: %v", err)
		}

		// TODO: recover lessor

		newbe := backend.NewDefaultBackend(fn)
		if err := s.kv.Restore(newbe); err != nil {
			plog.Panicf("restore KV error: %v", err)
		}

		// Closing old backend might block until all the txns
		// on the backend are finished.
		// We do not want to wait on closing the old backend.
		oldbe := s.be
		go func() {
			if err := oldbe.Close(); err != nil {
				plog.Panicf("close backend error: %v", err)
			}
		}()

		s.be = newbe
	}
	if err := s.store.Recovery(apply.snapshot.Data); err != nil {
		plog.Panicf("recovery store error: %v", err)
	}
	s.cluster.Recover()

	// recover raft transport
	s.r.transport.RemoveAllPeers()
	for _, m := range s.cluster.Members() {
		if m.ID == s.ID() {
			continue
		}
		s.r.transport.AddPeer(m.ID, m.PeerURLs)
	}

	ep.appliedi = apply.snapshot.Metadata.Index
	ep.snapi = ep.appliedi
	ep.confState = apply.snapshot.Metadata.ConfState
	plog.Infof("recovered from incoming snapshot at index %d", ep.snapi)
}
Example #2
0
// makeDB copies the database snapshot to the snapshot directory
func makeDB(snapdir, dbfile string) {
	f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600)
	if ferr != nil {
		ExitWithError(ExitInvalidInput, ferr)
	}
	defer f.Close()

	if err := os.MkdirAll(snapdir, 0755); err != nil {
		ExitWithError(ExitIO, err)
	}

	dbpath := path.Join(snapdir, "db")
	db, dberr := os.OpenFile(dbpath, os.O_WRONLY|os.O_CREATE, 0600)
	if dberr != nil {
		ExitWithError(ExitIO, dberr)
	}
	if _, err := io.Copy(db, f); err != nil {
		ExitWithError(ExitIO, err)
	}
	db.Close()

	// update consistentIndex so applies go through on etcdserver despite
	// having a new raft instance
	be := backend.NewDefaultBackend(dbpath)
	s := storage.NewStore(be, nil, &initIndex{})
	id := s.TxnBegin()
	btx := be.BatchTx()
	del := func(k, v []byte) error {
		_, _, err := s.TxnDeleteRange(id, k, nil)
		return err
	}
	// delete stored members from old cluster since using new members
	btx.UnsafeForEach([]byte("members"), del)
	btx.UnsafeForEach([]byte("members_removed"), del)
	// trigger write-out of new consistent index
	s.TxnEnd(id)
	s.Commit()
	s.Close()
}
Example #3
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)
	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *cluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	if !cfg.V3demo && fileutil.Exist(path.Join(cfg.SnapDir(), databaseFilename)) {
		return nil, errors.New("experimental-v3demo cannot be disabled once it is enabled")
	}

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err := upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())
	ss := snap.New(cfg.SnapDir())

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var remotes []*Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err := cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, err := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.id)
		cl.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			var err error
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			urlsmap, err := types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err := fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		var snapshot *raftpb.Snapshot
		var err error
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
		msgSnapC:      make(chan raftpb.Message, maxInFlightMsgSnap),
	}

	if cfg.V3demo {
		srv.be = backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename))
		srv.lessor = lease.NewLessor(srv.be)
		srv.kv = dstorage.New(srv.be, srv.lessor, &srv.consistIndex)
		srv.authStore = auth.NewAuthStore(srv)
		if h := cfg.AutoCompactionRetention; h != 0 {
			srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
			srv.compactor.Run()
		}
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
		V3demo:      cfg.V3demo,
	}
	if err := tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}