Exemple #1
0
func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error {
	walDir := n.walDir()
	snapDir := n.snapDir()

	if !fileutil.Exist(snapDir) {
		// If snapshots created by the etcd-v2 code exist, hard link
		// them at the new path. This prevents etc-v2 creating
		// snapshots that are visible to us, but out of sync with our
		// WALs, after a downgrade.
		legacySnapDir := n.legacySnapDir()
		if fileutil.Exist(legacySnapDir) {
			if err := migrateSnapshots(legacySnapDir, snapDir); err != nil {
				return err
			}
		} else if err := os.MkdirAll(snapDir, 0700); err != nil {
			return errors.Wrap(err, "failed to create snapshot directory")
		}
	}

	// Create a snapshotter
	n.snapshotter = snap.New(snapDir)

	if !wal.Exist(walDir) {
		// If wals created by the etcd-v2 wal code exist, copy them to
		// the new path to avoid adding backwards-incompatible entries
		// to those files.
		legacyWALDir := n.legacyWALDir()
		if !wal.Exist(legacyWALDir) {
			return errNoWAL
		}

		if err := migrateWALs(legacyWALDir, walDir); err != nil {
			return err
		}
	}

	// Load snapshot data
	snapshot, err := n.snapshotter.Load()
	if err != nil && err != snap.ErrNoSnapshot {
		return err
	}

	if snapshot != nil {
		// Load the snapshot data into the store
		if err := n.restoreFromSnapshot(snapshot.Data, forceNewCluster); err != nil {
			return err
		}
	}

	// Read logs to fully catch up store
	if err := n.readWAL(ctx, snapshot, forceNewCluster); err != nil {
		return err
	}

	return nil
}
Exemple #2
0
func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error {
	walDir := n.walDir()
	snapDir := n.snapDir()

	if err := os.MkdirAll(snapDir, 0700); err != nil {
		return fmt.Errorf("create snapshot directory error: %v", err)
	}

	// Create a snapshotter
	n.snapshotter = snap.New(snapDir)

	if !wal.Exist(walDir) {
		return errNoWAL
	}

	// Load snapshot data
	snapshot, err := n.snapshotter.Load()
	if err != nil && err != snap.ErrNoSnapshot {
		return err
	}

	if snapshot != nil {
		// Load the snapshot data into the store
		if err := n.restoreFromSnapshot(snapshot.Data, forceNewCluster); err != nil {
			return err
		}
	}

	// Read logs to fully catch up store
	if err := n.readWAL(ctx, snapshot, forceNewCluster); err != nil {
		return err
	}

	return nil
}
Exemple #3
0
// startRaft starts a raft node from the given wal dir.
// If the wal dir does not exist, startRaft will start a new raft node.
// If the wal dir exists, startRaft will restart the previous raft node.
// startRaft returns the started raft node and the opened wal.
func startRaft(id int64, peerIDs []int64, waldir string) (raft.Node, *wal.WAL) {
	if !wal.Exist(waldir) {
		w, err := wal.Create(waldir)
		if err != nil {
			log.Fatal(err)
		}
		n := raft.Start(id, peerIDs, 10, 1)
		return n, w
	}

	// restart a node from previous wal
	// TODO(xiangli): check snapshot; not open from one
	w, err := wal.OpenAtIndex(waldir, 1)
	if err != nil {
		log.Fatal(err)
	}
	wid, st, ents, err := w.ReadAll()
	// TODO(xiangli): save/recovery nodeID?
	if wid != 0 {
		log.Fatalf("unexpected nodeid %d: nodeid should always be zero until we save nodeid into wal", wid)
	}
	if err != nil {
		log.Fatal(err)
	}
	n := raft.Restart(id, peerIDs, 10, 1, st, ents)
	return n, w
}
Exemple #4
0
func isMemberInitialized(cfg *Config) bool {
	waldir := cfg.WalDir
	if waldir == "" {
		waldir = path.Join(cfg.Dir, "member", "wal")
	}

	return wal.Exist(waldir)
}
Exemple #5
0
func (rc *raftNode) startRaft() {
	if !fileutil.Exist(rc.snapdir) {
		if err := os.Mkdir(rc.snapdir, 0750); err != nil {
			log.Fatalf("raftexample: cannot create dir for snapshot (%v)", err)
		}
	}
	rc.snapshotter = snap.New(rc.snapdir)
	rc.snapshotterReady <- rc.snapshotter

	oldwal := wal.Exist(rc.waldir)
	rc.wal = rc.replayWAL()

	rpeers := make([]raft.Peer, len(rc.peers))
	for i := range rpeers {
		rpeers[i] = raft.Peer{ID: uint64(i + 1)}
	}
	c := &raft.Config{
		ID:              uint64(rc.id),
		ElectionTick:    10,
		HeartbeatTick:   1,
		Storage:         rc.raftStorage,
		MaxSizePerMsg:   1024 * 1024,
		MaxInflightMsgs: 256,
	}

	if oldwal {
		rc.node = raft.RestartNode(c)
	} else {
		startPeers := rpeers
		if rc.join {
			startPeers = nil
		}
		rc.node = raft.StartNode(c, startPeers)
	}

	ss := &stats.ServerStats{}
	ss.Initialize()

	rc.transport = &rafthttp.Transport{
		ID:          types.ID(rc.id),
		ClusterID:   0x1000,
		Raft:        rc,
		ServerStats: ss,
		LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)),
		ErrorC:      make(chan error),
	}

	rc.transport.Start()
	for i := range rc.peers {
		if i+1 != rc.id {
			rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]})
		}
	}

	go rc.serveRaft()
	go rc.serveChannels()
}
Exemple #6
0
func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error {
	walDir := n.walDir()
	snapDir := n.snapDir()

	if err := os.MkdirAll(snapDir, 0700); err != nil {
		return fmt.Errorf("create snapshot directory error: %v", err)
	}

	// Create a snapshotter
	n.snapshotter = snap.New(snapDir)

	if !wal.Exist(walDir) {
		raftNode := &api.RaftMember{
			RaftID: n.Config.ID,
			Addr:   n.Address,
		}
		metadata, err := raftNode.Marshal()
		if err != nil {
			return fmt.Errorf("error marshalling raft node: %v", err)
		}
		n.wal, err = wal.Create(walDir, metadata)
		if err != nil {
			return fmt.Errorf("create wal error: %v", err)
		}

		n.cluster.AddMember(&membership.Member{RaftMember: raftNode})
		n.startNodePeers = []raft.Peer{{ID: n.Config.ID, Context: metadata}}

		return nil
	}

	// Load snapshot data
	snapshot, err := n.snapshotter.Load()
	if err != nil && err != snap.ErrNoSnapshot {
		return err
	}

	if snapshot != nil {
		// Load the snapshot data into the store
		if err := n.restoreFromSnapshot(snapshot.Data, forceNewCluster); err != nil {
			return err
		}
	}

	// Read logs to fully catch up store
	if err := n.readWAL(ctx, snapshot, forceNewCluster); err != nil {
		return err
	}

	n.Node = raft.RestartNode(n.Config)
	return nil
}
func dump5(dataDir string) ([]raftpb.Entry, error) {
	wd5 := walDir5(dataDir)
	if !wal.Exist(wd5) {
		return nil, fmt.Errorf("No wal exists at %s", wd5)
	}

	w, err := wal.Open(wd5, walpb.Snapshot{})
	if err != nil {
		return nil, err
	}
	defer w.Close()

	_, _, ents, err := w.ReadAll()
	return ents, err
}
Exemple #8
0
func (rc *raftNode) startRaft() {
	oldwal := wal.Exist(rc.waldir)
	rc.wal = rc.replayWAL()

	rpeers := make([]raft.Peer, len(rc.peers))
	for i := range rpeers {
		rpeers[i] = raft.Peer{ID: uint64(i + 1)}
	}
	c := &raft.Config{
		ID:              uint64(rc.id),
		ElectionTick:    10,
		HeartbeatTick:   1,
		Storage:         rc.raftStorage,
		MaxSizePerMsg:   1024 * 1024,
		MaxInflightMsgs: 256,
	}

	if oldwal {
		rc.node = raft.RestartNode(c)
	} else {
		rc.node = raft.StartNode(c, rpeers)
	}

	ss := &stats.ServerStats{}
	ss.Initialize()

	rc.transport = &rafthttp.Transport{
		ID:          types.ID(rc.id),
		ClusterID:   0x1000,
		Raft:        rc,
		ServerStats: ss,
		LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)),
		ErrorC:      make(chan error),
	}

	rc.transport.Start()
	for i := range rc.peers {
		if i+1 != rc.id {
			rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]})
		}
	}

	go rc.serveRaft()
	go rc.serveChannels()
}
Exemple #9
0
// openWAL returns a WAL ready for reading.
func (rc *raftNode) openWAL() *wal.WAL {
	if wal.Exist(rc.waldir) == false {
		if err := os.Mkdir(rc.waldir, 0750); err != nil {
			log.Fatalf("raftexample: cannot create dir for wal (%v)", err)
		}

		w, err := wal.Create(rc.waldir, nil)
		if err != nil {
			log.Fatalf("raftexample: create wal error (%v)", err)
		}
		w.Close()
	}

	w, err := wal.Open(rc.waldir, walpb.Snapshot{})
	if err != nil {
		log.Fatalf("raftexample: error loading wal (%v)", err)
	}

	return w
}
Exemple #10
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err = upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil && !os.IsExist(err) {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())
	be := backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename))
	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var remotes []*membership.Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		var snapshot *raftpb.Snapshot
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
		msgSnapC:      make(chan raftpb.Message, maxInFlightMsgSnap),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	srv.lessor = lease.NewLessor(srv.be)
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())
	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Exemple #11
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())

	bepath := path.Join(cfg.SnapDir(), databaseFilename)
	beExist := fileutil.Exist(bepath)

	var be backend.Backend
	beOpened := make(chan struct{})
	go func() {
		be = backend.NewDefaultBackend(bepath)
		beOpened <- struct{}{}
	}()

	select {
	case <-beOpened:
	case <-time.After(time.Second):
		plog.Warningf("another etcd process is running with the same data dir and holding the file lock.")
		plog.Warningf("waiting for it to exit before starting...")
		<-beOpened
	}

	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var (
		remotes  []*membership.Member
		snapshot *raftpb.Snapshot
	)

	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incompatible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover(api.UpdateCapability)
		if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist {
			os.RemoveAll(bepath)
			return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath)
		}
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	heartbeat := time.Duration(cfg.TickMs) * time.Millisecond
	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
			Node:        n,
			ticker:      time.Tick(heartbeat),
			// set up contention detectors for raft heartbeat message.
			// expect to send a heartbeat within 2 heartbeat intervals.
			td:          contention.NewTimeoutDetector(2 * heartbeat),
			heartbeat:   heartbeat,
			raftStorage: s,
			storage:     NewStorage(w, ss),
			msgSnapC:    make(chan raftpb.Message, maxInFlightMsgSnap),
			readStateC:  make(chan raft.ReadState, 1),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat

	// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
	// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
	srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds())))
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	if beExist {
		kvindex := srv.kv.ConsistentIndex()
		// TODO: remove kvindex != 0 checking when we do not expect users to upgrade
		// etcd from pre-3.0 release.
		if snapshot != nil && kvindex < snapshot.Metadata.Index {
			if kvindex != 0 {
				return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
			}
			plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index)
		}
	}
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())

	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	srv.applyV3Base = &applierV3backend{srv}
	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Exemple #12
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)
	var w *wal.WAL
	var n raft.Node
	var s *raft.MemoryStorage
	var id types.ID
	var cl *cluster

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err := upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())
	ss := snap.New(cfg.SnapDir())

	var remotes []*Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err := cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, err := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), cfg.Transport)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, cfg.Transport) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.id)
		cl.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, cfg.Transport) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			str, err := discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, err
			}
			urlsmap, err := types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err := fileutil.IsDirWriteable(cfg.DataDir); err != nil {
			return nil, fmt.Errorf("cannot write to data directory: %v", err)
		}

		if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if snapshot != nil {
			plog.Infof("loaded cluster information from store: %s", cl)
		}
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		reqIDGen:      idutil.NewGenerator(uint8(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	// TODO: move transport initialization near the definition of remote
	tr := rafthttp.NewTransporter(cfg.Transport, id, cl.ID(), srv, srv.errorc, sstats, lstats)
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr
	return srv, nil
}
Exemple #13
0
// BootstrapFromDisk creates a new snapshotter and wal, and also reads the latest snapshot and WALs from disk
func (e *EncryptedRaftLogger) BootstrapFromDisk(ctx context.Context, oldEncryptionKeys ...[]byte) (*raftpb.Snapshot, WALData, error) {
	e.encoderMu.Lock()
	defer e.encoderMu.Unlock()

	walDir := e.walDir()
	snapDir := e.snapDir()

	encrypter, decrypter := encryption.Defaults(e.EncryptionKey)
	if oldEncryptionKeys != nil {
		decrypters := []encryption.Decrypter{decrypter}
		for _, key := range oldEncryptionKeys {
			_, d := encryption.Defaults(key)
			decrypters = append(decrypters, d)
		}
		decrypter = MultiDecrypter(decrypters)
	}

	snapFactory := NewSnapFactory(encrypter, decrypter)

	if !fileutil.Exist(snapDir) {
		// If snapshots created by the etcd-v2 code exist, or by swarmkit development version,
		// read the latest snapshot and write it encoded to the new path.  The new path
		// prevents etc-v2 creating snapshots that are visible to us, but not encoded and
		// out of sync with our WALs, after a downgrade.
		for _, dirs := range versionedWALSnapDirs[1:] {
			legacySnapDir := filepath.Join(e.StateDir, dirs.snap)
			if fileutil.Exist(legacySnapDir) {
				if err := MigrateSnapshot(legacySnapDir, snapDir, OriginalSnap, snapFactory); err != nil {
					return nil, WALData{}, err
				}
				break
			}
		}
	}
	// ensure the new directory exists
	if err := os.MkdirAll(snapDir, 0700); err != nil {
		return nil, WALData{}, errors.Wrap(err, "failed to create snapshot directory")
	}

	var (
		snapshotter Snapshotter
		walObj      WAL
		err         error
	)

	// Create a snapshotter and load snapshot data
	snapshotter = snapFactory.New(snapDir)
	snapshot, err := snapshotter.Load()
	if err != nil && err != snap.ErrNoSnapshot {
		return nil, WALData{}, err
	}

	walFactory := NewWALFactory(encrypter, decrypter)
	var walsnap walpb.Snapshot
	if snapshot != nil {
		walsnap.Index = snapshot.Metadata.Index
		walsnap.Term = snapshot.Metadata.Term
	}

	if !wal.Exist(walDir) {
		var walExists bool
		// If wals created by the etcd-v2 wal code exist, read the latest ones based
		// on this snapshot and encode them to wals in the new path to avoid adding
		// backwards-incompatible entries to those files.
		for _, dirs := range versionedWALSnapDirs[1:] {
			legacyWALDir := filepath.Join(e.StateDir, dirs.wal)
			if !wal.Exist(legacyWALDir) {
				continue
			}
			if err = MigrateWALs(ctx, legacyWALDir, walDir, OriginalWAL, walFactory, walsnap); err != nil {
				return nil, WALData{}, err
			}
			walExists = true
			break
		}
		if !walExists {
			return nil, WALData{}, ErrNoWAL
		}
	}

	walObj, waldata, err := ReadRepairWAL(ctx, walDir, walsnap, walFactory)
	if err != nil {
		return nil, WALData{}, err
	}

	e.snapshotter = snapshotter
	e.wal = walObj

	return snapshot, waldata, nil
}
Exemple #14
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) *EtcdServer {
	if err := os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil {
		log.Fatalf("etcdserver: cannot create snapshot directory: %v", err)
	}
	ss := snap.New(cfg.SnapDir())
	st := store.New()
	var w *wal.WAL
	var n raft.Node
	var id uint64
	haveWAL := wal.Exist(cfg.WALDir())
	switch {
	case !haveWAL && cfg.ClusterState == ClusterStateValueExisting:
		cl, err := GetClusterFromPeers(cfg.Cluster.PeerURLs())
		if err != nil {
			log.Fatal(err)
		}
		if err := cfg.Cluster.ValidateAndAssignIDs(cl.Members()); err != nil {
			log.Fatalf("etcdserver: %v", err)
		}
		cfg.Cluster.SetID(cl.id)
		cfg.Cluster.SetStore(st)
		id, n, w = startNode(cfg, nil)
	case !haveWAL && cfg.ClusterState == ClusterStateValueNew:
		if err := cfg.VerifyBootstrapConfig(); err != nil {
			log.Fatalf("etcdserver: %v", err)
		}
		m := cfg.Cluster.MemberByName(cfg.Name)
		if cfg.ShouldDiscover() {
			d, err := discovery.New(cfg.DiscoveryURL, m.ID, cfg.Cluster.String())
			if err != nil {
				log.Fatalf("etcdserver: cannot init discovery %v", err)
			}
			s, err := d.Discover()
			if err != nil {
				log.Fatalf("etcdserver: %v", err)
			}
			if cfg.Cluster, err = NewClusterFromString(cfg.Cluster.name, s); err != nil {
				log.Fatalf("etcdserver: %v", err)
			}
		}
		cfg.Cluster.SetStore(st)
		id, n, w = startNode(cfg, cfg.Cluster.MemberIDs())
	case haveWAL:
		if cfg.ShouldDiscover() {
			log.Printf("etcdserver: warn: ignoring discovery: etcd has already been initialized and has a valid log in %q", cfg.WALDir())
		}
		var index uint64
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			log.Fatal(err)
		}
		if snapshot != nil {
			log.Printf("etcdserver: recovering from snapshot at index %d", snapshot.Index)
			st.Recovery(snapshot.Data)
			index = snapshot.Index
		}
		cfg.Cluster = NewClusterFromStore(cfg.Cluster.name, st)
		id, n, w = restartNode(cfg, index, snapshot)
	default:
		log.Fatalf("etcdserver: unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   strutil.IDAsHex(id),
	}
	lstats := stats.NewLeaderStats(strutil.IDAsHex(id))

	s := &EtcdServer{
		store:      st,
		node:       n,
		id:         id,
		attributes: Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		Cluster:    cfg.Cluster,
		storage: struct {
			*wal.WAL
			*snap.Snapshotter
		}{w, ss},
		stats:      sstats,
		lstats:     lstats,
		send:       Sender(cfg.Transport, cfg.Cluster, sstats, lstats),
		Ticker:     time.Tick(100 * time.Millisecond),
		SyncTicker: time.Tick(500 * time.Millisecond),
		snapCount:  cfg.SnapCount,
	}
	return s
}
Exemple #15
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) *EtcdServer {
	m := cfg.Cluster.FindName(cfg.Name)
	if m == nil {
		// Should never happen
		log.Fatalf("could not find name %v in cluster!", cfg.Name)
	}
	snapdir := path.Join(cfg.DataDir, "snap")
	if err := os.MkdirAll(snapdir, privateDirMode); err != nil {
		log.Fatalf("etcdserver: cannot create snapshot directory: %v", err)
	}
	ss := snap.New(snapdir)
	st := store.New()
	var w *wal.WAL
	var n raft.Node
	var err error
	waldir := path.Join(cfg.DataDir, "wal")
	if !wal.Exist(waldir) {
		if w, err = wal.Create(waldir); err != nil {
			log.Fatal(err)
		}
		n = raft.StartNode(m.ID, cfg.Cluster.IDs(), 10, 1)
	} else {
		var index int64
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			log.Fatal(err)
		}
		if snapshot != nil {
			log.Printf("etcdserver: restart from snapshot at index %d", snapshot.Index)
			st.Recovery(snapshot.Data)
			index = snapshot.Index
		}

		// restart a node from previous wal
		if w, err = wal.OpenAtIndex(waldir, index); err != nil {
			log.Fatal(err)
		}
		wid, st, ents, err := w.ReadAll()
		if err != nil {
			log.Fatal(err)
		}
		// TODO(xiangli): save/recovery nodeID?
		if wid != 0 {
			log.Fatalf("unexpected nodeid %d: nodeid should always be zero until we save nodeid into wal", wid)
		}
		n = raft.RestartNode(m.ID, cfg.Cluster.IDs(), 10, 1, snapshot, st, ents)
	}

	cls := NewClusterStore(st, *cfg.Cluster)

	s := &EtcdServer{
		store: st,
		node:  n,
		name:  cfg.Name,
		storage: struct {
			*wal.WAL
			*snap.Snapshotter
		}{w, ss},
		send:         Sender(cfg.Transport, cls),
		clientURLs:   cfg.ClientURLs,
		ticker:       time.Tick(100 * time.Millisecond),
		syncTicker:   time.Tick(500 * time.Millisecond),
		snapCount:    cfg.SnapCount,
		ClusterStore: cls,
	}
	return s
}