// Start implements replication.LogReplicator func (l *raftLog) Start(lo uint64) error { inited, err := l.config.Storage.(*raftStorage).IsInitialized() if err != nil { return err } if inited { l.config.Applied = lo l.node = raft.RestartNode(&l.config) } else { if lo != 0 { log.Panicf("storage uninitialized but state machine not fresh: lo = %d", lo) } // Add a dummy first entry hardState, confState, err := l.config.Storage.InitialState() if err != nil { return err } confNodes := make([]raft.Peer, 0, len(confState.Nodes)) for _, id := range confState.Nodes { confNodes = append(confNodes, raft.Peer{ID: id}) } l.config.Storage.(*raftStorage).save(hardState, make([]raftpb.Entry, 1)) l.node = raft.StartNode(&l.config, confNodes) } l.leaderHintSet = make(chan bool, COMMITTED_BUFFER) l.waitCommitted = make(chan replication.LogEntry, COMMITTED_BUFFER) l.stop = make(chan struct{}) l.stopped = make(chan struct{}) l.grpcDropClient = make(chan uint64) l.grpcClientCache = make(map[uint64]proto.RaftClient) go l.run() return nil }
// restart restarts the node. restart a started node // blocks and might affect the future stop operation. func (n *node) restart() { // wait for the shutdown <-n.stopc n.Node = raft.RestartNode(n.id, 10, 1, n.storage, 0) n.start() n.iface.connect() }
func restartNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *cluster, raft.Node, *raftStorage, *wal.WAL) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap) plog.Infof("restarting member %s in cluster %s at commit index %d", id, cid, st.Commit) cl := newCluster("") cl.SetID(cid) s := newRaftStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) } s.SetHardState(st) s.Append(ents) c := &raft.Config{ ID: uint64(id), ElectionTick: cfg.ElectionTicks, HeartbeatTick: 1, Storage: s, MaxSizePerMsg: maxSizePerMsg, MaxInflightMsgs: maxInflightMsgs, } n := raft.RestartNode(c) raftStatus = n.Status advanceTicksForElection(n, c.ElectionTick) return id, cl, n, s, w }
// InitAndStartNode gets called after having at least one membership sync with the cluster. func (n *node) InitAndStartNode(wal *raftwal.Wal) { restart, err := n.initFromWal(wal) x.Check(err) if restart { fmt.Printf("RESTARTING\n") n.raft = raft.RestartNode(n.cfg) } else { if groups().HasPeer(n.gid) { n.joinPeers() n.raft = raft.StartNode(n.cfg, nil) } else { peers := []raft.Peer{{ID: n.id}} n.raft = raft.StartNode(n.cfg, peers) // Trigger election, so this node can become the leader of this single-node cluster. n.canCampaign = true } } go n.processCommitCh() go n.Run() // TODO: Find a better way to snapshot, so we don't lose the membership // state information, which isn't persisted. // go n.snapshotPeriodically() go n.batchAndSendMessages() }
func (rc *raftNode) startRaft() { if !fileutil.Exist(rc.snapdir) { if err := os.Mkdir(rc.snapdir, 0750); err != nil { log.Fatalf("raftexample: cannot create dir for snapshot (%v)", err) } } rc.snapshotter = snap.New(rc.snapdir) rc.snapshotterReady <- rc.snapshotter oldwal := wal.Exist(rc.waldir) rc.wal = rc.replayWAL() rpeers := make([]raft.Peer, len(rc.peers)) for i := range rpeers { rpeers[i] = raft.Peer{ID: uint64(i + 1)} } c := &raft.Config{ ID: uint64(rc.id), ElectionTick: 10, HeartbeatTick: 1, Storage: rc.raftStorage, MaxSizePerMsg: 1024 * 1024, MaxInflightMsgs: 256, } if oldwal { rc.node = raft.RestartNode(c) } else { startPeers := rpeers if rc.join { startPeers = nil } rc.node = raft.StartNode(c, startPeers) } ss := &stats.ServerStats{} ss.Initialize() rc.transport = &rafthttp.Transport{ ID: types.ID(rc.id), ClusterID: 0x1000, Raft: rc, ServerStats: ss, LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)), ErrorC: make(chan error), } rc.transport.Start() for i := range rc.peers { if i+1 != rc.id { rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]}) } } go rc.serveRaft() go rc.serveChannels() }
func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error { walDir := n.walDir() snapDir := n.snapDir() if err := os.MkdirAll(snapDir, 0700); err != nil { return fmt.Errorf("create snapshot directory error: %v", err) } // Create a snapshotter n.snapshotter = snap.New(snapDir) if !wal.Exist(walDir) { raftNode := &api.RaftMember{ RaftID: n.Config.ID, Addr: n.Address, } metadata, err := raftNode.Marshal() if err != nil { return fmt.Errorf("error marshalling raft node: %v", err) } n.wal, err = wal.Create(walDir, metadata) if err != nil { return fmt.Errorf("create wal error: %v", err) } n.cluster.AddMember(&membership.Member{RaftMember: raftNode}) n.startNodePeers = []raft.Peer{{ID: n.Config.ID, Context: metadata}} return nil } // Load snapshot data snapshot, err := n.snapshotter.Load() if err != nil && err != snap.ErrNoSnapshot { return err } if snapshot != nil { // Load the snapshot data into the store if err := n.restoreFromSnapshot(snapshot.Data, forceNewCluster); err != nil { return err } } // Read logs to fully catch up store if err := n.readWAL(ctx, snapshot, forceNewCluster); err != nil { return err } n.Node = raft.RestartNode(n.Config) return nil }
func restartNode(cfg *ServerConfig, index uint64, snapshot *raftpb.Snapshot) (types.ID, raft.Node, *raft.MemoryStorage, *wal.WAL) { w, id, cid, st, ents := readWAL(cfg.WALDir(), index) cfg.Cluster.SetID(cid) log.Printf("etcdserver: restart member %s in cluster %s at commit index %d", id, cfg.Cluster.ID(), st.Commit) s := raft.NewMemoryStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) } s.SetHardState(st) s.Append(ents) n := raft.RestartNode(uint64(id), 10, 1, s) return id, n, s, w }
func restartAsStandaloneNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *cluster, raft.Node, *raftStorage, *wal.WAL) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap) // discard the previously uncommitted entries for i, ent := range ents { if ent.Index > st.Commit { plog.Infof("discarding %d uncommitted WAL entries ", len(ents)-i) ents = ents[:i] break } } // force append the configuration change entries toAppEnts := createConfigChangeEnts(getIDs(snapshot, ents), uint64(id), st.Term, st.Commit) ents = append(ents, toAppEnts...) // force commit newly appended entries err := w.Save(raftpb.HardState{}, toAppEnts) if err != nil { plog.Fatalf("%v", err) } if len(ents) != 0 { st.Commit = ents[len(ents)-1].Index } plog.Printf("forcing restart of member %s in cluster %s at commit index %d", id, cid, st.Commit) cl := newCluster("") cl.SetID(cid) s := newRaftStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) } s.SetHardState(st) s.Append(ents) c := &raft.Config{ ID: uint64(id), ElectionTick: cfg.ElectionTicks, HeartbeatTick: 1, Storage: s, MaxSizePerMsg: maxSizePerMsg, MaxInflightMsgs: maxInflightMsgs, } n := raft.RestartNode(c) raftStatus = n.Status return id, cl, n, s, w }
// restart restarts the node. restart a started node // blocks and might affect the future stop operation. func (n *node) restart() { // wait for the shutdown <-n.stopc c := &raft.Config{ ID: n.id, ElectionTick: 10, HeartbeatTick: 1, Storage: n.storage, MaxSizePerMsg: 1024 * 1024, MaxInflightMsgs: 256, } n.Node = raft.RestartNode(c) n.start() n.iface.connect() }
func (rc *raftNode) startRaft() { oldwal := wal.Exist(rc.waldir) rc.wal = rc.replayWAL() rpeers := make([]raft.Peer, len(rc.peers)) for i := range rpeers { rpeers[i] = raft.Peer{ID: uint64(i + 1)} } c := &raft.Config{ ID: uint64(rc.id), ElectionTick: 10, HeartbeatTick: 1, Storage: rc.raftStorage, MaxSizePerMsg: 1024 * 1024, MaxInflightMsgs: 256, } if oldwal { rc.node = raft.RestartNode(c) } else { rc.node = raft.StartNode(c, rpeers) } ss := &stats.ServerStats{} ss.Initialize() rc.transport = &rafthttp.Transport{ ID: types.ID(rc.id), ClusterID: 0x1000, Raft: rc, ServerStats: ss, LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)), ErrorC: make(chan error), } rc.transport.Start() for i := range rc.peers { if i+1 != rc.id { rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]}) } } go rc.serveRaft() go rc.serveChannels() }
func restartNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, raft.Node, *raft.MemoryStorage, *wal.WAL) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap) cfg.Cluster.SetID(cid) log.Printf("etcdserver: restart member %s in cluster %s at commit index %d", id, cfg.Cluster.ID(), st.Commit) s := raft.NewMemoryStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) } s.SetHardState(st) s.Append(ents) n := raft.RestartNode(uint64(id), cfg.ElectionTicks, 1, s, 0) return id, n, s, w }
func restartNode(cfg *ServerConfig, index uint64, snapshot *raftpb.Snapshot) (id uint64, n raft.Node, w *wal.WAL) { var err error // restart a node from previous wal if w, err = wal.OpenAtIndex(cfg.WALDir(), index); err != nil { log.Fatal(err) } wmetadata, st, ents, err := w.ReadAll() if err != nil { log.Fatal(err) } var metadata pb.Metadata pbutil.MustUnmarshal(&metadata, wmetadata) id = metadata.NodeID cfg.Cluster.SetID(metadata.ClusterID) log.Printf("etcdserver: restart member %x in cluster %x at commit index %d", id, cfg.Cluster.ID(), st.Commit) n = raft.RestartNode(id, 10, 1, snapshot, st, ents) return }
func restartAsStandaloneNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, raft.Node, *raft.MemoryStorage, *wal.WAL) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap) cfg.Cluster.SetID(cid) // discard the previously uncommitted entries for i, ent := range ents { if ent.Index > st.Commit { log.Printf("etcdserver: discarding %d uncommitted WAL entries ", len(ents)-i) ents = ents[:i] break } } // force append the configuration change entries toAppEnts := createConfigChangeEnts(getIDs(snapshot, ents), uint64(id), st.Term, st.Commit) ents = append(ents, toAppEnts...) // force commit newly appended entries for _, e := range toAppEnts { err := w.SaveEntry(&e) if err != nil { log.Fatalf("etcdserver: %v", err) } } if len(ents) != 0 { st.Commit = ents[len(ents)-1].Index } log.Printf("etcdserver: forcing restart of member %s in cluster %s at commit index %d", id, cfg.Cluster.ID(), st.Commit) s := raft.NewMemoryStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) } s.SetHardState(st) s.Append(ents) n := raft.RestartNode(uint64(id), 10, 1, s) return id, n, s, w }
func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error { walDir := n.walDir() snapDir := n.snapDir() if err := os.MkdirAll(snapDir, 0700); err != nil { return fmt.Errorf("create snapshot directory error: %v", err) } // Create a snapshotter n.snapshotter = snap.New(snapDir) if !wal.Exist(walDir) { return errNoWAL } // Load snapshot data snapshot, err := n.snapshotter.Load() if err != nil && err != snap.ErrNoSnapshot { return err } if snapshot != nil { // Load the snapshot data into the store if err := n.restoreFromSnapshot(snapshot.Data, forceNewCluster); err != nil { return err } } // Read logs to fully catch up store if err := n.readWAL(ctx, snapshot, forceNewCluster); err != nil { return err } n.Node = raft.RestartNode(n.Config) return nil }
// NewNode generates a new Raft node func NewNode(ctx context.Context, opts NewNodeOptions) (*Node, error) { cfg := opts.Config if cfg == nil { cfg = DefaultNodeConfig() } if opts.TickInterval == 0 { opts.TickInterval = time.Second } raftID, err := identity.ParseNodeID(opts.ID) if err != nil { return nil, err } raftStore := raft.NewMemoryStorage() ctx, cancel := context.WithCancel(ctx) n := &Node{ Ctx: ctx, cancel: cancel, cluster: membership.NewCluster(), tlsCredentials: opts.TLSCredentials, raftStore: raftStore, Address: opts.Addr, Config: &raft.Config{ ElectionTick: cfg.ElectionTick, HeartbeatTick: cfg.HeartbeatTick, Storage: raftStore, MaxSizePerMsg: cfg.MaxSizePerMsg, MaxInflightMsgs: cfg.MaxInflightMsgs, Logger: cfg.Logger, ID: raftID, }, forceNewCluster: opts.ForceNewCluster, stopCh: make(chan struct{}), doneCh: make(chan struct{}), StateDir: opts.StateDir, joinAddr: opts.JoinAddr, sendTimeout: 2 * time.Second, leadershipBroadcast: events.NewBroadcaster(), } n.memoryStore = store.NewMemoryStore(n) if opts.ClockSource == nil { n.ticker = clock.NewClock().NewTicker(opts.TickInterval) } else { n.ticker = opts.ClockSource.NewTicker(opts.TickInterval) } if opts.SendTimeout != 0 { n.sendTimeout = opts.SendTimeout } if err := n.loadAndStart(ctx, opts.ForceNewCluster); err != nil { n.ticker.Stop() return nil, err } snapshot, err := raftStore.Snapshot() // Snapshot never returns an error if err != nil { panic("could not get snapshot of raft store") } n.confState = snapshot.Metadata.ConfState n.appliedIndex = snapshot.Metadata.Index n.snapshotIndex = snapshot.Metadata.Index n.reqIDGen = idutil.NewGenerator(uint16(n.Config.ID), time.Now()) n.wait = newWait() if n.startNodePeers != nil { if n.joinAddr != "" { c, err := n.ConnectToMember(n.joinAddr, 10*time.Second) if err != nil { return nil, err } defer func() { _ = c.Conn.Close() }() ctx, cancel := context.WithTimeout(n.Ctx, 10*time.Second) defer cancel() resp, err := c.Join(ctx, &api.JoinRequest{ Addr: n.Address, }) if err != nil { return nil, err } n.Node = raft.StartNode(n.Config, []raft.Peer{}) if err := n.registerNodes(resp.Members); err != nil { return nil, err } } else { n.Node = raft.StartNode(n.Config, n.startNodePeers) if err := n.Campaign(n.Ctx); err != nil { return nil, err } } return n, nil } if n.joinAddr != "" { n.Config.Logger.Warning("ignoring request to join cluster, because raft state already exists") } n.Node = raft.RestartNode(n.Config) return n, nil }
// JoinAndStart joins and starts the raft server func (n *Node) JoinAndStart(ctx context.Context) (err error) { ctx, cancel := n.WithContext(ctx) defer func() { cancel() if err != nil { n.done() } }() loadAndStartErr := n.loadAndStart(ctx, n.opts.ForceNewCluster) if loadAndStartErr != nil && loadAndStartErr != errNoWAL { return loadAndStartErr } snapshot, err := n.raftStore.Snapshot() // Snapshot never returns an error if err != nil { panic("could not get snapshot of raft store") } n.confState = snapshot.Metadata.ConfState n.appliedIndex = snapshot.Metadata.Index n.snapshotIndex = snapshot.Metadata.Index if loadAndStartErr == errNoWAL { if n.opts.JoinAddr != "" { c, err := n.ConnectToMember(n.opts.JoinAddr, 10*time.Second) if err != nil { return err } client := api.NewRaftMembershipClient(c.Conn) defer func() { _ = c.Conn.Close() }() joinCtx, joinCancel := context.WithTimeout(ctx, 10*time.Second) defer joinCancel() resp, err := client.Join(joinCtx, &api.JoinRequest{ Addr: n.opts.Addr, }) if err != nil { return err } n.Config.ID = resp.RaftID if _, err := n.createWAL(n.opts.ID); err != nil { return err } n.raftNode = raft.StartNode(n.Config, []raft.Peer{}) if err := n.registerNodes(resp.Members); err != nil { if walErr := n.wal.Close(); err != nil { log.G(ctx).WithError(walErr).Error("raft: error closing WAL") } return err } } else { // First member in the cluster, self-assign ID n.Config.ID = uint64(rand.Int63()) + 1 peer, err := n.createWAL(n.opts.ID) if err != nil { return err } n.raftNode = raft.StartNode(n.Config, []raft.Peer{peer}) n.campaignWhenAble = true } atomic.StoreUint32(&n.isMember, 1) return nil } if n.opts.JoinAddr != "" { log.G(ctx).Warning("ignoring request to join cluster, because raft state already exists") } n.campaignWhenAble = true n.raftNode = raft.RestartNode(n.Config) atomic.StoreUint32(&n.isMember, 1) return nil }
// JoinAndStart joins and starts the raft server func (n *Node) JoinAndStart() error { loadAndStartErr := n.loadAndStart(n.Ctx, n.opts.ForceNewCluster) if loadAndStartErr != nil && loadAndStartErr != errNoWAL { n.ticker.Stop() return loadAndStartErr } snapshot, err := n.raftStore.Snapshot() // Snapshot never returns an error if err != nil { panic("could not get snapshot of raft store") } n.confState = snapshot.Metadata.ConfState n.appliedIndex = snapshot.Metadata.Index n.snapshotIndex = snapshot.Metadata.Index if loadAndStartErr == errNoWAL { if n.joinAddr != "" { c, err := n.ConnectToMember(n.joinAddr, 10*time.Second) if err != nil { return err } client := api.NewRaftMembershipClient(c.Conn) defer func() { _ = c.Conn.Close() }() ctx, cancel := context.WithTimeout(n.Ctx, 10*time.Second) defer cancel() resp, err := client.Join(ctx, &api.JoinRequest{ Addr: n.Address, }) if err != nil { return err } n.Config.ID = resp.RaftID if _, err := n.createWAL(n.opts.ID); err != nil { return err } n.Node = raft.StartNode(n.Config, []raft.Peer{}) if err := n.registerNodes(resp.Members); err != nil { return err } } else { // First member in the cluster, self-assign ID n.Config.ID = uint64(rand.Int63()) + 1 peer, err := n.createWAL(n.opts.ID) if err != nil { return err } n.Node = raft.StartNode(n.Config, []raft.Peer{peer}) if err := n.Campaign(n.Ctx); err != nil { return err } } atomic.StoreUint32(&n.isMember, 1) return nil } if n.joinAddr != "" { n.Config.Logger.Warning("ignoring request to join cluster, because raft state already exists") } n.Node = raft.RestartNode(n.Config) atomic.StoreUint32(&n.isMember, 1) return nil }
// NewServer creates a new EtcdServer from the supplied configuration. The // configuration is considered static for the lifetime of the EtcdServer. func NewServer(cfg *ServerConfig) *EtcdServer { m := cfg.Cluster.FindName(cfg.Name) if m == nil { // Should never happen log.Fatalf("could not find name %v in cluster!", cfg.Name) } snapdir := path.Join(cfg.DataDir, "snap") if err := os.MkdirAll(snapdir, privateDirMode); err != nil { log.Fatalf("etcdserver: cannot create snapshot directory: %v", err) } ss := snap.New(snapdir) st := store.New() var w *wal.WAL var n raft.Node var err error waldir := path.Join(cfg.DataDir, "wal") if !wal.Exist(waldir) { if w, err = wal.Create(waldir); err != nil { log.Fatal(err) } n = raft.StartNode(m.ID, cfg.Cluster.IDs(), 10, 1) } else { var index int64 snapshot, err := ss.Load() if err != nil && err != snap.ErrNoSnapshot { log.Fatal(err) } if snapshot != nil { log.Printf("etcdserver: restart from snapshot at index %d", snapshot.Index) st.Recovery(snapshot.Data) index = snapshot.Index } // restart a node from previous wal if w, err = wal.OpenAtIndex(waldir, index); err != nil { log.Fatal(err) } wid, st, ents, err := w.ReadAll() if err != nil { log.Fatal(err) } // TODO(xiangli): save/recovery nodeID? if wid != 0 { log.Fatalf("unexpected nodeid %d: nodeid should always be zero until we save nodeid into wal", wid) } n = raft.RestartNode(m.ID, cfg.Cluster.IDs(), 10, 1, snapshot, st, ents) } cls := NewClusterStore(st, *cfg.Cluster) s := &EtcdServer{ store: st, node: n, name: cfg.Name, storage: struct { *wal.WAL *snap.Snapshotter }{w, ss}, send: Sender(cfg.Transport, cls), clientURLs: cfg.ClientURLs, ticker: time.Tick(100 * time.Millisecond), syncTicker: time.Tick(500 * time.Millisecond), snapCount: cfg.SnapCount, ClusterStore: cls, } return s }