func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return n.Step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange, Data: data}}}) }
// TestMultiNodeStart ensures that a node can be started correctly. The node should // start with correct configuration change entries, and can accept and commit // proposals. func TestMultiNodeStart(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatalf("unexpected marshal error: %v", err) } wants := []Ready{ { SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, Entries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, CommittedEntries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, }, { HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, }, } mn := StartMultiNode(1) storage := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, storage), []Peer{{ID: 1}}) mn.Campaign(ctx, 1) gs := <-mn.Ready() g := gs[1] if !reflect.DeepEqual(g, wants[0]) { t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) } else { storage.Append(g.Entries) mn.Advance(gs) } mn.Propose(ctx, 1, []byte("foo")) if gs2 := <-mn.Ready(); !reflect.DeepEqual(gs2[1], wants[1]) { t.Errorf("#%d: g = %+v,\n w %+v", 2, gs2[1], wants[1]) } else { storage.Append(gs2[1].Entries) mn.Advance(gs2) } select { case rd := <-mn.Ready(): t.Errorf("unexpected Ready: %+v", rd) case <-time.After(time.Millisecond): } }
func (mn *multiNode) ProposeConfChange(ctx context.Context, group uint64, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return mn.Step(ctx, group, pb.Message{ Type: pb.MsgProp, Entries: []pb.Entry{ {Type: pb.EntryConfChange, Data: data}, }, }) }
// TestMultiNodeProposeConfig ensures that multiNode.ProposeConfChange // sends the given configuration proposal to the underlying raft. func TestMultiNodeProposeConfig(t *testing.T) { mn := newMultiNode(1) go mn.run() s := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, s), []Peer{{ID: 1}}) mn.Campaign(context.TODO(), 1) proposed := false var lastIndex uint64 var ccdata []byte for { rds := <-mn.Ready() rd := rds[1] s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if !proposed && rd.SoftState.Lead == mn.id { cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} var err error ccdata, err = cc.Marshal() if err != nil { t.Fatal(err) } mn.ProposeConfChange(context.TODO(), 1, cc) proposed = true } mn.Advance(rds) var err error lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } if lastIndex >= 3 { break } } mn.Stop() entries, err := s.Entries(lastIndex, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 1 { t.Fatalf("len(entries) = %d, want %d", len(entries), 1) } if entries[0].Type != raftpb.EntryConfChange { t.Fatalf("type = %v, want %v", entries[0].Type, raftpb.EntryConfChange) } if !bytes.Equal(entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", entries[0].Data, ccdata) } }
// TestNodeProposeConfig ensures that node.ProposeConfChange sends the given configuration proposal // to the underlying raft. func TestNodeProposeConfig(t *testing.T) { msgs := []raftpb.Message{} appendStep := func(r *raft, m raftpb.Message) { msgs = append(msgs, m) } n := newNode() s := NewMemoryStorage() r := newRaft(1, []uint64{1}, 10, 1, s, 0) go n.run(r) n.Campaign(context.TODO()) for { rd := <-n.Ready() s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if rd.SoftState.Lead == r.id { r.step = appendStep n.Advance() break } n.Advance() } cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatal(err) } n.ProposeConfChange(context.TODO(), cc) n.Stop() if len(msgs) != 1 { t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) } if msgs[0].Type != raftpb.MsgProp { t.Errorf("msg type = %d, want %d", msgs[0].Type, raftpb.MsgProp) } if !reflect.DeepEqual(msgs[0].Entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, ccdata) } }
// applyConfChange applies a ConfChange to the server. It is only // invoked with a ConfChange that has already passed through Raft func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.ConfState) (bool, error) { if err := s.cluster.ValidateConfigurationChange(cc); err != nil { cc.NodeID = raft.None s.r.ApplyConfChange(cc) return false, err } *confState = *s.r.ApplyConfChange(cc) switch cc.Type { case raftpb.ConfChangeAddNode: m := new(Member) if err := json.Unmarshal(cc.Context, m); err != nil { plog.Panicf("unmarshal member should never fail: %v", err) } if cc.NodeID != uint64(m.ID) { plog.Panicf("nodeID should always be equal to member ID") } s.cluster.AddMember(m) if m.ID == s.id { plog.Noticef("added local member %s %v to cluster %s", m.ID, m.PeerURLs, s.cluster.ID()) } else { s.r.transport.AddPeer(m.ID, m.PeerURLs) plog.Noticef("added member %s %v to cluster %s", m.ID, m.PeerURLs, s.cluster.ID()) } case raftpb.ConfChangeRemoveNode: id := types.ID(cc.NodeID) s.cluster.RemoveMember(id) if id == s.id { return true, nil } else { s.r.transport.RemovePeer(id) plog.Noticef("removed member %s from cluster %s", id, s.cluster.ID()) } case raftpb.ConfChangeUpdateNode: m := new(Member) if err := json.Unmarshal(cc.Context, m); err != nil { plog.Panicf("unmarshal member should never fail: %v", err) } if cc.NodeID != uint64(m.ID) { plog.Panicf("nodeID should always be equal to member ID") } s.cluster.UpdateRaftAttributes(m.ID, m.RaftAttributes) if m.ID == s.id { plog.Noticef("update local member %s %v in cluster %s", m.ID, m.PeerURLs, s.cluster.ID()) } else { s.r.transport.UpdatePeer(m.ID, m.PeerURLs) plog.Noticef("update member %s %v in cluster %s", m.ID, m.PeerURLs, s.cluster.ID()) } } return false, nil }
// StartNode returns a new Node given a unique raft id, a list of raft peers, and // the election and heartbeat timeouts in units of ticks. // It appends a ConfChangeAddNode entry for each given peer to the initial log. func StartNode(id uint64, peers []Peer, election, heartbeat int, storage Storage) Node { n := newNode() r := newRaft(id, nil, election, heartbeat, storage, 0) // become the follower at term 1 and apply initial configuration // entires of term 1 r.becomeFollower(1, None) for _, peer := range peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} d, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } e := pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: r.raftLog.lastIndex() + 1, Data: d} r.raftLog.append(e) } // Mark these initial entries as committed. // TODO(bdarnell): These entries are still unstable; do we need to preserve // the invariant that committed < unstable? r.raftLog.committed = r.raftLog.lastIndex() r.Commit = r.raftLog.committed // Now apply them, mainly so that the application can call Campaign // immediately after StartNode in tests. Note that these nodes will // be added to raft twice: here and when the application's Ready // loop calls ApplyConfChange. The calls to addNode must come after // all calls to raftLog.append so progress.next is set after these // bootstrapping entries (it is an error if we try to append these // entries since they have already been committed). // We do not set raftLog.applied so the application will be able // to observe all conf changes via Ready.CommittedEntries. for _, peer := range peers { r.addNode(peer.ID) } go n.run(r) return &n }
// configure sends a configuration change through consensus and // then waits for it to be applied to the server. It // will block until the change is performed or there is an error. func (s *EtcdServer) configure(ctx context.Context, cc raftpb.ConfChange) error { cc.ID = s.reqIDGen.Next() ch := s.w.Register(cc.ID) if err := s.r.ProposeConfChange(ctx, cc); err != nil { s.w.Trigger(cc.ID, nil) return err } select { case x := <-ch: if err, ok := x.(error); ok { return err } if x != nil { log.Panicf("return type should always be error") } return nil case <-ctx.Done(): s.w.Trigger(cc.ID, nil) // GC wait return parseCtxErr(ctx.Err()) case <-s.done: return ErrStopped } }
// TestNodeStart ensures that a node can be started correctly. The node should // start with correct configuration change entries, and can accept and commit // proposals. func TestNodeStart(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatalf("unexpected marshal error: %v", err) } wants := []Ready{ { SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, Entries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, CommittedEntries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, }, { HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, }, } storage := NewMemoryStorage() c := &Config{ ID: 1, ElectionTick: 10, HeartbeatTick: 1, Storage: storage, MaxSizePerMsg: noLimit, MaxInflightMsgs: 256, } n := StartNode(c, []Peer{{ID: 1}}) n.Campaign(ctx) g := <-n.Ready() if !reflect.DeepEqual(g, wants[0]) { t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) } else { storage.Append(g.Entries) n.Advance() } n.Propose(ctx, []byte("foo")) if g2 := <-n.Ready(); !reflect.DeepEqual(g2, wants[1]) { t.Errorf("#%d: g = %+v,\n w %+v", 2, g2, wants[1]) } else { storage.Append(g2.Entries) n.Advance() } select { case rd := <-n.Ready(): t.Errorf("unexpected Ready: %+v", rd) case <-time.After(time.Millisecond): } }
func (mn *multiNode) run() { groups := map[uint64]*groupState{} rds := map[uint64]Ready{} var advancec chan map[uint64]Ready for { // Only select readyc if we have something to report and we are not // currently waiting for an advance. readyc := mn.readyc if len(rds) == 0 || advancec != nil { readyc = nil } // group points to the group that was touched on this iteration (if any) var group *groupState select { case gc := <-mn.groupc: gc.config.ID = mn.id r := newRaft(gc.config) group = &groupState{ id: gc.id, raft: r, } groups[gc.id] = group lastIndex, err := gc.config.Storage.LastIndex() if err != nil { panic(err) // TODO(bdarnell) } // If the log is empty, this is a new group (like StartNode); otherwise it's // restoring an existing group (like RestartNode). // TODO(bdarnell): rethink group initialization and whether the application needs // to be able to tell us when it expects the group to exist. if lastIndex == 0 { r.becomeFollower(1, None) ents := make([]pb.Entry, len(gc.peers)) for i, peer := range gc.peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} data, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data} } r.raftLog.append(ents...) r.raftLog.committed = uint64(len(ents)) for _, peer := range gc.peers { r.addNode(peer.ID) } } // Set the initial hard and soft states after performing all initialization. group.prevSoftSt = r.softState() group.prevHardSt = r.HardState close(gc.done) case gr := <-mn.rmgroupc: delete(groups, gr.id) delete(rds, gr.id) close(gr.done) case mm := <-mn.propc: // TODO(bdarnell): single-node impl doesn't read from propc unless the group // has a leader; we can't do that since we have one propc for many groups. // We'll have to buffer somewhere on a group-by-group basis, or just let // raft.Step drop any such proposals on the floor. mm.msg.From = mn.id group = groups[mm.group] group.raft.Step(mm.msg) case mm := <-mn.recvc: group = groups[mm.group] if _, ok := group.raft.prs[mm.msg.From]; ok || !IsResponseMsg(mm.msg) { group.raft.Step(mm.msg) } case mcc := <-mn.confc: group = groups[mcc.group] if mcc.msg.NodeID == None { group.raft.resetPendingConf() select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } break } switch mcc.msg.Type { case pb.ConfChangeAddNode: group.raft.addNode(mcc.msg.NodeID) case pb.ConfChangeRemoveNode: group.raft.removeNode(mcc.msg.NodeID) case pb.ConfChangeUpdateNode: group.raft.resetPendingConf() default: panic("unexpected conf type") } select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } case <-mn.tickc: // TODO(bdarnell): instead of calling every group on every tick, // we should have a priority queue of groups based on their next // time-based event. for _, g := range groups { g.raft.tick() rd := g.newReady() if rd.containsUpdates() { rds[g.id] = rd } } case readyc <- rds: // Clear outgoing messages as soon as we've passed them to the application. for g := range rds { groups[g].raft.msgs = nil } rds = map[uint64]Ready{} advancec = mn.advancec case advs := <-advancec: for groupID, rd := range advs { group, ok := groups[groupID] if !ok { continue } group.commitReady(rd) // We've been accumulating new entries in rds which may now be obsolete. // Drop the old Ready object and create a new one if needed. delete(rds, groupID) newRd := group.newReady() if newRd.containsUpdates() { rds[groupID] = newRd } } advancec = nil case ms := <-mn.status: if group, ok := groups[ms.group]; ok { s := getStatus(group.raft) ms.ch <- &s } else { ms.ch <- nil } case <-mn.stop: close(mn.done) return } if group != nil { rd := group.newReady() if rd.containsUpdates() { rds[group.id] = rd } } } }