func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return n.Step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange, Data: data}}}) }
func (c *RemoveCommand) Data2() ([]byte, error) { req2 := raftpb.ConfChange{ ID: 0, Type: raftpb.ConfChangeRemoveNode, NodeID: c.id, } return req2.Marshal() }
// TestMultiNodeStart ensures that a node can be started correctly. The node should // start with correct configuration change entries, and can accept and commit // proposals. func TestMultiNodeStart(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatalf("unexpected marshal error: %v", err) } wants := []Ready{ { SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, Entries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, CommittedEntries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, }, { HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, }, } mn := StartMultiNode(1, 10, 1) storage := NewMemoryStorage() mn.CreateGroup(1, []Peer{{ID: 1}}, storage) mn.Campaign(ctx, 1) gs := <-mn.Ready() g := gs[1] if !reflect.DeepEqual(g, wants[0]) { t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) } else { storage.Append(g.Entries) mn.Advance(gs) } mn.Propose(ctx, 1, []byte("foo")) if gs2 := <-mn.Ready(); !reflect.DeepEqual(gs2[1], wants[1]) { t.Errorf("#%d: g = %+v,\n w %+v", 2, gs2[1], wants[1]) } else { storage.Append(gs2[1].Entries) mn.Advance(gs2) } select { case rd := <-mn.Ready(): t.Errorf("unexpected Ready: %+v", rd) case <-time.After(time.Millisecond): } }
func (mn *multiNode) ProposeConfChange(ctx context.Context, group uint64, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return mn.Step(ctx, group, pb.Message{ Type: pb.MsgProp, Entries: []pb.Entry{ {Type: pb.EntryConfChange, Data: data}, }, }) }
// TestMultiNodeProposeConfig ensures that multiNode.ProposeConfChange // sends the given configuration proposal to the underlying raft. func TestMultiNodeProposeConfig(t *testing.T) { mn := newMultiNode(1, 10, 1) go mn.run() s := NewMemoryStorage() mn.CreateGroup(1, []Peer{{ID: 1}}, s) mn.Campaign(context.TODO(), 1) proposed := false var lastIndex uint64 var ccdata []byte for { rds := <-mn.Ready() rd := rds[1] s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if !proposed && rd.SoftState.Lead == mn.id { cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} var err error ccdata, err = cc.Marshal() if err != nil { t.Fatal(err) } mn.ProposeConfChange(context.TODO(), 1, cc) proposed = true } mn.Advance(rds) var err error lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } if lastIndex >= 3 { break } } mn.Stop() entries, err := s.Entries(lastIndex, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 1 { t.Fatalf("len(entries) = %d, want %d", len(entries), 1) } if entries[0].Type != raftpb.EntryConfChange { t.Fatalf("type = %v, want %v", entries[0].Type, raftpb.EntryConfChange) } if !bytes.Equal(entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", entries[0].Data, ccdata) } }
// StartNode returns a new Node given a unique raft id, a list of raft peers, and // the election and heartbeat timeouts in units of ticks. // It appends a ConfChangeAddNode entry for each given peer to the initial log. func StartNode(id uint64, peers []Peer, election, heartbeat int, storage Storage) Node { n := newNode() r := newRaft(id, nil, election, heartbeat, storage, 0) // become the follower at term 1 and apply initial configuration // entires of term 1 r.becomeFollower(1, None) for _, peer := range peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} d, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } e := pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: r.raftLog.lastIndex() + 1, Data: d} r.raftLog.append(e) } // Mark these initial entries as committed. // TODO(bdarnell): These entries are still unstable; do we need to preserve // the invariant that committed < unstable? r.raftLog.committed = r.raftLog.lastIndex() r.Commit = r.raftLog.committed // Now apply them, mainly so that the application can call Campaign // immediately after StartNode in tests. Note that these nodes will // be added to raft twice: here and when the application's Ready // loop calls ApplyConfChange. The calls to addNode must come after // all calls to raftLog.append so progress.next is set after these // bootstrapping entries (it is an error if we try to append these // entries since they have already been committed). // We do not set raftLog.applied so the application will be able // to observe all conf changes via Ready.CommittedEntries. for _, peer := range peers { r.addNode(peer.ID) } go n.run(r) return &n }
func (mn *multiNode) run() { groups := map[uint64]*groupState{} rds := map[uint64]Ready{} var advancec chan map[uint64]Ready for { // Only select readyc if we have something to report and we are not // currently waiting for an advance. readyc := mn.readyc if len(rds) == 0 || advancec != nil { readyc = nil } // group points to the group that was touched on this iteration (if any) var group *groupState select { case gc := <-mn.groupc: // TODO(bdarnell): pass applied through gc and into newRaft. Or get rid of it? r := newRaft(mn.id, nil, mn.election, mn.heartbeat, gc.storage, 0) group = &groupState{ id: gc.id, raft: r, } groups[gc.id] = group lastIndex, err := gc.storage.LastIndex() if err != nil { panic(err) // TODO(bdarnell) } // If the log is empty, this is a new group (like StartNode); otherwise it's // restoring an existing group (like RestartNode). // TODO(bdarnell): rethink group initialization and whether the application needs // to be able to tell us when it expects the group to exist. if lastIndex == 0 { r.becomeFollower(1, None) ents := make([]pb.Entry, len(gc.peers)) for i, peer := range gc.peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} data, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data} } r.raftLog.append(ents...) r.raftLog.committed = uint64(len(ents)) for _, peer := range gc.peers { r.addNode(peer.ID) } } // Set the initial hard and soft states after performing all initialization. group.prevSoftSt = r.softState() group.prevHardSt = r.HardState close(gc.done) case gr := <-mn.rmgroupc: delete(groups, gr.id) delete(rds, gr.id) close(gr.done) case mm := <-mn.propc: // TODO(bdarnell): single-node impl doesn't read from propc unless the group // has a leader; we can't do that since we have one propc for many groups. // We'll have to buffer somewhere on a group-by-group basis, or just let // raft.Step drop any such proposals on the floor. mm.msg.From = mn.id group = groups[mm.group] group.raft.Step(mm.msg) case mm := <-mn.recvc: group = groups[mm.group] if _, ok := group.raft.prs[mm.msg.From]; ok || !IsResponseMsg(mm.msg) { group.raft.Step(mm.msg) } case mcc := <-mn.confc: group = groups[mcc.group] if mcc.msg.NodeID == None { group.raft.resetPendingConf() select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } break } switch mcc.msg.Type { case pb.ConfChangeAddNode: group.raft.addNode(mcc.msg.NodeID) case pb.ConfChangeRemoveNode: group.raft.removeNode(mcc.msg.NodeID) case pb.ConfChangeUpdateNode: group.raft.resetPendingConf() default: panic("unexpected conf type") } select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } case <-mn.tickc: // TODO(bdarnell): instead of calling every group on every tick, // we should have a priority queue of groups based on their next // time-based event. for _, g := range groups { g.raft.tick() rd := g.newReady() if rd.containsUpdates() { rds[g.id] = rd } } case readyc <- rds: // Clear outgoing messages as soon as we've passed them to the application. for g := range rds { groups[g].raft.msgs = nil } rds = map[uint64]Ready{} advancec = mn.advancec case advs := <-advancec: for groupID, rd := range advs { group, ok := groups[groupID] if !ok { continue } group.commitReady(rd) // We've been accumulating new entries in rds which may now be obsolete. // Drop the old Ready object and create a new one if needed. delete(rds, groupID) newRd := group.newReady() if newRd.containsUpdates() { rds[groupID] = newRd } } advancec = nil case ms := <-mn.status: ms.ch <- getStatus(groups[ms.group].raft) case <-mn.stop: close(mn.done) return } if group != nil { rd := group.newReady() if rd.containsUpdates() { rds[group.id] = rd } } } }