func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return n.Step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange, Data: data}}}) }
// TestMultiNodeStart ensures that a node can be started correctly. The node should // start with correct configuration change entries, and can accept and commit // proposals. func TestMultiNodeStart(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatalf("unexpected marshal error: %v", err) } wants := []Ready{ { SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, Entries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, CommittedEntries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, }, { HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, }, } mn := StartMultiNode(1) storage := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, storage), []Peer{{ID: 1}}) mn.Campaign(ctx, 1) gs := <-mn.Ready() g := gs[1] if !reflect.DeepEqual(g, wants[0]) { t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) } else { storage.Append(g.Entries) mn.Advance(gs) } mn.Propose(ctx, 1, []byte("foo")) if gs2 := <-mn.Ready(); !reflect.DeepEqual(gs2[1], wants[1]) { t.Errorf("#%d: g = %+v,\n w %+v", 2, gs2[1], wants[1]) } else { storage.Append(gs2[1].Entries) mn.Advance(gs2) } select { case rd := <-mn.Ready(): t.Errorf("unexpected Ready: %+v", rd) case <-time.After(time.Millisecond): } }
func (mn *multiNode) ProposeConfChange(ctx context.Context, group uint64, cc pb.ConfChange) error { data, err := cc.Marshal() if err != nil { return err } return mn.Step(ctx, group, pb.Message{ Type: pb.MsgProp, Entries: []pb.Entry{ {Type: pb.EntryConfChange, Data: data}, }, }) }
// TestMultiNodeProposeConfig ensures that multiNode.ProposeConfChange // sends the given configuration proposal to the underlying raft. func TestMultiNodeProposeConfig(t *testing.T) { mn := newMultiNode(1) go mn.run() s := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, s), []Peer{{ID: 1}}) mn.Campaign(context.TODO(), 1) proposed := false var lastIndex uint64 var ccdata []byte for { rds := <-mn.Ready() rd := rds[1] s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if !proposed && rd.SoftState.Lead == mn.id { cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} var err error ccdata, err = cc.Marshal() if err != nil { t.Fatal(err) } mn.ProposeConfChange(context.TODO(), 1, cc) proposed = true } mn.Advance(rds) var err error lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } if lastIndex >= 3 { break } } mn.Stop() entries, err := s.Entries(lastIndex, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 1 { t.Fatalf("len(entries) = %d, want %d", len(entries), 1) } if entries[0].Type != raftpb.EntryConfChange { t.Fatalf("type = %v, want %v", entries[0].Type, raftpb.EntryConfChange) } if !bytes.Equal(entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", entries[0].Data, ccdata) } }
func (n *MultiNode) processConfChange(ctx context.Context, group uint64, cc raftpb.ConfChange, gn GroupNode) error { if group == 0 || gn.Node == 0 || gn.Group != group { glog.Fatalf("invalid group node: %v", gn) } id := n.genID() req := Request{Data: gn} var err error cc.Context, err = n.encReq(id, req) if err != nil { return err } ch := n.line.wait(id, req) d, err := cc.Marshal() if err != nil { return err } select { case n.propc <- multiMessage{ group: group, msg: raftpb.Message{ Type: raftpb.MsgProp, Entries: []raftpb.Entry{{Type: raftpb.EntryConfChange, Data: d}}, }, }: case <-ctx.Done(): n.line.cancel(id) return ctx.Err() case <-n.done: return ErrStopped } select { case res := <-ch: return res.Err case <-ctx.Done(): n.line.cancel(id) return ctx.Err() case <-n.done: return ErrStopped } }
// TestNodeProposeConfig ensures that node.ProposeConfChange sends the given configuration proposal // to the underlying raft. func TestNodeProposeConfig(t *testing.T) { msgs := []raftpb.Message{} appendStep := func(r *raft, m raftpb.Message) { msgs = append(msgs, m) } n := newNode() s := NewMemoryStorage() r := newTestRaft(1, []uint64{1}, 10, 1, s) go n.run(r) n.Campaign(context.TODO()) for { rd := <-n.Ready() s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if rd.SoftState.Lead == r.id { r.step = appendStep n.Advance() break } n.Advance() } cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatal(err) } n.ProposeConfChange(context.TODO(), cc) n.Stop() if len(msgs) != 1 { t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) } if msgs[0].Type != raftpb.MsgProp { t.Errorf("msg type = %d, want %d", msgs[0].Type, raftpb.MsgProp) } if !reflect.DeepEqual(msgs[0].Entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, ccdata) } }
// StartNode returns a new Node given configuration and a list of raft peers. // It appends a ConfChangeAddNode entry for each given peer to the initial log. func StartNode(c *Config, peers []Peer) Node { r := newRaft(c) // become the follower at term 1 and apply initial configuration // entires of term 1 r.becomeFollower(1, None) for _, peer := range peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} d, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } e := pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: r.raftLog.lastIndex() + 1, Data: d} r.raftLog.append(e) } // Mark these initial entries as committed. // TODO(bdarnell): These entries are still unstable; do we need to preserve // the invariant that committed < unstable? r.raftLog.committed = r.raftLog.lastIndex() r.Commit = r.raftLog.committed // Now apply them, mainly so that the application can call Campaign // immediately after StartNode in tests. Note that these nodes will // be added to raft twice: here and when the application's Ready // loop calls ApplyConfChange. The calls to addNode must come after // all calls to raftLog.append so progress.next is set after these // bootstrapping entries (it is an error if we try to append these // entries since they have already been committed). // We do not set raftLog.applied so the application will be able // to observe all conf changes via Ready.CommittedEntries. for _, peer := range peers { r.addNode(peer.ID) } n := newNode() go n.run(r) return &n }
func (mn *multiNode) run() { groups := map[uint64]*groupState{} rds := map[uint64]Ready{} var advancec chan map[uint64]Ready for { // Only select readyc if we have something to report and we are not // currently waiting for an advance. readyc := mn.readyc if len(rds) == 0 || advancec != nil { readyc = nil } // group points to the group that was touched on this iteration (if any) var group *groupState select { case gc := <-mn.groupc: gc.config.ID = mn.id r := newRaft(gc.config) group = &groupState{ id: gc.id, raft: r, } groups[gc.id] = group lastIndex, err := gc.config.Storage.LastIndex() if err != nil { panic(err) // TODO(bdarnell) } // If the log is empty, this is a new group (like StartNode); otherwise it's // restoring an existing group (like RestartNode). // TODO(bdarnell): rethink group initialization and whether the application needs // to be able to tell us when it expects the group to exist. if lastIndex == 0 { r.becomeFollower(1, None) ents := make([]pb.Entry, len(gc.peers)) for i, peer := range gc.peers { cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} data, err := cc.Marshal() if err != nil { panic("unexpected marshal error") } ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data} } r.raftLog.append(ents...) r.raftLog.committed = uint64(len(ents)) for _, peer := range gc.peers { r.addNode(peer.ID) } } // Set the initial hard and soft states after performing all initialization. group.prevSoftSt = r.softState() group.prevHardSt = r.HardState close(gc.done) case gr := <-mn.rmgroupc: delete(groups, gr.id) delete(rds, gr.id) close(gr.done) case mm := <-mn.propc: // TODO(bdarnell): single-node impl doesn't read from propc unless the group // has a leader; we can't do that since we have one propc for many groups. // We'll have to buffer somewhere on a group-by-group basis, or just let // raft.Step drop any such proposals on the floor. mm.msg.From = mn.id var ok bool if group, ok = groups[mm.group]; ok { group.raft.Step(mm.msg) } case mm := <-mn.recvc: group = groups[mm.group] if _, ok := group.raft.prs[mm.msg.From]; ok || !IsResponseMsg(mm.msg) { group.raft.Step(mm.msg) } case mcc := <-mn.confc: group = groups[mcc.group] if mcc.msg.NodeID == None { group.raft.resetPendingConf() select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } break } switch mcc.msg.Type { case pb.ConfChangeAddNode: group.raft.addNode(mcc.msg.NodeID) case pb.ConfChangeRemoveNode: group.raft.removeNode(mcc.msg.NodeID) case pb.ConfChangeUpdateNode: group.raft.resetPendingConf() default: panic("unexpected conf type") } select { case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}: case <-mn.done: } case <-mn.tickc: // TODO(bdarnell): instead of calling every group on every tick, // we should have a priority queue of groups based on their next // time-based event. for _, g := range groups { g.raft.tick() rd := g.newReady() if rd.containsUpdates() { rds[g.id] = rd } } case readyc <- rds: // Clear outgoing messages as soon as we've passed them to the application. for g := range rds { groups[g].raft.msgs = nil } rds = map[uint64]Ready{} advancec = mn.advancec case advs := <-advancec: for groupID, rd := range advs { g, ok := groups[groupID] if !ok { continue } g.commitReady(rd) // We've been accumulating new entries in rds which may now be obsolete. // Drop the old Ready object and create a new one if needed. delete(rds, groupID) newRd := g.newReady() if newRd.containsUpdates() { rds[groupID] = newRd } } advancec = nil case ms := <-mn.status: if g, ok := groups[ms.group]; ok { s := getStatus(g.raft) ms.ch <- &s } else { ms.ch <- nil } case <-mn.stop: close(mn.done) return } if group != nil { rd := group.newReady() if rd.containsUpdates() { rds[group.id] = rd } } } }
// TestNodeStart ensures that a node can be started correctly. The node should // start with correct configuration change entries, and can accept and commit // proposals. func TestNodeStart(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} ccdata, err := cc.Marshal() if err != nil { t.Fatalf("unexpected marshal error: %v", err) } wants := []Ready{ { SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, Entries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, CommittedEntries: []raftpb.Entry{ {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, {Term: 2, Index: 2}, }, }, { HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, }, } storage := NewMemoryStorage() c := &Config{ ID: 1, ElectionTick: 10, HeartbeatTick: 1, Storage: storage, MaxSizePerMsg: noLimit, MaxInflightMsgs: 256, } n := StartNode(c, []Peer{{ID: 1}}) n.Campaign(ctx) g := <-n.Ready() if !reflect.DeepEqual(g, wants[0]) { t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) } else { storage.Append(g.Entries) n.Advance() } n.Propose(ctx, []byte("foo")) if g2 := <-n.Ready(); !reflect.DeepEqual(g2, wants[1]) { t.Errorf("#%d: g = %+v,\n w %+v", 2, g2, wants[1]) } else { storage.Append(g2.Entries) n.Advance() } select { case rd := <-n.Ready(): t.Errorf("unexpected Ready: %+v", rd) case <-time.After(time.Millisecond): } }
func (g *group) save(rdsv readySaved) error { glog.V(3).Infof("%v saving state", g) if rdsv.ready.SoftState != nil && rdsv.ready.SoftState.Lead != 0 { g.node.notifyElection(g.id) } // Apply snapshot to storage if it is more updated than current snapped. if !etcdraft.IsEmptySnap(rdsv.ready.Snapshot) { if err := g.diskStorage.SaveSnap(rdsv.ready.Snapshot); err != nil { glog.Fatalf("err in save snapshot: %v", err) } g.raftStorage.ApplySnapshot(rdsv.ready.Snapshot) glog.Infof("%v saved incoming snapshot at index %d", g, rdsv.ready.Snapshot.Metadata.Index) } err := g.diskStorage.Save(rdsv.ready.HardState, rdsv.ready.Entries) if err != nil { glog.Fatalf("err in raft storage save: %v", err) } glog.V(3).Infof("%v saved state on disk", g) g.raftStorage.Append(rdsv.ready.Entries) glog.V(3).Infof("%v appended entries in storage", g) // Apply config changes in the node as soon as possible // before applying other entries in the state machine. for _, e := range rdsv.ready.CommittedEntries { if e.Type != raftpb.EntryConfChange { continue } if e.Index <= g.saved { continue } g.saved = e.Index var cc raftpb.ConfChange pbutil.MustUnmarshal(&cc, e.Data) if glog.V(2) { glog.Infof("%v applies conf change %s: %s", g, formatConfChange(cc), formatEntry(e)) } if err := g.validConfChange(cc); err != nil { glog.Errorf("%v received an invalid conf change for node %v: %v", g, cc.NodeID, err) cc.NodeID = etcdraft.None g.node.node.ApplyConfChange(g.id, cc) continue } cch := make(chan struct{}) go func() { g.confState = *g.node.node.ApplyConfChange(g.id, cc) close(cch) }() select { case <-g.node.done: return ErrStopped case <-cch: } } glog.V(3).Infof("%v successfully saved ready", g) rdsv.saved <- struct{}{} select { case g.applyc <- rdsv.ready: case <-g.node.done: } return nil }