Exemple #1
0
func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChange) error {
	data, err := cc.Marshal()
	if err != nil {
		return err
	}
	return n.Step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange, Data: data}}})
}
Exemple #2
0
func (c *RemoveCommand) Data2() ([]byte, error) {
	req2 := raftpb.ConfChange{
		ID:     0,
		Type:   raftpb.ConfChangeRemoveNode,
		NodeID: c.id,
	}
	return req2.Marshal()
}
// TestMultiNodeStart ensures that a node can be started correctly. The node should
// start with correct configuration change entries, and can accept and commit
// proposals.
func TestMultiNodeStart(t *testing.T) {
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1}
	ccdata, err := cc.Marshal()
	if err != nil {
		t.Fatalf("unexpected marshal error: %v", err)
	}
	wants := []Ready{
		{
			SoftState: &SoftState{Lead: 1, RaftState: StateLeader},
			HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1},
			Entries: []raftpb.Entry{
				{Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata},
				{Term: 2, Index: 2},
			},
			CommittedEntries: []raftpb.Entry{
				{Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata},
				{Term: 2, Index: 2},
			},
		},
		{
			HardState:        raftpb.HardState{Term: 2, Commit: 3, Vote: 1},
			Entries:          []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}},
			CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}},
		},
	}
	mn := StartMultiNode(1, 10, 1)
	storage := NewMemoryStorage()
	mn.CreateGroup(1, []Peer{{ID: 1}}, storage)
	mn.Campaign(ctx, 1)
	gs := <-mn.Ready()
	g := gs[1]
	if !reflect.DeepEqual(g, wants[0]) {
		t.Fatalf("#%d: g = %+v,\n             w   %+v", 1, g, wants[0])
	} else {
		storage.Append(g.Entries)
		mn.Advance(gs)
	}

	mn.Propose(ctx, 1, []byte("foo"))
	if gs2 := <-mn.Ready(); !reflect.DeepEqual(gs2[1], wants[1]) {
		t.Errorf("#%d: g = %+v,\n             w   %+v", 2, gs2[1], wants[1])
	} else {
		storage.Append(gs2[1].Entries)
		mn.Advance(gs2)
	}

	select {
	case rd := <-mn.Ready():
		t.Errorf("unexpected Ready: %+v", rd)
	case <-time.After(time.Millisecond):
	}
}
Exemple #4
0
func (mn *multiNode) ProposeConfChange(ctx context.Context, group uint64, cc pb.ConfChange) error {
	data, err := cc.Marshal()
	if err != nil {
		return err
	}
	return mn.Step(ctx, group,
		pb.Message{
			Type: pb.MsgProp,
			Entries: []pb.Entry{
				{Type: pb.EntryConfChange, Data: data},
			},
		})
}
// TestMultiNodeProposeConfig ensures that multiNode.ProposeConfChange
// sends the given configuration proposal to the underlying raft.
func TestMultiNodeProposeConfig(t *testing.T) {
	mn := newMultiNode(1, 10, 1)
	go mn.run()
	s := NewMemoryStorage()
	mn.CreateGroup(1, []Peer{{ID: 1}}, s)
	mn.Campaign(context.TODO(), 1)
	proposed := false
	var lastIndex uint64
	var ccdata []byte
	for {
		rds := <-mn.Ready()
		rd := rds[1]
		s.Append(rd.Entries)
		// change the step function to appendStep until this raft becomes leader
		if !proposed && rd.SoftState.Lead == mn.id {
			cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1}
			var err error
			ccdata, err = cc.Marshal()
			if err != nil {
				t.Fatal(err)
			}
			mn.ProposeConfChange(context.TODO(), 1, cc)
			proposed = true
		}
		mn.Advance(rds)

		var err error
		lastIndex, err = s.LastIndex()
		if err != nil {
			t.Fatal(err)
		}
		if lastIndex >= 3 {
			break
		}
	}
	mn.Stop()

	entries, err := s.Entries(lastIndex, lastIndex+1, noLimit)
	if err != nil {
		t.Fatal(err)
	}
	if len(entries) != 1 {
		t.Fatalf("len(entries) = %d, want %d", len(entries), 1)
	}
	if entries[0].Type != raftpb.EntryConfChange {
		t.Fatalf("type = %v, want %v", entries[0].Type, raftpb.EntryConfChange)
	}
	if !bytes.Equal(entries[0].Data, ccdata) {
		t.Errorf("data = %v, want %v", entries[0].Data, ccdata)
	}
}
Exemple #6
0
// applyConfChange applies a ConfChange to the server. It is only
// invoked with a ConfChange that has already passed through Raft
func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.ConfState) (bool, error) {
	if err := s.Cluster.ValidateConfigurationChange(cc); err != nil {
		cc.NodeID = raft.None
		s.r.ApplyConfChange(cc)
		return false, err
	}
	*confState = *s.r.ApplyConfChange(cc)
	switch cc.Type {
	case raftpb.ConfChangeAddNode:
		m := new(Member)
		if err := json.Unmarshal(cc.Context, m); err != nil {
			log.Panicf("unmarshal member should never fail: %v", err)
		}
		if cc.NodeID != uint64(m.ID) {
			log.Panicf("nodeID should always be equal to member ID")
		}
		s.Cluster.AddMember(m)
		if m.ID == s.id {
			log.Printf("etcdserver: added local member %s %v to cluster %s", m.ID, m.PeerURLs, s.Cluster.ID())
		} else {
			s.r.transport.AddPeer(m.ID, m.PeerURLs)
			log.Printf("etcdserver: added member %s %v to cluster %s", m.ID, m.PeerURLs, s.Cluster.ID())
		}
	case raftpb.ConfChangeRemoveNode:
		id := types.ID(cc.NodeID)
		s.Cluster.RemoveMember(id)
		if id == s.id {
			return true, nil
		} else {
			s.r.transport.RemovePeer(id)
			log.Printf("etcdserver: removed member %s from cluster %s", id, s.Cluster.ID())
		}
	case raftpb.ConfChangeUpdateNode:
		m := new(Member)
		if err := json.Unmarshal(cc.Context, m); err != nil {
			log.Panicf("unmarshal member should never fail: %v", err)
		}
		if cc.NodeID != uint64(m.ID) {
			log.Panicf("nodeID should always be equal to member ID")
		}
		s.Cluster.UpdateRaftAttributes(m.ID, m.RaftAttributes)
		if m.ID == s.id {
			log.Printf("etcdserver: update local member %s %v in cluster %s", m.ID, m.PeerURLs, s.Cluster.ID())
		} else {
			s.r.transport.UpdatePeer(m.ID, m.PeerURLs)
			log.Printf("etcdserver: update member %s %v in cluster %s", m.ID, m.PeerURLs, s.Cluster.ID())
		}
	}
	return false, nil
}
Exemple #7
0
// StartNode returns a new Node given a unique raft id, a list of raft peers, and
// the election and heartbeat timeouts in units of ticks.
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
func StartNode(id uint64, peers []Peer, election, heartbeat int, storage Storage) Node {
	n := newNode()
	r := newRaft(id, nil, election, heartbeat, storage, 0)

	// become the follower at term 1 and apply initial configuration
	// entires of term 1
	r.becomeFollower(1, None)
	for _, peer := range peers {
		cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context}
		d, err := cc.Marshal()
		if err != nil {
			panic("unexpected marshal error")
		}
		e := pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: r.raftLog.lastIndex() + 1, Data: d}
		r.raftLog.append(e)
	}
	// Mark these initial entries as committed.
	// TODO(bdarnell): These entries are still unstable; do we need to preserve
	// the invariant that committed < unstable?
	r.raftLog.committed = r.raftLog.lastIndex()
	r.Commit = r.raftLog.committed
	// Now apply them, mainly so that the application can call Campaign
	// immediately after StartNode in tests. Note that these nodes will
	// be added to raft twice: here and when the application's Ready
	// loop calls ApplyConfChange. The calls to addNode must come after
	// all calls to raftLog.append so progress.next is set after these
	// bootstrapping entries (it is an error if we try to append these
	// entries since they have already been committed).
	// We do not set raftLog.applied so the application will be able
	// to observe all conf changes via Ready.CommittedEntries.
	for _, peer := range peers {
		r.addNode(peer.ID)
	}

	go n.run(r)
	return &n
}
Exemple #8
0
// configure sends a configuration change through consensus and
// then waits for it to be applied to the server. It
// will block until the change is performed or there is an error.
func (s *EtcdServer) configure(ctx context.Context, cc raftpb.ConfChange) error {
	cc.ID = s.reqIDGen.Next()
	ch := s.w.Register(cc.ID)
	if err := s.r.ProposeConfChange(ctx, cc); err != nil {
		s.w.Trigger(cc.ID, nil)
		return err
	}
	select {
	case x := <-ch:
		if err, ok := x.(error); ok {
			return err
		}
		if x != nil {
			log.Panicf("return type should always be error")
		}
		return nil
	case <-ctx.Done():
		s.w.Trigger(cc.ID, nil) // GC wait
		return parseCtxErr(ctx.Err())
	case <-s.done:
		return ErrStopped
	}
}
Exemple #9
0
func (mn *multiNode) run() {
	groups := map[uint64]*groupState{}
	rds := map[uint64]Ready{}
	var advancec chan map[uint64]Ready
	for {
		// Only select readyc if we have something to report and we are not
		// currently waiting for an advance.
		readyc := mn.readyc
		if len(rds) == 0 || advancec != nil {
			readyc = nil
		}

		// group points to the group that was touched on this iteration (if any)
		var group *groupState
		select {
		case gc := <-mn.groupc:
			// TODO(bdarnell): pass applied through gc and into newRaft. Or get rid of it?
			r := newRaft(mn.id, nil, mn.election, mn.heartbeat, gc.storage, 0)
			group = &groupState{
				id:   gc.id,
				raft: r,
			}
			groups[gc.id] = group
			lastIndex, err := gc.storage.LastIndex()
			if err != nil {
				panic(err) // TODO(bdarnell)
			}
			// If the log is empty, this is a new group (like StartNode); otherwise it's
			// restoring an existing group (like RestartNode).
			// TODO(bdarnell): rethink group initialization and whether the application needs
			// to be able to tell us when it expects the group to exist.
			if lastIndex == 0 {
				r.becomeFollower(1, None)
				ents := make([]pb.Entry, len(gc.peers))
				for i, peer := range gc.peers {
					cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context}
					data, err := cc.Marshal()
					if err != nil {
						panic("unexpected marshal error")
					}
					ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data}
				}
				r.raftLog.append(ents...)
				r.raftLog.committed = uint64(len(ents))
				for _, peer := range gc.peers {
					r.addNode(peer.ID)
				}
			}
			// Set the initial hard and soft states after performing all initialization.
			group.prevSoftSt = r.softState()
			group.prevHardSt = r.HardState
			close(gc.done)

		case gr := <-mn.rmgroupc:
			delete(groups, gr.id)
			delete(rds, gr.id)
			close(gr.done)

		case mm := <-mn.propc:
			// TODO(bdarnell): single-node impl doesn't read from propc unless the group
			// has a leader; we can't do that since we have one propc for many groups.
			// We'll have to buffer somewhere on a group-by-group basis, or just let
			// raft.Step drop any such proposals on the floor.
			mm.msg.From = mn.id
			group = groups[mm.group]
			group.raft.Step(mm.msg)

		case mm := <-mn.recvc:
			group = groups[mm.group]
			if _, ok := group.raft.prs[mm.msg.From]; ok || !IsResponseMsg(mm.msg) {
				group.raft.Step(mm.msg)
			}

		case mcc := <-mn.confc:
			group = groups[mcc.group]
			if mcc.msg.NodeID == None {
				group.raft.resetPendingConf()
				select {
				case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}:
				case <-mn.done:
				}
				break
			}
			switch mcc.msg.Type {
			case pb.ConfChangeAddNode:
				group.raft.addNode(mcc.msg.NodeID)
			case pb.ConfChangeRemoveNode:
				group.raft.removeNode(mcc.msg.NodeID)
			case pb.ConfChangeUpdateNode:
				group.raft.resetPendingConf()
			default:
				panic("unexpected conf type")
			}
			select {
			case mcc.ch <- pb.ConfState{Nodes: group.raft.nodes()}:
			case <-mn.done:
			}

		case <-mn.tickc:
			// TODO(bdarnell): instead of calling every group on every tick,
			// we should have a priority queue of groups based on their next
			// time-based event.
			for _, g := range groups {
				g.raft.tick()
				rd := g.newReady()
				if rd.containsUpdates() {
					rds[g.id] = rd
				}
			}

		case readyc <- rds:
			// Clear outgoing messages as soon as we've passed them to the application.
			for g := range rds {
				groups[g].raft.msgs = nil
			}
			rds = map[uint64]Ready{}
			advancec = mn.advancec

		case advs := <-advancec:
			for groupID, rd := range advs {
				group, ok := groups[groupID]
				if !ok {
					continue
				}
				group.commitReady(rd)

				// We've been accumulating new entries in rds which may now be obsolete.
				// Drop the old Ready object and create a new one if needed.
				delete(rds, groupID)
				newRd := group.newReady()
				if newRd.containsUpdates() {
					rds[groupID] = newRd
				}
			}
			advancec = nil

		case ms := <-mn.status:
			ms.ch <- getStatus(groups[ms.group].raft)

		case <-mn.stop:
			close(mn.done)
			return
		}

		if group != nil {
			rd := group.newReady()
			if rd.containsUpdates() {
				rds[group.id] = rd
			}
		}
	}
}