// InitialState implements the raft.Storage interface.
func (r *Range) InitialState() (raftpb.HardState, raftpb.ConfState, error) {
	var hs raftpb.HardState
	found, err := engine.MVCCGetProto(r.rm.Engine(), keys.RaftHardStateKey(r.Desc().RaftID),
		proto.ZeroTimestamp, true, nil, &hs)
	if err != nil {
		return raftpb.HardState{}, raftpb.ConfState{}, err
	}
	if !found {
		// We don't have a saved HardState, so set up the defaults.
		if r.isInitialized() {
			// Set the initial log term.
			hs.Term = raftInitialLogTerm
			hs.Commit = raftInitialLogIndex

			atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex)
		} else {
			// This is a new range we are receiving from another node. Start
			// from zero so we will receive a snapshot.
			atomic.StoreUint64(&r.lastIndex, 0)
		}
	}

	var cs raftpb.ConfState
	// For uninitalized ranges, membership is unknown at this point.
	if found || r.isInitialized() {
		for _, rep := range r.Desc().Replicas {
			cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID)))
		}
	}

	return hs, cs, nil
}
// Snapshot implements the raft.Storage interface.
func (r *Replica) Snapshot() (raftpb.Snapshot, error) {
	// Copy all the data from a consistent RocksDB snapshot into a RaftSnapshotData.
	snap := r.rm.NewSnapshot()
	defer snap.Close()
	var snapData proto.RaftSnapshotData

	// Read the range metadata from the snapshot instead of the members
	// of the Range struct because they might be changed concurrently.
	appliedIndex, err := r.loadAppliedIndex(snap)
	if err != nil {
		return raftpb.Snapshot{}, err
	}
	var desc proto.RangeDescriptor
	// We ignore intents on the range descriptor (consistent=false) because we
	// know they cannot be committed yet; operations that modify range
	// descriptors resolve their own intents when they commit.
	ok, err := engine.MVCCGetProto(snap, keys.RangeDescriptorKey(r.Desc().StartKey),
		r.rm.Clock().Now(), false /* !consistent */, nil, &desc)
	if err != nil {
		return raftpb.Snapshot{}, util.Errorf("failed to get desc: %s", err)
	}
	if !ok {
		return raftpb.Snapshot{}, util.Errorf("couldn't find range descriptor")
	}

	// Store RangeDescriptor as metadata, it will be retrieved by ApplySnapshot()
	snapData.RangeDescriptor = desc

	// Iterate over all the data in the range, including local-only data like
	// the response cache.
	for iter := newRangeDataIterator(r.Desc(), snap); iter.Valid(); iter.Next() {
		snapData.KV = append(snapData.KV,
			&proto.RaftSnapshotData_KeyValue{Key: iter.Key(), Value: iter.Value()})
	}

	data, err := gogoproto.Marshal(&snapData)
	if err != nil {
		return raftpb.Snapshot{}, err
	}

	// Synthesize our raftpb.ConfState from desc.
	var cs raftpb.ConfState
	for _, rep := range desc.Replicas {
		cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID)))
	}

	term, err := r.Term(appliedIndex)
	if err != nil {
		return raftpb.Snapshot{}, util.Errorf("failed to fetch term of %d: %s", appliedIndex, err)
	}

	return raftpb.Snapshot{
		Data: data,
		Metadata: raftpb.SnapshotMetadata{
			Index:     appliedIndex,
			Term:      term,
			ConfState: cs,
		},
	}, nil
}
Beispiel #3
0
func TestRaftNodeID(t *testing.T) {
	defer leaktest.AfterTest(t)
	cases := []struct {
		nodeID   proto.NodeID
		storeID  proto.StoreID
		expected proto.RaftNodeID
	}{
		{0, 1, 1},
		{1, 1, 0x100000001},
		{2, 3, 0x200000003},
		{math.MaxInt32, math.MaxInt32, 0x7fffffff7fffffff},
	}
	for _, c := range cases {
		x := proto.MakeRaftNodeID(c.nodeID, c.storeID)
		if x != c.expected {
			t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected %v",
				c.nodeID, c.storeID, x, c.expected)
		}
		n, s := proto.DecodeRaftNodeID(x)
		if n != c.nodeID || s != c.storeID {
			t.Errorf("decodeRaftNodeID(%v) returned %v, %v; expected %v, %v",
				x, n, s, c.nodeID, c.storeID)
		}
	}

	panicCases := []struct {
		nodeID  proto.NodeID
		storeID proto.StoreID
	}{
		{1, 0},
		{1, -1},
		{-1, 1},
	}
	for _, c := range panicCases {
		func() {
			defer func() {
				_ = recover()
			}()
			x := proto.MakeRaftNodeID(c.nodeID, c.storeID)
			t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected panic",
				c.nodeID, c.storeID, x)
		}()
	}
}
func TestSendAndReceive(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper)
	g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap)

	// Create several servers, each of which has two stores (A multiraft node ID addresses
	// a store).
	const numServers = 3
	const storesPerServer = 2
	const numStores = numServers * storesPerServer
	// servers has length numServers.
	servers := []*rpc.Server{}
	// All the rest have length numStores (note that several stores share a transport).
	nextNodeID := proto.NodeID(1)
	nodeIDs := []proto.RaftNodeID{}
	transports := []multiraft.Transport{}
	channels := []channelServer{}
	for serverIndex := 0; serverIndex < numServers; serverIndex++ {
		server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext)
		if err := server.Start(); err != nil {
			t.Fatal(err)
		}
		defer server.Close()

		transport, err := newRPCTransport(g, server, nodeRPCContext)
		if err != nil {
			t.Fatalf("Unexpected error creating transport, Error: %s", err)
		}
		defer transport.Close()

		for store := 0; store < storesPerServer; store++ {
			protoNodeID := nextNodeID
			nodeID := proto.MakeRaftNodeID(protoNodeID, 1)
			nextNodeID++

			channel := newChannelServer(10, 0)
			if err := transport.Listen(nodeID, channel); err != nil {
				t.Fatal(err)
			}

			addr := server.Addr()
			if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID),
				&proto.NodeDescriptor{
					Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
				},
				time.Hour); err != nil {
				t.Fatal(err)
			}

			nodeIDs = append(nodeIDs, nodeID)
			transports = append(transports, transport)
			channels = append(channels, channel)
		}

		servers = append(servers, server)
	}

	// Each store sends one message to each store.
	for from := 0; from < numStores; from++ {
		for to := 0; to < numStores; to++ {
			req := &multiraft.RaftMessageRequest{
				GroupID: 1,
				Message: raftpb.Message{
					From: uint64(nodeIDs[from]),
					To:   uint64(nodeIDs[to]),
					Type: raftpb.MsgHeartbeat,
				},
			}

			if err := transports[from].Send(req); err != nil {
				t.Errorf("Unable to send message from %d to %d: %s", nodeIDs[from], nodeIDs[to], err)
			}
		}
	}

	// Read all the messages from the channels. Note that the transport
	// does not guarantee in-order delivery between independent
	// transports, so we just verify that the right number of messages
	// end up in each channel.
	for to := 0; to < numStores; to++ {
		for from := 0; from < numStores; from++ {
			select {
			case req := <-channels[to].ch:
				if req.Message.To != uint64(nodeIDs[to]) {
					t.Errorf("invalid message received on channel %d (expected from %d): %+v",
						nodeIDs[to], nodeIDs[from], req)
				}
			case <-time.After(5 * time.Second):
				t.Fatal("timed out waiting for message")
			}
		}

		select {
		case req := <-channels[to].ch:
			t.Errorf("got unexpected message %+v on channel %d", req, nodeIDs[to])
		default:
		}
	}
}
// TestInOrderDelivery verifies that for a given pair of nodes, raft
// messages are delivered in order.
func TestInOrderDelivery(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper)
	g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap)

	server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext)
	if err := server.Start(); err != nil {
		t.Fatal(err)
	}
	defer server.Close()

	const numMessages = 100
	protoNodeID := proto.NodeID(1)
	raftNodeID := proto.MakeRaftNodeID(protoNodeID, 1)
	serverTransport, err := newRPCTransport(g, server, nodeRPCContext)
	if err != nil {
		t.Fatal(err)
	}
	defer serverTransport.Close()
	serverChannel := newChannelServer(numMessages, 10*time.Millisecond)
	if err := serverTransport.Listen(raftNodeID, serverChannel); err != nil {
		t.Fatal(err)
	}
	addr := server.Addr()
	if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID),
		&proto.NodeDescriptor{
			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
		},
		time.Hour); err != nil {
		t.Fatal(err)
	}

	clientNodeID := proto.MakeRaftNodeID(2, 2)
	clientTransport, err := newRPCTransport(g, nil, nodeRPCContext)
	if err != nil {
		t.Fatal(err)
	}
	defer clientTransport.Close()

	for i := 0; i < numMessages; i++ {
		req := &multiraft.RaftMessageRequest{
			GroupID: 1,
			Message: raftpb.Message{
				To:     uint64(raftNodeID),
				From:   uint64(clientNodeID),
				Commit: uint64(i),
			},
		}
		if err := clientTransport.Send(req); err != nil {
			t.Errorf("failed to send message %d: %s", i, err)
		}
	}

	for i := 0; i < numMessages; i++ {
		req := <-serverChannel.ch
		if req.Message.Commit != uint64(i) {
			t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i)
		}
	}
}