// InitialState implements the raft.Storage interface. func (r *Range) InitialState() (raftpb.HardState, raftpb.ConfState, error) { var hs raftpb.HardState found, err := engine.MVCCGetProto(r.rm.Engine(), keys.RaftHardStateKey(r.Desc().RaftID), proto.ZeroTimestamp, true, nil, &hs) if err != nil { return raftpb.HardState{}, raftpb.ConfState{}, err } if !found { // We don't have a saved HardState, so set up the defaults. if r.isInitialized() { // Set the initial log term. hs.Term = raftInitialLogTerm hs.Commit = raftInitialLogIndex atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex) } else { // This is a new range we are receiving from another node. Start // from zero so we will receive a snapshot. atomic.StoreUint64(&r.lastIndex, 0) } } var cs raftpb.ConfState // For uninitalized ranges, membership is unknown at this point. if found || r.isInitialized() { for _, rep := range r.Desc().Replicas { cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID))) } } return hs, cs, nil }
// Snapshot implements the raft.Storage interface. func (r *Replica) Snapshot() (raftpb.Snapshot, error) { // Copy all the data from a consistent RocksDB snapshot into a RaftSnapshotData. snap := r.rm.NewSnapshot() defer snap.Close() var snapData proto.RaftSnapshotData // Read the range metadata from the snapshot instead of the members // of the Range struct because they might be changed concurrently. appliedIndex, err := r.loadAppliedIndex(snap) if err != nil { return raftpb.Snapshot{}, err } var desc proto.RangeDescriptor // We ignore intents on the range descriptor (consistent=false) because we // know they cannot be committed yet; operations that modify range // descriptors resolve their own intents when they commit. ok, err := engine.MVCCGetProto(snap, keys.RangeDescriptorKey(r.Desc().StartKey), r.rm.Clock().Now(), false /* !consistent */, nil, &desc) if err != nil { return raftpb.Snapshot{}, util.Errorf("failed to get desc: %s", err) } if !ok { return raftpb.Snapshot{}, util.Errorf("couldn't find range descriptor") } // Store RangeDescriptor as metadata, it will be retrieved by ApplySnapshot() snapData.RangeDescriptor = desc // Iterate over all the data in the range, including local-only data like // the response cache. for iter := newRangeDataIterator(r.Desc(), snap); iter.Valid(); iter.Next() { snapData.KV = append(snapData.KV, &proto.RaftSnapshotData_KeyValue{Key: iter.Key(), Value: iter.Value()}) } data, err := gogoproto.Marshal(&snapData) if err != nil { return raftpb.Snapshot{}, err } // Synthesize our raftpb.ConfState from desc. var cs raftpb.ConfState for _, rep := range desc.Replicas { cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID))) } term, err := r.Term(appliedIndex) if err != nil { return raftpb.Snapshot{}, util.Errorf("failed to fetch term of %d: %s", appliedIndex, err) } return raftpb.Snapshot{ Data: data, Metadata: raftpb.SnapshotMetadata{ Index: appliedIndex, Term: term, ConfState: cs, }, }, nil }
func TestRaftNodeID(t *testing.T) { defer leaktest.AfterTest(t) cases := []struct { nodeID proto.NodeID storeID proto.StoreID expected proto.RaftNodeID }{ {0, 1, 1}, {1, 1, 0x100000001}, {2, 3, 0x200000003}, {math.MaxInt32, math.MaxInt32, 0x7fffffff7fffffff}, } for _, c := range cases { x := proto.MakeRaftNodeID(c.nodeID, c.storeID) if x != c.expected { t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected %v", c.nodeID, c.storeID, x, c.expected) } n, s := proto.DecodeRaftNodeID(x) if n != c.nodeID || s != c.storeID { t.Errorf("decodeRaftNodeID(%v) returned %v, %v; expected %v, %v", x, n, s, c.nodeID, c.storeID) } } panicCases := []struct { nodeID proto.NodeID storeID proto.StoreID }{ {1, 0}, {1, -1}, {-1, 1}, } for _, c := range panicCases { func() { defer func() { _ = recover() }() x := proto.MakeRaftNodeID(c.nodeID, c.storeID) t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected panic", c.nodeID, c.storeID, x) }() } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap) // Create several servers, each of which has two stores (A multiraft node ID addresses // a store). const numServers = 3 const storesPerServer = 2 const numStores = numServers * storesPerServer // servers has length numServers. servers := []*rpc.Server{} // All the rest have length numStores (note that several stores share a transport). nextNodeID := proto.NodeID(1) nodeIDs := []proto.RaftNodeID{} transports := []multiraft.Transport{} channels := []channelServer{} for serverIndex := 0; serverIndex < numServers; serverIndex++ { server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext) if err := server.Start(); err != nil { t.Fatal(err) } defer server.Close() transport, err := newRPCTransport(g, server, nodeRPCContext) if err != nil { t.Fatalf("Unexpected error creating transport, Error: %s", err) } defer transport.Close() for store := 0; store < storesPerServer; store++ { protoNodeID := nextNodeID nodeID := proto.MakeRaftNodeID(protoNodeID, 1) nextNodeID++ channel := newChannelServer(10, 0) if err := transport.Listen(nodeID, channel); err != nil { t.Fatal(err) } addr := server.Addr() if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID), &proto.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } nodeIDs = append(nodeIDs, nodeID) transports = append(transports, transport) channels = append(channels, channel) } servers = append(servers, server) } // Each store sends one message to each store. for from := 0; from < numStores; from++ { for to := 0; to < numStores; to++ { req := &multiraft.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ From: uint64(nodeIDs[from]), To: uint64(nodeIDs[to]), Type: raftpb.MsgHeartbeat, }, } if err := transports[from].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", nodeIDs[from], nodeIDs[to], err) } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for to := 0; to < numStores; to++ { for from := 0; from < numStores; from++ { select { case req := <-channels[to].ch: if req.Message.To != uint64(nodeIDs[to]) { t.Errorf("invalid message received on channel %d (expected from %d): %+v", nodeIDs[to], nodeIDs[from], req) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } } select { case req := <-channels[to].ch: t.Errorf("got unexpected message %+v on channel %d", req, nodeIDs[to]) default: } } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap) server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext) if err := server.Start(); err != nil { t.Fatal(err) } defer server.Close() const numMessages = 100 protoNodeID := proto.NodeID(1) raftNodeID := proto.MakeRaftNodeID(protoNodeID, 1) serverTransport, err := newRPCTransport(g, server, nodeRPCContext) if err != nil { t.Fatal(err) } defer serverTransport.Close() serverChannel := newChannelServer(numMessages, 10*time.Millisecond) if err := serverTransport.Listen(raftNodeID, serverChannel); err != nil { t.Fatal(err) } addr := server.Addr() if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID), &proto.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := proto.MakeRaftNodeID(2, 2) clientTransport, err := newRPCTransport(g, nil, nodeRPCContext) if err != nil { t.Fatal(err) } defer clientTransport.Close() for i := 0; i < numMessages; i++ { req := &multiraft.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ To: uint64(raftNodeID), From: uint64(clientNodeID), Commit: uint64(i), }, } if err := clientTransport.Send(req); err != nil { t.Errorf("failed to send message %d: %s", i, err) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }