func TestEntryCache(t *testing.T) { defer leaktest.AfterTest(t)() rec := newRaftEntryCache(100) rangeID := roachpb.RangeID(2) // Add entries for range 1, indexes (1-10). ents := addEntries(rec, rangeID, 1, 11) // Fetch all data with an exact match. verifyGet(t, rec, rangeID, 1, 11, ents, 11) // Fetch point entry. verifyGet(t, rec, rangeID, 1, 2, ents[0:1], 2) // Fetch overlapping first half. verifyGet(t, rec, rangeID, 0, 5, []raftpb.Entry{}, 0) // Fetch overlapping second half. verifyGet(t, rec, rangeID, 9, 12, ents[8:], 11) // Fetch data from earlier range. verifyGet(t, rec, roachpb.RangeID(1), 1, 11, []raftpb.Entry{}, 1) // Fetch data from later range. verifyGet(t, rec, roachpb.RangeID(3), 1, 11, []raftpb.Entry{}, 1) // Create a gap in the entries. rec.delEntries(rangeID, 4, 8) // Fetch all data; verify we get only first three. verifyGet(t, rec, rangeID, 1, 11, ents[0:3], 4) // Try to fetch from within the gap; expect no entries. verifyGet(t, rec, rangeID, 5, 11, []raftpb.Entry{}, 5) // Fetch after the gap. verifyGet(t, rec, rangeID, 8, 11, ents[7:], 11) // Delete the prefix of entries. rec.delEntries(rangeID, 1, 3) // Verify entries are gone. verifyGet(t, rec, rangeID, 1, 5, []raftpb.Entry{}, 1) // Delete the suffix of entries. rec.delEntries(rangeID, 10, 11) // Verify get of entries at end of range. verifyGet(t, rec, rangeID, 8, 11, ents[7:9], 10) }
// TestHeartbeatResponseFanout check 2 raft groups on the same node distribution, // but each group has different Term, heartbeat response from each group should // not disturb other group's Term or Leadership func TestHeartbeatResponseFanout(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() cluster := newTestCluster(nil, 3, stopper, t) groupID1 := roachpb.RangeID(1) cluster.createGroup(groupID1, 0, 3 /* replicas */) groupID2 := roachpb.RangeID(2) cluster.createGroup(groupID2, 0, 3 /* replicas */) leaderIndex := 0 cluster.elect(leaderIndex, groupID1) // GroupID2 will have 3 round of election, so it will have different // term with groupID1, but both leader on the same node. for i := 2; i >= 0; i-- { leaderIndex = i cluster.elect(leaderIndex, groupID2) } // Send a coalesced heartbeat. // Heartbeat response from groupID2 will have a big term than which from groupID1. cluster.nodes[0].coalescedHeartbeat() // Start submit a command to see if groupID1's leader changed? cluster.nodes[0].SubmitCommand(groupID1, makeCommandID(), []byte("command")) select { case _ = <-cluster.events[0].CommandCommitted: log.Infof("SubmitCommand succeed after Heartbeat Response fanout") case <-time.After(500 * time.Millisecond): t.Fatalf("No leader after Heartbeat Response fanout") } }
func TestLeaderCache(t *testing.T) { defer leaktest.AfterTest(t)() lc := newLeaderCache(3) if repDesc, ok := lc.Lookup(12); ok { t.Errorf("lookup of missing key returned: %+v", repDesc) } rangeID := roachpb.RangeID(5) replica := roachpb.ReplicaDescriptor{StoreID: 1} lc.Update(rangeID, replica) if repDesc, ok := lc.Lookup(rangeID); !ok { t.Fatalf("expected %+v", replica) } else if repDesc != replica { t.Errorf("expected %+v, got %+v", replica, repDesc) } newReplica := roachpb.ReplicaDescriptor{StoreID: 7} lc.Update(rangeID, newReplica) if repDesc, ok := lc.Lookup(rangeID); !ok { t.Fatalf("expected %+v", replica) } else if repDesc != newReplica { t.Errorf("expected %+v, got %+v", newReplica, repDesc) } lc.Update(rangeID, roachpb.ReplicaDescriptor{}) if repDesc, ok := lc.Lookup(rangeID); ok { t.Errorf("lookup of evicted key returned: %+v", repDesc) } for i := 10; i < 20; i++ { lc.Update(roachpb.RangeID(i), replica) } _, ok16 := lc.Lookup(16) _, ok17 := lc.Lookup(17) if ok16 || !ok17 { t.Fatalf("unexpected policy used in cache") } }
func localRangeIDKeyParse(input string) (remainder string, key roachpb.Key) { var rangeID int64 var err error input = mustShiftSlash(input) if endPos := strings.Index(input, "/"); endPos > 0 { rangeID, err = strconv.ParseInt(input[:endPos], 10, 64) if err != nil { panic(err) } input = input[endPos:] } else { panic(errors.Errorf("illegal RangeID: %q", input)) } input = mustShiftSlash(input) var infix string infix, input = mustShift(input) var replicated bool switch { case bytes.Equal(localRangeIDUnreplicatedInfix, []byte(infix)): case bytes.Equal(localRangeIDReplicatedInfix, []byte(infix)): replicated = true default: panic(errors.Errorf("invalid infix: %q", infix)) } input = mustShiftSlash(input) // Get the suffix. var suffix roachpb.RKey for _, s := range rangeIDSuffixDict { if strings.HasPrefix(input, s.name) { input = input[len(s.name):] if s.psFunc != nil { remainder, key = s.psFunc(roachpb.RangeID(rangeID), input) return } suffix = roachpb.RKey(s.suffix) break } } maker := MakeRangeIDUnreplicatedKey if replicated { maker = MakeRangeIDReplicatedKey } if suffix != nil { if input != "" { panic(&errUglifyUnsupported{errors.New("nontrivial detail")}) } var detail roachpb.RKey // TODO(tschottdorf): can't do this, init cycle: // detail, err := UglyPrint(input) // if err != nil { // return "", nil, err // } remainder = "" key = maker(roachpb.RangeID(rangeID), suffix, roachpb.RKey(detail)) return } panic(&errUglifyUnsupported{errors.New("unhandled general range key")}) }
// TestRaftAfterRemoveRange verifies that the raft state removes // a remote node correctly after the Replica was removed from the Store. func TestRaftAfterRemoveRange(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 3) defer mtc.Stop() // Make the split. splitArgs := adminSplitArgs(roachpb.KeyMin, []byte("b")) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &splitArgs); err != nil { t.Fatal(err) } rangeID := roachpb.RangeID(2) mtc.replicateRange(rangeID, 1, 2) mtc.unreplicateRange(rangeID, 2) mtc.unreplicateRange(rangeID, 1) // Wait for the removal to be processed. util.SucceedsWithin(t, time.Second, func() error { _, err := mtc.stores[1].GetReplica(rangeID) if _, ok := err.(*roachpb.RangeNotFoundError); ok { return nil } else if err != nil { return err } return util.Errorf("range still exists") }) replica1 := roachpb.ReplicaDescriptor{ ReplicaID: roachpb.ReplicaID(mtc.stores[1].StoreID()), NodeID: roachpb.NodeID(mtc.stores[1].StoreID()), StoreID: mtc.stores[1].StoreID(), } replica2 := roachpb.ReplicaDescriptor{ ReplicaID: roachpb.ReplicaID(mtc.stores[2].StoreID()), NodeID: roachpb.NodeID(mtc.stores[2].StoreID()), StoreID: mtc.stores[2].StoreID(), } if err := mtc.transport.Send(&storage.RaftMessageRequest{ GroupID: 0, ToReplica: replica1, FromReplica: replica2, Message: raftpb.Message{ From: uint64(replica2.ReplicaID), To: uint64(replica1.ReplicaID), Type: raftpb.MsgHeartbeat, }}); err != nil { t.Fatal(err) } // Execute another replica change to ensure that raft has processed // the heartbeat just sent. mtc.replicateRange(roachpb.RangeID(1), 1) // Expire leases to ensure any remaining intent resolutions can complete. // TODO(bdarnell): understand why some tests need this. mtc.expireLeaderLeases() }
func TestLeaderCache(t *testing.T) { defer leaktest.AfterTest(t) lc := newLeaderCache(3) if r := lc.Lookup(12); r.StoreID != 0 { t.Fatalf("lookup of missing key returned replica: %v", r) } replica := roachpb.ReplicaDescriptor{StoreID: 1} lc.Update(5, replica) if r := lc.Lookup(5); r.StoreID != 1 { t.Errorf("expected %v, got %v", replica, r) } newReplica := roachpb.ReplicaDescriptor{StoreID: 7} lc.Update(5, newReplica) r := lc.Lookup(5) if r.StoreID != 7 { t.Errorf("expected %v, got %v", newReplica, r) } lc.Update(5, roachpb.ReplicaDescriptor{}) r = lc.Lookup(5) if r.StoreID != 0 { t.Fatalf("evicted leader returned: %v", r) } for i := 10; i < 20; i++ { lc.Update(roachpb.RangeID(i), replica) } if lc.Lookup(16).StoreID != 0 || lc.Lookup(17).StoreID == 0 { t.Errorf("unexpected policy used in cache") } }
func runDebugRaftLog(cmd *cobra.Command, args []string) error { stopper := stop.NewStopper() defer stopper.Stop() if len(args) != 2 { return errors.New("required arguments: dir range_id") } db, err := openStore(cmd, args[0], stopper) if err != nil { return err } rangeIDInt, err := strconv.ParseInt(args[1], 10, 64) if err != nil { return err } rangeID := roachpb.RangeID(rangeIDInt) start := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID)) end := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID).PrefixEnd()) if err := db.Iterate(start, end, printRaftLogEntry); err != nil { return err } return nil }
// newTestRangeSet creates a new range set that has the count number of ranges. func newTestRangeSet(count int, t *testing.T) *testRangeSet { rs := &testRangeSet{replicasByKey: btree.New(64 /* degree */)} for i := 0; i < count; i++ { desc := &roachpb.RangeDescriptor{ RangeID: roachpb.RangeID(i), StartKey: roachpb.RKey(fmt.Sprintf("%03d", i)), EndKey: roachpb.RKey(fmt.Sprintf("%03d", i+1)), } // Initialize the range stat so the scanner can use it. rng := &Replica{ RangeID: desc.RangeID, } rng.mu.state.Stats = enginepb.MVCCStats{ KeyBytes: 1, ValBytes: 2, KeyCount: 1, LiveCount: 1, } if err := rng.setDesc(desc); err != nil { t.Fatal(err) } if exRngItem := rs.replicasByKey.ReplaceOrInsert(rng); exRngItem != nil { t.Fatalf("failed to insert range %s", rng) } } return rs }
func TestLeaderElectionEvent(t *testing.T) { defer leaktest.AfterTest(t) // Leader election events are fired when the leader commits an entry, not when it // issues a call for votes. stopper := stop.NewStopper() cluster := newTestCluster(nil, 3, stopper, t) defer stopper.Stop() groupID := roachpb.RangeID(1) cluster.createGroup(groupID, 0, 3) // Process a Ready with a new leader but no new commits. // This happens while an election is in progress. // This may be dirty, but it seems this is the only way to make testrace pass. cluster.nodes[1].callbackChan <- func() { cluster.nodes[1].maybeSendLeaderEvent(groupID, cluster.nodes[1].groups[groupID], &raft.Ready{ SoftState: &raft.SoftState{ Lead: 3, }, }) } // Trigger multiraft another round select cluster.tickers[1].Tick() // No events are sent. select { case e := <-cluster.events[1].LeaderElection: t.Fatalf("got unexpected event %v", e) case <-time.After(200 * time.Millisecond): } // Now there are new committed entries. A new leader always commits an entry // to conclude the election. entry := raftpb.Entry{ Index: 42, Term: 42, } // This may be dirty, but it seems this is the only way to make testrace pass. cluster.nodes[1].callbackChan <- func() { cluster.nodes[1].maybeSendLeaderEvent(groupID, cluster.nodes[1].groups[groupID], &raft.Ready{ Entries: []raftpb.Entry{entry}, CommittedEntries: []raftpb.Entry{entry}, }) } cluster.tickers[1].Tick() // Now we get an event. select { case e := <-cluster.events[1].LeaderElection: if !reflect.DeepEqual(e, &EventLeaderElection{ GroupID: groupID, ReplicaID: 3, Term: 42, }) { t.Errorf("election event did not match expectations: %+v", e) } case <-time.After(200 * time.Millisecond): t.Fatal("didn't get expected event") } }
// TestLeaderRemoveSelf verifies that a leader can remove itself // without panicking and future access to the range returns a // RangeNotFoundError (not RaftGroupDeletedError, and even before // the ReplicaGCQueue has run). func TestLeaderRemoveSelf(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() // Disable the replica GC queue. This verifies that the replica is // considered removed even before the gc queue has run, and also // helps avoid a deadlock at shutdown. mtc.stores[0].DisableReplicaGCQueue(true) raftID := roachpb.RangeID(1) mtc.replicateRange(raftID, 1) // Remove the replica from first store. mtc.unreplicateRange(raftID, 0) getArgs := getArgs([]byte("a")) // Force the read command request a new lease. clock := mtc.clocks[0] header := roachpb.Header{} header.Timestamp = clock.Update(clock.Now().Add(int64(storage.DefaultLeaderLeaseDuration), 0)) // Expect get a RangeNotFoundError. _, pErr := client.SendWrappedWith(rg1(mtc.stores[0]), nil, header, &getArgs) if _, ok := pErr.GoError().(*roachpb.RangeNotFoundError); !ok { t.Fatalf("expect get RangeNotFoundError, actual get %v ", pErr) } }
func parseRangeID(arg string) (roachpb.RangeID, error) { rangeIDInt, err := strconv.ParseInt(arg, 10, 64) if err != nil { return 0, err } return roachpb.RangeID(rangeIDInt), nil }
// sendAttempt gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendAttempt(trace *tracer.Trace, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor) (*roachpb.BatchResponse, *roachpb.Error) { defer trace.Epoch("sending RPC")() leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = rpc.OrderStable } } br, err := ds.sendRPC(trace, desc.RangeID, replicas, order, ba) if err != nil { return nil, roachpb.NewError(err) } // Untangle the error from the received response. pErr := br.Error br.Error = nil // scrub the response error return br, pErr }
// handleWriteReady converts a set of raft.Ready structs into a writeRequest // to be persisted, marks the group as writing and sends it to the writeTask. func (s *state) handleWriteReady(readyGroups map[uint64]raft.Ready) { if log.V(6) { log.Infof("node %v write ready, preparing request", s.nodeID) } writeRequest := newWriteRequest() for groupID, ready := range readyGroups { raftGroupID := roachpb.RangeID(groupID) g, ok := s.groups[raftGroupID] if !ok { if log.V(6) { log.Infof("dropping write request to group %d", groupID) } continue } g.writing = true gwr := &groupWriteRequest{} if !raft.IsEmptyHardState(ready.HardState) { gwr.state = ready.HardState } if !raft.IsEmptySnap(ready.Snapshot) { gwr.snapshot = ready.Snapshot } if len(ready.Entries) > 0 { gwr.entries = ready.Entries } writeRequest.groups[raftGroupID] = gwr } s.writeTask.in <- writeRequest }
func TestMembershipChange(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() cluster := newTestCluster(nil, 4, stopper, t) defer stopper.Stop() // Create a group with a single member, cluster.nodes[0]. groupID := roachpb.RangeID(1) cluster.createGroup(groupID, 0, 1) // An automatic election is triggered since this is a single-node Raft group, // so we don't need to call triggerElection. // Consume and apply the membership change events. for i := 0; i < 4; i++ { go func(i int) { for { e, ok := <-cluster.events[i].MembershipChangeCommitted if !ok { return } e.Callback(nil) } }(i) } // Add each of the other three nodes to the cluster. for i := 1; i < 4; i++ { ch := cluster.nodes[0].ChangeGroupMembership(groupID, makeCommandID(), raftpb.ConfChangeAddNode, roachpb.ReplicaDescriptor{ NodeID: cluster.nodes[i].nodeID, StoreID: roachpb.StoreID(cluster.nodes[i].nodeID), ReplicaID: roachpb.ReplicaID(cluster.nodes[i].nodeID), }, nil) <-ch } // TODO(bdarnell): verify that the channel events are sent out correctly. /* for i := 0; i < 10; i++ { log.Infof("tick %d", i) cluster.tickers[0].Tick() time.Sleep(5 * time.Millisecond) } // Each node is notified of each other node's joining. for i := 0; i < 4; i++ { for j := 1; j < 4; j++ { select { case e := <-cluster.events[i].MembershipChangeCommitted: if e.NodeID != cluster.nodes[j].nodeID { t.Errorf("node %d expected event for %d, got %d", i, j, e.NodeID) } default: t.Errorf("node %d did not get expected event for %d", i, j) } } }*/ }
func TestSlowStorage(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() cluster := newTestCluster(nil, 3, stopper, t) defer stopper.Stop() groupID := roachpb.RangeID(1) cluster.createGroup(groupID, 0, 3) cluster.triggerElection(0, groupID) // Block the storage on the last node. cluster.storages[2].Block() // Submit a command to the leader cluster.nodes[0].SubmitCommand(groupID, makeCommandID(), []byte("command")) // Even with the third node blocked, the other nodes can make progress. for i := 0; i < 2; i++ { events := cluster.events[i] log.Infof("waiting for event to be commited on node %v", i) commit := <-events.CommandCommitted if string(commit.Command) != "command" { t.Errorf("unexpected value in committed command: %v", commit.Command) } } // Ensure that node 2 is in fact blocked. time.Sleep(time.Millisecond) select { case commit := <-cluster.events[2].CommandCommitted: t.Errorf("didn't expect commits on node 2 but got %v", commit) default: } // After unblocking the third node, it will catch up. cluster.storages[2].Unblock() log.Infof("waiting for event to be commited on node 2") // When we unblock, the backlog is not guaranteed to be processed in order, // and in some cases the leader may need to retransmit some messages. for i := 0; i < 3; i++ { select { case commit := <-cluster.events[2].CommandCommitted: if string(commit.Command) != "command" { t.Errorf("unexpected value in committed command: %v", commit.Command) } return case <-time.After(5 * time.Millisecond): // Tick both node's clocks. The ticks on the follower node don't // really do anything, but they do ensure that that goroutine is // getting scheduled (and the real-time delay allows rpc responses // to pass between the nodes) cluster.tickers[0].Tick() cluster.tickers[2].Tick() } } }
func parseRangeID(arg string) (roachpb.RangeID, error) { rangeIDInt, err := strconv.ParseInt(arg, 10, 64) if err != nil { return 0, err } if rangeIDInt < 1 { return 0, fmt.Errorf("illegal RangeID: %d", rangeIDInt) } return roachpb.RangeID(rangeIDInt), nil }
// handleWriteReady converts a set of raft.Ready structs into a writeRequest // to be persisted, marks the group as writing and sends it to the writeTask. func (s *state) handleWriteReady() { if log.V(6) { log.Infof("node %v write ready, preparing request", s.nodeID) } s.lockStorage() defer s.unlockStorage() writeRequest := newWriteRequest() for groupID, ready := range s.readyGroups { raftGroupID := roachpb.RangeID(groupID) g, ok := s.groups[raftGroupID] if !ok { if log.V(6) { log.Infof("dropping write request to group %d", groupID) } continue } g.writing = true gwr := &groupWriteRequest{} var err error gwr.replicaID, err = s.Storage().ReplicaIDForStore(roachpb.RangeID(groupID), s.storeID) if err != nil { if log.V(1) { log.Warningf("failed to look up replica ID for range %v (disabling replica ID check): %s", groupID, err) } gwr.replicaID = 0 } if !raft.IsEmptyHardState(ready.HardState) { gwr.state = ready.HardState } if !raft.IsEmptySnap(ready.Snapshot) { gwr.snapshot = ready.Snapshot } if len(ready.Entries) > 0 { gwr.entries = ready.Entries } writeRequest.groups[raftGroupID] = gwr } s.writeTask.in <- writeRequest }
// TestReplicateAfterSplit verifies that a new replica whose start key // is not KeyMin replicating to a fresh store can apply snapshots correctly. func TestReplicateAfterSplit(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() rangeID := roachpb.RangeID(1) splitKey := roachpb.Key("m") key := roachpb.Key("z") store0 := mtc.stores[0] // Make the split splitArgs := adminSplitArgs(roachpb.KeyMin, splitKey) if _, err := client.SendWrapped(rg1(store0), nil, &splitArgs); err != nil { t.Fatal(err) } rangeID2 := store0.LookupReplica(roachpb.RKey(key), nil).RangeID if rangeID2 == rangeID { t.Errorf("got same range id after split") } // Issue an increment for later check. incArgs := incrementArgs(key, 11) if _, err := client.SendWrappedWith(rg1(store0), nil, roachpb.Header{ RangeID: rangeID2, }, &incArgs); err != nil { t.Fatal(err) } // Now add the second replica. mtc.replicateRange(rangeID2, 0, 1) if mtc.stores[1].LookupReplica(roachpb.RKey(key), nil).GetMaxBytes() == 0 { t.Error("Range MaxBytes is not set after snapshot applied") } // Once it catches up, the effects of increment commands can be seen. if err := util.IsTrueWithin(func() bool { getArgs := getArgs(key) // Reading on non-leader replica should use inconsistent read reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{ RangeID: rangeID2, ReadConsistency: roachpb.INCONSISTENT, }, &getArgs) if err != nil { return false } getResp := reply.(*roachpb.GetResponse) if log.V(1) { log.Infof("read value %d", mustGetInt(getResp.Value)) } return mustGetInt(getResp.Value) == 11 }, replicaReadTimeout); err != nil { t.Fatal(err) } }
// addRange adds a new range to the cluster but does not attach it to any // store. func (c *Cluster) addRange() *Range { rangeID := roachpb.RangeID(len(c.ranges)) newRng := newRange(rangeID, c.allocator) c.ranges[rangeID] = newRng // Save a sorted array of range IDs to avoid having to calculate them // multiple times. c.rangeIDs = append(c.rangeIDs, rangeID) sort.Sort(c.rangeIDs) return newRng }
// TestReplicaGCQueueDropReplica verifies that a removed replica is // immediately cleaned up. func TestReplicaGCQueueDropReplicaDirect(t *testing.T) { defer leaktest.AfterTest(t)() mtc := &multiTestContext{} const numStores = 3 rangeID := roachpb.RangeID(1) // In this test, the Replica on the second Node is removed, and the test // verifies that that Node adds this Replica to its RangeGCQueue. However, // the queue does a consistent lookup which will usually be read from // Node 1. Hence, if Node 1 hasn't processed the removal when Node 2 has, // no GC will take place since the consistent RangeLookup hits the first // Node. We use the TestingCommandFilter to make sure that the second Node // waits for the first. ctx := storage.TestStoreContext() mtc.storeContext = &ctx mtc.storeContext.TestingKnobs.TestingCommandFilter = func(filterArgs storageutils.FilterArgs) error { et, ok := filterArgs.Req.(*roachpb.EndTransactionRequest) if !ok || filterArgs.Sid != 2 { return nil } rct := et.InternalCommitTrigger.GetChangeReplicasTrigger() if rct == nil || rct.ChangeType != roachpb.REMOVE_REPLICA { return nil } util.SucceedsSoon(t, func() error { r, err := mtc.stores[0].GetReplica(rangeID) if err != nil { return err } if i, _ := r.Desc().FindReplica(2); i >= 0 { return errors.New("expected second node gone from first node's known replicas") } return nil }) return nil } mtc.Start(t, numStores) defer mtc.Stop() mtc.replicateRange(rangeID, 1, 2) mtc.unreplicateRange(rangeID, 1) // Make sure the range is removed from the store. util.SucceedsSoon(t, func() error { if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "range .* was not found") { return util.Errorf("expected range removal") } return nil }) }
// sendSingleRange gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendSingleRange( ctx context.Context, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor, ) (*roachpb.BatchResponse, *roachpb.Error) { // Hack: avoid formatting the message passed to Span.LogEvent for // opentracing.noopSpans. We can't actually tell if we have a noopSpan, but // we can see if the span as a NoopTracer. Note that this particular // invocation is expensive because we're pretty-printing keys. // // TODO(tschottdorf): This hack can go away when something like // Span.LogEventf is added. sp := opentracing.SpanFromContext(ctx) if sp != nil && sp.Tracer() != (opentracing.NoopTracer{}) { sp.LogEvent(fmt.Sprintf("sending RPC to [%s, %s)", desc.StartKey, desc.EndKey)) } // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) { if leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)); leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = orderStable } } } // TODO(tschottdorf): should serialize the trace here, not higher up. br, pErr := ds.sendRPC(ctx, desc.RangeID, replicas, order, ba) if pErr != nil { return nil, pErr } // If the reply contains a timestamp, update the local HLC with it. if br.Error != nil && br.Error.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Error.Now) } else if br.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Now) } // Untangle the error from the received response. pErr = br.Error br.Error = nil // scrub the response error return br, pErr }
// TestRaftRemoveRace adds and removes a replica repeatedly in an // attempt to reproduce a race // (https://github.com/cockroachdb/cockroach/issues/1911). Note that // 10 repetitions is not enough to reliably reproduce the problem, but // it's better than any other tests we have for this (increasing the // number of repetitions adds an unacceptable amount of test runtime). func TestRaftRemoveRace(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 3) defer mtc.Stop() rangeID := roachpb.RangeID(1) mtc.replicateRange(rangeID, 1, 2) for i := 0; i < 10; i++ { mtc.unreplicateRange(rangeID, 2) mtc.replicateRange(rangeID, 2) } }
// TestReproposeConfigChange verifies the behavior when multiple // configuration changes are in flight at once. Raft prohibits this, // but any configuration changes that are dropped by this rule should // be reproposed when the previous change completes. func TestReproposeConfigChange(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() const clusterSize = 4 const groupSize = 3 cluster := newTestCluster(nil, clusterSize, stopper, t) const groupID = roachpb.RangeID(1) const leader = 0 const proposer = 1 cluster.createGroup(groupID, leader, groupSize) cluster.elect(leader, groupID) targetDesc := roachpb.ReplicaDescriptor{ NodeID: cluster.nodes[groupSize].nodeID, StoreID: roachpb.StoreID(cluster.nodes[groupSize].nodeID), ReplicaID: roachpb.ReplicaID(cluster.nodes[groupSize].nodeID), } // Add a node and immediately remove it without waiting for the // first change to commit. addErrCh := cluster.nodes[proposer].ChangeGroupMembership(groupID, makeCommandID(), raftpb.ConfChangeAddNode, targetDesc, nil) removeErrCh := cluster.nodes[proposer].ChangeGroupMembership(groupID, makeCommandID(), raftpb.ConfChangeRemoveNode, targetDesc, nil) // The add command will commit first; then it needs to be applied. // Apply it on the proposer node before the leader. e := <-cluster.events[proposer].MembershipChangeCommitted e.Callback(nil) e = <-cluster.events[leader].MembershipChangeCommitted e.Callback(nil) // Now wait for both commands to commit. select { case err := <-addErrCh: if err != nil { t.Errorf("add failed: %s", err) } case <-time.After(time.Second): t.Errorf("add timed out") } select { case err := <-removeErrCh: if err != nil { t.Errorf("remove failed: %s", err) } case <-time.After(time.Second): t.Errorf("remove timed out") } }
// TestBookieReserveMaxBytes ensures that over-booking doesn't occur when trying // to reserve more bytes than maxReservedBytes. func TestBookieReserveMaxBytes(t *testing.T) { defer leaktest.AfterTest(t)() previousReservedBytes := 10 stopper, _, b := createTestBookie(time.Hour, previousReservedBytes*2, int64(previousReservedBytes)) defer stopper.Stop() // Load up reservations with a size of 1 each. for i := 1; i <= previousReservedBytes; i++ { req := roachpb.ReservationRequest{ StoreRequestHeader: roachpb.StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(i), RangeSize: 1, } if !b.Reserve(context.Background(), req, nil).Reserved { t.Errorf("%d: could not add reservation", i) } verifyBookie(t, b, i, i, int64(i)) } overbookedReq := roachpb.ReservationRequest{ StoreRequestHeader: roachpb.StoreRequestHeader{ StoreID: roachpb.StoreID(previousReservedBytes + 1), NodeID: roachpb.NodeID(previousReservedBytes + 1), }, RangeID: roachpb.RangeID(previousReservedBytes + 1), RangeSize: 1, } if b.Reserve(context.Background(), overbookedReq, nil).Reserved { t.Errorf("expected reservation to fail due to too many already existing reservations, but it succeeded") } // The same numbers from the last call to verifyBookie. verifyBookie(t, b, previousReservedBytes, previousReservedBytes, int64(previousReservedBytes)) }
func TestEntryCacheClearTo(t *testing.T) { defer leaktest.AfterTest(t)() rangeID := roachpb.RangeID(1) rec := newRaftEntryCache(100) rec.addEntries(rangeID, []raftpb.Entry{newEntry(2, 1)}) rec.addEntries(rangeID, []raftpb.Entry{newEntry(20, 1), newEntry(21, 1)}) rec.clearTo(rangeID, 21) if ents, _, _ := rec.getEntries(rangeID, 2, 21, 0); len(ents) != 0 { t.Errorf("expected no entries after clearTo") } if ents, _, _ := rec.getEntries(rangeID, 21, 22, 0); len(ents) != 1 { t.Errorf("expected entry 22 to remain in the cache clearTo") } }
// TestRaftRemoveRace adds and removes a replica repeatedly in an // attempt to reproduce a race // (https://github.com/cockroachdb/cockroach/issues/1911). Note that // 10 repetitions is not enough to reliably reproduce the problem, but // it's better than any other tests we have for this (increasing the // number of repetitions adds an unacceptable amount of test runtime). func TestRaftRemoveRace(t *testing.T) { defer leaktest.AfterTest(t) t.Skip("TODO(bdarnell): https://github.com/cockroachdb/cockroach/issues/2878") mtc := startMultiTestContext(t, 3) defer mtc.Stop() rangeID := roachpb.RangeID(1) mtc.replicateRange(rangeID, 0, 1, 2) for i := 0; i < 10; i++ { mtc.unreplicateRange(rangeID, 0, 2) mtc.replicateRange(rangeID, 0, 2) } }
func runSubmitCommandBenchmark(numNodes, pendingCommands int, b *testing.B) { stopper := stop.NewStopper() defer stopper.Stop() cluster := newTestCluster(NewLocalTransport(), numNodes, stopper, b) groupID := roachpb.RangeID(1) cluster.createGroup(groupID, 0, numNodes) if numNodes > 1 { cluster.elect(0, groupID) } // Rate limiting: the producer (main) thread writes to the throttle // channel before submitting a command. The consumer thread started // here reads from the throttle channel as commands are committed. throttle := make(chan struct{}, pendingCommands) stopper.RunWorker(func() { for { select { case <-cluster.events[0].CommandCommitted: case <-stopper.ShouldStop(): return } select { case <-throttle: case <-stopper.ShouldStop(): return } } }) // Also drain the other nodes' CommandCommitted channels to prevent deadlock. for i := 1; i < numNodes; i++ { ch := cluster.events[i].CommandCommitted stopper.RunWorker(func() { for range ch { } }) } b.ResetTimer() for i := 0; i < b.N; i++ { throttle <- struct{}{} // SubmitCommand returns a `chan error` but we can't read from it // here without serializing all the commands. _ = cluster.nodes[0].SubmitCommand(groupID, makeCommandID(), nil) } // Flush out all remaining commands. for i := 0; i < pendingCommands; i++ { throttle <- struct{}{} } b.StopTimer() }
// TestRaftRemoveRace adds and removes a replica repeatedly in an // attempt to reproduce a race // (https://github.com/cockroachdb/cockroach/issues/1911). Note that // 10 repetitions is not enough to reliably reproduce the problem, but // it's better than any other tests we have for this (increasing the // number of repetitions adds an unacceptable amount of test runtime). func TestRaftRemoveRace(t *testing.T) { defer leaktest.AfterTest(t) t.Skip("#3187, #3178 and others; very flaky since PR #3175") mtc := startMultiTestContext(t, 3) defer mtc.Stop() rangeID := roachpb.RangeID(1) mtc.replicateRange(rangeID, 0, 1, 2) for i := 0; i < 10; i++ { mtc.unreplicateRange(rangeID, 0, 2) mtc.replicateRange(rangeID, 0, 2) } }
func TestInitialLeaderElection(t *testing.T) { defer leaktest.AfterTest(t) // Run the test three times, each time triggering a different node's election clock. // The node that requests an election first should win. for leaderIndex := 0; leaderIndex < 3; leaderIndex++ { log.Infof("testing leader election for node %v", leaderIndex) stopper := stop.NewStopper() cluster := newTestCluster(nil, 3, stopper, t) groupID := roachpb.RangeID(1) cluster.createGroup(groupID, 0, 3) cluster.elect(leaderIndex, groupID) stopper.Stop() } }
// TestReplicateAfterSplit verifies that a new replica whose start key // is not KeyMin replicating to a fresh store can apply snapshots correctly. func TestReplicateAfterSplit(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() rangeID := roachpb.RangeID(1) splitKey := roachpb.Key("m") key := roachpb.Key("z") store0 := mtc.stores[0] // Make the split splitArgs := adminSplitArgs(roachpb.KeyMin, splitKey) if _, err := client.SendWrapped(rg1(store0), nil, &splitArgs); err != nil { t.Fatal(err) } rangeID2 := store0.LookupReplica(roachpb.RKey(key), nil).RangeID if rangeID2 == rangeID { t.Errorf("got same range id after split") } // Issue an increment for later check. incArgs := incrementArgs(key, 11) if _, err := client.SendWrappedWith(rg1(store0), nil, roachpb.Header{ RangeID: rangeID2, }, &incArgs); err != nil { t.Fatal(err) } // Now add the second replica. mtc.replicateRange(rangeID2, 1) if mtc.stores[1].LookupReplica(roachpb.RKey(key), nil).GetMaxBytes() == 0 { t.Error("Range MaxBytes is not set after snapshot applied") } // Once it catches up, the effects of increment commands can be seen. util.SucceedsWithin(t, replicaReadTimeout, func() error { getArgs := getArgs(key) // Reading on non-leader replica should use inconsistent read if reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{ RangeID: rangeID2, ReadConsistency: roachpb.INCONSISTENT, }, &getArgs); err != nil { return util.Errorf("failed to read data: %s", err) } else if e, v := int64(11), mustGetInt(reply.(*roachpb.GetResponse).Value); v != e { return util.Errorf("failed to read correct data: expected %d, got %d", e, v) } return nil }) }