func TestStoresVisitStores(t *testing.T) { defer leaktest.AfterTest(t)() ls := NewStores(log.AmbientContext{}, hlc.NewClock(hlc.UnixNano)) numStores := 10 for i := 0; i < numStores; i++ { ls.AddStore(&Store{Ident: roachpb.StoreIdent{StoreID: roachpb.StoreID(i)}}) } visit := make([]bool, numStores) err := ls.VisitStores(func(s *Store) error { visit[s.Ident.StoreID] = true; return nil }) if err != nil { t.Errorf("unexpected error on visit: %s", err.Error()) } for i, visited := range visit { if !visited { t.Errorf("store %d was not visited", i) } } errBoom := errors.New("boom") if err := ls.VisitStores(func(s *Store) error { return errBoom }); err != errBoom { t.Errorf("got unexpected error %v", err) } }
func createReplicaSlice() ReplicaSlice { rs := ReplicaSlice(nil) for i := 0; i < 5; i++ { rs = append(rs, ReplicaInfo{ReplicaDescriptor: roachpb.ReplicaDescriptor{StoreID: roachpb.StoreID(i + 1)}}) } return rs }
func TestStorePoolGetStoreDetails(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff, false /* deterministic */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) sp.mu.Lock() defer sp.mu.Unlock() if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.dead { t.Errorf("Present storeDetail came back as dead, expected it to be alive. %+v", detail) } if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.dead { t.Errorf("Absent storeDetail came back as dead, expected it to be alive. %+v", detail) } }
func TestStorePoolGetStoreDetails(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) sp.mu.Lock() defer sp.mu.Unlock() if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.desc != nil { t.Errorf("unexpected fetched store ID 1: %+v", detail.desc) } if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.desc == nil { t.Errorf("failed to fetch store ID 2") } }
// allocateStoreIDs increments the store id generator key for the // specified node to allocate "inc" new, unique store ids. The // first ID in a contiguous range is returned on success. func allocateStoreIDs( ctx context.Context, nodeID roachpb.NodeID, inc int64, db *client.DB, ) (roachpb.StoreID, error) { r, err := db.Inc(ctx, keys.StoreIDGenerator, inc) if err != nil { return 0, errors.Errorf("unable to allocate %d store IDs for node %d: %s", inc, nodeID, err) } return roachpb.StoreID(r.ValueInt() - inc + 1), nil }
// getRemoveTarget queries the allocator for the store that contains a replica // that can be removed. func (r *Range) getRemoveTarget() (roachpb.StoreID, error) { // Pass in an invalid store ID since we don't consider range leases as part // of the simulator. removeStore, err := r.allocator.RemoveTarget(r.desc.Replicas, roachpb.StoreID(-1)) if err != nil { return 0, err } return removeStore.StoreID, nil }
// GetFirstStoreID is a utility function returning the StoreID of the first // store on this node. func (ts *TestServer) GetFirstStoreID() roachpb.StoreID { firstStoreID := roachpb.StoreID(-1) err := ts.Stores().VisitStores(func(s *storage.Store) error { if firstStoreID == -1 { firstStoreID = s.Ident.StoreID } return nil }) if err != nil { panic(err) } return firstStoreID }
// TestBookieReserveMaxBytes ensures that over-booking doesn't occur when trying // to reserve more bytes than maxReservedBytes. func TestBookieReserveMaxBytes(t *testing.T) { defer leaktest.AfterTest(t)() previousReservedBytes := 10 stopper, _, b := createTestBookie(time.Hour, previousReservedBytes*2, int64(previousReservedBytes)) defer stopper.Stop() // Load up reservations with a size of 1 each. for i := 1; i <= previousReservedBytes; i++ { req := ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(i), RangeSize: 1, } if !b.Reserve(context.Background(), req, nil).Reserved { t.Errorf("%d: could not add reservation", i) } verifyBookie(t, b, i, i, int64(i)) } overbookedReq := ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(previousReservedBytes + 1), NodeID: roachpb.NodeID(previousReservedBytes + 1), }, RangeID: roachpb.RangeID(previousReservedBytes + 1), RangeSize: 1, } if b.Reserve(context.Background(), overbookedReq, nil).Reserved { t.Errorf("expected reservation to fail due to too many already existing reservations, but it succeeded") } // The same numbers from the last call to verifyBookie. verifyBookie(t, b, previousReservedBytes, previousReservedBytes, int64(previousReservedBytes)) }
func TestStoresRemoveStore(t *testing.T) { defer leaktest.AfterTest(t)() ls := NewStores(log.AmbientContext{}, hlc.NewClock(hlc.UnixNano)) storeID := roachpb.StoreID(89) ls.AddStore(&Store{Ident: roachpb.StoreIdent{StoreID: storeID}}) ls.RemoveStore(&Store{Ident: roachpb.StoreIdent{StoreID: storeID}}) if ls.HasStore(storeID) { t.Errorf("expted local sender to remove storeID=%d", storeID) } }
func TestStoresGetStoreCount(t *testing.T) { defer leaktest.AfterTest(t)() ls := NewStores(log.AmbientContext{}, hlc.NewClock(hlc.UnixNano)) if ls.GetStoreCount() != 0 { t.Errorf("expected 0 stores in new local sender") } expectedCount := 10 for i := 0; i < expectedCount; i++ { ls.AddStore(&Store{Ident: roachpb.StoreIdent{StoreID: roachpb.StoreID(i)}}) } if count := ls.GetStoreCount(); count != expectedCount { t.Errorf("expected store count to be %d but was %d", expectedCount, count) } }
// TODO(bram): This test suite is not even close to exhaustive. The scores are // not checked and each rule should have many more test cases. Also add a // corrupt replica test and remove the 0 range ID used when calling // getStoreList. func TestRuleSolver(t *testing.T) { defer leaktest.AfterTest(t)() stopper, _, _, storePool := createTestStorePool( TestTimeUntilStoreDeadOff, /* deterministic */ false, ) defer stopper.Stop() storeUSa15 := roachpb.StoreID(1) // us-a-1-5 storeUSa1 := roachpb.StoreID(2) // us-a-1 storeUSb := roachpb.StoreID(3) // us-b storeDead := roachpb.StoreID(4) storeEurope := roachpb.StoreID(5) // eur-a-1-5 mockStorePool(storePool, []roachpb.StoreID{storeUSa15, storeUSa1, storeUSb, storeEurope}, []roachpb.StoreID{storeDead}, nil) // tierSetup returns a tier struct constructed using the passed in values. // If any value is an empty string, it is not included. tierSetup := func(datacenter, floor, rack, slot string) []roachpb.Tier { var tiers []roachpb.Tier if datacenter != "" { tiers = append(tiers, roachpb.Tier{Key: "datacenter", Value: datacenter}) } if floor != "" { tiers = append(tiers, roachpb.Tier{Key: "floor", Value: floor}) } if rack != "" { tiers = append(tiers, roachpb.Tier{Key: "rack", Value: rack}) } if slot != "" { tiers = append(tiers, roachpb.Tier{Key: "slot", Value: slot}) } return tiers } // capacitySetup returns a store capacity in which the total capacity is // always 100 and available and range count are passed in. capacitySetup := func(available int64, rangeCount int32) roachpb.StoreCapacity { return roachpb.StoreCapacity{ Capacity: 100, Available: available, RangeCount: rangeCount, } } storePool.mu.Lock() storePool.mu.storeDetails[storeUSa15].desc.Attrs.Attrs = []string{"a"} storePool.mu.storeDetails[storeUSa15].desc.Node.Locality.Tiers = tierSetup("us", "a", "1", "5") storePool.mu.storeDetails[storeUSa15].desc.Capacity = capacitySetup(1, 99) storePool.mu.nodeLocalities[roachpb.NodeID(storeUSa15)] = storePool.mu.storeDetails[storeUSa15].desc.Node.Locality storePool.mu.storeDetails[storeUSa1].desc.Attrs.Attrs = []string{"a", "b"} storePool.mu.storeDetails[storeUSa1].desc.Node.Locality.Tiers = tierSetup("us", "a", "1", "") storePool.mu.storeDetails[storeUSa1].desc.Capacity = capacitySetup(100, 0) storePool.mu.nodeLocalities[roachpb.NodeID(storeUSa1)] = storePool.mu.storeDetails[storeUSa1].desc.Node.Locality storePool.mu.storeDetails[storeUSb].desc.Attrs.Attrs = []string{"a", "b", "c"} storePool.mu.storeDetails[storeUSb].desc.Node.Locality.Tiers = tierSetup("us", "b", "", "") storePool.mu.storeDetails[storeUSb].desc.Capacity = capacitySetup(50, 50) storePool.mu.nodeLocalities[roachpb.NodeID(storeUSb)] = storePool.mu.storeDetails[storeUSb].desc.Node.Locality storePool.mu.storeDetails[storeEurope].desc.Node.Locality.Tiers = tierSetup("eur", "a", "1", "5") storePool.mu.storeDetails[storeEurope].desc.Capacity = capacitySetup(60, 40) storePool.mu.nodeLocalities[roachpb.NodeID(storeEurope)] = storePool.mu.storeDetails[storeEurope].desc.Node.Locality storePool.mu.Unlock() testCases := []struct { name string rule rule c config.Constraints existing []roachpb.ReplicaDescriptor expected []roachpb.StoreID }{ { name: "no constraints or rules", expected: []roachpb.StoreID{storeUSa15, storeUSa1, storeUSb, storeEurope}, }, { name: "white list rule", rule: func(state solveState) (float64, bool) { switch state.store.StoreID { case storeUSa15: return 0, true case storeUSb: return 1, true default: return 0, false } }, expected: []roachpb.StoreID{storeUSb, storeUSa15}, }, { name: "ruleReplicasUniqueNodes - 2 available nodes", rule: ruleReplicasUniqueNodes, existing: []roachpb.ReplicaDescriptor{ {NodeID: roachpb.NodeID(storeUSa15)}, {NodeID: roachpb.NodeID(storeUSb)}, }, expected: []roachpb.StoreID{storeUSa1, storeEurope}, }, { name: "ruleReplicasUniqueNodes - 0 available nodes", rule: ruleReplicasUniqueNodes, existing: []roachpb.ReplicaDescriptor{ {NodeID: roachpb.NodeID(storeUSa15)}, {NodeID: roachpb.NodeID(storeUSa1)}, {NodeID: roachpb.NodeID(storeUSb)}, {NodeID: roachpb.NodeID(storeEurope)}, }, expected: nil, }, { name: "ruleConstraints - required constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Value: "b", Type: config.Constraint_REQUIRED}, }, }, expected: []roachpb.StoreID{storeUSa1, storeUSb}, }, { name: "ruleConstraints - required locality constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Key: "datacenter", Value: "us", Type: config.Constraint_REQUIRED}, }, }, expected: []roachpb.StoreID{storeUSa15, storeUSa1, storeUSb}, }, { name: "ruleConstraints - prohibited constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Value: "b", Type: config.Constraint_PROHIBITED}, }, }, expected: []roachpb.StoreID{storeUSa15, storeEurope}, }, { name: "ruleConstraints - prohibited locality constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Key: "datacenter", Value: "us", Type: config.Constraint_PROHIBITED}, }, }, expected: []roachpb.StoreID{storeEurope}, }, { name: "ruleConstraints - positive constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Value: "a"}, {Value: "b"}, {Value: "c"}, }, }, expected: []roachpb.StoreID{storeUSb, storeUSa1, storeUSa15, storeEurope}, }, { name: "ruleConstraints - positive locality constraints", rule: ruleConstraints, c: config.Constraints{ Constraints: []config.Constraint{ {Key: "datacenter", Value: "eur"}, }, }, expected: []roachpb.StoreID{storeEurope, storeUSa15, storeUSa1, storeUSb}, }, { name: "ruleDiversity - no existing replicas", rule: ruleDiversity, existing: nil, expected: []roachpb.StoreID{storeUSa15, storeUSa1, storeUSb, storeEurope}, }, { name: "ruleDiversity - one existing replicas", rule: ruleDiversity, existing: []roachpb.ReplicaDescriptor{ {NodeID: roachpb.NodeID(storeUSa15)}, }, expected: []roachpb.StoreID{storeEurope, storeUSb, storeUSa1, storeUSa15}, }, { name: "ruleDiversity - two existing replicas", rule: ruleDiversity, existing: []roachpb.ReplicaDescriptor{ {NodeID: roachpb.NodeID(storeUSa15)}, {NodeID: roachpb.NodeID(storeEurope)}, }, expected: []roachpb.StoreID{storeUSb, storeUSa1, storeUSa15, storeEurope}, }, { name: "ruleCapacity", rule: ruleCapacity, expected: []roachpb.StoreID{storeUSa1, storeEurope, storeUSb}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var solver ruleSolver if tc.rule != nil { solver = ruleSolver{tc.rule} } sl, _, _ := storePool.getStoreList(roachpb.RangeID(0)) candidates, err := solver.Solve( sl, tc.c, tc.existing, storePool.getNodeLocalities(tc.existing), ) if err != nil { t.Fatal(err) } sort.Sort(byScoreAndID(candidates)) if len(candidates) != len(tc.expected) { t.Fatalf("length of %+v should match %+v", candidates, tc.expected) } for i, expected := range tc.expected { if actual := candidates[i].store.StoreID; actual != expected { t.Errorf("candidates[%d].store.StoreID = %d; not %d; %+v", i, actual, expected, candidates) } } }) } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() rttc := newRaftTransportTestContext(t) defer rttc.Stop() // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := map[raftpb.MessageType]struct{}{ raftpb.MsgHeartbeat: {}, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ transports[nodeID] = rttc.AddNode(nodeID) for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channels[storeID] = rttc.ListenStore(nodeID, storeID) } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for messageType := range messageTypes { req := baseReq req.Message.Type = messageType if !transports[fromNodeID].SendAsync(&req) { t.Errorf("unable to send %s from %d to %d", req.Message.Type, fromNodeID, toNodeID) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if !transports[storeNodes[fromStoreID]].SendAsync(expReq) { t.Errorf("unable to send message from %d to %d", fromStoreID, toStoreID) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestBookieReserve ensures that you can never have more than one reservation // for a specific rangeID at a time, and that both `Reserve` and `Fill` function // correctly. func TestBookieReserve(t *testing.T) { defer leaktest.AfterTest(t)() b := createTestBookie(5, defaultMaxReservedBytes) testCases := []struct { rangeID int reserve bool // true for reserve, false for fill expSuc bool // is the operation expected to succeed expOut int // expected number of reserved replicas expBytes int64 // expected number of bytes being reserved deadReplicas []roachpb.ReplicaIdent // dead replicas that we should not reserve over }{ {rangeID: 1, reserve: true, expSuc: true, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: true, expSuc: false, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, {rangeID: 1, reserve: false, expSuc: false, expOut: 0, expBytes: 0}, {rangeID: 2, reserve: true, expSuc: true, expOut: 1, expBytes: 2}, {rangeID: 3, reserve: true, expSuc: true, expOut: 2, expBytes: 5}, {rangeID: 1, reserve: true, expSuc: true, expOut: 3, expBytes: 6}, {rangeID: 2, reserve: true, expSuc: false, expOut: 3, expBytes: 6}, {rangeID: 2, reserve: false, expSuc: true, expOut: 2, expBytes: 4}, {rangeID: 2, reserve: false, expSuc: false, expOut: 2, expBytes: 4}, {rangeID: 3, reserve: false, expSuc: true, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, {rangeID: 2, reserve: false, expSuc: false, expOut: 0, expBytes: 0}, {rangeID: 0, reserve: true, expSuc: false, expOut: 0, expBytes: 0, deadReplicas: []roachpb.ReplicaIdent{{RangeID: 0}}}, {rangeID: 0, reserve: true, expSuc: true, expOut: 1, expBytes: 0, deadReplicas: []roachpb.ReplicaIdent{{RangeID: 1}}}, {rangeID: 0, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, } ctx := context.Background() for i, testCase := range testCases { if testCase.reserve { // Try to reserve the range. req := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(testCase.rangeID), RangeSize: int64(testCase.rangeID), } if resp := b.Reserve(ctx, req, testCase.deadReplicas); resp.Reserved != testCase.expSuc { if testCase.expSuc { t.Errorf("%d: expected a successful reservation, was rejected", i) } else { t.Errorf("%d: expected no reservation, but it was accepted", i) } } } else { // Fill the reservation. if filled := b.Fill(ctx, roachpb.RangeID(testCase.rangeID)); filled != testCase.expSuc { if testCase.expSuc { t.Errorf("%d: expected a successful filled reservation, was rejected", i) } else { t.Errorf("%d: expected no reservation to be filled, but it was accepted", i) } } } verifyBookie(t, b, testCase.expOut, testCase.expBytes) } // Test that repeated requests with the same store and node number extend // the timeout of the pre-existing reservation. repeatReq := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: 100, NodeID: 100, }, RangeID: 100, RangeSize: 100, } for i := 1; i < 10; i++ { if !b.Reserve(context.Background(), repeatReq, nil).Reserved { t.Errorf("%d: could not add repeated reservation", i) } verifyBookie(t, b, 1, 100) } // Test rejecting a reservation due to disk space constraints. overfilledReq := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: 200, NodeID: 200, }, RangeID: 200, RangeSize: 200, } b.mu.Lock() // Set the bytes have 1 less byte free than needed by the reservation. b.metrics.Available.Update(b.mu.size + (2 * overfilledReq.RangeSize) - 1) b.mu.Unlock() if b.Reserve(context.Background(), overfilledReq, nil).Reserved { t.Errorf("expected reservation to fail due to disk space constraints, but it succeeded") } verifyBookie(t, b, 1, 100) // The same numbers from the last call to verifyBookie. }
// TestCandidateSelection tests select{good,bad} and {best,worst}constraints. func TestCandidateSelection(t *testing.T) { defer leaktest.AfterTest(t)() type scoreTuple struct { constraint int capacity int } genCandidates := func(scores []scoreTuple, idShift int) candidateList { var cl candidateList for i, score := range scores { cl = append(cl, candidate{ store: roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(i + idShift), }, constraint: float64(score.constraint), capacity: float64(score.capacity), valid: true, }) } sort.Sort(sort.Reverse(byScore(cl))) return cl } formatter := func(cl candidateList) string { var buffer bytes.Buffer for i, c := range cl { if i != 0 { buffer.WriteRune(',') } buffer.WriteString(fmt.Sprintf("%d:%d", int(c.constraint), int(c.capacity))) } return buffer.String() } testCases := []struct { candidates []scoreTuple best []scoreTuple worst []scoreTuple good scoreTuple bad scoreTuple }{ { candidates: []scoreTuple{{0, 0}}, best: []scoreTuple{{0, 0}}, worst: []scoreTuple{{0, 0}}, good: scoreTuple{0, 0}, bad: scoreTuple{0, 0}, }, { candidates: []scoreTuple{{0, 1}, {0, 0}}, best: []scoreTuple{{0, 1}, {0, 0}}, worst: []scoreTuple{{0, 1}, {0, 0}}, good: scoreTuple{0, 1}, bad: scoreTuple{0, 0}, }, { candidates: []scoreTuple{{0, 2}, {0, 1}, {0, 0}}, best: []scoreTuple{{0, 2}, {0, 1}, {0, 0}}, worst: []scoreTuple{{0, 2}, {0, 1}, {0, 0}}, good: scoreTuple{0, 1}, bad: scoreTuple{0, 0}, }, { candidates: []scoreTuple{{1, 0}, {0, 1}}, best: []scoreTuple{{1, 0}}, worst: []scoreTuple{{0, 1}}, good: scoreTuple{1, 0}, bad: scoreTuple{0, 1}, }, { candidates: []scoreTuple{{1, 0}, {0, 2}, {0, 1}}, best: []scoreTuple{{1, 0}}, worst: []scoreTuple{{0, 2}, {0, 1}}, good: scoreTuple{1, 0}, bad: scoreTuple{0, 1}, }, { candidates: []scoreTuple{{1, 1}, {1, 0}, {0, 2}}, best: []scoreTuple{{1, 1}, {1, 0}}, worst: []scoreTuple{{0, 2}}, good: scoreTuple{1, 1}, bad: scoreTuple{0, 2}, }, { candidates: []scoreTuple{{1, 1}, {1, 0}, {0, 3}, {0, 2}}, best: []scoreTuple{{1, 1}, {1, 0}}, worst: []scoreTuple{{0, 3}, {0, 2}}, good: scoreTuple{1, 1}, bad: scoreTuple{0, 2}, }, } pickResult := func(cl candidateList, storeID roachpb.StoreID) *candidate { for _, c := range cl { if c.store.StoreID == storeID { return &c } } return nil } allocRand := makeAllocatorRand(rand.NewSource(0)) for _, tc := range testCases { cl := genCandidates(tc.candidates, 1) t.Run(fmt.Sprintf("best-%s", formatter(cl)), func(t *testing.T) { if a, e := cl.best(), genCandidates(tc.best, 1); !reflect.DeepEqual(a, e) { t.Errorf("expected:%s actual:%s diff:%v", formatter(e), formatter(a), pretty.Diff(e, a)) } }) t.Run(fmt.Sprintf("worst-%s", formatter(cl)), func(t *testing.T) { // Shifting the ids is required to match the end of the list. if a, e := cl.worst(), genCandidates( tc.worst, len(tc.candidates)-len(tc.worst)+1, ); !reflect.DeepEqual(a, e) { t.Errorf("expected:%s actual:%s diff:%v", formatter(e), formatter(a), pretty.Diff(e, a)) } }) t.Run(fmt.Sprintf("good-%s", formatter(cl)), func(t *testing.T) { goodStore := cl.selectGood(allocRand) if goodStore == nil { t.Fatalf("no good store found") } good := pickResult(cl, goodStore.StoreID) if good == nil { t.Fatalf("candidate for store %d not found in candidate list: %s", goodStore.StoreID, cl) } actual := scoreTuple{int(good.constraint), int(good.capacity)} if actual != tc.good { t.Errorf("expected:%v actual:%v", tc.good, actual) } }) t.Run(fmt.Sprintf("bad-%s", formatter(cl)), func(t *testing.T) { badStore := cl.selectBad(allocRand) if badStore == nil { t.Fatalf("no bad store found") } bad := pickResult(cl, badStore.StoreID) if bad == nil { t.Fatalf("candidate for store %d not found in candidate list: %s", badStore.StoreID, cl) } actual := scoreTuple{int(bad.constraint), int(bad.capacity)} if actual != tc.bad { t.Errorf("expected:%v actual:%v", tc.bad, actual) } }) } }
return tiers } // testStoreCapacitySetup returns a store capacity in which the total capacity // is always 100 and available and range count are passed in. func testStoreCapacitySetup(available int64, rangeCount int32) roachpb.StoreCapacity { return roachpb.StoreCapacity{ Capacity: 100, Available: available, RangeCount: rangeCount, } } // This is a collection of test stores used by a suite of tests. var ( testStoreUSa15 = roachpb.StoreID(1) // us-a-1-5 testStoreUSa1 = roachpb.StoreID(2) // us-a-1 testStoreUSb = roachpb.StoreID(3) // us-b testStoreEurope = roachpb.StoreID(4) // eur-a-1-5 testStores = []roachpb.StoreDescriptor{ { StoreID: testStoreUSa15, Attrs: roachpb.Attributes{ Attrs: []string{"a"}, }, Node: roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(testStoreUSa15), Locality: roachpb.Locality{ Tiers: testStoreTierSetup("us", "a", "1", "5"), },
// TestStatusSummaries verifies that status summaries are written correctly for // both the Node and stores within the node. func TestStatusSummaries(t *testing.T) { defer leaktest.AfterTest(t)() // ======================================== // Start test server and wait for full initialization. // ======================================== srv, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{ DisableEventLog: true, }) defer srv.Stopper().Stop() ts := srv.(*TestServer) ctx := context.TODO() // Retrieve the first store from the Node. s, err := ts.node.stores.GetStore(roachpb.StoreID(1)) if err != nil { t.Fatal(err) } s.WaitForInit() content := "junk" leftKey := "a" // Scan over all keys to "wake up" all replicas (force a lease holder election). if _, err := kvDB.Scan(context.TODO(), keys.MetaMax, keys.MaxKey, 0); err != nil { t.Fatal(err) } // Wait for full replication of initial ranges. initialRanges := ExpectedInitialRangeCount() util.SucceedsSoon(t, func() error { for i := 1; i <= int(initialRanges); i++ { if s.RaftStatus(roachpb.RangeID(i)) == nil { return errors.Errorf("Store %d replica %d is not present in raft", s.StoreID(), i) } } return nil }) // ======================================== // Construct an initial expectation for NodeStatus to compare to the first // status produced by the server. // ======================================== expectedNodeStatus := &status.NodeStatus{ Desc: ts.node.Descriptor, StartedAt: 0, UpdatedAt: 0, Metrics: map[string]float64{ "exec.success": 0, "exec.error": 0, }, } expectedStoreStatuses := make(map[roachpb.StoreID]status.StoreStatus) if err := ts.node.stores.VisitStores(func(s *storage.Store) error { desc, err := s.Descriptor() if err != nil { t.Fatal(err) } expectedReplicas := 0 if s.StoreID() == roachpb.StoreID(1) { expectedReplicas = initialRanges } stat := status.StoreStatus{ Desc: *desc, Metrics: map[string]float64{ "replicas": float64(expectedReplicas), "replicas.leaseholders": float64(expectedReplicas), "livebytes": 0, "keybytes": 0, "valbytes": 0, "livecount": 0, "keycount": 0, "valcount": 0, }, } expectedNodeStatus.StoreStatuses = append(expectedNodeStatus.StoreStatuses, stat) expectedStoreStatuses[s.StoreID()] = stat return nil }); err != nil { t.Fatal(err) } // Function to force summaries to be written synchronously, including all // data currently in the event pipeline. Only one of the stores has // replicas, so there are no concerns related to quorum writes; if there // were multiple replicas, more care would need to be taken in the initial // syncFeed(). forceWriteStatus := func() { if err := ts.node.computePeriodicMetrics(ctx, 0); err != nil { t.Fatalf("error publishing store statuses: %s", err) } if err := ts.WriteSummaries(); err != nil { t.Fatalf("error writing summaries: %s", err) } } // Verify initial status. forceWriteStatus() expectedNodeStatus = compareNodeStatus(t, ts, expectedNodeStatus, 1) for _, s := range expectedNodeStatus.StoreStatuses { expectedStoreStatuses[s.Desc.StoreID] = s } // ======================================== // Put some data into the K/V store and confirm change to status. // ======================================== splitKey := "b" rightKey := "c" // Write some values left and right of the proposed split key. if err := ts.db.Put(ctx, leftKey, content); err != nil { t.Fatal(err) } if err := ts.db.Put(ctx, rightKey, content); err != nil { t.Fatal(err) } // Increment metrics on the node expectedNodeStatus.Metrics["exec.success"] += 2 // Increment metrics on the first store. store1 := expectedStoreStatuses[roachpb.StoreID(1)].Metrics store1["livecount"]++ store1["keycount"]++ store1["valcount"]++ store1["livebytes"]++ store1["keybytes"]++ store1["valbytes"]++ forceWriteStatus() expectedNodeStatus = compareNodeStatus(t, ts, expectedNodeStatus, 2) for _, s := range expectedNodeStatus.StoreStatuses { expectedStoreStatuses[s.Desc.StoreID] = s } // ======================================== // Perform an admin split and verify that status is updated. // ======================================== // Split the range. if err := ts.db.AdminSplit(context.TODO(), splitKey); err != nil { t.Fatal(err) } // Write on both sides of the split to ensure that the raft machinery // is running. if err := ts.db.Put(ctx, leftKey, content); err != nil { t.Fatal(err) } if err := ts.db.Put(ctx, rightKey, content); err != nil { t.Fatal(err) } // Increment metrics on the node expectedNodeStatus.Metrics["exec.success"] += 2 // Increment metrics on the first store. store1 = expectedStoreStatuses[roachpb.StoreID(1)].Metrics store1["replicas"]++ store1["replicas.leaders"]++ store1["replicas.leaseholders"]++ store1["ranges"]++ forceWriteStatus() expectedNodeStatus = compareNodeStatus(t, ts, expectedNodeStatus, 3) for _, s := range expectedNodeStatus.StoreStatuses { expectedStoreStatuses[s.Desc.StoreID] = s } }
// TestMetricsRecorder verifies that the metrics recorder properly formats the // statistics from various registries, both for Time Series and for Status // Summaries. func TestMetricsRecorder(t *testing.T) { defer leaktest.AfterTest(t)() // ======================================== // Construct a series of fake descriptors for use in test. // ======================================== nodeDesc := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), } storeDesc1 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(1), Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 50, }, } storeDesc2 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(2), Capacity: roachpb.StoreCapacity{ Capacity: 200, Available: 75, }, } // ======================================== // Create registries and add them to the recorder (two node-level, two // store-level). // ======================================== reg1 := metric.NewRegistry() store1 := fakeStore{ storeID: roachpb.StoreID(1), desc: storeDesc1, registry: metric.NewRegistry(), } store2 := fakeStore{ storeID: roachpb.StoreID(2), desc: storeDesc2, registry: metric.NewRegistry(), } manual := hlc.NewManualClock(100) recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano, time.Nanosecond)) recorder.AddStore(store1) recorder.AddStore(store2) recorder.AddNode(reg1, nodeDesc, 50) // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual // data (e.g. in histogram's) unexpectedly. defer metric.TestingSetNow(func() time.Time { return time.Unix(0, manual.UnixNano()).UTC() })() // ======================================== // Generate Metrics Data & Expected Results // ======================================== // Flatten the four registries into an array for ease of use. regList := []struct { reg *metric.Registry prefix string source int64 isNode bool }{ { reg: reg1, prefix: "one.", source: 1, isNode: true, }, { reg: reg1, prefix: "two.", source: 1, isNode: true, }, { reg: store1.registry, prefix: "", source: int64(store1.storeID), isNode: false, }, { reg: store2.registry, prefix: "", source: int64(store2.storeID), isNode: false, }, } // Every registry will have a copy of the following metrics. metricNames := []struct { name string typ string val int64 }{ {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, {"testCounterWithRates", "counterwithrates", 2}, {"testHistogram", "histogram", 10}, {"testLatency", "latency", 10}, // Stats needed for store summaries. {"ranges", "counter", 1}, {"replicas.leaders", "gauge", 1}, {"replicas.leaseholders", "gauge", 1}, {"ranges", "gauge", 1}, {"ranges.available", "gauge", 1}, } // Add the metrics to each registry and set their values. At the same time, // generate expected time series results and status summary metric values. var expected []tspb.TimeSeriesData expectedNodeSummaryMetrics := make(map[string]float64) expectedStoreSummaryMetrics := make(map[string]float64) // addExpected generates expected data for a single metric data point. addExpected := func(prefix, name string, source, time, val int64, isNode bool) { // Generate time series data. tsPrefix := "cr.node." if !isNode { tsPrefix = "cr.store." } expect := tspb.TimeSeriesData{ Name: tsPrefix + prefix + name, Source: strconv.FormatInt(source, 10), Datapoints: []tspb.TimeSeriesDatapoint{ { TimestampNanos: time, Value: float64(val), }, }, } expected = append(expected, expect) // Generate status summary data. if isNode { expectedNodeSummaryMetrics[prefix+name] = float64(val) } else { // This can overwrite the previous value, but this is expected as // all stores in our tests have identical values; when comparing // status summaries, the same map is used as expected data for all // stores. expectedStoreSummaryMetrics[prefix+name] = float64(val) } } for _, reg := range regList { for _, data := range metricNames { switch data.typ { case "gauge": g := metric.NewGauge(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "floatgauge": g := metric.NewGaugeFloat64(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(float64(data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counter": c := metric.NewCounter(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(c) c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counterwithrates": r := metric.NewCounterWithRates(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(r) r.Inc(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "histogram": h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, 1000, 2) reg.reg.AddMetric(h) h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } case "latency": l := metric.NewLatency(metric.Metadata{Name: reg.prefix + data.name}, time.Hour) reg.reg.AddMetric(l) l.RecordValue(data.val) // Latency is simply three histograms (at different resolution // time scales). for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } default: t.Fatalf("unexpected: %+v", data) } } } // ======================================== // Verify time series data // ======================================== actual := recorder.GetTimeSeriesData() // Actual comparison is simple: sort the resulting arrays by time and name, // and use reflect.DeepEqual. sort.Sort(byTimeAndName(actual)) sort.Sort(byTimeAndName(expected)) if a, e := actual, expected; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a)) } // ======================================== // Verify node summary generation // ======================================== expectedNodeSummary := &NodeStatus{ Desc: nodeDesc, BuildInfo: build.GetInfo(), StartedAt: 50, UpdatedAt: 100, Metrics: expectedNodeSummaryMetrics, StoreStatuses: []StoreStatus{ { Desc: storeDesc1, Metrics: expectedStoreSummaryMetrics, }, { Desc: storeDesc2, Metrics: expectedStoreSummaryMetrics, }, }, } nodeSummary := recorder.GetStatusSummary() if nodeSummary == nil { t.Fatalf("recorder did not return nodeSummary") } sort.Sort(byStoreDescID(nodeSummary.StoreStatuses)) if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a)) } }
// TestLeaseMetricsOnSplitAndTransfer verifies that lease-related metrics // are updated after splitting a range and then initiating one successful // and one failing lease transfer. func TestLeaseMetricsOnSplitAndTransfer(t *testing.T) { defer leaktest.AfterTest(t)() var injectLeaseTransferError atomic.Value sc := storage.TestStoreConfig(nil) sc.TestingKnobs.DisableSplitQueue = true sc.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if args, ok := filterArgs.Req.(*roachpb.TransferLeaseRequest); ok { if val := injectLeaseTransferError.Load(); val != nil && val.(bool) { // Note that we can't just return an error here as we only // end up counting failures in the metrics if the command // makes it through to being executed. So use a fake store ID. args.Lease.Replica.StoreID = roachpb.StoreID(1000) } } return nil } mtc := &multiTestContext{storeConfig: &sc} defer mtc.Stop() mtc.Start(t, 2) // Up-replicate to two replicas. keyMinReplica0 := mtc.stores[0].LookupReplica(roachpb.RKeyMin, nil) mtc.replicateRange(keyMinReplica0.RangeID, 1) // Split the key space at key "a". splitKey := roachpb.RKey("a") splitArgs := adminSplitArgs(splitKey.AsRawKey(), splitKey.AsRawKey()) if _, pErr := client.SendWrapped( context.Background(), rg1(mtc.stores[0]), splitArgs, ); pErr != nil { t.Fatal(pErr) } // Now, a successful transfer from LHS replica 0 to replica 1. injectLeaseTransferError.Store(false) if err := mtc.dbs[0].AdminTransferLease( context.TODO(), keyMinReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(), ); err != nil { t.Fatalf("unable to transfer lease to replica 1: %s", err) } // Wait for all replicas to process. testutils.SucceedsSoon(t, func() error { for i := 0; i < 2; i++ { r := mtc.stores[i].LookupReplica(roachpb.RKeyMin, nil) if l, _ := r.GetLease(); l.Replica.StoreID != mtc.stores[1].StoreID() { return errors.Errorf("expected lease to transfer to replica 2: got %s", l) } } return nil }) // Next a failed transfer from RHS replica 0 to replica 1. injectLeaseTransferError.Store(true) keyAReplica0 := mtc.stores[0].LookupReplica(splitKey, nil) if err := mtc.dbs[0].AdminTransferLease( context.TODO(), keyAReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(), ); err == nil { t.Fatal("expected an error transferring to an unknown store ID") } metrics := mtc.stores[0].Metrics() if a, e := metrics.LeaseTransferSuccessCount.Count(), int64(1); a != e { t.Errorf("expected %d lease transfer successes; got %d", e, a) } if a, e := metrics.LeaseTransferErrorCount.Count(), int64(1); a != e { t.Errorf("expected %d lease transfer errors; got %d", e, a) } // Expire current leases and put a key to RHS of split to request // an epoch-based lease. testutils.SucceedsSoon(t, func() error { mtc.expireLeases(context.TODO()) if err := mtc.stores[0].DB().Put(context.TODO(), "a", "foo"); err != nil { return err } // Update replication gauges on store 1 and verify we have 1 each of // expiration and epoch leases. These values are counted from store 1 // because it will have the higher replica IDs. Expire leases to make // sure that epoch-based leases are used for the split range. if err := mtc.stores[1].ComputeMetrics(context.Background(), 0); err != nil { return err } metrics = mtc.stores[1].Metrics() if a, e := metrics.LeaseExpirationCount.Value(), int64(1); a != e { return errors.Errorf("expected %d expiration lease count; got %d", e, a) } if a, e := metrics.LeaseEpochCount.Value(), int64(1); a != e { return errors.Errorf("expected %d epoch lease count; got %d", e, a) } return nil }) }
func TestLogSplits(t *testing.T) { defer leaktest.AfterTest(t)() s, db, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) defer s.Stopper().Stop() countSplits := func() int { var count int // TODO(mrtracy): this should be a parameterized query, but due to #3660 // it does not work. This should be changed when #3660 is fixed. err := db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))).Scan(&count) if err != nil { t.Fatal(err) } return count } // Count the number of split events. initialSplits := server.ExpectedInitialRangeCount() - 1 if a, e := countSplits(), initialSplits; a != e { t.Fatalf("expected %d initial splits, found %d", e, a) } // Generate an explicit split event. if err := kvDB.AdminSplit(context.TODO(), "splitkey"); err != nil { t.Fatal(err) } // verify that every the count has increased by one. if a, e := countSplits(), initialSplits+1; a != e { t.Fatalf("expected %d splits, found %d", e, a) } // verify that RangeID always increases (a good way to see that the splits // are logged correctly) // TODO(mrtracy): Change to parameterized query when #3660 is fixed. rows, err := db.Query(fmt.Sprintf(`SELECT rangeID, otherRangeID, info FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))) if err != nil { t.Fatal(err) } for rows.Next() { var rangeID int64 var otherRangeID gosql.NullInt64 var infoStr gosql.NullString if err := rows.Scan(&rangeID, &otherRangeID, &infoStr); err != nil { t.Fatal(err) } if !otherRangeID.Valid { t.Errorf("otherRangeID not recorded for split of range %d", rangeID) } if otherRangeID.Int64 <= rangeID { t.Errorf("otherRangeID %d is not greater than rangeID %d", otherRangeID.Int64, rangeID) } // Verify that info returns a json struct. if !infoStr.Valid { t.Errorf("info not recorded for split of range %d", rangeID) } var info struct { UpdatedDesc roachpb.RangeDescriptor NewDesc roachpb.RangeDescriptor } if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Errorf("error unmarshalling info string for split of range %d: %s", rangeID, err) continue } if int64(info.UpdatedDesc.RangeID) != rangeID { t.Errorf("recorded wrong updated descriptor %s for split of range %d", info.UpdatedDesc, rangeID) } if int64(info.NewDesc.RangeID) != otherRangeID.Int64 { t.Errorf("recorded wrong new descriptor %s for split of range %d", info.NewDesc, rangeID) } } if rows.Err() != nil { t.Fatal(rows.Err()) } // This code assumes that there is only one TestServer, and thus that // StoreID 1 is present on the testserver. If this assumption changes in the // future, *any* store will work, but a new method will need to be added to // Stores (or a creative usage of VisitStores could suffice). store, pErr := s.(*server.TestServer).Stores().GetStore(roachpb.StoreID(1)) if pErr != nil { t.Fatal(pErr) } minSplits := int64(initialSplits + 1) // Verify that the minimimum number of splits has occurred. This is a min // instead of an exact number, because the number of splits seems to vary // between different runs of this test. if a := store.Metrics().RangeSplits.Count(); a < minSplits { t.Errorf("splits = %d < min %d", a, minSplits) } }
// TestReservationQueue checks to ensure that the expiration loop functions // correctly expiring any unfilled reservations in a number of different cases. func TestReservationQueue(t *testing.T) { defer leaktest.AfterTest(t)() // This test loads up 7 reservations at once, so set the queue higher to // accommodate them. stopper, mc, b := createTestBookie(time.Microsecond, 20, defaultMaxReservedBytes) defer stopper.Stop() bytesPerReservation := int64(100) // Load a collection of reservations into the bookie. for i := 1; i <= 10; i++ { // Ensure all the reservations expire 100 nanoseconds apart. mc.Increment(100) if !b.Reserve(context.Background(), ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(i), RangeSize: bytesPerReservation, }, nil).Reserved { t.Fatalf("could not book a reservation for reservation number %d", i) } } verifyBookie(t, b, 10 /*reservations*/, 10 /*queue*/, 10*bytesPerReservation /*bytes*/) // Fill reservation 2. if !b.Fill(context.Background(), 2) { t.Fatalf("Could not fill reservation 2") } // After filling a reservation, wait a full cycle so that it can be timed // out. verifyBookie(t, b, 9 /*reservations*/, 10 /*queue*/, 9*bytesPerReservation /*bytes*/) // Expire reservation 1. expireNextReservation(t, mc, b) verifyBookie(t, b, 8 /*reservations*/, 9 /*queue*/, 8*bytesPerReservation /*bytes*/) // Fill reservations 4 and 6. if !b.Fill(context.Background(), 4) { t.Fatalf("Could not fill reservation 4") } if !b.Fill(context.Background(), 6) { t.Fatalf("Could not fill reservation 6") } verifyBookie(t, b, 6 /*reservations*/, 9 /*queue*/, 6*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 2 (already filled) verifyBookie(t, b, 6 /*reservations*/, 8 /*queue*/, 6*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 3 verifyBookie(t, b, 5 /*reservations*/, 7 /*queue*/, 5*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 4 (already filled) verifyBookie(t, b, 5 /*reservations*/, 6 /*queue*/, 5*bytesPerReservation /*bytes*/) // Add three new reservations, 1 and 2, which have already been filled and // timed out, and 6, which has been filled by not timed out. Only increment // by 10 here to ensure we don't expire any of the other reservations. mc.Increment(10) if !b.Reserve(context.Background(), ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(11), NodeID: roachpb.NodeID(11), }, RangeID: roachpb.RangeID(1), RangeSize: bytesPerReservation, }, nil).Reserved { t.Fatalf("could not book a reservation for reservation number 1 (second pass)") } verifyBookie(t, b, 6 /*reservations*/, 7 /*queue*/, 6*bytesPerReservation /*bytes*/) mc.Increment(10) if !b.Reserve(context.Background(), ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(12), NodeID: roachpb.NodeID(12), }, RangeID: roachpb.RangeID(2), RangeSize: bytesPerReservation, }, nil).Reserved { t.Fatalf("could not book a reservation for reservation number 2 (second pass)") } verifyBookie(t, b, 7 /*reservations*/, 8 /*queue*/, 7*bytesPerReservation /*bytes*/) mc.Increment(10) if !b.Reserve(context.Background(), ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(13), NodeID: roachpb.NodeID(13), }, RangeID: roachpb.RangeID(6), RangeSize: bytesPerReservation, }, nil).Reserved { t.Fatalf("could not book a reservation for reservation number 6 (second pass)") } verifyBookie(t, b, 8 /*reservations*/, 9 /*queue*/, 8*bytesPerReservation /*bytes*/) // Fill 1 a second time. if !b.Fill(context.Background(), 1) { t.Fatalf("Could not fill reservation 1 (second pass)") } verifyBookie(t, b, 7 /*reservations*/, 9 /*queue*/, 7*bytesPerReservation /*bytes*/) // Expire all the remaining reservations one at a time. expireNextReservation(t, mc, b) // Expire 5 verifyBookie(t, b, 6 /*reservations*/, 8 /*queue*/, 6*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 6(1) - already filled verifyBookie(t, b, 6 /*reservations*/, 7 /*queue*/, 6*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 7 verifyBookie(t, b, 5 /*reservations*/, 6 /*queue*/, 5*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 8 verifyBookie(t, b, 4 /*reservations*/, 5 /*queue*/, 4*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 9 verifyBookie(t, b, 3 /*reservations*/, 4 /*queue*/, 3*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 10 verifyBookie(t, b, 2 /*reservations*/, 3 /*queue*/, 2*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 1(2) - already filled verifyBookie(t, b, 2 /*reservations*/, 2 /*queue*/, 2*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 2(2) verifyBookie(t, b, 1 /*reservations*/, 1 /*queue*/, 1*bytesPerReservation /*bytes*/) expireNextReservation(t, mc, b) // Expire 6(2) verifyBookie(t, b, 0 /*reservations*/, 0 /*queue*/, 0 /*bytes*/) }
// bootstrapCluster bootstraps a multiple stores using the provided // engines and cluster ID. The first bootstrapped store contains a // single range spanning all keys. Initial range lookup metadata is // populated for the range. Returns the cluster ID. func bootstrapCluster(engines []engine.Engine, txnMetrics kv.TxnMetrics) (uuid.UUID, error) { clusterID := uuid.MakeV4() stopper := stop.NewStopper() defer stopper.Stop() cfg := storage.StoreConfig{} cfg.ScanInterval = 10 * time.Minute cfg.MetricsSampleInterval = time.Duration(math.MaxInt64) cfg.ConsistencyCheckInterval = 10 * time.Minute cfg.Clock = hlc.NewClock(hlc.UnixNano) cfg.AmbientCtx.Tracer = tracing.NewTracer() // Create a KV DB with a local sender. stores := storage.NewStores(cfg.AmbientCtx, cfg.Clock) sender := kv.NewTxnCoordSender(cfg.AmbientCtx, stores, cfg.Clock, false, stopper, txnMetrics) cfg.DB = client.NewDB(sender) cfg.Transport = storage.NewDummyRaftTransport() for i, eng := range engines { sIdent := roachpb.StoreIdent{ ClusterID: clusterID, NodeID: FirstNodeID, StoreID: roachpb.StoreID(i + 1), } // The bootstrapping store will not connect to other nodes so its // StoreConfig doesn't really matter. s := storage.NewStore(cfg, eng, &roachpb.NodeDescriptor{NodeID: FirstNodeID}) // Verify the store isn't already part of a cluster. if s.Ident.ClusterID != *uuid.EmptyUUID { return uuid.UUID{}, errors.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID) } // Bootstrap store to persist the store ident. if err := s.Bootstrap(sIdent); err != nil { return uuid.UUID{}, err } // Create first range, writing directly to engine. Note this does // not create the range, just its data. Only do this if this is the // first store. if i == 0 { initialValues := GetBootstrapSchema().GetInitialValues() if err := s.BootstrapRange(initialValues); err != nil { return uuid.UUID{}, err } } if err := s.Start(context.Background(), stopper); err != nil { return uuid.UUID{}, err } stores.AddStore(s) ctx := context.TODO() // Initialize node and store ids. Only initialize the node once. if i == 0 { if nodeID, err := allocateNodeID(ctx, cfg.DB); nodeID != sIdent.NodeID || err != nil { return uuid.UUID{}, errors.Errorf("expected to initialize node id allocator to %d, got %d: %s", sIdent.NodeID, nodeID, err) } } if storeID, err := allocateStoreIDs(ctx, sIdent.NodeID, 1, cfg.DB); storeID != sIdent.StoreID || err != nil { return uuid.UUID{}, errors.Errorf("expected to initialize store id allocator to %d, got %d: %s", sIdent.StoreID, storeID, err) } } return clusterID, nil }
// getNextStoreID gets the store ID that should be used when adding a new store // to the node. func (n *Node) getNextStoreID() roachpb.StoreID { return roachpb.StoreID((int(n.desc.NodeID) * 1000) + len(n.stores)) }
func (m *StoreRequestHeader) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 for iNdEx < l { preIndex := iNdEx var wire uint64 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowApi } if iNdEx >= l { return io.ErrUnexpectedEOF } b := dAtA[iNdEx] iNdEx++ wire |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } } fieldNum := int32(wire >> 3) wireType := int(wire & 0x7) if wireType == 4 { return fmt.Errorf("proto: StoreRequestHeader: wiretype end group for non-group") } if fieldNum <= 0 { return fmt.Errorf("proto: StoreRequestHeader: illegal tag %d (wire type %d)", fieldNum, wire) } switch fieldNum { case 1: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field NodeID", wireType) } m.NodeID = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowApi } if iNdEx >= l { return io.ErrUnexpectedEOF } b := dAtA[iNdEx] iNdEx++ m.NodeID |= (github_com_cockroachdb_cockroach_pkg_roachpb.NodeID(b) & 0x7F) << shift if b < 0x80 { break } } case 2: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StoreID", wireType) } m.StoreID = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowApi } if iNdEx >= l { return io.ErrUnexpectedEOF } b := dAtA[iNdEx] iNdEx++ m.StoreID |= (github_com_cockroachdb_cockroach_pkg_roachpb.StoreID(b) & 0x7F) << shift if b < 0x80 { break } } default: iNdEx = preIndex skippy, err := skipApi(dAtA[iNdEx:]) if err != nil { return err } if skippy < 0 { return ErrInvalidLengthApi } if (iNdEx + skippy) > l { return io.ErrUnexpectedEOF } iNdEx += skippy } } if iNdEx > l { return io.ErrUnexpectedEOF } return nil }
// TestTimeSeriesMaintenanceQueueServer verifies that the time series // maintenance queue runs correctly on a test server. func TestTimeSeriesMaintenanceQueueServer(t *testing.T) { defer leaktest.AfterTest(t)() s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) defer s.Stopper().Stop() tsrv := s.(*server.TestServer) tsdb := tsrv.TsDB() // Populate time series data into the server. One time series, with one // datapoint at the current time and one datapoint older than the pruning // threshold. Datapoint timestamps are set to the midpoint of sample duration // periods; this simplifies verification. seriesName := "test.metric" now := tsrv.Clock().PhysicalNow() pastThreshold := now - (ts.Resolution10s.PruneThreshold() * 2) sampleDuration := ts.Resolution10s.SampleDuration() datapoints := []tspb.TimeSeriesDatapoint{ { TimestampNanos: pastThreshold - pastThreshold%sampleDuration + sampleDuration/2, Value: 200.0, }, { TimestampNanos: now - now%sampleDuration + sampleDuration/2, Value: 100.0, }, } if err := tsdb.StoreData(context.TODO(), ts.Resolution10s, []tspb.TimeSeriesData{ { Name: seriesName, Source: "source1", Datapoints: datapoints, }, }); err != nil { t.Fatal(err) } // getDatapoints queries all datapoints in the series from the beginning // of time to a point in the near future. getDatapoints := func() ([]tspb.TimeSeriesDatapoint, error) { dps, _, err := tsdb.Query( context.TODO(), tspb.Query{Name: seriesName}, ts.Resolution10s, ts.Resolution10s.SampleDuration(), 0, now+ts.Resolution10s.SlabDuration(), ) return dps, err } // Verify the datapoints are all present. actualDatapoints, err := getDatapoints() if err != nil { t.Fatal(err) } if a, e := actualDatapoints, datapoints; !reflect.DeepEqual(a, e) { t.Fatalf("got datapoints %v, expected %v, diff: %s", a, e, pretty.Diff(a, e)) } // Force pruning. storeID := roachpb.StoreID(1) store, err := tsrv.Stores().GetStore(roachpb.StoreID(1)) if err != nil { t.Fatalf("error retrieving store %d: %s", storeID, err) } store.ForceTimeSeriesMaintenanceQueueProcess() // Verify the older datapoint has been pruned. util.SucceedsSoon(t, func() error { actualDatapoints, err = getDatapoints() if err != nil { return err } if a, e := actualDatapoints, datapoints[1:]; !reflect.DeepEqual(a, e) { return fmt.Errorf("got datapoints %v, expected %v, diff: %s", a, e, pretty.Diff(a, e)) } return nil }) }
func TestLogRebalances(t *testing.T) { defer leaktest.AfterTest(t)() s, _, db := serverutils.StartServer(t, base.TestServerArgs{}) defer s.Stopper().Stop() // Use a client to get the RangeDescriptor for the first range. We will use // this range's information to log fake rebalance events. desc := &roachpb.RangeDescriptor{} if err := db.GetProto(context.TODO(), keys.RangeDescriptorKey(roachpb.RKeyMin), desc); err != nil { t.Fatal(err) } // This code assumes that there is only one TestServer, and thus that // StoreID 1 is present on the testserver. If this assumption changes in the // future, *any* store will work, but a new method will need to be added to // Stores (or a creative usage of VisitStores could suffice). store, err := s.(*server.TestServer).Stores().GetStore(roachpb.StoreID(1)) if err != nil { t.Fatal(err) } // Log several fake events using the store. logEvent := func(changeType roachpb.ReplicaChangeType) { if err := db.Txn(context.TODO(), func(txn *client.Txn) error { return store.LogReplicaChangeTest(txn, changeType, desc.Replicas[0], *desc) }); err != nil { t.Fatal(err) } } checkMetrics := func(expAdds, expRemoves int64) { if a, e := store.Metrics().RangeAdds.Count(), expAdds; a != e { t.Errorf("range adds %d != expected %d", a, e) } if a, e := store.Metrics().RangeRemoves.Count(), expRemoves; a != e { t.Errorf("range removes %d != expected %d", a, e) } } logEvent(roachpb.ADD_REPLICA) checkMetrics(1 /*add*/, 0 /*remove*/) logEvent(roachpb.ADD_REPLICA) checkMetrics(2 /*adds*/, 0 /*remove*/) logEvent(roachpb.REMOVE_REPLICA) checkMetrics(2 /*adds*/, 1 /*remove*/) // Open a SQL connection to verify that the events have been logged. pgURL, cleanupFn := sqlutils.PGUrl(t, s.ServingAddr(), "TestLogRebalances", url.User(security.RootUser)) defer cleanupFn() sqlDB, err := gosql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } defer sqlDB.Close() // verify that two add replica events have been logged. // TODO(mrtracy): placeholders still appear to be broken, this query should // be using a string placeholder for the eventType value. rows, err := sqlDB.Query(`SELECT rangeID, info FROM system.rangelog WHERE eventType = 'add'`) if err != nil { t.Fatal(err) } var count int for rows.Next() { count++ var rangeID int64 var infoStr gosql.NullString if err := rows.Scan(&rangeID, &infoStr); err != nil { t.Fatal(err) } if a, e := roachpb.RangeID(rangeID), desc.RangeID; a != e { t.Errorf("wrong rangeID %d recorded for add event, expected %d", a, e) } // Verify that info returns a json struct. if !infoStr.Valid { t.Errorf("info not recorded for add replica of range %d", rangeID) } var info struct { AddReplica roachpb.ReplicaDescriptor UpdatedDesc roachpb.RangeDescriptor } if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Errorf("error unmarshalling info string for add replica %d: %s", rangeID, err) continue } if int64(info.UpdatedDesc.RangeID) != rangeID { t.Errorf("recorded wrong updated descriptor %s for add replica of range %d", info.UpdatedDesc, rangeID) } if a, e := info.AddReplica, desc.Replicas[0]; a != e { t.Errorf("recorded wrong updated replica %s for add replica of range %d, expected %s", a, rangeID, e) } } if rows.Err() != nil { t.Fatal(rows.Err()) } if a, e := count, 2; a != e { t.Errorf("expected %d AddReplica events logged, found %d", e, a) } // verify that one remove replica event was logged. rows, err = sqlDB.Query(`SELECT rangeID, info FROM system.rangelog WHERE eventType = 'remove'`) if err != nil { t.Fatal(err) } count = 0 for rows.Next() { count++ var rangeID int64 var infoStr gosql.NullString if err := rows.Scan(&rangeID, &infoStr); err != nil { t.Fatal(err) } if a, e := roachpb.RangeID(rangeID), desc.RangeID; a != e { t.Errorf("wrong rangeID %d recorded for remove event, expected %d", a, e) } // Verify that info returns a json struct. if !infoStr.Valid { t.Errorf("info not recorded for remove replica of range %d", rangeID) } var info struct { RemovedReplica roachpb.ReplicaDescriptor UpdatedDesc roachpb.RangeDescriptor } if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Errorf("error unmarshalling info string for remove replica %d: %s", rangeID, err) continue } if int64(info.UpdatedDesc.RangeID) != rangeID { t.Errorf("recorded wrong updated descriptor %s for remove replica of range %d", info.UpdatedDesc, rangeID) } if a, e := info.RemovedReplica, desc.Replicas[0]; a != e { t.Errorf("recorded wrong updated replica %s for remove replica of range %d, expected %s", a, rangeID, e) } } if rows.Err() != nil { t.Fatal(rows.Err()) } if a, e := count, 1; a != e { t.Errorf("expected %d RemoveReplica events logged, found %d", e, a) } }
// TestTimeSeriesMaintenanceQueueServer verifies that the time series // maintenance queue runs correctly on a test server. func TestTimeSeriesMaintenanceQueueServer(t *testing.T) { defer leaktest.AfterTest(t)() s, _, db := serverutils.StartServer(t, base.TestServerArgs{ Knobs: base.TestingKnobs{ Store: &storage.StoreTestingKnobs{ DisableScanner: true, }, }, }) defer s.Stopper().Stop() tsrv := s.(*server.TestServer) tsdb := tsrv.TsDB() // Populate time series data into the server. One time series, with one // datapoint at the current time and two datapoints older than the pruning // threshold. Datapoint timestamps are set to the midpoint of sample duration // periods; this simplifies verification. seriesName := "test.metric" sourceName := "source1" now := tsrv.Clock().PhysicalNow() nearPast := now - (ts.Resolution10s.PruneThreshold() * 2) farPast := now - (ts.Resolution10s.PruneThreshold() * 4) sampleDuration := ts.Resolution10s.SampleDuration() datapoints := []tspb.TimeSeriesDatapoint{ { TimestampNanos: farPast - farPast%sampleDuration + sampleDuration/2, Value: 100.0, }, { TimestampNanos: nearPast - (nearPast)%sampleDuration + sampleDuration/2, Value: 200.0, }, { TimestampNanos: now - now%sampleDuration + sampleDuration/2, Value: 300.0, }, } if err := tsdb.StoreData(context.TODO(), ts.Resolution10s, []tspb.TimeSeriesData{ { Name: seriesName, Source: sourceName, Datapoints: datapoints, }, }); err != nil { t.Fatal(err) } // Generate a split key at a timestamp halfway between near past and far past. splitKey := ts.MakeDataKey( seriesName, sourceName, ts.Resolution10s, farPast+(nearPast-farPast)/2, ) // Force a range split in between near past and far past. This guarantees // that the pruning operation will issue a DeleteRange which spans ranges. if err := db.AdminSplit(context.TODO(), splitKey); err != nil { t.Fatal(err) } // getDatapoints queries all datapoints in the series from the beginning // of time to a point in the near future. getDatapoints := func() ([]tspb.TimeSeriesDatapoint, error) { dps, _, err := tsdb.Query( context.TODO(), tspb.Query{Name: seriesName}, ts.Resolution10s, ts.Resolution10s.SampleDuration(), 0, now+ts.Resolution10s.SlabDuration(), ) return dps, err } // Verify the datapoints are all present. actualDatapoints, err := getDatapoints() if err != nil { t.Fatal(err) } if a, e := actualDatapoints, datapoints; !reflect.DeepEqual(a, e) { t.Fatalf("got datapoints %v, expected %v, diff: %s", a, e, pretty.Diff(a, e)) } // Force pruning. storeID := roachpb.StoreID(1) store, err := tsrv.Stores().GetStore(roachpb.StoreID(1)) if err != nil { t.Fatalf("error retrieving store %d: %s", storeID, err) } store.ForceTimeSeriesMaintenanceQueueProcess() // Verify the older datapoint has been pruned. util.SucceedsSoon(t, func() error { actualDatapoints, err = getDatapoints() if err != nil { return err } if a, e := actualDatapoints, datapoints[2:]; !reflect.DeepEqual(a, e) { return fmt.Errorf("got datapoints %v, expected %v, diff: %s", a, e, pretty.Diff(a, e)) } return nil }) }