// TestInfoStoreMostDistant verifies selection of most distant node & // associated hops. func TestInfoStoreMostDistant(t *testing.T) { defer leaktest.AfterTest(t)() nodes := []roachpb.NodeID{ roachpb.NodeID(1), roachpb.NodeID(2), roachpb.NodeID(3), } is, stopper := newTestInfoStore() defer stopper.Stop() // Add info from each address, with hop count equal to index+1. for i := 0; i < len(nodes); i++ { inf := is.newInfo(nil, time.Second) inf.Hops = uint32(i + 1) inf.NodeID = nodes[i] if err := is.addInfo(fmt.Sprintf("b.%d", i), inf); err != nil { t.Fatal(err) } nodeID, hops := is.mostDistant() if nodeID != inf.NodeID { t.Errorf("%d: expected node %d; got %d", i, inf.NodeID, nodeID) } if hops != inf.Hops { t.Errorf("%d: expected node %d; got %d", i, inf.Hops, hops) } } }
// TestLeastUseful verifies that the least-contributing peer node // can be determined. func TestLeastUseful(t *testing.T) { defer leaktest.AfterTest(t)() nodes := []roachpb.NodeID{ roachpb.NodeID(1), roachpb.NodeID(2), } is, stopper := newTestInfoStore() defer stopper.Stop() set := makeNodeSet(3, metric.NewGauge(metric.Metadata{Name: ""})) if is.leastUseful(set) != 0 { t.Error("not expecting a node from an empty set") } inf1 := is.newInfo(nil, time.Second) inf1.NodeID = 1 inf1.PeerID = 1 if err := is.addInfo("a1", inf1); err != nil { t.Fatal(err) } if is.leastUseful(set) != 0 { t.Error("not expecting a node from an empty set") } set.addNode(nodes[0]) if is.leastUseful(set) != nodes[0] { t.Error("expecting nodes[0] as least useful") } inf2 := is.newInfo(nil, time.Second) inf2.NodeID = 2 inf2.PeerID = 1 if err := is.addInfo("a2", inf2); err != nil { t.Fatal(err) } if is.leastUseful(set) != nodes[0] { t.Error("expecting nodes[0] as least useful") } set.addNode(nodes[1]) if is.leastUseful(set) != nodes[1] { t.Error("expecting nodes[1] as least useful") } inf3 := is.newInfo(nil, time.Second) inf3.NodeID = 2 inf3.PeerID = 2 if err := is.addInfo("a3", inf3); err != nil { t.Fatal(err) } if is.leastUseful(set) != nodes[1] { t.Error("expecting nodes[1] as least useful") } }
func TestPreexistingReplicaCheck(t *testing.T) { defer leaktest.AfterTest(t)() var existing []roachpb.ReplicaDescriptor for i := 2; i < 10; i += 2 { existing = append(existing, roachpb.ReplicaDescriptor{NodeID: roachpb.NodeID(i)}) } for i := 1; i < 10; i++ { if e, a := i%2 != 0, preexistingReplicaCheck(roachpb.NodeID(i), existing); e != a { t.Errorf("NodeID %d expected to be %t, got %t", i, e, a) } } }
// allocateNodeID increments the node id generator key to allocate // a new, unique node id. func allocateNodeID(ctx context.Context, db *client.DB) (roachpb.NodeID, error) { r, err := db.Inc(ctx, keys.NodeIDGenerator, 1) if err != nil { return 0, errors.Errorf("unable to allocate node ID: %s", err) } return roachpb.NodeID(r.ValueInt()), nil }
func TestNodeSetAsSlice(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) nodes.addNode(node1) nodeArr := nodes.asSlice() if len(nodeArr) != 2 { t.Error("expected slice of length 2:", nodeArr) } if (nodeArr[0] != node0 && nodeArr[0] != node1) || (nodeArr[1] != node1 && nodeArr[1] != node0) { t.Error("expected slice to contain both node0 and node1:", nodeArr) } }
func TestGetNodeLocalities(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) // Creates a node with a locality with the number of tiers passed in. The // NodeID is the same as the tier count. createDescWithLocality := func(tierCount int) roachpb.NodeDescriptor { nodeDescriptor := roachpb.NodeDescriptor{NodeID: roachpb.NodeID(tierCount)} for i := 1; i <= tierCount; i++ { value := fmt.Sprintf("%d", i) nodeDescriptor.Locality.Tiers = append(nodeDescriptor.Locality.Tiers, roachpb.Tier{ Key: value, Value: value, }) } return nodeDescriptor } stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: createDescWithLocality(1), }, { StoreID: 2, Node: createDescWithLocality(2), }, { StoreID: 3, Node: createDescWithLocality(3), }, { StoreID: 4, Node: createDescWithLocality(2), }, } sg.GossipStores(stores, t) var existingReplicas []roachpb.ReplicaDescriptor for _, store := range stores { existingReplicas = append(existingReplicas, roachpb.ReplicaDescriptor{NodeID: store.Node.NodeID}) } localities := sp.getNodeLocalities(existingReplicas) for _, store := range stores { locality, ok := localities[store.Node.NodeID] if !ok { t.Fatalf("could not find locality for node %d", store.Node.NodeID) } if e, a := int(store.Node.NodeID), len(locality.Tiers); e != a { t.Fatalf("for node %d, expected %d tiers, only got %d", store.Node.NodeID, e, a) } } }
// addNewNodeWithStore adds new node with a single store. func (c *Cluster) addNewNodeWithStore() { nodeID := roachpb.NodeID(len(c.nodes)) c.nodes[nodeID] = newNode(nodeID, c.gossip) // Only output if we're running the simulation. if c.epoch >= 0 { fmt.Fprintf(c.actionWriter, "%d:\tNode %d added\n", c.epoch, nodeID) } c.addStore(nodeID) }
func TestNodeSetFilter(t *testing.T) { defer leaktest.AfterTest(t)() nodes1 := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes1.addNode(node0) nodes1.addNode(node1) nodes2 := makeNodeSet(1, metric.NewGauge(metric.Metadata{Name: ""})) nodes2.addNode(node1) filtered := nodes1.filter(func(a roachpb.NodeID) bool { return !nodes2.hasNode(a) }) if filtered.len() != 1 || filtered.hasNode(node1) || !filtered.hasNode(node0) { t.Errorf("expected filter to leave node0: %+v", filtered) } }
func TestNodeSetAddAndRemoveNode(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) nodes.addNode(node1) if !nodes.hasNode(node0) || !nodes.hasNode(node1) { t.Error("failed to locate added nodes") } nodes.removeNode(node0) if nodes.hasNode(node0) || !nodes.hasNode(node1) { t.Error("failed to remove node0", nodes) } nodes.removeNode(node1) if nodes.hasNode(node0) || nodes.hasNode(node1) { t.Error("failed to remove node1", nodes) } }
// TestInfoStoreMostDistant verifies selection of most distant node & // associated hops. func TestInfoStoreMostDistant(t *testing.T) { defer leaktest.AfterTest(t)() nodes := []roachpb.NodeID{ roachpb.NodeID(1), roachpb.NodeID(2), roachpb.NodeID(3), } is, stopper := newTestInfoStore() defer stopper.Stop() // Add info from each address, with hop count equal to index+1. for i := 0; i < len(nodes); i++ { inf := is.newInfo(nil, time.Second) inf.Hops = uint32(i + 1) inf.NodeID = nodes[i] if err := is.addInfo(fmt.Sprintf("b.%d", i), inf); err != nil { t.Fatal(err) } nodeID, hops := is.mostDistant(func(roachpb.NodeID) bool { return false }) if nodeID != inf.NodeID { t.Errorf("%d: expected node %d; got %d", i, inf.NodeID, nodeID) } if hops != inf.Hops { t.Errorf("%d: expected node %d; got %d", i, inf.Hops, hops) } } // Finally, simulate a Gossip instance that has an outgoing connection // and expect the outgoing connection to not be recommended even though // it's the furthest node away. filteredNode := nodes[len(nodes)-1] expectedNode := nodes[len(nodes)-2] expectedHops := expectedNode nodeID, hops := is.mostDistant(func(nodeID roachpb.NodeID) bool { return nodeID == filteredNode }) if nodeID != expectedNode { t.Errorf("expected node %d; got %d", expectedNode, nodeID) } if hops != uint32(expectedHops) { t.Errorf("expected node %d; got %d", expectedHops, hops) } }
func TestNodeSetMaxSize(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(1, metric.NewGauge(metric.Metadata{Name: ""})) if !nodes.hasSpace() { t.Error("set should have space") } nodes.addNode(roachpb.NodeID(1)) if nodes.hasSpace() { t.Error("set should have no space") } }
// NodeIDFromKey attempts to extract a NodeID from the provided key. // The key should have been constructed by MakeNodeIDKey. // Returns an error if the key is not of the correct type or is not parsable. func NodeIDFromKey(key string) (roachpb.NodeID, error) { trimmedKey := strings.TrimPrefix(key, KeyNodeIDPrefix+separator) if trimmedKey == key { return 0, errors.Errorf("%q is not a NodeID Key", key) } nodeID, err := strconv.ParseInt(trimmedKey, 10, 64) if err != nil { return 0, errors.Wrapf(err, "failed parsing NodeID from key %q", key) } return roachpb.NodeID(nodeID), nil }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Config, initSender InitSenderFn) { ambient := log.AmbientContext{Tracer: tracing.NewTracer()} nc := &base.NodeIDContainer{} ambient.AddLogTag("n", nc) nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(ambient, baseCtx, ltc.Clock, ltc.Stopper) server := rpc.NewServer(rpcContext) // never started ltc.Gossip = gossip.New(ambient, nc, rpcContext, server, nil, ltc.Stopper, metric.NewRegistry()) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20) ltc.Stopper.AddCloser(ltc.Eng) ltc.Stores = storage.NewStores(ambient, ltc.Clock) ltc.Sender = initSender(nodeDesc, ambient.Tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) if ltc.DBContext == nil { dbCtx := client.DefaultDBContext() ltc.DBContext = &dbCtx } ltc.DB = client.NewDBWithContext(ltc.Sender, *ltc.DBContext) transport := storage.NewDummyRaftTransport() cfg := storage.TestStoreConfig() if ltc.RangeRetryOptions != nil { cfg.RangeRetryOptions = *ltc.RangeRetryOptions } cfg.AmbientCtx = ambient cfg.Clock = ltc.Clock cfg.DB = ltc.DB cfg.Gossip = ltc.Gossip cfg.Transport = transport cfg.MetricsSampleInterval = metric.TestSampleInterval ltc.Store = storage.NewStore(cfg, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(context.Background(), ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } nc.Set(context.TODO(), nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
func (s *statusServer) parseNodeID(nodeIDParam string) (roachpb.NodeID, bool, error) { // No parameter provided or set to local. if len(nodeIDParam) == 0 || localRE.MatchString(nodeIDParam) { return s.gossip.NodeID.Get(), true, nil } id, err := strconv.ParseInt(nodeIDParam, 10, 64) if err != nil { return 0, false, fmt.Errorf("node id could not be parsed: %s", err) } nodeID := roachpb.NodeID(id) return nodeID, nodeID == s.gossip.NodeID.Get(), nil }
func TestNodeSetHasNode(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node := roachpb.NodeID(1) if nodes.hasNode(node) { t.Error("node wasn't added and should not be valid") } // Add node and verify it's valid. nodes.addNode(node) if !nodes.hasNode(node) { t.Error("empty node wasn't added and should not be valid") } }
// TestBookieReserveMaxBytes ensures that over-booking doesn't occur when trying // to reserve more bytes than maxReservedBytes. func TestBookieReserveMaxBytes(t *testing.T) { defer leaktest.AfterTest(t)() previousReservedBytes := 10 stopper, _, b := createTestBookie(time.Hour, previousReservedBytes*2, int64(previousReservedBytes)) defer stopper.Stop() // Load up reservations with a size of 1 each. for i := 1; i <= previousReservedBytes; i++ { req := ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(i), RangeSize: 1, } if !b.Reserve(context.Background(), req, nil).Reserved { t.Errorf("%d: could not add reservation", i) } verifyBookie(t, b, i, i, int64(i)) } overbookedReq := ReservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(previousReservedBytes + 1), NodeID: roachpb.NodeID(previousReservedBytes + 1), }, RangeID: roachpb.RangeID(previousReservedBytes + 1), RangeSize: 1, } if b.Reserve(context.Background(), overbookedReq, nil).Reserved { t.Errorf("expected reservation to fail due to too many already existing reservations, but it succeeded") } // The same numbers from the last call to verifyBookie. verifyBookie(t, b, previousReservedBytes, previousReservedBytes, int64(previousReservedBytes)) }
func (t *leaseTest) node(nodeID uint32) *csql.LeaseManager { mgr := t.nodes[nodeID] if mgr == nil { nc := &base.NodeIDContainer{} nc.Set(context.TODO(), roachpb.NodeID(nodeID)) mgr = csql.NewLeaseManager( nc, *t.kvDB, t.server.Clock(), t.leaseManagerTestingKnobs, t.server.Stopper(), &csql.MemoryMetrics{}, ) t.nodes[nodeID] = mgr } return mgr }
// TestClientRegisterInitNodeID verifies two client's gossip request with NodeID 0. func TestClientRegisterWithInitNodeID(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() // Create three gossip nodes, and connect to the first with NodeID 0. var g []*Gossip var gossipAddr string for i := 0; i < 3; i++ { RPCContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) server := rpc.NewServer(RPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, server, util.IsolatedTestAddr) if err != nil { t.Fatal(err) } // Connect to the first gossip node. if gossipAddr == "" { gossipAddr = ln.Addr().String() } var resolvers []resolver.Resolver resolver, err := resolver.NewResolver(gossipAddr) if err != nil { t.Fatal(err) } resolvers = append(resolvers, resolver) // node ID must be non-zero gnode := NewTest( roachpb.NodeID(i+1), RPCContext, server, resolvers, stopper, metric.NewRegistry(), ) g = append(g, gnode) gnode.Start(ln.Addr()) } util.SucceedsSoon(t, func() error { // The first gossip node should have two gossip client address // in nodeMap if these three gossip nodes registered success. g[0].mu.Lock() defer g[0].mu.Unlock() if a, e := len(g[0].mu.nodeMap), 2; a != e { return errors.Errorf("expected %s to contain %d nodes, got %d", g[0].mu.nodeMap, e, a) } return nil }) }
// TestClientNodeID verifies a client's gossip request with correct NodeID. func TestClientNodeID(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() disconnected := make(chan *client, 1) localNodeID := roachpb.NodeID(1) local, remote := startFakeServerGossips(t, localNodeID, stopper) // Use an insecure context. We're talking to tcp socket which are not in the certs. rpcContext := newInsecureRPCContext(stopper) c := newClient(log.AmbientContext{}, &remote.nodeAddr, makeMetrics()) disconnected <- c defer func() { stopper.Stop() if c != <-disconnected { t.Errorf("expected client disconnect after remote close") } }() // A gossip client may fail to start if the grpc connection times out which // can happen under load (such as in CircleCI or using `make stress`). So we // loop creating clients until success or the test times out. for { // Wait for c.gossip to start. select { case receivedNodeID := <-remote.nodeIDChan: if receivedNodeID != localNodeID { t.Fatalf("client should send NodeID with %v, got %v", localNodeID, receivedNodeID) } return case <-disconnected: // The client hasn't been started or failed to start, loop and try again. c.start(local, disconnected, rpcContext, stopper, rpcContext.NewBreaker()) } } }
// TestGossipCullNetwork verifies that a client will be culled from // the network periodically (at cullInterval duration intervals). func TestGossipCullNetwork(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) local.SetCullInterval(5 * time.Millisecond) local.mu.Lock() for i := 0; i < minPeers; i++ { peer := startGossip(roachpb.NodeID(i+2), stopper, t, metric.NewRegistry()) local.startClient(peer.GetNodeAddr()) } local.mu.Unlock() const slowGossipDuration = time.Minute if err := util.RetryForDuration(slowGossipDuration, func() error { if peers := len(local.Outgoing()); peers != minPeers { return errors.Errorf("%d of %d peers connected", peers, minPeers) } return nil }); err != nil { t.Fatalf("condition failed to evaluate within %s: %s", slowGossipDuration, err) } local.manage() if err := util.RetryForDuration(slowGossipDuration, func() error { // Verify that a client is closed within the cull interval. if peers := len(local.Outgoing()); peers != minPeers-1 { return errors.Errorf("%d of %d peers connected", peers, minPeers-1) } return nil }); err != nil { t.Fatalf("condition failed to evaluate within %s: %s", slowGossipDuration, err) } }
// This is a collection of test stores used by a suite of tests. var ( testStoreUSa15 = roachpb.StoreID(1) // us-a-1-5 testStoreUSa1 = roachpb.StoreID(2) // us-a-1 testStoreUSb = roachpb.StoreID(3) // us-b testStoreEurope = roachpb.StoreID(4) // eur-a-1-5 testStores = []roachpb.StoreDescriptor{ { StoreID: testStoreUSa15, Attrs: roachpb.Attributes{ Attrs: []string{"a"}, }, Node: roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(testStoreUSa15), Locality: roachpb.Locality{ Tiers: testStoreTierSetup("us", "a", "1", "5"), }, }, Capacity: testStoreCapacitySetup(1, 99), }, { StoreID: testStoreUSa1, Attrs: roachpb.Attributes{ Attrs: []string{"a", "b"}, }, Node: roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(testStoreUSa1), Locality: roachpb.Locality{ Tiers: testStoreTierSetup("us", "a", "1", ""),
func TestStorePoolFindDeadReplicas(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, mnl := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, }, } replicas := []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, ReplicaID: 1, }, { NodeID: 2, StoreID: 2, ReplicaID: 2, }, { NodeID: 3, StoreID: 3, ReplicaID: 4, }, { NodeID: 4, StoreID: 5, ReplicaID: 4, }, { NodeID: 5, StoreID: 5, ReplicaID: 5, }, } sg.GossipStores(stores, t) for i := 1; i <= 5; i++ { mnl.setLive(roachpb.NodeID(i), true) } deadReplicas := sp.deadReplicas(0, replicas) if len(deadReplicas) > 0 { t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas) } // Mark nodes 4 & 5 as dead. mnl.setLive(4, false) mnl.setLive(5, false) deadReplicas = sp.deadReplicas(0, replicas) if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) { t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e) } }
// TestStorePoolGetStoreList ensures that the store list returns only stores // that are live and match the attribute criteria. func TestStorePoolGetStoreList(t *testing.T) { defer leaktest.AfterTest(t)() // We're going to manually mark stores dead in this test. stopper, g, _, sp, mnl := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) constraints := config.Constraints{Constraints: []config.Constraint{{Value: "ssd"}, {Value: "dc"}}} required := []string{"ssd", "dc"} // Nothing yet. sl, _, _ := sp.getStoreList(roachpb.RangeID(0)) sl = sl.filter(constraints) if len(sl.stores) != 0 { t.Errorf("expected no stores, instead %+v", sl.stores) } matchingStore := roachpb.StoreDescriptor{ StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } supersetStore := roachpb.StoreDescriptor{ StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Attrs: roachpb.Attributes{Attrs: append(required, "db")}, } unmatchingStore := roachpb.StoreDescriptor{ StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Attrs: roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}}, } emptyStore := roachpb.StoreDescriptor{ StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Attrs: roachpb.Attributes{}, } deadStore := roachpb.StoreDescriptor{ StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, Attrs: roachpb.Attributes{Attrs: required}, } declinedStore := roachpb.StoreDescriptor{ StoreID: 6, Node: roachpb.NodeDescriptor{NodeID: 6}, Attrs: roachpb.Attributes{Attrs: required}, } corruptReplicaStore := roachpb.StoreDescriptor{ StoreID: 7, Node: roachpb.NodeDescriptor{NodeID: 7}, Attrs: roachpb.Attributes{Attrs: required}, } corruptedRangeID := roachpb.RangeID(1) // Gossip and mark all alive initially. sg.GossipStores([]*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, &unmatchingStore, &emptyStore, &deadStore, &declinedStore, &corruptReplicaStore, }, t) for i := 1; i <= 7; i++ { mnl.setLive(roachpb.NodeID(i), true) } // Add some corrupt replicas that should not affect getStoreList(). sp.mu.Lock() sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(10)] = []roachpb.ReplicaDescriptor{{ StoreID: matchingStore.StoreID, NodeID: matchingStore.Node.NodeID, }} sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(11)] = []roachpb.ReplicaDescriptor{{ StoreID: matchingStore.StoreID, NodeID: matchingStore.Node.NodeID, }} sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(10)] = []roachpb.ReplicaDescriptor{{ StoreID: corruptReplicaStore.StoreID, NodeID: corruptReplicaStore.Node.NodeID, }} sp.mu.Unlock() if err := verifyStoreList( sp, constraints, corruptedRangeID, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), int(deadStore.StoreID), int(declinedStore.StoreID), int(corruptReplicaStore.StoreID), }, /* expectedAliveStoreCount */ 7, /* expectedThrottledStoreCount */ 0, ); err != nil { t.Error(err) } // Set deadStore as dead. mnl.setLive(deadStore.Node.NodeID, false) sp.mu.Lock() // Set declinedStore as throttled. sp.mu.storeDetails[declinedStore.StoreID].throttledUntil = sp.clock.Now().GoTime().Add(time.Hour) // Add a corrupt replica to corruptReplicaStore. sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(1)] = []roachpb.ReplicaDescriptor{{ StoreID: corruptReplicaStore.StoreID, NodeID: corruptReplicaStore.Node.NodeID, }} sp.mu.Unlock() if err := verifyStoreList( sp, constraints, corruptedRangeID, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), }, /* expectedAliveStoreCount */ 6, /* expectedThrottledStoreCount */ 1, ); err != nil { t.Error(err) } }
func TestSchemaChangeLease(t *testing.T) { defer leaktest.AfterTest(t)() params, _ := createTestServerParams() s, sqlDB, kvDB := serverutils.StartServer(t, params) defer s.Stopper().Stop() // Set MinSchemaChangeLeaseDuration to always expire the lease. minLeaseDuration := csql.MinSchemaChangeLeaseDuration csql.MinSchemaChangeLeaseDuration = 2 * csql.SchemaChangeLeaseDuration defer func() { csql.MinSchemaChangeLeaseDuration = minLeaseDuration }() if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k CHAR PRIMARY KEY, v CHAR); `); err != nil { t.Fatal(err) } var lease sqlbase.TableDescriptor_SchemaChangeLease var id = sqlbase.ID(keys.MaxReservedDescID + 2) var node = roachpb.NodeID(2) changer := csql.NewSchemaChangerForTesting(id, 0, node, *kvDB, nil) // Acquire a lease. lease, err := changer.AcquireLease() if err != nil { t.Fatal(err) } if !validExpirationTime(lease.ExpirationTime) { t.Fatalf("invalid expiration time: %s", time.Unix(0, lease.ExpirationTime)) } // Acquiring another lease will fail. if _, err := changer.AcquireLease(); !testutils.IsError( err, "an outstanding schema change lease exists", ) { t.Fatal(err) } // Extend the lease. newLease, err := changer.ExtendLease(lease) if err != nil { t.Fatal(err) } if !validExpirationTime(newLease.ExpirationTime) { t.Fatalf("invalid expiration time: %s", time.Unix(0, newLease.ExpirationTime)) } // The new lease is a brand new lease. if newLease == lease { t.Fatalf("lease was not extended: %v", lease) } // Extending an old lease fails. if _, err := changer.ExtendLease(lease); !testutils.IsError(err, "table: .* has lease") { t.Fatal(err) } // Releasing an old lease fails. err = changer.ReleaseLease(lease) if err == nil { t.Fatal("releasing a old lease succeeded") } // Release lease. err = changer.ReleaseLease(newLease) if err != nil { t.Fatal(err) } // Extending the lease fails. _, err = changer.ExtendLease(newLease) if err == nil { t.Fatalf("was able to extend an already released lease: %d, %v", id, lease) } // acquiring the lease succeeds lease, err = changer.AcquireLease() if err != nil { t.Fatal(err) } // Set MinSchemaChangeLeaseDuration to not expire the lease. csql.MinSchemaChangeLeaseDuration = minLeaseDuration newLease, err = changer.ExtendLease(lease) if err != nil { t.Fatal(err) } // The old lease is renewed. if newLease != lease { t.Fatalf("acquired new lease: %v, old lease: %v", newLease, lease) } }
func TestSchemaChangeProcess(t *testing.T) { defer leaktest.AfterTest(t)() // The descriptor changes made must have an immediate effect // so disable leases on tables. defer csql.TestDisableTableLeases()() params, _ := createTestServerParams() // Disable external processing of mutations. params.Knobs.SQLSchemaChanger = &csql.SchemaChangerTestingKnobs{ AsyncExecNotification: asyncSchemaChangerDisabled, } s, sqlDB, kvDB := serverutils.StartServer(t, params) defer s.Stopper().Stop() var id = sqlbase.ID(keys.MaxReservedDescID + 2) var node = roachpb.NodeID(2) stopper := stop.NewStopper() leaseMgr := csql.NewLeaseManager( &base.NodeIDContainer{}, *kvDB, hlc.NewClock(hlc.UnixNano, time.Nanosecond), csql.LeaseManagerTestingKnobs{}, stopper, &csql.MemoryMetrics{}, ) defer stopper.Stop() changer := csql.NewSchemaChangerForTesting(id, 0, node, *kvDB, leaseMgr) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k CHAR PRIMARY KEY, v CHAR, INDEX foo(v)); INSERT INTO t.test VALUES ('a', 'b'), ('c', 'd'); `); err != nil { t.Fatal(err) } // Read table descriptor for version. tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test") expectedVersion := tableDesc.Version desc, err := changer.MaybeIncrementVersion() if err != nil { t.Fatal(err) } tableDesc = desc.GetTable() newVersion := tableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version; e = %d, v = %d", expectedVersion, newVersion) } isDone, err := changer.IsDone() if err != nil { t.Fatal(err) } if !isDone { t.Fatalf("table expected to not have an outstanding schema change: %v", tableDesc) } // Check that MaybeIncrementVersion increments the version // correctly. expectedVersion++ tableDesc.UpVersion = true if err := kvDB.Put( context.TODO(), sqlbase.MakeDescMetadataKey(tableDesc.ID), sqlbase.WrapDescriptor(tableDesc), ); err != nil { t.Fatal(err) } isDone, err = changer.IsDone() if err != nil { t.Fatal(err) } if isDone { t.Fatalf("table expected to have an outstanding schema change: %v", desc.GetTable()) } desc, err = changer.MaybeIncrementVersion() if err != nil { t.Fatal(err) } tableDesc = desc.GetTable() savedTableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test") newVersion = tableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version in returned desc; e = %d, v = %d", expectedVersion, newVersion) } newVersion = savedTableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version in saved desc; e = %d, v = %d", expectedVersion, newVersion) } isDone, err = changer.IsDone() if err != nil { t.Fatal(err) } if !isDone { t.Fatalf("table expected to not have an outstanding schema change: %v", tableDesc) } // Check that RunStateMachineBeforeBackfill doesn't do anything // if there are no mutations queued. if err := changer.RunStateMachineBeforeBackfill(); err != nil { t.Fatal(err) } tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") newVersion = tableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version; e = %d, v = %d", expectedVersion, newVersion) } // Check that RunStateMachineBeforeBackfill functions properly. expectedVersion = tableDesc.Version // Make a copy of the index for use in a mutation. index := protoutil.Clone(&tableDesc.Indexes[0]).(*sqlbase.IndexDescriptor) index.Name = "bar" index.ID = tableDesc.NextIndexID tableDesc.NextIndexID++ changer = csql.NewSchemaChangerForTesting(id, tableDesc.NextMutationID, node, *kvDB, leaseMgr) tableDesc.Mutations = append(tableDesc.Mutations, sqlbase.DescriptorMutation{ Descriptor_: &sqlbase.DescriptorMutation_Index{Index: index}, Direction: sqlbase.DescriptorMutation_ADD, State: sqlbase.DescriptorMutation_DELETE_ONLY, MutationID: tableDesc.NextMutationID, }) tableDesc.NextMutationID++ // Run state machine in both directions. for _, direction := range []sqlbase.DescriptorMutation_Direction{sqlbase.DescriptorMutation_ADD, sqlbase.DescriptorMutation_DROP} { tableDesc.Mutations[0].Direction = direction expectedVersion++ if err := kvDB.Put( context.TODO(), sqlbase.MakeDescMetadataKey(tableDesc.ID), sqlbase.WrapDescriptor(tableDesc), ); err != nil { t.Fatal(err) } // The expected end state. expectedState := sqlbase.DescriptorMutation_WRITE_ONLY if direction == sqlbase.DescriptorMutation_DROP { expectedState = sqlbase.DescriptorMutation_DELETE_ONLY } // Run two times to ensure idempotency of operations. for i := 0; i < 2; i++ { if err := changer.RunStateMachineBeforeBackfill(); err != nil { t.Fatal(err) } tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") newVersion = tableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version; e = %d, v = %d", expectedVersion, newVersion) } state := tableDesc.Mutations[0].State if state != expectedState { t.Fatalf("bad state; e = %d, v = %d", expectedState, state) } } } // RunStateMachineBeforeBackfill() doesn't complete the schema change. isDone, err = changer.IsDone() if err != nil { t.Fatal(err) } if isDone { t.Fatalf("table expected to have an outstanding schema change: %v", tableDesc) } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() rttc := newRaftTransportTestContext(t) defer rttc.Stop() // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := map[raftpb.MessageType]struct{}{ raftpb.MsgHeartbeat: {}, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ transports[nodeID] = rttc.AddNode(nodeID) for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channels[storeID] = rttc.ListenStore(nodeID, storeID) } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for messageType := range messageTypes { req := baseReq req.Message.Type = messageType if !transports[fromNodeID].SendAsync(&req) { t.Errorf("unable to send %s from %d to %d", req.Message.Type, fromNodeID, toNodeID) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if !transports[storeNodes[fromStoreID]].SendAsync(expReq) { t.Errorf("unable to send message from %d to %d", fromStoreID, toStoreID) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestBookieReserve ensures that you can never have more than one reservation // for a specific rangeID at a time, and that both `Reserve` and `Fill` function // correctly. func TestBookieReserve(t *testing.T) { defer leaktest.AfterTest(t)() b := createTestBookie(5, defaultMaxReservedBytes) testCases := []struct { rangeID int reserve bool // true for reserve, false for fill expSuc bool // is the operation expected to succeed expOut int // expected number of reserved replicas expBytes int64 // expected number of bytes being reserved deadReplicas []roachpb.ReplicaIdent // dead replicas that we should not reserve over }{ {rangeID: 1, reserve: true, expSuc: true, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: true, expSuc: false, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, {rangeID: 1, reserve: false, expSuc: false, expOut: 0, expBytes: 0}, {rangeID: 2, reserve: true, expSuc: true, expOut: 1, expBytes: 2}, {rangeID: 3, reserve: true, expSuc: true, expOut: 2, expBytes: 5}, {rangeID: 1, reserve: true, expSuc: true, expOut: 3, expBytes: 6}, {rangeID: 2, reserve: true, expSuc: false, expOut: 3, expBytes: 6}, {rangeID: 2, reserve: false, expSuc: true, expOut: 2, expBytes: 4}, {rangeID: 2, reserve: false, expSuc: false, expOut: 2, expBytes: 4}, {rangeID: 3, reserve: false, expSuc: true, expOut: 1, expBytes: 1}, {rangeID: 1, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, {rangeID: 2, reserve: false, expSuc: false, expOut: 0, expBytes: 0}, {rangeID: 0, reserve: true, expSuc: false, expOut: 0, expBytes: 0, deadReplicas: []roachpb.ReplicaIdent{{RangeID: 0}}}, {rangeID: 0, reserve: true, expSuc: true, expOut: 1, expBytes: 0, deadReplicas: []roachpb.ReplicaIdent{{RangeID: 1}}}, {rangeID: 0, reserve: false, expSuc: true, expOut: 0, expBytes: 0}, } ctx := context.Background() for i, testCase := range testCases { if testCase.reserve { // Try to reserve the range. req := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: roachpb.StoreID(i), NodeID: roachpb.NodeID(i), }, RangeID: roachpb.RangeID(testCase.rangeID), RangeSize: int64(testCase.rangeID), } if resp := b.Reserve(ctx, req, testCase.deadReplicas); resp.Reserved != testCase.expSuc { if testCase.expSuc { t.Errorf("%d: expected a successful reservation, was rejected", i) } else { t.Errorf("%d: expected no reservation, but it was accepted", i) } } } else { // Fill the reservation. if filled := b.Fill(ctx, roachpb.RangeID(testCase.rangeID)); filled != testCase.expSuc { if testCase.expSuc { t.Errorf("%d: expected a successful filled reservation, was rejected", i) } else { t.Errorf("%d: expected no reservation to be filled, but it was accepted", i) } } } verifyBookie(t, b, testCase.expOut, testCase.expBytes) } // Test that repeated requests with the same store and node number extend // the timeout of the pre-existing reservation. repeatReq := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: 100, NodeID: 100, }, RangeID: 100, RangeSize: 100, } for i := 1; i < 10; i++ { if !b.Reserve(context.Background(), repeatReq, nil).Reserved { t.Errorf("%d: could not add repeated reservation", i) } verifyBookie(t, b, 1, 100) } // Test rejecting a reservation due to disk space constraints. overfilledReq := reservationRequest{ StoreRequestHeader: StoreRequestHeader{ StoreID: 200, NodeID: 200, }, RangeID: 200, RangeSize: 200, } b.mu.Lock() // Set the bytes have 1 less byte free than needed by the reservation. b.metrics.Available.Update(b.mu.size + (2 * overfilledReq.RangeSize) - 1) b.mu.Unlock() if b.Reserve(context.Background(), overfilledReq, nil).Reserved { t.Errorf("expected reservation to fail due to disk space constraints, but it succeeded") } verifyBookie(t, b, 1, 100) // The same numbers from the last call to verifyBookie. }
func TestDiversityRemovalScore(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { name string expected map[roachpb.StoreID]float64 }{ { name: "four existing replicas", expected: map[roachpb.StoreID]float64{ testStoreUSa15: 1, testStoreUSa1: 1, testStoreUSb: 1, testStoreEurope: 1.0 / 2.0, }, }, { name: "three existing replicas - testStoreUSa15", expected: map[roachpb.StoreID]float64{ testStoreUSa1: 1, testStoreUSb: 1, testStoreEurope: 1.0 / 2.0, }, }, { name: "three existing replicas - testStoreUSa1", expected: map[roachpb.StoreID]float64{ testStoreUSa15: 1, testStoreUSb: 1, testStoreEurope: 1.0 / 2.0, }, }, { name: "three existing replicas - testStoreUSb", expected: map[roachpb.StoreID]float64{ testStoreUSa15: 1, testStoreUSa1: 1, testStoreEurope: 1.0 / 4.0, }, }, { name: "three existing replicas - testStoreEurope", expected: map[roachpb.StoreID]float64{ testStoreUSa15: 1.0 / 2.0, testStoreUSa1: 1.0 / 2.0, testStoreUSb: 1.0 / 4.0, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { existingNodeLocalities := make(map[roachpb.NodeID]roachpb.Locality) for _, s := range testStores { if _, ok := tc.expected[s.StoreID]; ok { existingNodeLocalities[roachpb.NodeID(s.Node.NodeID)] = s.Node.Locality } } for _, s := range testStores { if _, ok := tc.expected[s.StoreID]; !ok { continue } actualScore := diversityRemovalScore(s.Node.NodeID, existingNodeLocalities) expectedScore, ok := tc.expected[s.StoreID] if !ok { t.Fatalf("no expected score found for storeID %d", s.StoreID) } if actualScore != expectedScore { t.Errorf("store %d expected diversity removal score: %.2f, actual %.2f", s.StoreID, expectedScore, actualScore) } } }) } }
func TestDiversityScore(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { name string existing []roachpb.NodeID expected map[roachpb.StoreID]float64 }{ { name: "no existing replicas", expected: map[roachpb.StoreID]float64{ testStoreUSa15: 1, testStoreUSa1: 1, testStoreUSb: 1, testStoreEurope: 1, }, }, { name: "one existing replicas", existing: []roachpb.NodeID{ roachpb.NodeID(testStoreUSa15), }, expected: map[roachpb.StoreID]float64{ testStoreUSa15: 0, testStoreUSa1: 1.0 / 4.0, testStoreUSb: 1.0 / 2.0, testStoreEurope: 1, }, }, { name: "two existing replicas", existing: []roachpb.NodeID{ roachpb.NodeID(testStoreUSa15), roachpb.NodeID(testStoreEurope), }, expected: map[roachpb.StoreID]float64{ testStoreUSa15: 0, testStoreUSa1: 1.0 / 4.0, testStoreUSb: 1.0 / 2.0, testStoreEurope: 0, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { existingNodeLocalities := make(map[roachpb.NodeID]roachpb.Locality) for _, nodeID := range tc.existing { for _, s := range testStores { if s.Node.NodeID == nodeID { existingNodeLocalities[roachpb.NodeID(s.Node.NodeID)] = s.Node.Locality } } } for _, s := range testStores { actualScore := diversityScore(s, existingNodeLocalities) expectedScore, ok := tc.expected[s.StoreID] if !ok { t.Fatalf("no expected score found for storeID %d", s.StoreID) } if actualScore != expectedScore { t.Errorf("store %d expected diversity score: %.2f, actual %.2f", s.StoreID, expectedScore, actualScore) } } }) } }
// Get returns the current node ID; 0 if it is unset. func (n *NodeIDContainer) Get() roachpb.NodeID { return roachpb.NodeID(atomic.LoadInt32(&n.nodeID)) }