// checkGossip fetches the gossip infoStore from each node and invokes the given // function. The test passes if the function returns 0 for every node, // retrying for up to the given duration. func checkGossip(t *testing.T, c cluster.Cluster, d time.Duration, f checkGossipFunc) { util.SucceedsWithin(t, d, func() error { select { case <-stopper: t.Fatalf("interrupted") return nil case <-time.After(1 * time.Second): } for i := 0; i < c.NumNodes(); i++ { var m map[string]interface{} if err := getJSON(c.URL(i), "/_status/gossip/local", &m); err != nil { return err } infos, ok := m["infos"].(map[string]interface{}) if !ok { return errors.New("no infos yet") } if err := f(infos); err != nil { return util.Errorf("node %d: %s", i, err) } } return nil }) }
// checkGossip fetches the gossip infoStore from each node and invokes the given // function. The test passes if the function returns 0 for every node, // retrying for up to the given duration. func checkGossip(t *testing.T, c cluster.Cluster, d time.Duration, f checkGossipFunc) { err := util.RetryForDuration(d, func() error { select { case <-stopper: t.Fatalf("interrupted") return nil case <-time.After(1 * time.Second): } var infoStatus gossip.InfoStatus for i := 0; i < c.NumNodes(); i++ { if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/gossip/local", &infoStatus); err != nil { return err } if err := f(infoStatus.Infos); err != nil { return errors.Errorf("node %d: %s", i, err) } } return nil }) if err != nil { t.Fatal(errors.Errorf("condition failed to evaluate within %s: %s", d, err)) } }
func testBuildInfoInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes())) var details server.DetailsResponse util.SucceedsSoon(t, func() error { select { case <-stopper: t.Fatalf("interrupted") default: } return util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/details/local", &details) }) bi := details.BuildInfo testData := map[string]string{ "go_version": bi.GoVersion, "tag": bi.Tag, "time": bi.Time, "dependencies": bi.Dependencies, } for key, val := range testData { if val == "" { t.Errorf("build info not set for \"%s\"", key) } } }
func testStatusServerInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { // Get the ids for each node. idMap := make(map[int]string) for i := 0; i < c.NumNodes(); i++ { var detail details if err := getJSON(c.URL(i), "/_status/details/local", &detail); err != nil { t.Fatal(err) } idMap[i] = detail.NodeID.String() } // Check local response for the every node. for i := 0; i < c.NumNodes(); i++ { checkNode(t, c, i, idMap[i], "local", idMap[i]) get(t, c.URL(i), "/_status/nodes") get(t, c.URL(i), "/_status/stores") } // Proxy from the first node to the last node. firstNode := 0 lastNode := c.NumNodes() - 1 firstID := idMap[firstNode] lastID := idMap[lastNode] checkNode(t, c, firstNode, firstID, lastID, lastID) // And from the last node to the first node. checkNode(t, c, lastNode, lastID, firstID, firstID) // And from the last node to the last node. checkNode(t, c, lastNode, lastID, lastID, lastID) }
func postFreeze(c cluster.Cluster, freeze bool, timeout time.Duration) (serverpb.ClusterFreezeResponse, error) { httpClient := cluster.HTTPClient httpClient.Timeout = timeout var resp serverpb.ClusterFreezeResponse log.Infof("requesting: freeze=%t, timeout=%s", freeze, timeout) cb := func(v proto.Message) { oldNum := resp.RangesAffected resp = *v.(*serverpb.ClusterFreezeResponse) if oldNum > resp.RangesAffected { resp.RangesAffected = oldNum } if (resp != serverpb.ClusterFreezeResponse{}) { log.Infof("%+v", &resp) } } err := util.StreamJSON( httpClient, c.URL(0)+"/_admin/v1/cluster/freeze", &serverpb.ClusterFreezeRequest{Freeze: freeze}, &serverpb.ClusterFreezeResponse{}, cb, ) return resp, err }
func testStatusServerInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { // Get the ids for each node. idMap := make(map[int]roachpb.NodeID) for i := 0; i < c.NumNodes(); i++ { var details server.DetailsResponse if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/local", &details); err != nil { t.Fatal(err) } idMap[i] = details.NodeID } // Check local response for the every node. for i := 0; i < c.NumNodes(); i++ { id := idMap[i] checkNode(t, c, i, id, id, id) get(t, c.URL(i), "/_status/nodes") } // Proxy from the first node to the last node. firstNode := 0 lastNode := c.NumNodes() - 1 firstID := idMap[firstNode] lastID := idMap[lastNode] checkNode(t, c, firstNode, firstID, lastID, lastID) // And from the last node to the first node. checkNode(t, c, lastNode, lastID, firstID, firstID) // And from the last node to the last node. checkNode(t, c, lastNode, lastID, lastID, lastID) }
func testBuildInfoInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes())) util.SucceedsSoon(t, func() error { select { case <-stopper: t.Fatalf("interrupted") return nil default: } var r struct { BuildInfo map[string]string } if err := getJSON(c.URL(0), "/_status/details/local", &r); err != nil { return err } for _, key := range []string{"goVersion", "tag", "time", "dependencies"} { if val, ok := r.BuildInfo[key]; !ok { t.Errorf("build info missing for \"%s\"", key) } else if val == "" { t.Errorf("build info not set for \"%s\"", key) } } return nil }) }
func postFreeze(c cluster.Cluster, freeze bool) (server.ClusterFreezeResponse, error) { httpClient := cluster.HTTPClient() httpClient.Timeout = 10 * time.Second var resp server.ClusterFreezeResponse err := postJSON(httpClient, c.URL(0), "/_admin/v1/cluster/freeze", &server.ClusterFreezeRequest{Freeze: freeze}, &resp) return resp, err }
func postFreeze(c cluster.Cluster, freeze bool, timeout time.Duration) (server.ClusterFreezeResponse, error) { httpClient := cluster.HTTPClient httpClient.Timeout = timeout var resp server.ClusterFreezeResponse err := util.PostJSON( httpClient, c.URL(0)+"/_admin/v1/cluster/freeze", &server.ClusterFreezeRequest{Freeze: freeze}, &resp, ) return resp, err }
func testAdminLossOfQuorumInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { if c.NumNodes() < 2 { t.Logf("skipping test %s because given cluster has too few nodes", cfg.Name) return } // Get the ids for each node. nodeIDs := make([]roachpb.NodeID, c.NumNodes()) for i := 0; i < c.NumNodes(); i++ { var details serverpb.DetailsResponse if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/local", &details); err != nil { t.Fatal(err) } nodeIDs[i] = details.NodeID } // Leave only the first node alive. for i := 1; i < c.NumNodes(); i++ { if err := c.Kill(i); err != nil { t.Fatal(err) } } // Retrieve node statuses. var nodes serverpb.NodesResponse if err := util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/nodes", &nodes); err != nil { t.Fatal(err) } for _, nodeID := range nodeIDs { var nodeStatus status.NodeStatus if err := util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/nodes/"+strconv.Itoa(int(nodeID)), &nodeStatus); err != nil { t.Fatal(err) } } // Retrieve time-series data. nowNanos := timeutil.Now().UnixNano() queryRequest := tspb.TimeSeriesQueryRequest{ StartNanos: nowNanos - 10*time.Second.Nanoseconds(), EndNanos: nowNanos, Queries: []tspb.Query{ {Name: "doesnt_matter", Sources: []string{}}, }, } var queryResponse tspb.TimeSeriesQueryResponse if err := util.PostJSON(cluster.HTTPClient, c.URL(0)+"/ts/query", &queryRequest, &queryResponse); err != nil { t.Fatal(err) } // TODO(cdo): When we're able to issue SQL queries without a quorum, test all // admin endpoints that issue SQL queries here. }
func testAdminLossOfQuorumInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { if c.NumNodes() < 2 { t.Logf("skipping test %s because given cluster has too few nodes", cfg.Name) return } // Get the ids for each node. idMap := make(map[int]string) for i := 0; i < c.NumNodes(); i++ { var detail details if err := getJSON(c.URL(i), "/_status/details/local", &detail); err != nil { t.Fatal(err) } idMap[i] = detail.NodeID.String() } // Leave only the first node alive. for i := 1; i < c.NumNodes(); i++ { if err := c.Kill(i); err != nil { t.Fatal(err) } } // Retrieve node statuses. var nodeStatuses interface{} if err := getJSON(c.URL(0), "/_status/nodes/", &nodeStatuses); err != nil { t.Fatal(err) } for i := 0; i < c.NumNodes(); i++ { var nodeStatus interface{} url := fmt.Sprintf("/_status/nodes/%s", idMap[i]) if err := getJSON(c.URL(0), url, &nodeStatus); err != nil { t.Fatal(err) } } // Retrieve time-series data. nowNanos := timeutil.Now().UnixNano() queryRequest := ts.TimeSeriesQueryRequest{ StartNanos: nowNanos - 10*time.Second.Nanoseconds(), EndNanos: nowNanos, Queries: []ts.Query{ {Name: "doesnt_matter", Sources: []string{}}, }, } var queryResponse ts.TimeSeriesQueryResponse if err := postJSON(cluster.HTTPClient(), c.URL(0), "/ts/query", &queryRequest, &queryResponse); err != nil { t.Fatal(err) } // TODO(cdo): When we're able to issue SQL queries without a quorum, test all // admin endpoints that issue SQL queries here. }
// checkNode checks all the endpoints of the status server hosted by node and // requests info for the node with otherNodeID. That node could be the same // other node, the same node or "local". func checkNode(t *testing.T, c cluster.Cluster, i int, nodeID, otherNodeID, expectedNodeID string) { var detail details if err := getJSON(c.URL(i), "/_status/details/"+otherNodeID, &detail); err != nil { t.Fatal(util.ErrorfSkipFrames(1, "unable to parse details - %s", err)) } if actualNodeID := detail.NodeID.String(); actualNodeID != expectedNodeID { t.Fatal(util.ErrorfSkipFrames(1, "%s calling %s: node ids don't match - expected %s, actual %s", nodeID, otherNodeID, expectedNodeID, actualNodeID)) } get(t, c.URL(i), fmt.Sprintf("/_status/gossip/%s", otherNodeID)) get(t, c.URL(i), fmt.Sprintf("/_status/logfiles/%s", otherNodeID)) get(t, c.URL(i), fmt.Sprintf("/_status/logs/%s", otherNodeID)) get(t, c.URL(i), fmt.Sprintf("/_status/stacks/%s", otherNodeID)) get(t, c.URL(i), fmt.Sprintf("/_status/nodes/%s", otherNodeID)) }
// checkNode checks all the endpoints of the status server hosted by node and // requests info for the node with otherNodeID. That node could be the same // other node, the same node or "local". func checkNode(t *testing.T, c cluster.Cluster, i int, nodeID, otherNodeID, expectedNodeID roachpb.NodeID) { urlIDs := []string{otherNodeID.String()} if nodeID == otherNodeID { urlIDs = append(urlIDs, "local") } var details server.DetailsResponse for _, urlID := range urlIDs { if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/"+urlID, &details); err != nil { t.Fatal(util.ErrorfSkipFrames(1, "unable to parse details - %s", err)) } if details.NodeID != expectedNodeID { t.Fatal(util.ErrorfSkipFrames(1, "%d calling %s: node ids don't match - expected %d, actual %d", nodeID, urlID, expectedNodeID, details.NodeID)) } get(t, c.URL(i), fmt.Sprintf("/_status/gossip/%s", urlID)) get(t, c.URL(i), fmt.Sprintf("/_status/nodes/%s", urlID)) get(t, c.URL(i), fmt.Sprintf("/_status/logfiles/%s", urlID)) get(t, c.URL(i), fmt.Sprintf("/_status/logs/%s", urlID)) get(t, c.URL(i), fmt.Sprintf("/_status/stacks/%s", urlID)) } }