Пример #1
0
func testAdminLossOfQuorumInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	if c.NumNodes() < 2 {
		t.Logf("skipping test %s because given cluster has too few nodes", cfg.Name)
		return
	}

	// Get the ids for each node.
	nodeIDs := make([]roachpb.NodeID, c.NumNodes())
	for i := 0; i < c.NumNodes(); i++ {
		var details serverpb.DetailsResponse
		if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/local", &details); err != nil {
			t.Fatal(err)
		}
		nodeIDs[i] = details.NodeID
	}

	// Leave only the first node alive.
	for i := 1; i < c.NumNodes(); i++ {
		if err := c.Kill(i); err != nil {
			t.Fatal(err)
		}
	}

	// Retrieve node statuses.
	var nodes serverpb.NodesResponse
	if err := util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/nodes", &nodes); err != nil {
		t.Fatal(err)
	}

	for _, nodeID := range nodeIDs {
		var nodeStatus status.NodeStatus
		if err := util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/nodes/"+strconv.Itoa(int(nodeID)), &nodeStatus); err != nil {
			t.Fatal(err)
		}
	}

	// Retrieve time-series data.
	nowNanos := timeutil.Now().UnixNano()
	queryRequest := tspb.TimeSeriesQueryRequest{
		StartNanos: nowNanos - 10*time.Second.Nanoseconds(),
		EndNanos:   nowNanos,
		Queries: []tspb.Query{
			{Name: "doesnt_matter", Sources: []string{}},
		},
	}
	var queryResponse tspb.TimeSeriesQueryResponse
	if err := util.PostJSON(cluster.HTTPClient, c.URL(0)+"/ts/query",
		&queryRequest, &queryResponse); err != nil {
		t.Fatal(err)
	}

	// TODO(cdo): When we're able to issue SQL queries without a quorum, test all
	// admin endpoints that issue SQL queries here.
}
Пример #2
0
// checkGossip fetches the gossip infoStore from each node and invokes the given
// function. The test passes if the function returns 0 for every node,
// retrying for up to the given duration.
func checkGossip(t *testing.T, c cluster.Cluster, d time.Duration, f checkGossipFunc) {
	err := util.RetryForDuration(d, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(1 * time.Second):
		}

		var infoStatus gossip.InfoStatus
		for i := 0; i < c.NumNodes(); i++ {
			if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/gossip/local", &infoStatus); err != nil {
				return err
			}
			if err := f(infoStatus.Infos); err != nil {
				return errors.Errorf("node %d: %s", i, err)
			}
		}

		return nil
	})
	if err != nil {
		t.Fatal(errors.Errorf("condition failed to evaluate within %s: %s", d, err))
	}
}
Пример #3
0
// GetAddress returns a net.Addr or error.
func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) {
	if nl.httpClient == nil {
		tlsConfig, err := nl.context.GetClientTLSConfig()
		if err != nil {
			return nil, err
		}
		nl.httpClient = &http.Client{
			Transport: &http.Transport{TLSClientConfig: tlsConfig},
			Timeout:   base.NetworkTimeout,
		}
	}

	local := struct {
		Address util.UnresolvedAddr `json:"address"`
		// We ignore all other fields.
	}{}

	log.Infof("querying %s for gossip nodes", nl.addr)
	// TODO(marc): put common URIs in base and reuse everywhere.
	if err := util.GetJSON(nl.httpClient, nl.context.HTTPRequestScheme(), nl.addr, "/_status/details/local", &local); err != nil {
		return nil, err
	}

	addr, err := resolveAddress(local.Address.Network(), local.Address.String())
	if err != nil {
		return nil, err
	}
	return addr, nil
}
Пример #4
0
func testBuildInfoInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes()))

	var details server.DetailsResponse
	util.SucceedsSoon(t, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
		default:
		}
		return util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/details/local", &details)
	})

	bi := details.BuildInfo
	testData := map[string]string{
		"go_version":   bi.GoVersion,
		"tag":          bi.Tag,
		"time":         bi.Time,
		"dependencies": bi.Dependencies,
	}
	for key, val := range testData {
		if val == "" {
			t.Errorf("build info not set for \"%s\"", key)
		}
	}
}
Пример #5
0
func testStatusServerInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	// Get the ids for each node.
	idMap := make(map[int]roachpb.NodeID)
	for i := 0; i < c.NumNodes(); i++ {
		var details server.DetailsResponse
		if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/local", &details); err != nil {
			t.Fatal(err)
		}
		idMap[i] = details.NodeID
	}

	// Check local response for the every node.
	for i := 0; i < c.NumNodes(); i++ {
		id := idMap[i]
		checkNode(t, c, i, id, id, id)
		get(t, c.URL(i), "/_status/nodes")
	}

	// Proxy from the first node to the last node.
	firstNode := 0
	lastNode := c.NumNodes() - 1
	firstID := idMap[firstNode]
	lastID := idMap[lastNode]
	checkNode(t, c, firstNode, firstID, lastID, lastID)

	// And from the last node to the first node.
	checkNode(t, c, lastNode, lastID, firstID, firstID)

	// And from the last node to the last node.
	checkNode(t, c, lastNode, lastID, lastID, lastID)
}
Пример #6
0
// apiGet issues a GET to the provided server using the given API path and marshals the result
// into the v parameter.
func apiGet(s *TestServer, path string, v interface{}) error {
	apiPath := apiEndpoint + path
	client, err := s.Ctx.GetHTTPClient()
	if err != nil {
		return err
	}
	return util.GetJSON(client, s.Ctx.HTTPRequestScheme(), s.HTTPAddr(), apiPath, v)
}
Пример #7
0
// getJSON is a convenience wrapper around cockroach/util.GetJSON(), which retrieves
// an URL specified by the parameters and unmarshals the result into the supplied
// interface.
func getJSON(tls bool, hostport, path string, v interface{}) error {
	scheme := "https"
	if !tls {
		scheme = "http"
	}

	return util.GetJSON(&HTTPClient, scheme, hostport, path, v)
}
Пример #8
0
// apiGet issues a GET to the provided server using the given API path and
// marshals the result into response.
func apiGet(s TestServer, path string, response proto.Message) error {
	apiPath := apiEndpoint + path
	client, err := s.Ctx.GetHTTPClient()
	if err != nil {
		return err
	}
	return util.GetJSON(client, s.Ctx.AdminURL()+apiPath, response)
}
Пример #9
0
// printStats prints the time it took for rebalancing to finish and the final
// standard deviation of replica counts across stores.
func (at *allocatorTest) printRebalanceStats(db *gosql.DB, host string, adminPort int) error {
	// TODO(cuongdo): Output these in a machine-friendly way and graph.

	// Output time it took to rebalance.
	{
		var rebalanceIntervalStr string
		var rebalanceInterval time.Duration
		q := `SELECT (SELECT MAX(timestamp) FROM rangelog) - ` +
			`(select MAX(timestamp) FROM eventlog WHERE eventType='` + string(sql.EventLogNodeJoin) + `')`
		if err := db.QueryRow(q).Scan(&rebalanceIntervalStr); err != nil {
			return err
		}
		rebalanceInterval, err := time.ParseDuration(rebalanceIntervalStr)
		if err != nil {
			return err
		}
		if rebalanceInterval < 0 {
			// This can happen with single-node clusters.
			rebalanceInterval = time.Duration(0)
		}
		log.Infof("cluster took %s to rebalance", rebalanceInterval)
	}

	// Output # of range events that occurred. All other things being equal,
	// larger numbers are worse and potentially indicate thrashing.
	{
		var rangeEvents int64
		q := `SELECT COUNT(*) from rangelog`
		if err := db.QueryRow(q).Scan(&rangeEvents); err != nil {
			return err
		}
		log.Infof("%d range events", rangeEvents)
	}

	// Output standard deviation of the replica counts for all stores.
	{
		var client http.Client
		var nodesResp serverpb.NodesResponse
		url := fmt.Sprintf("http://%s:%d/_status/nodes", host, adminPort)
		if err := util.GetJSON(client, url, &nodesResp); err != nil {
			return err
		}
		var replicaCounts stats.Float64Data
		for _, node := range nodesResp.Nodes {
			for _, ss := range node.StoreStatuses {
				replicaCounts = append(replicaCounts, float64(ss.Metrics["replicas"]))
			}
		}
		stddev, err := stats.StdDevP(replicaCounts)
		if err != nil {
			return err
		}
		log.Infof("stddev(replica count) = %.2f", stddev)
	}

	return nil
}
Пример #10
0
// queryCount returns the total SQL queries executed by the cluster.
func (cl continuousLoadTest) queryCount(f *terrafarm.Farmer) (float64, error) {
	var client http.Client
	var resp status.NodeStatus
	host := f.Nodes()[0]
	if err := util.GetJSON(client, "http://"+host+":8080/_status/nodes/local", &resp); err != nil {
		return 0, err
	}
	count, ok := resp.Metrics["sql.query.count"]
	if !ok {
		return 0, errors.New("couldn't find SQL query count metric")
	}
	return count, nil
}
Пример #11
0
// GetAddress returns a net.Addr or error.
// Upon errors, we set exhausted=true, then flip it back when called again.
func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) {
	// TODO(marc): this is a bit of a hack to allow the server to start.
	// In single-node setups, this resolver will never return anything since
	// the status handlers are not serving yet. Instead, we specify multiple
	// gossip addresses (--gossip=localhost,http-lb=lb). We need this one to
	// be exhausted from time to time so that we have a chance to hit the fixed address.
	// Remove once the status pages are served before we've established a connection to
	// the gossip network.
	if nl.exhausted {
		nl.exhausted = false
		return nil, util.Errorf("skipping temporarily-exhausted resolver")
	}

	if nl.httpClient == nil {
		tlsConfig, err := nl.context.GetClientTLSConfig()
		if err != nil {
			return nil, err
		}
		nl.httpClient = &http.Client{
			Transport: &http.Transport{TLSClientConfig: tlsConfig},
			Timeout:   base.NetworkTimeout,
		}
	}

	nl.exhausted = true

	local := struct {
		Address util.UnresolvedAddr `json:"address"`
		// We ignore all other fields.
	}{}

	log.Infof("querying %s for gossip nodes", nl.addr)
	// TODO(marc): put common URIs in base and reuse everywhere.
	if err := util.GetJSON(nl.httpClient, nl.context.HTTPRequestScheme(), nl.addr, "/_status/details/local", &local); err != nil {
		return nil, err
	}

	addr, err := resolveAddress(local.Address.Network(), local.Address.String())
	if err != nil {
		return nil, err
	}
	nl.exhausted = false
	log.Infof("found gossip node: %+v", addr)
	return addr, nil
}
Пример #12
0
func (at *allocatorTest) stdDev() (float64, error) {
	host := at.f.Nodes()[0]
	var client http.Client
	var nodesResp serverpb.NodesResponse
	url := fmt.Sprintf("http://%s:%s/_status/nodes", host, adminPort)
	if err := util.GetJSON(client, url, &nodesResp); err != nil {
		return 0, err
	}
	var replicaCounts stats.Float64Data
	for _, node := range nodesResp.Nodes {
		for _, ss := range node.StoreStatuses {
			replicaCounts = append(replicaCounts, float64(ss.Metrics["replicas"]))
		}
	}
	stdDev, err := stats.StdDevP(replicaCounts)
	if err != nil {
		return 0, err
	}
	return stdDev, nil
}
Пример #13
0
// checkNode checks all the endpoints of the status server hosted by node and
// requests info for the node with otherNodeID. That node could be the same
// other node, the same node or "local".
func checkNode(t *testing.T, c cluster.Cluster, i int, nodeID, otherNodeID, expectedNodeID roachpb.NodeID) {
	urlIDs := []string{otherNodeID.String()}
	if nodeID == otherNodeID {
		urlIDs = append(urlIDs, "local")
	}
	var details server.DetailsResponse
	for _, urlID := range urlIDs {
		if err := util.GetJSON(cluster.HTTPClient, c.URL(i)+"/_status/details/"+urlID, &details); err != nil {
			t.Fatal(util.ErrorfSkipFrames(1, "unable to parse details - %s", err))
		}
		if details.NodeID != expectedNodeID {
			t.Fatal(util.ErrorfSkipFrames(1, "%d calling %s: node ids don't match - expected %d, actual %d", nodeID, urlID, expectedNodeID, details.NodeID))
		}

		get(t, c.URL(i), fmt.Sprintf("/_status/gossip/%s", urlID))
		get(t, c.URL(i), fmt.Sprintf("/_status/nodes/%s", urlID))
		get(t, c.URL(i), fmt.Sprintf("/_status/logfiles/%s", urlID))
		get(t, c.URL(i), fmt.Sprintf("/_status/logs/%s", urlID))
		get(t, c.URL(i), fmt.Sprintf("/_status/stacks/%s", urlID))
	}
}
Пример #14
0
func TestStopServer(t *testing.T) {
	defer leaktest.AfterTest(t)()

	tc := StartTestCluster(t, 3, base.TestClusterArgs{ReplicationMode: base.ReplicationAuto})
	defer tc.Stopper().Stop()
	if err := tc.WaitForFullReplication(); err != nil {
		t.Fatal(err)
	}

	// Connect to server 1, ensure it is answering requests over HTTP and GRPC.
	server1 := tc.Server(1)
	var response serverpb.HealthResponse

	httpClient1, err := server1.GetHTTPClient()
	if err != nil {
		t.Fatal(err)
	}
	url := server1.AdminURL() + "/_admin/v1/health"
	if err := util.GetJSON(httpClient1, url, &response); err != nil {
		t.Fatal(err)
	}

	rpcContext := rpc.NewContext(
		tc.Server(1).RPCContext().Context, tc.Server(1).Clock(), tc.Stopper(),
	)
	conn, err := rpcContext.GRPCDial(server1.ServingAddr())
	if err != nil {
		t.Fatal(err)
	}
	adminClient1 := serverpb.NewAdminClient(conn)
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()
	if _, err := adminClient1.Health(ctx, &serverpb.HealthRequest{}); err != nil {
		t.Fatal(err)
	}

	// Stop server 1.
	tc.StopServer(1)

	// Verify HTTP and GRPC requests to server now fail.
	httpErrorText := "connection refused"
	if err := util.GetJSON(httpClient1, url, &response); err == nil {
		t.Fatal("Expected HTTP Request to fail after server stopped")
	} else if !testutils.IsError(err, httpErrorText) {
		t.Fatalf("Expected error from server with text %q, got error with text %q", httpErrorText, err.Error())
	}

	grpcErrorText := "rpc error"
	if _, err := adminClient1.Health(ctx, &serverpb.HealthRequest{}); err == nil {
		t.Fatal("Expected GRPC Request to fail after server stopped")
	} else if !testutils.IsError(err, grpcErrorText) {
		t.Fatalf("Expected error from GRPC with text %q, got error with text %q", grpcErrorText, err.Error())
	}

	// Verify that request to Server 0 still works.
	httpClient1, err = tc.Server(0).GetHTTPClient()
	if err != nil {
		t.Fatal(err)
	}
	url = tc.Server(0).AdminURL() + "/_admin/v1/health"
	if err := util.GetJSON(httpClient1, url, &response); err != nil {
		t.Fatal(err)
	}
}
Пример #15
0
func TestAdminAPITableStats(t *testing.T) {
	defer leaktest.AfterTest(t)()
	const nodeCount = 3
	tc := testcluster.StartTestCluster(t, nodeCount, base.TestClusterArgs{
		ReplicationMode: base.ReplicationAuto,
		ServerArgs: base.TestServerArgs{
			ScanInterval:    time.Millisecond,
			ScanMaxIdleTime: time.Millisecond,
		},
	})
	defer tc.Stopper().Stop()
	if err := tc.WaitForFullReplication(); err != nil {
		t.Fatal(err)
	}
	server0 := tc.Server(0)

	// Create clients (SQL, HTTP) connected to server 0.
	db := tc.ServerConn(0)

	client, err := server0.GetHTTPClient()
	if err != nil {
		t.Fatal(err)
	}

	client.Timeout = base.NetworkTimeout * 3

	// Make a single table and insert some data. The database and test have
	// names which require escaping, in order to verify that database and
	// table names are being handled correctly.
	if _, err := db.Exec(`CREATE DATABASE "test test"`); err != nil {
		t.Fatal(err)
	}
	if _, err := db.Exec(`
		CREATE TABLE "test test"."foo foo" (
			id INT PRIMARY KEY,
			val STRING
		)`,
	); err != nil {
		t.Fatal(err)
	}
	for i := 0; i < 10; i++ {
		if _, err := db.Exec(`
			INSERT INTO "test test"."foo foo" VALUES(
				$1, $2
			)`, i, "test",
		); err != nil {
			t.Fatal(err)
		}
	}

	url := server0.AdminURL() + "/_admin/v1/databases/test test/tables/foo foo/stats"
	var tsResponse serverpb.TableStatsResponse

	// The new SQL table may not yet have split into its own range. Wait for
	// this to occur, and for full replication.
	util.SucceedsSoon(t, func() error {
		if err := util.GetJSON(client, url, &tsResponse); err != nil {
			return err
		}
		if tsResponse.RangeCount != 1 {
			return errors.Errorf("Table range not yet separated.")
		}
		if tsResponse.NodeCount != nodeCount {
			return errors.Errorf("Table range not yet replicated to %d nodes.", 3)
		}
		if a, e := tsResponse.ReplicaCount, int64(nodeCount); a != e {
			return errors.Errorf("expected %d replicas, found %d", e, a)
		}
		return nil
	})

	// These two conditions *must* be true, given that the above
	// SucceedsSoon has succeeded.
	if a, e := tsResponse.Stats.KeyCount, int64(20); a < e {
		t.Fatalf("expected at least 20 total keys, found %d", a)
	}
	if len(tsResponse.MissingNodes) > 0 {
		t.Fatalf("expected no missing nodes, found %v", tsResponse.MissingNodes)
	}

	// Kill a node, ensure it shows up in MissingNodes and that ReplicaCount is
	// lower.
	tc.StopServer(1)

	if err := util.GetJSON(client, url, &tsResponse); err != nil {
		t.Fatal(err)
	}
	if a, e := tsResponse.NodeCount, int64(nodeCount); a != e {
		t.Errorf("expected %d nodes, found %d", e, a)
	}
	if a, e := tsResponse.RangeCount, int64(1); a != e {
		t.Errorf("expected %d ranges, found %d", e, a)
	}
	if a, e := tsResponse.ReplicaCount, int64((nodeCount/2)+1); a != e {
		t.Errorf("expected %d replicas, found %d", e, a)
	}
	if a, e := tsResponse.Stats.KeyCount, int64(10); a < e {
		t.Errorf("expected at least 10 total keys, found %d", a)
	}
	if len(tsResponse.MissingNodes) != 1 {
		t.Errorf("expected one missing node, found %v", tsResponse.MissingNodes)
	}

	// Call TableStats with a very low timeout. This tests that fan-out queries
	// do not leak goroutines if the calling context is abandoned.
	// Interestingly, the call can actually sometimes succeed, despite the small
	// timeout; however, in aggregate (or in stress tests) this will suffice for
	// detecting leaks.
	client.Timeout = 1 * time.Nanosecond
	_ = util.GetJSON(client, url, &tsResponse)
}