Ejemplo n.º 1
0
// writeSummaries retrieves status summaries from the supplied
// NodeStatusRecorder and persists them to the cockroach data store.
func (n *Node) writeSummaries(ctx context.Context) error {
	var err error
	if runErr := n.stopper.RunTask(func() {
		nodeStatus := n.recorder.GetStatusSummary()
		if nodeStatus != nil {
			key := keys.NodeStatusKey(nodeStatus.Desc.NodeID)
			// We use PutInline to store only a single version of the node
			// status. There's not much point in keeping the historical
			// versions as we keep all of the constituent data as
			// timeseries. Further, due to the size of the build info in the
			// node status, writing one of these every 10s will generate
			// more versions than will easily fit into a range over the
			// course of a day.
			if err = n.storeCfg.DB.PutInline(ctx, key, nodeStatus); err != nil {
				return
			}
			if log.V(2) {
				statusJSON, err := json.Marshal(nodeStatus)
				if err != nil {
					log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err)
				}
				log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON)
			}
		}
	}); runErr != nil {
		err = runErr
	}
	return err
}
Ejemplo n.º 2
0
// WriteStatusSummary generates a summary and immediately writes it to the given
// client.
func (mr *MetricsRecorder) WriteStatusSummary(ctx context.Context, db *client.DB) error {
	mr.writeSummaryMu.Lock()
	defer mr.writeSummaryMu.Unlock()

	nodeStatus := mr.GetStatusSummary()
	if nodeStatus != nil {
		key := keys.NodeStatusKey(nodeStatus.Desc.NodeID)
		// We use PutInline to store only a single version of the node status.
		// There's not much point in keeping the historical versions as we keep
		// all of the constituent data as timeseries. Further, due to the size
		// of the build info in the node status, writing one of these every 10s
		// will generate more versions than will easily fit into a range over
		// the course of a day.
		if err := db.PutInline(ctx, key, nodeStatus); err != nil {
			return err
		}
		if log.V(2) {
			statusJSON, err := json.Marshal(nodeStatus)
			if err != nil {
				log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err)
			}
			log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON)
		}
	}
	return nil
}
Ejemplo n.º 3
0
// handleNodeStatus handles GET requests for a single node's status.
func (s *statusServer) Node(
	ctx context.Context, req *serverpb.NodeRequest,
) (*status.NodeStatus, error) {
	ctx = s.AnnotateCtx(ctx)
	nodeID, _, err := s.parseNodeID(req.NodeId)
	if err != nil {
		return nil, grpc.Errorf(codes.InvalidArgument, err.Error())
	}

	key := keys.NodeStatusKey(nodeID)
	b := &client.Batch{}
	b.Get(key)
	if err := s.db.Run(ctx, b); err != nil {
		log.Error(ctx, err)
		return nil, grpc.Errorf(codes.Internal, err.Error())
	}

	var nodeStatus status.NodeStatus
	if err := b.Results[0].Rows[0].ValueProto(&nodeStatus); err != nil {
		err = errors.Errorf("could not unmarshal NodeStatus from %s: %s", key, err)
		log.Error(ctx, err)
		return nil, grpc.Errorf(codes.Internal, err.Error())
	}
	return &nodeStatus, nil
}
Ejemplo n.º 4
0
// compareNodeStatus ensures that the actual node status for the passed in
// node is updated correctly. It checks that the Node Descriptor, StoreIDs,
// RangeCount, StartedAt, ReplicatedRangeCount and are exactly correct and that
// the bytes and counts for Live, Key and Val are at least the expected value.
// And that UpdatedAt has increased.
// The latest actual stats are returned.
func compareNodeStatus(
	t *testing.T, ts *TestServer, expectedNodeStatus *status.NodeStatus, testNumber int,
) *status.NodeStatus {
	// ========================================
	// Read NodeStatus from server and validate top-level fields.
	// ========================================
	nodeStatusKey := keys.NodeStatusKey(ts.node.Descriptor.NodeID)
	nodeStatus := &status.NodeStatus{}
	if err := ts.db.GetProto(context.TODO(), nodeStatusKey, nodeStatus); err != nil {
		t.Fatalf("%d: failure getting node status: %s", testNumber, err)
	}

	// Descriptor values should be exactly equal to expected.
	if a, e := nodeStatus.Desc, expectedNodeStatus.Desc; !reflect.DeepEqual(a, e) {
		t.Errorf("%d: Descriptor does not match expected.\nexpected: %s\nactual: %s", testNumber, e, a)
	}

	// ========================================
	// Ensure all expected stores are represented in the node status.
	// ========================================
	storesToMap := func(ns *status.NodeStatus) map[roachpb.StoreID]status.StoreStatus {
		strMap := make(map[roachpb.StoreID]status.StoreStatus, len(ns.StoreStatuses))
		for _, str := range ns.StoreStatuses {
			strMap[str.Desc.StoreID] = str
		}
		return strMap
	}
	actualStores := storesToMap(nodeStatus)
	expectedStores := storesToMap(expectedNodeStatus)

	if a, e := len(actualStores), len(expectedStores); a != e {
		t.Errorf("%d: actual status contained %d stores, expected %d", testNumber, a, e)
	}
	for key := range expectedStores {
		if _, ok := actualStores[key]; !ok {
			t.Errorf("%d: actual node status did not contain expected store %d", testNumber, key)
		}
	}
	if t.Failed() {
		t.FailNow()
	}

	// ========================================
	// Ensure all metric sets (node and store level) are consistent with
	// expected status.
	// ========================================

	// CompareMetricMaps accepts an actual and expected metric maps, along with
	// two lists of string keys. For metrics with keys in the 'equal' map, the
	// actual value must be equal to the expected value. For keys in the
	// 'greater' map, the actul value must be greater than or equal to the
	// expected value.
	compareMetricMaps := func(actual, expected map[string]float64, equal, greater []string) {
		// Make sure the actual value map contains all values in expected map.
		for key := range expected {
			if _, ok := actual[key]; !ok {
				t.Errorf("%d: actual node status did not contain expected metric %s", testNumber, key)
			}
		}
		if t.Failed() {
			return
		}

		// For each equal key, ensure that the actual value is equal to expected
		// key.
		for _, key := range equal {
			if _, ok := actual[key]; !ok {
				t.Errorf("%d, actual node status did not contain expected 'equal' metric key %s", testNumber, key)
				continue
			}
			if a, e := actual[key], expected[key]; a != e {
				t.Errorf("%d: %s does not match expected value.\nExpected %f, Actual %f", testNumber, key, e, a)
			}
		}
		for _, key := range greater {
			if _, ok := actual[key]; !ok {
				t.Errorf("%d: actual node status did not contain expected 'greater' metric key %s", testNumber, key)
				continue
			}
			if a, e := actual[key], expected[key]; a < e {
				t.Errorf("%d: %s is not greater than or equal to expected value.\nExpected %f, Actual %f", testNumber, key, e, a)
			}
		}
	}

	compareMetricMaps(nodeStatus.Metrics, expectedNodeStatus.Metrics, nil, []string{
		"exec.success",
		"exec.error",
	})

	for key := range actualStores {
		// Directly verify a subset of metrics which have predictable output.
		compareMetricMaps(actualStores[key].Metrics, expectedStores[key].Metrics,
			[]string{
				"replicas",
				"replicas.leaseholders",
			},
			[]string{
				"livecount",
				"keycount",
				"valcount",
			})
	}

	if t.Failed() {
		t.FailNow()
	}

	return nodeStatus
}