// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (n *Node) writeSummaries(ctx context.Context) error { var err error if runErr := n.stopper.RunTask(func() { nodeStatus := n.recorder.GetStatusSummary() if nodeStatus != nil { key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) // We use PutInline to store only a single version of the node // status. There's not much point in keeping the historical // versions as we keep all of the constituent data as // timeseries. Further, due to the size of the build info in the // node status, writing one of these every 10s will generate // more versions than will easily fit into a range over the // course of a day. if err = n.storeCfg.DB.PutInline(ctx, key, nodeStatus); err != nil { return } if log.V(2) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) } log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } }); runErr != nil { err = runErr } return err }
// WriteStatusSummary generates a summary and immediately writes it to the given // client. func (mr *MetricsRecorder) WriteStatusSummary(ctx context.Context, db *client.DB) error { mr.writeSummaryMu.Lock() defer mr.writeSummaryMu.Unlock() nodeStatus := mr.GetStatusSummary() if nodeStatus != nil { key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) // We use PutInline to store only a single version of the node status. // There's not much point in keeping the historical versions as we keep // all of the constituent data as timeseries. Further, due to the size // of the build info in the node status, writing one of these every 10s // will generate more versions than will easily fit into a range over // the course of a day. if err := db.PutInline(ctx, key, nodeStatus); err != nil { return err } if log.V(2) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) } log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } return nil }
// handleNodeStatus handles GET requests for a single node's status. func (s *statusServer) Node( ctx context.Context, req *serverpb.NodeRequest, ) (*status.NodeStatus, error) { ctx = s.AnnotateCtx(ctx) nodeID, _, err := s.parseNodeID(req.NodeId) if err != nil { return nil, grpc.Errorf(codes.InvalidArgument, err.Error()) } key := keys.NodeStatusKey(nodeID) b := &client.Batch{} b.Get(key) if err := s.db.Run(ctx, b); err != nil { log.Error(ctx, err) return nil, grpc.Errorf(codes.Internal, err.Error()) } var nodeStatus status.NodeStatus if err := b.Results[0].Rows[0].ValueProto(&nodeStatus); err != nil { err = errors.Errorf("could not unmarshal NodeStatus from %s: %s", key, err) log.Error(ctx, err) return nil, grpc.Errorf(codes.Internal, err.Error()) } return &nodeStatus, nil }
// compareNodeStatus ensures that the actual node status for the passed in // node is updated correctly. It checks that the Node Descriptor, StoreIDs, // RangeCount, StartedAt, ReplicatedRangeCount and are exactly correct and that // the bytes and counts for Live, Key and Val are at least the expected value. // And that UpdatedAt has increased. // The latest actual stats are returned. func compareNodeStatus( t *testing.T, ts *TestServer, expectedNodeStatus *status.NodeStatus, testNumber int, ) *status.NodeStatus { // ======================================== // Read NodeStatus from server and validate top-level fields. // ======================================== nodeStatusKey := keys.NodeStatusKey(ts.node.Descriptor.NodeID) nodeStatus := &status.NodeStatus{} if err := ts.db.GetProto(context.TODO(), nodeStatusKey, nodeStatus); err != nil { t.Fatalf("%d: failure getting node status: %s", testNumber, err) } // Descriptor values should be exactly equal to expected. if a, e := nodeStatus.Desc, expectedNodeStatus.Desc; !reflect.DeepEqual(a, e) { t.Errorf("%d: Descriptor does not match expected.\nexpected: %s\nactual: %s", testNumber, e, a) } // ======================================== // Ensure all expected stores are represented in the node status. // ======================================== storesToMap := func(ns *status.NodeStatus) map[roachpb.StoreID]status.StoreStatus { strMap := make(map[roachpb.StoreID]status.StoreStatus, len(ns.StoreStatuses)) for _, str := range ns.StoreStatuses { strMap[str.Desc.StoreID] = str } return strMap } actualStores := storesToMap(nodeStatus) expectedStores := storesToMap(expectedNodeStatus) if a, e := len(actualStores), len(expectedStores); a != e { t.Errorf("%d: actual status contained %d stores, expected %d", testNumber, a, e) } for key := range expectedStores { if _, ok := actualStores[key]; !ok { t.Errorf("%d: actual node status did not contain expected store %d", testNumber, key) } } if t.Failed() { t.FailNow() } // ======================================== // Ensure all metric sets (node and store level) are consistent with // expected status. // ======================================== // CompareMetricMaps accepts an actual and expected metric maps, along with // two lists of string keys. For metrics with keys in the 'equal' map, the // actual value must be equal to the expected value. For keys in the // 'greater' map, the actul value must be greater than or equal to the // expected value. compareMetricMaps := func(actual, expected map[string]float64, equal, greater []string) { // Make sure the actual value map contains all values in expected map. for key := range expected { if _, ok := actual[key]; !ok { t.Errorf("%d: actual node status did not contain expected metric %s", testNumber, key) } } if t.Failed() { return } // For each equal key, ensure that the actual value is equal to expected // key. for _, key := range equal { if _, ok := actual[key]; !ok { t.Errorf("%d, actual node status did not contain expected 'equal' metric key %s", testNumber, key) continue } if a, e := actual[key], expected[key]; a != e { t.Errorf("%d: %s does not match expected value.\nExpected %f, Actual %f", testNumber, key, e, a) } } for _, key := range greater { if _, ok := actual[key]; !ok { t.Errorf("%d: actual node status did not contain expected 'greater' metric key %s", testNumber, key) continue } if a, e := actual[key], expected[key]; a < e { t.Errorf("%d: %s is not greater than or equal to expected value.\nExpected %f, Actual %f", testNumber, key, e, a) } } } compareMetricMaps(nodeStatus.Metrics, expectedNodeStatus.Metrics, nil, []string{ "exec.success", "exec.error", }) for key := range actualStores { // Directly verify a subset of metrics which have predictable output. compareMetricMaps(actualStores[key].Metrics, expectedStores[key].Metrics, []string{ "replicas", "replicas.leaseholders", }, []string{ "livecount", "keycount", "valcount", }) } if t.Failed() { t.FailNow() } return nodeStatus }