// Cluster returns cluster metadata. func (s *adminServer) Cluster(_ context.Context, req *ClusterRequest) (*ClusterResponse, error) { clusterID := s.node.ClusterID if uuid.Equal(clusterID, *uuid.EmptyUUID) { return nil, grpc.Errorf(codes.Unavailable, "cluster ID not yet available") } return &ClusterResponse{ClusterID: clusterID.String()}, nil }
// Exec executes fn in the context of a distributed transaction. // Execution is controlled by opt (see comments in TxnExecOptions). // // opt is passed to fn, and it's valid for fn to modify opt as it sees // fit during each execution attempt. // // It's valid for txn to be nil (meaning the txn has already aborted) if fn // can handle that. This is useful for continuing transactions that have been // aborted because of an error in a previous batch of statements in the hope // that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit // can be set in this case. // // When this method returns, txn might be in any state; Exec does not attempt // to clean up the transaction before returning an error. In case of // TransactionAbortedError, txn is reset to a fresh transaction, ready to be // used. func (txn *Txn) Exec( opt TxnExecOptions, fn func(txn *Txn, opt *TxnExecOptions) error, ) (err error) { // Run fn in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var retryOptions retry.Options if txn == nil && (opt.AutoRetry || opt.AutoCommit) { panic("asked to retry or commit a txn that is already aborted") } // Ensure that a RetryableTxnError escaping this function is not used by // another (higher-level) Exec() invocation to restart its unrelated // transaction. Technically, setting TxnID to nil here is best-effort and // doesn't ensure that (the error will be wrongly used if the outer txn also // has a nil TxnID). // TODO(andrei): set TxnID to a bogus non-nil value once we get rid of the // retErr.Transaction field. defer func() { if retErr, ok := err.(*roachpb.RetryableTxnError); ok { retErr.TxnID = nil retErr.Transaction = nil } }() if opt.AutoRetry { retryOptions = txn.db.ctx.TxnRetryOptions } for r := retry.Start(retryOptions); r.Next(); { if txn != nil { // If we're looking at a brand new transaction, then communicate // what should be used as initial timestamp for the KV txn created // by TxnCoordSender. if txn.Proto.OrigTimestamp == hlc.ZeroTimestamp { txn.Proto.OrigTimestamp = opt.MinInitialTimestamp } } err = fn(txn, &opt) if err == nil && opt.AutoCommit && txn.Proto.Status == roachpb.PENDING { // fn succeeded, but didn't commit. err = txn.Commit() if err != nil { if _, retryable := err.(*roachpb.RetryableTxnError); !retryable { // We can't retry, so let the caller know we tried to // autocommit. err = &AutoCommitError{cause: err} } } } if !opt.AutoRetry { break } if retErr, retryable := err.(*roachpb.RetryableTxnError); !retryable { break } else { // Make sure the txn record that err carries is for this txn. // If it's not, we terminate the "retryable" character of the error. if txn.Proto.ID != nil { if retErr.TxnID == nil { return errors.New(retErr.Error()) } if !uuid.Equal(*retErr.TxnID, *txn.Proto.ID) { return errors.New(retErr.Error()) } } if !retErr.Backoff { r.Reset() } } if log.V(2) { log.Infof(context.TODO(), "automatically retrying transaction: %s because of error: %s", txn.DebugName(), err) } } return err }
func testEventLogInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { num := c.NumNodes() if num <= 0 { t.Fatalf("%d nodes in cluster", num) } var confirmedClusterID uuid.UUID type nodeEventInfo struct { Descriptor roachpb.NodeDescriptor ClusterID uuid.UUID } // Verify that a node_join message was logged for each node in the cluster. // We expect there to eventually be one such message for each node in the // cluster, and each message must be correctly formatted. util.SucceedsSoon(t, func() error { db := makePGClient(t, c.PGUrl(0)) defer db.Close() // Query all node join events. There should be one for each node in the // cluster. rows, err := db.Query( "SELECT targetID, info FROM system.eventlog WHERE eventType = $1", string(csql.EventLogNodeJoin)) if err != nil { return err } seenIds := make(map[int64]struct{}) var clusterID uuid.UUID for rows.Next() { var targetID int64 var infoStr gosql.NullString if err := rows.Scan(&targetID, &infoStr); err != nil { t.Fatal(err) } // Verify the stored node descriptor. if !infoStr.Valid { t.Fatalf("info not recorded for node join, target node %d", targetID) } var info nodeEventInfo if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Fatal(err) } if a, e := int64(info.Descriptor.NodeID), targetID; a != e { t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a) } // Verify cluster ID is recorded, and is the same for all nodes. if uuid.Equal(info.ClusterID, *uuid.EmptyUUID) { t.Fatalf("Node join recorded nil cluster id, info: %v", info) } if uuid.Equal(clusterID, *uuid.EmptyUUID) { clusterID = info.ClusterID } else if !uuid.Equal(clusterID, info.ClusterID) { t.Fatalf( "Node join recorded different cluster ID than earlier node. Expected %s, got %s. Info: %v", clusterID, info.ClusterID, info) } // Verify that all NodeIDs are different. if _, ok := seenIds[targetID]; ok { t.Fatalf("Node ID %d seen in two different node join messages", targetID) } seenIds[targetID] = struct{}{} } if err := rows.Err(); err != nil { return err } if a, e := len(seenIds), c.NumNodes(); a != e { return errors.Errorf("expected %d node join messages, found %d: %v", e, a, seenIds) } confirmedClusterID = clusterID return nil }) // Stop and Start Node 0, and verify the node restart message. if err := c.Kill(0); err != nil { t.Fatal(err) } if err := c.Restart(0); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { db := makePGClient(t, c.PGUrl(0)) defer db.Close() // Query all node restart events. There should only be one. rows, err := db.Query( "SELECT targetID, info FROM system.eventlog WHERE eventType = $1", string(csql.EventLogNodeRestart)) if err != nil { return err } seenCount := 0 for rows.Next() { var targetID int64 var infoStr gosql.NullString if err := rows.Scan(&targetID, &infoStr); err != nil { t.Fatal(err) } // Verify the stored node descriptor. if !infoStr.Valid { t.Fatalf("info not recorded for node join, target node %d", targetID) } var info nodeEventInfo if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Fatal(err) } if a, e := int64(info.Descriptor.NodeID), targetID; a != e { t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a) } // Verify cluster ID is recorded, and is the same for all nodes. if !uuid.Equal(confirmedClusterID, info.ClusterID) { t.Fatalf( "Node restart recorded different cluster ID than earlier join. Expected %s, got %s. Info: %v", confirmedClusterID, info.ClusterID, info) } seenCount++ } if err := rows.Err(); err != nil { return err } if seenCount != 1 { return errors.Errorf("Expected only one node restart event, found %d", seenCount) } return nil }) }