예제 #1
0
// Cluster returns cluster metadata.
func (s *adminServer) Cluster(_ context.Context, req *ClusterRequest) (*ClusterResponse, error) {
	clusterID := s.node.ClusterID
	if uuid.Equal(clusterID, *uuid.EmptyUUID) {
		return nil, grpc.Errorf(codes.Unavailable, "cluster ID not yet available")
	}
	return &ClusterResponse{ClusterID: clusterID.String()}, nil
}
예제 #2
0
// Exec executes fn in the context of a distributed transaction.
// Execution is controlled by opt (see comments in TxnExecOptions).
//
// opt is passed to fn, and it's valid for fn to modify opt as it sees
// fit during each execution attempt.
//
// It's valid for txn to be nil (meaning the txn has already aborted) if fn
// can handle that. This is useful for continuing transactions that have been
// aborted because of an error in a previous batch of statements in the hope
// that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit
// can be set in this case.
//
// When this method returns, txn might be in any state; Exec does not attempt
// to clean up the transaction before returning an error. In case of
// TransactionAbortedError, txn is reset to a fresh transaction, ready to be
// used.
func (txn *Txn) Exec(
	opt TxnExecOptions,
	fn func(txn *Txn, opt *TxnExecOptions) error,
) (err error) {
	// Run fn in a retry loop until we encounter a success or
	// error condition this loop isn't capable of handling.
	var retryOptions retry.Options
	if txn == nil && (opt.AutoRetry || opt.AutoCommit) {
		panic("asked to retry or commit a txn that is already aborted")
	}

	// Ensure that a RetryableTxnError escaping this function is not used by
	// another (higher-level) Exec() invocation to restart its unrelated
	// transaction. Technically, setting TxnID to nil here is best-effort and
	// doesn't ensure that (the error will be wrongly used if the outer txn also
	// has a nil TxnID).
	// TODO(andrei): set TxnID to a bogus non-nil value once we get rid of the
	// retErr.Transaction field.
	defer func() {
		if retErr, ok := err.(*roachpb.RetryableTxnError); ok {
			retErr.TxnID = nil
			retErr.Transaction = nil
		}
	}()

	if opt.AutoRetry {
		retryOptions = txn.db.ctx.TxnRetryOptions
	}

	for r := retry.Start(retryOptions); r.Next(); {
		if txn != nil {
			// If we're looking at a brand new transaction, then communicate
			// what should be used as initial timestamp for the KV txn created
			// by TxnCoordSender.
			if txn.Proto.OrigTimestamp == hlc.ZeroTimestamp {
				txn.Proto.OrigTimestamp = opt.MinInitialTimestamp
			}
		}

		err = fn(txn, &opt)
		if err == nil && opt.AutoCommit && txn.Proto.Status == roachpb.PENDING {
			// fn succeeded, but didn't commit.
			err = txn.Commit()
			if err != nil {
				if _, retryable := err.(*roachpb.RetryableTxnError); !retryable {
					// We can't retry, so let the caller know we tried to
					// autocommit.
					err = &AutoCommitError{cause: err}
				}
			}
		}

		if !opt.AutoRetry {
			break
		}

		if retErr, retryable := err.(*roachpb.RetryableTxnError); !retryable {
			break
		} else {
			// Make sure the txn record that err carries is for this txn.
			// If it's not, we terminate the "retryable" character of the error.
			if txn.Proto.ID != nil {
				if retErr.TxnID == nil {
					return errors.New(retErr.Error())
				}
				if !uuid.Equal(*retErr.TxnID, *txn.Proto.ID) {
					return errors.New(retErr.Error())
				}
			}

			if !retErr.Backoff {
				r.Reset()
			}
		}
		if log.V(2) {
			log.Infof(context.TODO(), "automatically retrying transaction: %s because of error: %s",
				txn.DebugName(), err)
		}
	}

	return err
}
예제 #3
0
func testEventLogInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	num := c.NumNodes()
	if num <= 0 {
		t.Fatalf("%d nodes in cluster", num)
	}

	var confirmedClusterID uuid.UUID
	type nodeEventInfo struct {
		Descriptor roachpb.NodeDescriptor
		ClusterID  uuid.UUID
	}

	// Verify that a node_join message was logged for each node in the cluster.
	// We expect there to eventually be one such message for each node in the
	// cluster, and each message must be correctly formatted.
	util.SucceedsSoon(t, func() error {
		db := makePGClient(t, c.PGUrl(0))
		defer db.Close()

		// Query all node join events. There should be one for each node in the
		// cluster.
		rows, err := db.Query(
			"SELECT targetID, info FROM system.eventlog WHERE eventType = $1",
			string(csql.EventLogNodeJoin))
		if err != nil {
			return err
		}
		seenIds := make(map[int64]struct{})
		var clusterID uuid.UUID
		for rows.Next() {
			var targetID int64
			var infoStr gosql.NullString
			if err := rows.Scan(&targetID, &infoStr); err != nil {
				t.Fatal(err)
			}

			// Verify the stored node descriptor.
			if !infoStr.Valid {
				t.Fatalf("info not recorded for node join, target node %d", targetID)
			}
			var info nodeEventInfo
			if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil {
				t.Fatal(err)
			}
			if a, e := int64(info.Descriptor.NodeID), targetID; a != e {
				t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a)
			}

			// Verify cluster ID is recorded, and is the same for all nodes.
			if uuid.Equal(info.ClusterID, *uuid.EmptyUUID) {
				t.Fatalf("Node join recorded nil cluster id, info: %v", info)
			}
			if uuid.Equal(clusterID, *uuid.EmptyUUID) {
				clusterID = info.ClusterID
			} else if !uuid.Equal(clusterID, info.ClusterID) {
				t.Fatalf(
					"Node join recorded different cluster ID than earlier node. Expected %s, got %s. Info: %v",
					clusterID, info.ClusterID, info)
			}

			// Verify that all NodeIDs are different.
			if _, ok := seenIds[targetID]; ok {
				t.Fatalf("Node ID %d seen in two different node join messages", targetID)
			}
			seenIds[targetID] = struct{}{}
		}
		if err := rows.Err(); err != nil {
			return err
		}

		if a, e := len(seenIds), c.NumNodes(); a != e {
			return errors.Errorf("expected %d node join messages, found %d: %v", e, a, seenIds)
		}

		confirmedClusterID = clusterID
		return nil
	})

	// Stop and Start Node 0, and verify the node restart message.
	if err := c.Kill(0); err != nil {
		t.Fatal(err)
	}
	if err := c.Restart(0); err != nil {
		t.Fatal(err)
	}

	util.SucceedsSoon(t, func() error {
		db := makePGClient(t, c.PGUrl(0))
		defer db.Close()

		// Query all node restart events. There should only be one.
		rows, err := db.Query(
			"SELECT targetID, info FROM system.eventlog WHERE eventType = $1",
			string(csql.EventLogNodeRestart))
		if err != nil {
			return err
		}

		seenCount := 0
		for rows.Next() {
			var targetID int64
			var infoStr gosql.NullString
			if err := rows.Scan(&targetID, &infoStr); err != nil {
				t.Fatal(err)
			}

			// Verify the stored node descriptor.
			if !infoStr.Valid {
				t.Fatalf("info not recorded for node join, target node %d", targetID)
			}
			var info nodeEventInfo
			if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil {
				t.Fatal(err)
			}
			if a, e := int64(info.Descriptor.NodeID), targetID; a != e {
				t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a)
			}

			// Verify cluster ID is recorded, and is the same for all nodes.
			if !uuid.Equal(confirmedClusterID, info.ClusterID) {
				t.Fatalf(
					"Node restart recorded different cluster ID than earlier join. Expected %s, got %s. Info: %v",
					confirmedClusterID, info.ClusterID, info)
			}

			seenCount++
		}
		if err := rows.Err(); err != nil {
			return err
		}
		if seenCount != 1 {
			return errors.Errorf("Expected only one node restart event, found %d", seenCount)
		}
		return nil
	})
}