Exemple #1
0
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
	request := PingRequest{
		Addr:           ctx.Addr,
		MaxOffsetNanos: ctx.localClock.MaxOffset().Nanoseconds(),
	}
	heartbeatClient := NewHeartbeatClient(cc)

	var heartbeatTimer timeutil.Timer
	defer heartbeatTimer.Stop()

	// Give the first iteration a wait-free heartbeat attempt.
	nextHeartbeat := 0 * time.Nanosecond
	for {
		heartbeatTimer.Reset(nextHeartbeat)
		select {
		case <-ctx.Stopper.ShouldStop():
			return nil
		case <-heartbeatTimer.C:
			heartbeatTimer.Read = true
		}

		sendTime := ctx.localClock.PhysicalTime()
		response, err := ctx.heartbeat(heartbeatClient, request)
		ctx.setConnHealthy(remoteAddr, err == nil)
		if err == nil {
			receiveTime := ctx.localClock.PhysicalTime()

			// Only update the clock offset measurement if we actually got a
			// successful response from the server.
			if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
				request.Offset.Reset()
			} else {
				// Offset and error are measured using the remote clock reading
				// technique described in
				// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
				// However, we assume that drift and min message delay are 0, for
				// now.
				request.Offset.MeasuredAt = receiveTime.UnixNano()
				request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
				remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
				request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
			}
			ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)

			if cb := ctx.HeartbeatCB; cb != nil {
				cb()
			}
		}

		// If the heartbeat timed out, run the next one immediately. Otherwise,
		// wait out the heartbeat interval on the next iteration.
		if grpc.Code(err) == codes.DeadlineExceeded {
			nextHeartbeat = 0
		} else {
			nextHeartbeat = ctx.HeartbeatInterval
		}
	}
}
Exemple #2
0
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
	g.server.stopper.RunWorker(func() {
		ctx := g.AnnotateCtx(context.Background())
		ctx = log.WithLogTag(ctx, "bootstrap", nil)
		var bootstrapTimer timeutil.Timer
		defer bootstrapTimer.Stop()
		for {
			if g.server.stopper.RunTask(func() {
				g.mu.Lock()
				defer g.mu.Unlock()
				haveClients := g.outgoing.len() > 0
				haveSentinel := g.mu.is.getInfo(KeySentinel) != nil
				log.Eventf(ctx, "have clients: %t, have sentinel: %t", haveClients, haveSentinel)
				if !haveClients || !haveSentinel {
					// Try to get another bootstrap address from the resolvers.
					if addr := g.getNextBootstrapAddress(); addr != nil {
						g.startClient(addr, g.NodeID.Get())
					} else {
						bootstrapAddrs := make([]string, 0, len(g.bootstrapping))
						for addr := range g.bootstrapping {
							bootstrapAddrs = append(bootstrapAddrs, addr)
						}
						log.Eventf(ctx, "no next bootstrap address; currently bootstrapping: %v", bootstrapAddrs)
						// We couldn't start a client, signal that we're stalled so that
						// we'll retry.
						g.maybeSignalStatusChangeLocked()
					}
				}
			}) != nil {
				return
			}

			// Pause an interval before next possible bootstrap.
			bootstrapTimer.Reset(g.bootstrapInterval)
			log.Eventf(ctx, "sleeping %s until bootstrap", g.bootstrapInterval)
			select {
			case <-bootstrapTimer.C:
				bootstrapTimer.Read = true
				// break
			case <-g.server.stopper.ShouldStop():
				return
			}
			log.Eventf(ctx, "idling until bootstrap required")
			// Block until we need bootstrapping again.
			select {
			case <-g.stalledCh:
				log.Eventf(ctx, "detected stall; commencing bootstrap")
				// break
			case <-g.server.stopper.ShouldStop():
				return
			}
		}
	})
}
Exemple #3
0
// start will run continuously and expire old reservations.
func (b *bookie) start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		var timeoutTimer timeutil.Timer
		defer timeoutTimer.Stop()
		ctx := context.TODO()
		for {
			var timeout time.Duration
			b.mu.Lock()
			nextExpiration := b.mu.queue.peek()
			if nextExpiration == nil {
				// No reservations to expire.
				timeout = b.reservationTimeout
			} else {
				now := b.clock.Now()
				if now.GoTime().After(nextExpiration.expireAt.GoTime()) {
					// We have a reservation expiration, remove it.
					expiredReservation := b.mu.queue.dequeue()
					// Is it an active reservation?
					if b.mu.reservationsByRangeID[expiredReservation.RangeID] == expiredReservation {
						b.fillReservationLocked(ctx, expiredReservation)
					} else if log.V(2) {
						log.Infof(ctx, "[r%d] expired reservation has already been filled",
							expiredReservation.RangeID)
					}
					// Set the timeout to 0 to force another peek.
					timeout = 0
				} else {
					timeout = nextExpiration.expireAt.GoTime().Sub(now.GoTime())
				}
			}
			b.mu.Unlock()
			timeoutTimer.Reset(timeout)
			select {
			case <-timeoutTimer.C:
				timeoutTimer.Read = true
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Exemple #4
0
// WaitForRebalance waits until there's been no recent range adds, removes, and
// splits. We wait until the cluster is balanced or until `StableInterval`
// elapses, whichever comes first. Then, it prints stats about the rebalancing
// process. If the replica count appears unbalanced, an error is returned.
//
// This method is crude but necessary. If we were to wait until range counts
// were just about even, we'd miss potential post-rebalance thrashing.
func (at *allocatorTest) WaitForRebalance(ctx context.Context, t *testing.T) error {
	const statsInterval = 20 * time.Second

	db, err := gosql.Open("postgres", at.f.PGUrl(ctx, 0))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	var statsTimer timeutil.Timer
	var assertTimer timeutil.Timer
	defer statsTimer.Stop()
	defer assertTimer.Stop()
	statsTimer.Reset(statsInterval)
	assertTimer.Reset(0)
	for {
		select {
		case <-statsTimer.C:
			statsTimer.Read = true
			stats, err := at.allocatorStats(db)
			if err != nil {
				return err
			}

			log.Info(ctx, stats)
			if StableInterval <= stats.ElapsedSinceLastEvent {
				host := at.f.Hostname(0)
				log.Infof(context.Background(), "replica count = %f, max = %f", stats.ReplicaCountStdDev, *flagATMaxStdDev)
				if stats.ReplicaCountStdDev > *flagATMaxStdDev {
					_ = at.printRebalanceStats(db, host)
					return errors.Errorf(
						"%s elapsed without changes, but replica count standard "+
							"deviation is %.2f (>%.2f)", stats.ElapsedSinceLastEvent,
						stats.ReplicaCountStdDev, *flagATMaxStdDev)
				}
				return at.printRebalanceStats(db, host)
			}
			statsTimer.Reset(statsInterval)
		case <-assertTimer.C:
			assertTimer.Read = true
			at.f.Assert(ctx, t)
			assertTimer.Reset(time.Minute)
		case <-stopper.ShouldStop():
			return errors.New("interrupted")
		}
	}
}
// start will run continuously and mark stores as offline if they haven't been
// heard from in longer than timeUntilStoreDead.
func (sp *StorePool) start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		var timeoutTimer timeutil.Timer
		defer timeoutTimer.Stop()
		for {
			var timeout time.Duration
			sp.mu.Lock()
			detail := sp.mu.queue.peek()
			if detail == nil {
				// No stores yet, wait the full timeout.
				timeout = sp.timeUntilStoreDead
			} else {
				// Check to see if the store should be marked as dead.
				deadAsOf := detail.lastUpdatedTime.GoTime().Add(sp.timeUntilStoreDead)
				now := sp.clock.Now()
				if now.GoTime().After(deadAsOf) {
					deadDetail := sp.mu.queue.dequeue()
					deadDetail.markDead(now)
					// The next store might be dead as well, set the timeout to
					// 0 to process it immediately.
					timeout = 0
				} else {
					// Store is still alive, schedule the next check for when
					// it should timeout.
					timeout = deadAsOf.Sub(now.GoTime())
				}
			}
			sp.mu.Unlock()
			timeoutTimer.Reset(timeout)
			select {
			case <-timeoutTimer.C:
				timeoutTimer.Read = true
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Exemple #6
0
// sendToReplicas sends one or more RPCs to clients specified by the
// slice of replicas. On success, Send returns the first successful
// reply. If an error occurs which is not specific to a single
// replica, it's returned immediately. Otherwise, when all replicas
// have been tried and failed, returns a send error.
func (ds *DistSender) sendToReplicas(
	opts SendOptions,
	rangeID roachpb.RangeID,
	replicas ReplicaSlice,
	args roachpb.BatchRequest,
	rpcContext *rpc.Context,
) (*roachpb.BatchResponse, error) {
	if len(replicas) < 1 {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
				len(replicas), 1))
	}

	var ambiguousResult bool
	var haveCommit bool
	// We only check for committed txns, not aborts because aborts may
	// be retried without any risk of inconsistencies.
	if etArg, ok := args.GetArg(roachpb.EndTransaction); ok &&
		etArg.(*roachpb.EndTransactionRequest).Commit {
		haveCommit = true
	}
	done := make(chan BatchCall, len(replicas))

	transportFactory := opts.transportFactory
	if transportFactory == nil {
		transportFactory = grpcTransportFactory
	}
	transport, err := transportFactory(opts, rpcContext, replicas, args)
	if err != nil {
		return nil, err
	}
	defer transport.Close()
	if transport.IsExhausted() {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("sending to all %d replicas failed", len(replicas)))
	}

	// Send the first request.
	pending := 1
	log.VEventf(opts.ctx, 2, "sending RPC for batch: %s", args.Summary())
	transport.SendNext(done)

	// Wait for completions. This loop will retry operations that fail
	// with errors that reflect per-replica state and may succeed on
	// other replicas.
	var sendNextTimer timeutil.Timer
	defer sendNextTimer.Stop()
	for {
		sendNextTimer.Reset(opts.SendNextTimeout)
		select {
		case <-sendNextTimer.C:
			sendNextTimer.Read = true
			// On successive RPC timeouts, send to additional replicas if available.
			if !transport.IsExhausted() {
				log.VEventf(opts.ctx, 2, "timeout, trying next peer")
				pending++
				transport.SendNext(done)
			}

		case call := <-done:
			pending--
			err := call.Err
			if err == nil {
				if log.V(2) {
					log.Infof(opts.ctx, "RPC reply: %s", call.Reply)
				} else if log.V(1) && call.Reply.Error != nil {
					log.Infof(opts.ctx, "application error: %s", call.Reply.Error)
				}

				if call.Reply.Error == nil {
					return call.Reply, nil
				} else if !ds.handlePerReplicaError(opts.ctx, transport, rangeID, call.Reply.Error) {
					// The error received is not specific to this replica, so we
					// should return it instead of trying other replicas. However,
					// if we're trying to commit a transaction and there are
					// still other RPCs outstanding or an ambiguous RPC error
					// was already received, we must return an ambiguous commit
					// error instead of returned error.
					if haveCommit && (pending > 0 || ambiguousResult) {
						return nil, roachpb.NewAmbiguousResultError()
					}
					return call.Reply, nil
				}

				// Extract the detail so it can be included in the error
				// message if this is our last replica.
				//
				// TODO(bdarnell): The last error is not necessarily the best
				// one to return; we may want to remember the "best" error
				// we've seen (for example, a NotLeaseHolderError conveys more
				// information than a RangeNotFound).
				err = call.Reply.Error.GoError()
			} else {
				if log.V(1) {
					log.Warningf(opts.ctx, "RPC error: %s", err)
				}
				// All connection errors except for an unavailable node (this
				// is GRPC's fail-fast error), may mean that the request
				// succeeded on the remote server, but we were unable to
				// receive the reply. Set the ambiguous commit flag.
				//
				// We retry ambiguous commit batches to avoid returning the
				// unrecoverable AmbiguousResultError. This is safe because
				// repeating an already-successfully applied batch is
				// guaranteed to return either a TransactionReplayError (in
				// case the replay happens at the original leader), or a
				// TransactionRetryError (in case the replay happens at a new
				// leader). If the original attempt merely timed out or was
				// lost, then the batch will succeed and we can be assured the
				// commit was applied just once.
				//
				// The Unavailable code is used by GRPC to indicate that a
				// request fails fast and is not sent, so we can be sure there
				// is no ambiguity on these errors. Note that these are common
				// if a node is down.
				// See https://github.com/grpc/grpc-go/blob/52f6504dc290bd928a8139ba94e3ab32ed9a6273/call.go#L182
				// See https://github.com/grpc/grpc-go/blob/52f6504dc290bd928a8139ba94e3ab32ed9a6273/stream.go#L158
				if haveCommit && grpc.Code(err) != codes.Unavailable {
					ambiguousResult = true
				}
			}

			// Send to additional replicas if available.
			if !transport.IsExhausted() {
				log.VEventf(opts.ctx, 2, "error, trying next peer: %s", err)
				pending++
				transport.SendNext(done)
			}
			if pending == 0 {
				if ambiguousResult {
					err = roachpb.NewAmbiguousResultError()
				} else {
					err = roachpb.NewSendError(
						fmt.Sprintf("sending to all %d replicas failed; last error: %v", len(replicas), err),
					)
				}
				if log.V(2) {
					log.ErrEvent(opts.ctx, err.Error())
				}
				return nil, err
			}
		}
	}
}
// Run performs a continuous load test with the given parameters, checking the
// health of the cluster regularly. Any failure of a CockroachDB node or load
// generator constitutes a test failure. The test runs for the duration given
// by the `test.timeout` flag, minus the time it takes to reliably tear down
// the test cluster.
func (cl continuousLoadTest) Run(ctx context.Context, t testing.TB) {
	f := MakeFarmer(t, cl.Prefix+cl.shortTestTimeout(), stopper)
	// If the timeout flag was set, calculate an appropriate lower timeout by
	// subtracting expected cluster creation and teardown times to allow for
	// proper shutdown at the end of the test.
	if fl := flag.Lookup("test.timeout"); fl != nil {
		timeout, err := time.ParseDuration(fl.Value.String())
		if err != nil {
			t.Fatal(err)
		}
		// We've empirically observed 6-7 minute teardown times. We set aside a
		// larger duration to account for outliers and setup time.
		if setupTeardownDuration := 10 * time.Minute; timeout <= setupTeardownDuration {
			t.Fatalf("test.timeout must be greater than create/destroy interval %s", setupTeardownDuration)
		} else {
			var cancel context.CancelFunc
			ctx, cancel = context.WithTimeout(ctx, timeout-setupTeardownDuration)
			defer cancel()
		}
	}
	if deadline, ok := ctx.Deadline(); ok {
		log.Infof(ctx, "load test will end at %s", deadline)
	} else {
		log.Infof(ctx, "load test will run indefinitely")
	}

	// Start cluster and load.
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("recovered from panic to destroy cluster: %v", r)
		}
		f.MustDestroy(t)
	}()
	f.AddVars["benchmark_name"] = cl.BenchmarkPrefix + cl.shortTestTimeout()
	if cl.CockroachDiskSizeGB != 0 {
		f.AddVars["cockroach_disk_size"] = strconv.Itoa(cl.CockroachDiskSizeGB)
	}
	if err := f.Resize(cl.NumNodes); err != nil {
		t.Fatal(err)
	}
	CheckGossip(ctx, t, f, longWaitTime, HasPeers(cl.NumNodes))
	start := timeutil.Now()
	if err := f.StartLoad(ctx, cl.Process, *flagCLTWriters); err != nil {
		t.Fatal(err)
	}
	f.Assert(ctx, t)

	// Run load, checking the health of the cluster periodically.
	const healthCheckInterval = 2 * time.Minute
	var healthCheckTimer timeutil.Timer
	healthCheckTimer.Reset(healthCheckInterval)
	lastQueryCount := 0.0
	lastHealthCheck := timeutil.Now()
	for {
		select {
		case <-healthCheckTimer.C:
			// Check that all nodes are up and that queries are being processed.
			healthCheckTimer.Read = true
			f.Assert(ctx, t)
			queryCount, err := cl.queryCount(f)
			if err != nil {
				t.Fatal(err)
			}
			if lastQueryCount == queryCount {
				t.Fatalf("no queries in the last %s", healthCheckInterval)
			}
			qps := (queryCount - lastQueryCount) / timeutil.Now().Sub(lastHealthCheck).Seconds()
			if qps < *flagCLTMinQPS {
				t.Fatalf("qps (%.1f) < min (%.1f)", qps, *flagCLTMinQPS)
			}
			lastQueryCount = queryCount
			lastHealthCheck = timeutil.Now()
			healthCheckTimer.Reset(healthCheckInterval)
			log.Infof(ctx, "health check ok %s (%.1f qps)", timeutil.Now().Sub(start), qps)
		case <-ctx.Done():
			log.Infof(ctx, "load test finished")
			f.Assert(ctx, t)
			if err := f.Stop(ctx, 0, cl.Process); err != nil {
				t.Error(err)
			}
			return
		case <-stopper.ShouldStop():
			t.Fatal("interrupted")
		}
	}
}
Exemple #8
0
// printStatsLoop blocks and periodically logs transaction statistics
// (throughput, success rates, durations, ...). Note that this only captures
// write txns, since read-only txns are stateless as far as TxnCoordSender is
// concerned. stats).
func (tc *TxnCoordSender) printStatsLoop(ctx context.Context) {
	res := time.Millisecond // for duration logging resolution
	var statusLogTimer timeutil.Timer
	defer statusLogTimer.Stop()
	scale := metric.Scale1M
	for {
		statusLogTimer.Reset(statusLogInterval)
		select {
		case <-statusLogTimer.C:
			statusLogTimer.Read = true
			// Take a snapshot of metrics. There's some chance of skew, since the snapshots are
			// not done atomically, but that should be fine for these debug stats.
			metrics := tc.metrics
			durations, durationsWindow := metrics.Durations.Windowed()
			restarts, restartsWindow := metrics.Restarts.Windowed()
			if restartsWindow != durationsWindow {
				log.Fatalf(ctx,
					"misconfigured windowed histograms: %s != %s",
					restartsWindow,
					durationsWindow,
				)
			}
			commitRate := metrics.Commits.Rates[scale].Value()
			commit1PCRate := metrics.Commits1PC.Rates[scale].Value()
			abortRate := metrics.Aborts.Rates[scale].Value()
			abandonRate := metrics.Abandons.Rates[scale].Value()

			// Show transaction stats over the last minute. Maybe this should
			// be shorter in the future. We'll revisit if we get sufficient
			// feedback.
			totalRate := commitRate + abortRate + abandonRate
			var pCommitted, pCommitted1PC, pAbandoned, pAborted float64
			if totalRate > 0 {
				pCommitted = 100 * (commitRate / totalRate)
				pCommitted1PC = 100 * (commit1PCRate / totalRate)
				pAborted = 100 * (abortRate / totalRate)
				pAbandoned = 100 * (abandonRate / totalRate)
			}

			dMean := durations.Mean()
			dDev := durations.StdDev()
			dMax := durations.Max()
			rMean := restarts.Mean()
			rDev := restarts.StdDev()
			rMax := restarts.Max()
			num := durations.TotalCount()

			// We could skip calculating everything if !log.V(1) but we want to make
			// sure the code above doesn't silently break.
			if log.V(1) {
				log.Infof(ctx,
					"txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f/%.2f %%cmmt/cmmt1pc/abrt/abnd, "+
						"%s/%s/%s avg/σ/max duration, %.1f/%.1f/%d avg/σ/max restarts (%d samples over %s)",
					totalRate, pCommitted, pCommitted1PC, pAborted, pAbandoned,
					util.TruncateDuration(time.Duration(dMean), res),
					util.TruncateDuration(time.Duration(dDev), res),
					util.TruncateDuration(time.Duration(dMax), res),
					rMean, rDev, rMax, num, restartsWindow,
				)
			}
		case <-tc.stopper.ShouldStop():
			return
		}
	}
}
Exemple #9
0
// processQueue opens a Raft client stream and sends messages from the
// designated queue (ch) via that stream, exiting when an error is received or
// when it idles out. All messages remaining in the queue at that point are
// lost and a new instance of processQueue will be started by the next message
// to be sent.
func (t *RaftTransport) processQueue(
	nodeID roachpb.NodeID,
	ch chan *RaftMessageRequest,
	stats *raftTransportStats,
	stream MultiRaft_RaftMessageBatchClient,
) error {
	errCh := make(chan error, 1)

	// Starting workers in a task prevents data races during shutdown.
	if err := t.rpcContext.Stopper.RunTask(func() {
		t.rpcContext.Stopper.RunWorker(func() {
			ctx := t.AnnotateCtx(context.Background())
			errCh <- func() error {
				for {
					resp, err := stream.Recv()
					if err != nil {
						return err
					}
					atomic.AddInt64(&stats.clientRecv, 1)
					t.recvMu.Lock()
					handler, ok := t.recvMu.handlers[resp.ToReplica.StoreID]
					t.recvMu.Unlock()
					if !ok {
						log.Warningf(ctx, "no handler found for store %s in response %s",
							resp.ToReplica.StoreID, resp)
						continue
					}
					if err := handler.HandleRaftResponse(stream.Context(), resp); err != nil {
						return err
					}
				}
			}()
		})
	}); err != nil {
		return err
	}

	var raftIdleTimer timeutil.Timer
	defer raftIdleTimer.Stop()
	batch := &RaftMessageRequestBatch{}
	for {
		raftIdleTimer.Reset(raftIdleTimeout)
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return nil
		case <-raftIdleTimer.C:
			raftIdleTimer.Read = true
			return nil
		case err := <-errCh:
			return err
		case req := <-ch:
			batch.Requests = append(batch.Requests, *req)

			// Pull off as many queued requests as possible.
			//
			// TODO(peter): Think about limiting the size of the batch we send.
			for done := false; !done; {
				select {
				case req = <-ch:
					batch.Requests = append(batch.Requests, *req)
				default:
					done = true
				}
			}

			err := stream.Send(batch)
			batch.Requests = batch.Requests[:0]

			atomic.AddInt64(&stats.clientSent, 1)
			if err != nil {
				return err
			}
		}
	}
}
Exemple #10
0
// Run performs a continuous load test with the given parameters, checking the
// health of the cluster regularly. Any failure of a CockroachDB node or load
// generator constitutes a test failure. The test runs for the duration given
// by the `test.timeout` flag, minus the time it takes to reliably tear down
// the test cluster.
func (cl continuousLoadTest) Run(t *testing.T) {
	f := farmer(t, cl.Prefix+cl.shortTestTimeout())
	ctx, err := WithClusterTimeout(context.Background())
	if err != nil {
		t.Fatal(err)
	}
	if deadline, ok := ctx.Deadline(); ok {
		log.Infof(ctx, "load test will end at %s", deadline)
	} else {
		log.Infof(ctx, "load test will run indefinitely")
	}

	// Start cluster and load.
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("recovered from panic to destroy cluster: %v", r)
		}
		f.MustDestroy(t)
	}()
	f.AddVars["benchmark_name"] = cl.BenchmarkPrefix + cl.shortTestTimeout()
	if cl.CockroachDiskSizeGB != 0 {
		f.AddVars["cockroach_disk_size"] = strconv.Itoa(cl.CockroachDiskSizeGB)
	}
	if err := f.Resize(cl.NumNodes); err != nil {
		t.Fatal(err)
	}
	checkGossip(t, f, longWaitTime, hasPeers(cl.NumNodes))
	start := timeutil.Now()
	if err := cl.startLoad(f); err != nil {
		t.Fatal(err)
	}
	cl.assert(t, f)

	// Run load, checking the health of the cluster periodically.
	const healthCheckInterval = 2 * time.Minute
	var healthCheckTimer timeutil.Timer
	healthCheckTimer.Reset(healthCheckInterval)
	lastQueryCount := 0.0
	lastHealthCheck := timeutil.Now()
	for {
		select {
		case <-healthCheckTimer.C:
			// Check that all nodes are up and that queries are being processed.
			healthCheckTimer.Read = true
			cl.assert(t, f)
			queryCount, err := cl.queryCount(f)
			if err != nil {
				t.Fatal(err)
			}
			if lastQueryCount == queryCount {
				t.Fatalf("no queries in the last %s", healthCheckInterval)
			}
			qps := (queryCount - lastQueryCount) / timeutil.Now().Sub(lastHealthCheck).Seconds()
			if qps < *flagCLTMinQPS {
				t.Fatalf("qps (%.1f) < min (%.1f)", qps, *flagCLTMinQPS)
			}
			lastQueryCount = queryCount
			lastHealthCheck = timeutil.Now()
			healthCheckTimer.Reset(healthCheckInterval)
			log.Infof(ctx, "health check ok %s (%.1f qps)", timeutil.Now().Sub(start), qps)
		case <-ctx.Done():
			log.Infof(ctx, "load test finished")
			cl.assert(t, f)
			if err := f.Stop(0, cl.Process); err != nil {
				t.Error(err)
			}
			return
		case <-stopper:
			t.Fatal("interrupted")
		}
	}
}