Beispiel #1
0
// WaitForRebalance waits until there's been no recent range adds, removes, and
// splits. We wait until the cluster is balanced or until `StableInterval`
// elapses, whichever comes first. Then, it prints stats about the rebalancing
// process.
//
// This method is crude but necessary. If we were to wait until range counts
// were just about even, we'd miss potential post-rebalance thrashing.
func (at *allocatorTest) WaitForRebalance() error {
	db, err := gosql.Open("postgres", at.f.PGUrl(1))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	var timer timeutil.Timer
	defer timer.Stop()
	timer.Reset(0)
	for {
		select {
		case <-timer.C:
			timer.Read = true
			stable, err := at.checkAllocatorStable(db)
			if err != nil {
				return err
			}
			if stable {
				host := at.f.Nodes()[0]
				if err := at.printRebalanceStats(db, host, 8080); err != nil {
					return err
				}
				return nil
			}
		case <-stopper:
			return errors.New("interrupted")
		}

		timer.Reset(10 * time.Second)
	}
}
Beispiel #2
0
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(ch chan *RaftMessageRequest, toReplica roachpb.ReplicaDescriptor) error {
	addr, err := t.resolver(toReplica.NodeID)
	if err != nil {
		return err
	}

	conn, err := t.rpcContext.GRPCDial(addr.String())
	if err != nil {
		return err
	}
	client := NewMultiRaftClient(conn)

	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()
	stream, err := client.RaftMessage(ctx)
	if err != nil {
		return err
	}

	errCh := make(chan error, 1)

	// Starting workers in a task prevents data races during shutdown.
	if err := t.rpcContext.Stopper.RunTask(func() {
		t.rpcContext.Stopper.RunWorker(func() {
			errCh <- stream.RecvMsg(new(RaftMessageResponse))
		})
	}); err != nil {
		return err
	}

	var raftIdleTimer timeutil.Timer
	defer raftIdleTimer.Stop()
	for {
		raftIdleTimer.Reset(raftIdleTimeout)
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return nil
		case <-raftIdleTimer.C:
			raftIdleTimer.Read = true
			return nil
		case err := <-errCh:
			return err
		case req := <-ch:
			err := stream.Send(req)
			if req.Message.Type == raftpb.MsgSnap {
				select {
				case <-t.rpcContext.Stopper.ShouldStop():
					return nil
				case t.SnapshotStatusChan <- RaftSnapshotStatus{req, err}:
				}

			}
			if err != nil {
				return err
			}
		}
	}
}
Beispiel #3
0
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
	request := PingRequest{Addr: ctx.Addr}
	heartbeatClient := NewHeartbeatClient(cc)

	var heartbeatTimer timeutil.Timer
	defer heartbeatTimer.Stop()
	for {
		// If we should stop, return immediately. Note that we check this
		// at the beginning and end of the loop because we may 'continue'
		// before reaching the end.
		select {
		case <-ctx.Stopper.ShouldStop():
			return nil
		default:
		}
		sendTime := ctx.localClock.PhysicalTime()
		response, err := ctx.heartbeat(heartbeatClient, request)
		ctx.setConnHealthy(remoteAddr, err == nil)
		if err != nil {
			if grpc.Code(err) == codes.DeadlineExceeded {
				continue
			}
			return err
		}
		receiveTime := ctx.localClock.PhysicalTime()

		// Only update the clock offset measurement if we actually got a
		// successful response from the server.
		if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
			request.Offset.Reset()
		} else {
			// Offset and error are measured using the remote clock reading
			// technique described in
			// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
			// However, we assume that drift and min message delay are 0, for
			// now.
			request.Offset.MeasuredAt = receiveTime.UnixNano()
			request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
			remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
			request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
		}
		ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)

		if cb := ctx.HeartbeatCB; cb != nil {
			cb()
		}

		// Wait after the heartbeat so that the first iteration gets a wait-free
		// heartbeat attempt.
		heartbeatTimer.Reset(ctx.HeartbeatInterval)
		select {
		case <-ctx.Stopper.ShouldStop():
			return nil
		case <-heartbeatTimer.C:
			heartbeatTimer.Read = true
		}
	}
}
// startStats blocks and periodically logs transaction statistics (throughput,
// success rates, durations, ...). Note that this only captures write txns,
// since read-only txns are stateless as far as TxnCoordSender is concerned.
// stats).
func (tc *TxnCoordSender) startStats() {
	res := time.Millisecond // for duration logging resolution
	var statusLogTimer timeutil.Timer
	defer statusLogTimer.Stop()
	scale := metric.Scale1M
	for {
		statusLogTimer.Reset(statusLogInterval)
		select {
		case <-statusLogTimer.C:
			statusLogTimer.Read = true
			if !log.V(1) {
				continue
			}

			// Take a snapshot of metrics. There's some chance of skew, since the snapshots are
			// not done atomically, but that should be fine for these debug stats.
			metrics := tc.metrics
			durations := metrics.Durations[scale].Current()
			restarts := metrics.Restarts.Current()
			commitRate := metrics.Commits.Rates[scale].Value()
			commit1PCRate := metrics.Commits1PC.Rates[scale].Value()
			abortRate := metrics.Aborts.Rates[scale].Value()
			abandonRate := metrics.Abandons.Rates[scale].Value()

			// Show transaction stats over the last minute. Maybe this should be shorter in the future.
			// We'll revisit if we get sufficient feedback.
			totalRate := commitRate + abortRate + abandonRate
			var pCommitted, pCommitted1PC, pAbandoned, pAborted float64
			if totalRate > 0 {
				pCommitted = 100 * (commitRate / totalRate)
				pCommitted1PC = 100 * (commit1PCRate / totalRate)
				pAborted = 100 * (abortRate / totalRate)
				pAbandoned = 100 * (abandonRate / totalRate)
			}

			dMean := durations.Mean()
			dDev := durations.StdDev()
			dMax := durations.Max()
			rMean := restarts.Mean()
			rDev := restarts.StdDev()
			rMax := restarts.Max()
			num := durations.TotalCount()

			log.Infof(tc.ctx,
				"txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f/%.2f %%cmmt/cmmt1pc/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%d avg/σ/max restarts (%d samples)",
				totalRate, pCommitted, pCommitted1PC, pAborted, pAbandoned,
				util.TruncateDuration(time.Duration(dMean), res),
				util.TruncateDuration(time.Duration(dDev), res),
				util.TruncateDuration(time.Duration(dMax), res),
				rMean, rDev, rMax, num,
			)

		case <-tc.stopper.ShouldStop():
			return
		}
	}
}
Beispiel #5
0
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
	request := PingRequest{Addr: ctx.Addr}
	heartbeatClient := NewHeartbeatClient(cc)

	var heartbeatTimer timeutil.Timer
	defer heartbeatTimer.Stop()

	// Give the first iteration a wait-free heartbeat attempt.
	nextHeartbeat := 0 * time.Nanosecond
	for {
		heartbeatTimer.Reset(nextHeartbeat)
		select {
		case <-ctx.Stopper.ShouldStop():
			return nil
		case <-heartbeatTimer.C:
			heartbeatTimer.Read = true
		}

		sendTime := ctx.localClock.PhysicalTime()
		response, err := ctx.heartbeat(heartbeatClient, request)
		ctx.setConnHealthy(remoteAddr, err == nil)
		if err == nil {
			receiveTime := ctx.localClock.PhysicalTime()

			// Only update the clock offset measurement if we actually got a
			// successful response from the server.
			if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
				request.Offset.Reset()
			} else {
				// Offset and error are measured using the remote clock reading
				// technique described in
				// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
				// However, we assume that drift and min message delay are 0, for
				// now.
				request.Offset.MeasuredAt = receiveTime.UnixNano()
				request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
				remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
				request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
			}
			ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)

			if cb := ctx.HeartbeatCB; cb != nil {
				cb()
			}
		}

		// If the heartbeat timed out, run the next one immediately. Otherwise,
		// wait out the heartbeat interval on the next iteration.
		if grpc.Code(err) == codes.DeadlineExceeded {
			nextHeartbeat = 0
		} else {
			nextHeartbeat = ctx.HeartbeatInterval
		}
	}
}
Beispiel #6
0
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
	g.server.stopper.RunWorker(func() {
		ctx := log.WithLogTag(g.ctx, "bootstrap", nil)
		var bootstrapTimer timeutil.Timer
		defer bootstrapTimer.Stop()
		for {
			if g.server.stopper.RunTask(func() {
				g.mu.Lock()
				defer g.mu.Unlock()
				haveClients := g.outgoing.len() > 0
				haveSentinel := g.mu.is.getInfo(KeySentinel) != nil
				log.Eventf(ctx, "have clients: %t, have sentinel: %t", haveClients, haveSentinel)
				if !haveClients || !haveSentinel {
					// Try to get another bootstrap address from the resolvers.
					if addr := g.getNextBootstrapAddress(); addr != nil {
						g.startClient(addr, g.mu.is.NodeID)
					} else {
						bootstrapAddrs := make([]string, 0, len(g.bootstrapping))
						for addr := range g.bootstrapping {
							bootstrapAddrs = append(bootstrapAddrs, addr)
						}
						log.Eventf(ctx, "no next bootstrap address; currently bootstrapping: %v", bootstrapAddrs)
						// We couldn't start a client, signal that we're stalled so that
						// we'll retry.
						g.maybeSignalStatusChangeLocked()
					}
				}
			}) != nil {
				return
			}

			// Pause an interval before next possible bootstrap.
			bootstrapTimer.Reset(g.bootstrapInterval)
			log.Eventf(ctx, "sleeping %s until bootstrap", g.bootstrapInterval)
			select {
			case <-bootstrapTimer.C:
				bootstrapTimer.Read = true
				// break
			case <-g.server.stopper.ShouldStop():
				return
			}
			log.Eventf(ctx, "idling until bootstrap required")
			// Block until we need bootstrapping again.
			select {
			case <-g.stalledCh:
				log.Eventf(ctx, "detected stall; commencing bootstrap")
				// break
			case <-g.server.stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #7
0
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
	stopper := g.server.stopper

	stopper.RunWorker(func() {
		var bootstrapTimer timeutil.Timer
		defer bootstrapTimer.Stop()
		for {
			if stopper.RunTask(func() {
				g.mu.Lock()
				defer g.mu.Unlock()
				haveClients := g.outgoing.len() > 0
				haveSentinel := g.is.getInfo(KeySentinel) != nil
				if !haveClients || !haveSentinel {
					// Try to get another bootstrap address from the resolvers.
					if addr := g.getNextBootstrapAddress(); addr != nil {
						g.startClient(addr, stopper)
					} else {
						// We couldn't start a client, signal that we're stalled so that
						// we'll retry.
						g.maybeSignalStalledLocked()
					}
				}
			}) != nil {
				return
			}

			// Pause an interval before next possible bootstrap.
			bootstrapTimer.Reset(g.bootstrapInterval)
			select {
			case <-bootstrapTimer.C:
				bootstrapTimer.Read = true
				// break
			case <-stopper.ShouldStop():
				return
			}
			// Block until we need bootstrapping again.
			select {
			case <-g.stalled:
				// break
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #8
0
// start will run continuously and expire old reservations.
func (b *bookie) start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		var timeoutTimer timeutil.Timer
		defer timeoutTimer.Stop()
		for {
			var timeout time.Duration
			b.mu.Lock()
			nextExpiration := b.mu.queue.peek()
			if nextExpiration == nil {
				// No reservations to expire.
				timeout = b.reservationTimeout
			} else {
				now := b.clock.Now()
				if now.GoTime().After(nextExpiration.expireAt.GoTime()) {
					// We have a reservation expiration, remove it.
					expiredReservation := b.mu.queue.dequeue()
					// Is it an active reservation?
					if b.mu.reservationsByRangeID[expiredReservation.RangeID] == expiredReservation {
						b.fillReservationLocked(expiredReservation)
					} else if log.V(2) {
						log.Infof(context.TODO(), "the reservation for rangeID %d has already been filled.",
							expiredReservation.RangeID)
					}
					// Set the timeout to 0 to force another peek.
					timeout = 0
				} else {
					timeout = nextExpiration.expireAt.GoTime().Sub(now.GoTime())
				}
			}
			b.mu.Unlock()
			timeoutTimer.Reset(timeout)
			select {
			case <-timeoutTimer.C:
				timeoutTimer.Read = true
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #9
0
// start will run continuously and mark stores as offline if they haven't been
// heard from in longer than timeUntilStoreDead.
func (sp *StorePool) start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		var timeoutTimer timeutil.Timer
		defer timeoutTimer.Stop()
		for {
			var timeout time.Duration
			sp.mu.Lock()
			detail := sp.mu.queue.peek()
			if detail == nil {
				// No stores yet, wait the full timeout.
				timeout = sp.timeUntilStoreDead
			} else {
				// Check to see if the store should be marked as dead.
				deadAsOf := detail.lastUpdatedTime.GoTime().Add(sp.timeUntilStoreDead)
				now := sp.clock.Now()
				if now.GoTime().After(deadAsOf) {
					deadDetail := sp.mu.queue.dequeue()
					deadDetail.markDead(now)
					// The next store might be dead as well, set the timeout to
					// 0 to process it immediately.
					timeout = 0
				} else {
					// Store is still alive, schedule the next check for when
					// it should timeout.
					timeout = deadAsOf.Sub(now.GoTime())
				}
			}
			sp.mu.Unlock()
			timeoutTimer.Reset(timeout)
			select {
			case <-timeoutTimer.C:
				timeoutTimer.Read = true
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #10
0
// sendToReplicas sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
func (ds *DistSender) sendToReplicas(
	opts SendOptions,
	rangeID roachpb.RangeID,
	replicas ReplicaSlice,
	args roachpb.BatchRequest,
	rpcContext *rpc.Context,
) (*roachpb.BatchResponse, error) {
	if len(replicas) < 1 {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
				len(replicas), 1))
	}

	done := make(chan BatchCall, len(replicas))

	transportFactory := opts.transportFactory
	if transportFactory == nil {
		transportFactory = grpcTransportFactory
	}
	transport, err := transportFactory(opts, rpcContext, replicas, args)
	if err != nil {
		return nil, err
	}
	defer transport.Close()
	if transport.IsExhausted() {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("sending to all %d replicas failed", len(replicas)))
	}

	// Send the first request.
	pending := 1
	transport.SendNext(done)

	// Wait for completions. This loop will retry operations that fail
	// with errors that reflect per-replica state and may succeed on
	// other replicas.
	var sendNextTimer timeutil.Timer
	defer sendNextTimer.Stop()
	for {
		sendNextTimer.Reset(opts.SendNextTimeout)
		select {
		case <-sendNextTimer.C:
			sendNextTimer.Read = true
			// On successive RPC timeouts, send to additional replicas if available.
			if !transport.IsExhausted() {
				log.Trace(opts.Context, "timeout, trying next peer")
				pending++
				transport.SendNext(done)
			}

		case call := <-done:
			pending--
			err := call.Err
			if err == nil {
				if log.V(2) {
					log.Infof(opts.Context, "RPC reply: %+v", call.Reply)
				} else if log.V(1) && call.Reply.Error != nil {
					log.Infof(opts.Context, "application error: %s", call.Reply.Error)
				}

				if !ds.handlePerReplicaError(rangeID, call.Reply.Error) {
					return call.Reply, nil
				}

				// Extract the detail so it can be included in the error
				// message if this is our last replica.
				//
				// TODO(bdarnell): The last error is not necessarily the best
				// one to return; we may want to remember the "best" error
				// we've seen (for example, a NotLeaseHolderError conveys more
				// information than a RangeNotFound).
				err = call.Reply.Error.GoError()
			} else if log.V(1) {
				log.Warningf(opts.Context, "RPC error: %s", err)
			}

			// Send to additional replicas if available.
			if !transport.IsExhausted() {
				log.Tracef(opts.Context, "error, trying next peer: %s", err)
				pending++
				transport.SendNext(done)
			}
			if pending == 0 {
				return nil, roachpb.NewSendError(
					fmt.Sprintf("sending to all %d replicas failed; last error: %v",
						len(replicas), err))
			}
		}
	}
}
Beispiel #11
0
// processQueue opens a Raft client stream and sends messages from the
// designated queue (ch) via that stream, exiting when an error is received or
// when it idles out. All messages remaining in the queue at that point are
// lost and a new instance of processQueue will be started by the next message
// to be sent.
func (t *RaftTransport) processQueue(
	nodeID roachpb.NodeID,
	ch chan *RaftMessageRequest,
	stats *raftTransportStats,
	stream MultiRaft_RaftMessageClient,
) error {
	errCh := make(chan error, 1)

	// Starting workers in a task prevents data races during shutdown.
	if err := t.rpcContext.Stopper.RunTask(func() {
		t.rpcContext.Stopper.RunWorker(func() {
			errCh <- func() error {
				for {
					resp, err := stream.Recv()
					if err != nil {
						return err
					}
					atomic.AddInt64(&stats.clientRecv, 1)
					t.mu.Lock()
					handler, ok := t.mu.handlers[resp.ToReplica.StoreID]
					t.mu.Unlock()
					if !ok {
						log.Warningf(context.TODO(), "no handler found for store %s in response %s",
							resp.ToReplica.StoreID, resp)
						continue
					}
					handler.HandleRaftResponse(stream.Context(), resp)
				}
			}()
		})
	}); err != nil {
		return err
	}

	var raftIdleTimer timeutil.Timer
	defer raftIdleTimer.Stop()
	for {
		raftIdleTimer.Reset(raftIdleTimeout)
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return nil
		case <-raftIdleTimer.C:
			raftIdleTimer.Read = true
			return nil
		case err := <-errCh:
			return err
		case req := <-ch:
			err := stream.Send(req)
			atomic.AddInt64(&stats.clientSent, 1)
			if req.Message.Type == raftpb.MsgSnap {
				select {
				case <-t.rpcContext.Stopper.ShouldStop():
					return nil
				case t.SnapshotStatusChan <- RaftSnapshotStatus{req, err}:
				}
			}
			if err != nil {
				return err
			}
		}
	}
}
Beispiel #12
0
// WaitForRebalance waits until there's been no recent range adds, removes, and
// splits. We wait until the cluster is balanced or until `StableInterval`
// elapses, whichever comes first. Then, it prints stats about the rebalancing
// process. If the replica count appears unbalanced, an error is returned.
//
// This method is crude but necessary. If we were to wait until range counts
// were just about even, we'd miss potential post-rebalance thrashing.
func (at *allocatorTest) WaitForRebalance(t *testing.T) error {
	const statsInterval = 20 * time.Second

	db, err := gosql.Open("postgres", at.f.PGUrl(0))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	var statsTimer timeutil.Timer
	var assertTimer timeutil.Timer
	defer statsTimer.Stop()
	defer assertTimer.Stop()
	statsTimer.Reset(statsInterval)
	assertTimer.Reset(0)
	for {
		select {
		case <-statsTimer.C:
			statsTimer.Read = true
			stats, err := at.allocatorStats(db)
			if err != nil {
				return err
			}

			log.Info(context.Background(), stats)
			if StableInterval <= stats.ElapsedSinceLastEvent {
				host := at.f.Nodes()[0]
				log.Infof(context.Background(), "replica count = %f, max = %f", stats.ReplicaCountStdDev, *flagATMaxStdDev)
				if stats.ReplicaCountStdDev > *flagATMaxStdDev {
					_ = at.printRebalanceStats(db, host)
					return errors.Errorf(
						"%s elapsed without changes, but replica count standard "+
							"deviation is %.2f (>%.2f)", stats.ElapsedSinceLastEvent,
						stats.ReplicaCountStdDev, *flagATMaxStdDev)
				}
				return at.printRebalanceStats(db, host)
			}
			statsTimer.Reset(statsInterval)
		case <-assertTimer.C:
			assertTimer.Read = true
			at.f.Assert(t)
			assertTimer.Reset(time.Minute)
		case <-stopper:
			return errors.New("interrupted")
		}
	}
}
// Run performs a continuous load test with the given parameters, checking the
// health of the cluster regularly. Any failure of a CockroachDB node or load
// generator constitutes a test failure. The test runs for the duration given
// by the `test.timeout` flag, minus the time it takes to reliably tear down
// the test cluster.
func (cl continuousLoadTest) Run(t *testing.T) {
	f := farmer(t, cl.Prefix+cl.shortTestTimeout())
	ctx, err := WithClusterTimeout(context.Background())
	if err != nil {
		t.Fatal(err)
	}
	if deadline, ok := ctx.Deadline(); ok {
		log.Infof(ctx, "load test will end at %s", deadline)
	} else {
		log.Infof(ctx, "load test will run indefinitely")
	}

	// Start cluster and load.
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("recovered from panic to destroy cluster: %v", r)
		}
		f.MustDestroy(t)
	}()
	f.AddVars["benchmark_name"] = cl.BenchmarkPrefix + cl.shortTestTimeout()
	if cl.CockroachDiskSizeGB != 0 {
		f.AddVars["cockroach_disk_size"] = strconv.Itoa(cl.CockroachDiskSizeGB)
	}
	if err := f.Resize(cl.NumNodes); err != nil {
		t.Fatal(err)
	}
	checkGossip(t, f, longWaitTime, hasPeers(cl.NumNodes))
	start := timeutil.Now()
	if err := cl.startLoad(f); err != nil {
		t.Fatal(err)
	}
	cl.assert(t, f)

	// Run load, checking the health of the cluster periodically.
	const healthCheckInterval = 2 * time.Minute
	var healthCheckTimer timeutil.Timer
	healthCheckTimer.Reset(healthCheckInterval)
	lastQueryCount := 0.0
	lastHealthCheck := timeutil.Now()
	for {
		select {
		case <-healthCheckTimer.C:
			// Check that all nodes are up and that queries are being processed.
			healthCheckTimer.Read = true
			cl.assert(t, f)
			queryCount, err := cl.queryCount(f)
			if err != nil {
				t.Fatal(err)
			}
			if lastQueryCount == queryCount {
				t.Fatalf("no queries in the last %s", healthCheckInterval)
			}
			qps := (queryCount - lastQueryCount) / timeutil.Now().Sub(lastHealthCheck).Seconds()
			if qps < *flagCLTMinQPS {
				t.Fatalf("qps (%.1f) < min (%.1f)", qps, *flagCLTMinQPS)
			}
			lastQueryCount = queryCount
			lastHealthCheck = timeutil.Now()
			healthCheckTimer.Reset(healthCheckInterval)
			log.Infof(ctx, "health check ok %s (%.1f qps)", timeutil.Now().Sub(start), qps)
		case <-ctx.Done():
			log.Infof(ctx, "load test finished")
			cl.assert(t, f)
			if err := f.Stop(0, cl.Process); err != nil {
				t.Error(err)
			}
			return
		case <-stopper:
			t.Fatal("interrupted")
		}
	}
}
Beispiel #14
0
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(nodeID roachpb.NodeID) {
	t.mu.Lock()
	ch, ok := t.mu.queues[nodeID]
	t.mu.Unlock()
	if !ok {
		return
	}
	// Clean-up when the loop below shuts down.
	defer func() {
		t.mu.Lock()
		delete(t.mu.queues, nodeID)
		t.mu.Unlock()
	}()

	addr, err := t.resolver(nodeID)
	if err != nil {
		if log.V(1) {
			log.Errorf("failed to get address for node %d: %s", nodeID, err)
		}
		return
	}

	if log.V(1) {
		log.Infof("dialing node %d at %s", nodeID, addr)
	}
	conn, err := t.rpcContext.GRPCDial(addr.String())
	if err != nil {
		if log.V(1) {
			log.Errorf("failed to dial: %s", err)
		}
		return
	}
	client := NewMultiRaftClient(conn)
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()
	if log.V(1) {
		log.Infof("establishing Raft transport stream to node %d at %s", nodeID, addr)
	}
	// We start two streams; one will be used for snapshots, the other for all
	// other traffic. This is done to prevent snapshots from blocking other
	// traffic.
	streams := make([]MultiRaft_RaftMessageClient, 2)
	for i := range streams {
		stream, err := client.RaftMessage(ctx)
		if err != nil {
			if log.V(1) {
				log.Errorf("failed to establish Raft transport stream to node %d at %s: %s", nodeID, addr, err)
			}
			return
		}
		streams[i] = stream
	}

	errCh := make(chan error, len(streams))

	// Starting workers in a task prevents data races during shutdown.
	t.rpcContext.Stopper.RunTask(func() {
		for i := range streams {
			// Avoid closing over a `range` binding.
			stream := streams[i]

			t.rpcContext.Stopper.RunWorker(func() {
				// NB: only one error will ever be read from this channel. That's fine,
				// given that the channel is buffered to the maximum number of errors
				// that will be written to it.
				errCh <- stream.RecvMsg(new(RaftMessageResponse))
			})
		}
	})

	snapStream := streams[0]
	restStream := streams[1]

	var raftIdleTimer timeutil.Timer
	defer raftIdleTimer.Stop()
	for {
		raftIdleTimer.Reset(raftIdleTimeout)
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return
		case <-raftIdleTimer.C:
			raftIdleTimer.Read = true
			if log.V(1) {
				log.Infof("closing Raft transport to %d at %s due to inactivity", nodeID, addr)
			}
			return
		case err := <-errCh:
			if log.V(1) {
				if err != nil {
					log.Infof("remote node %d at %s closed Raft transport with error: %s", nodeID, addr, err)
				} else {
					log.Infof("remote node %d at %s closed Raft transport", nodeID, addr)
				}
			}
			return
		case req := <-ch:
			if req.Message.Type == raftpb.MsgSnap {
				t.rpcContext.Stopper.RunAsyncTask(func() {
					err := snapStream.Send(req)
					if err != nil {
						log.Errorf("failed to send Raft snapshot to node %d at %s: %s", nodeID, addr, err)
					} else if log.V(1) {
						log.Infof("successfully sent a Raft snapshot to node %d at %s", nodeID, addr)
					}
					t.SnapshotStatusChan <- RaftSnapshotStatus{req, err}
				})
			} else {
				if err := restStream.Send(req); err != nil {
					log.Error(err)
					return
				}
			}
		}
	}
}