Example #1
// RangeLookup dispatches an RangeLookup request for the given
// metadata key to the replicas of the given range. Note that we allow
// inconsistent reads when doing range lookups for efficiency. Getting
// stale data is not a correctness problem but instead may
// infrequently result in additional latency as additional range
// lookups may be required. Note also that rangeLookup bypasses the
// DistSender's Send() method, so there is no error inspection and
// retry logic here; this is not an issue since the lookup performs a
// single inconsistent read only.
func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) {
	ba := roachpb.BatchRequest{}
	ba.ReadConsistency = roachpb.INCONSISTENT
		Span: roachpb.Span{
			// We can interpret the RKey as a Key here since it's a metadata
			// lookup; those are never local.
			Key: key.AsRawKey(),
		MaxRanges:       ds.rangeLookupMaxRanges,
		ConsiderIntents: considerIntents,
		Reverse:         useReverseScan,
	replicas := newReplicaSlice(ds.gossip, desc)
	// TODO(tschottdorf) consider a Trace here, potentially that of the request
	// that had the cache miss and waits for the result.
	sp := tracing.NoopSpan()
	br, err := ds.sendRPC(sp, desc.RangeID, replicas, orderRandom, ba)
	if err != nil {
		return nil, err
	if br.Error != nil {
		return nil, br.Error
	return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil
Example #2
// processReplica processes a single replica. This should not be
// called externally to the queue. bq.mu.Lock should not be held
// while calling this method.
func (bq *baseQueue) processReplica(repl *Replica, clock *hlc.Clock) error {
	// Load the system config.
	cfg := bq.gossip.GetSystemConfig()
	if cfg == nil {
		bq.eventLog.Infof(log.V(1), "no system config available. skipping")
		return nil

	desc := repl.Desc()
	if !bq.impl.acceptsUnsplitRanges() && cfg.NeedsSplit(desc.StartKey, desc.EndKey) {
		// Range needs to be split due to zone configs, but queue does
		// not accept unsplit ranges.
		bq.eventLog.Infof(log.V(3), "%s: split needed; skipping", repl)
		return nil

	// If the queue requires a replica to have the range leader lease in
	// order to be processed, check whether this replica has leader lease
	// and renew or acquire if necessary.
	if bq.impl.needsLeaderLease() {
		// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
		if err := repl.redirectOnOrAcquireLeaderLease(tracing.NoopSpan()); err != nil {
			bq.eventLog.Infof(log.V(3), "%s: could not acquire leader lease; skipping", repl)
			return nil

	bq.eventLog.Infof(log.V(3), "%s: processing", repl)
	start := time.Now()
	if err := bq.impl.process(clock.Now(), repl, cfg); err != nil {
		return err
	bq.eventLog.Infof(log.V(2), "%s: done: %s", repl, time.Since(start))
	return nil
Example #3
// Send sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
// TODO(pmattis): Get rid of the getArgs function which requires the caller to
// maintain a map from address to replica. Instead, pass in the list of
// replicas instead of a list of addresses and use that to populate the
// requests.
func send(opts SendOptions, replicas ReplicaSlice,
	args roachpb.BatchRequest, context *rpc.Context) (proto.Message, error) {
	sp := opts.Trace
	if sp == nil {
		sp = tracing.NoopSpan()

	if len(replicas) < 1 {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
				len(replicas), 1), false)

	done := make(chan *netrpc.Call, len(replicas))

	clients := make([]batchClient, 0, len(replicas))
	for i, replica := range replicas {
		clients = append(clients, batchClient{
			Client:  rpc.NewClient(&replica.NodeDesc.Address, context),
			replica: &replicas[i],
			args:    args,

	var orderedClients []batchClient
	switch opts.Ordering {
	case orderStable:
		orderedClients = clients
	case orderRandom:
		// Randomly permute order, but keep known-unhealthy clients last.
		var nHealthy int
		for i, client := range clients {
			select {
			case <-client.Healthy():
				clients[i], clients[nHealthy] = clients[nHealthy], clients[i]


		orderedClients = clients
	// TODO(spencer): going to need to also sort by affinity; closest
	// ping time should win. Makes sense to have the rpc client/server
	// heartbeat measure ping times. With a bit of seasoning, each
	// node will be able to order the healthy replicas based on latency.

	// Send the first request.
	sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done)
	orderedClients = orderedClients[1:]

	var errors, retryableErrors int

	// Wait for completions.
	var sendNextTimer util.Timer
	defer sendNextTimer.Stop()
	for {
		select {
		case <-sendNextTimer.C:
			sendNextTimer.Read = true
			// On successive RPC timeouts, send to additional replicas if available.
			if len(orderedClients) > 0 {
				sp.LogEvent("timeout, trying next peer")
				sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done)
				orderedClients = orderedClients[1:]

		case call := <-done:
			if call.Error == nil {
				// Verify response data integrity if this is a proto response.
				if req, reqOk := call.Args.(roachpb.Request); reqOk {
					if resp, respOk := call.Reply.(roachpb.Response); respOk {
						if err := resp.Verify(req); err != nil {
							call.Error = err
					} else {
						call.Error = util.Errorf("response to proto request must be a proto")
			err := call.Error
			if err == nil {
				if log.V(2) {
					log.Infof("successful reply: %+v", call.Reply)

				return call.Reply.(proto.Message), nil

			// Error handling.
			if log.V(1) {
				log.Warningf("error reply: %s", err)


			// Since we have a reconnecting client here, disconnect errors are retryable.
			disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF
			if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) {

			if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 {
				return nil, roachpb.NewSendError(
					fmt.Sprintf("too many errors encountered (%d of %d total): %v",
						errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1)
			// Send to additional replicas if available.
			if len(orderedClients) > 0 {
				sp.LogEvent("error, trying next peer")
				sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done)
				orderedClients = orderedClients[1:]