// RangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) // TODO(tschottdorf) consider a Trace here, potentially that of the request // that had the cache miss and waits for the result. sp := tracing.NoopSpan() br, err := ds.sendRPC(sp, desc.RangeID, replicas, orderRandom, ba) if err != nil { return nil, err } if br.Error != nil { return nil, br.Error } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// processReplica processes a single replica. This should not be // called externally to the queue. bq.mu.Lock should not be held // while calling this method. func (bq *baseQueue) processReplica(repl *Replica, clock *hlc.Clock) error { // Load the system config. cfg := bq.gossip.GetSystemConfig() if cfg == nil { bq.eventLog.Infof(log.V(1), "no system config available. skipping") return nil } desc := repl.Desc() if !bq.impl.acceptsUnsplitRanges() && cfg.NeedsSplit(desc.StartKey, desc.EndKey) { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. bq.eventLog.Infof(log.V(3), "%s: split needed; skipping", repl) return nil } // If the queue requires a replica to have the range leader lease in // order to be processed, check whether this replica has leader lease // and renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. if err := repl.redirectOnOrAcquireLeaderLease(tracing.NoopSpan()); err != nil { bq.eventLog.Infof(log.V(3), "%s: could not acquire leader lease; skipping", repl) return nil } } bq.eventLog.Infof(log.V(3), "%s: processing", repl) start := time.Now() if err := bq.impl.process(clock.Now(), repl, cfg); err != nil { return err } bq.eventLog.Infof(log.V(2), "%s: done: %s", repl, time.Since(start)) return nil }
// Send sends one or more RPCs to clients specified by the slice of // replicas. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. // // TODO(pmattis): Get rid of the getArgs function which requires the caller to // maintain a map from address to replica. Instead, pass in the list of // replicas instead of a list of addresses and use that to populate the // requests. func send(opts SendOptions, replicas ReplicaSlice, args roachpb.BatchRequest, context *rpc.Context) (proto.Message, error) { sp := opts.Trace if sp == nil { sp = tracing.NoopSpan() } if len(replicas) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(replicas), 1), false) } done := make(chan *netrpc.Call, len(replicas)) clients := make([]batchClient, 0, len(replicas)) for i, replica := range replicas { clients = append(clients, batchClient{ Client: rpc.NewClient(&replica.NodeDesc.Address, context), replica: &replicas[i], args: args, }) } var orderedClients []batchClient switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. var nHealthy int for i, client := range clients { select { case <-client.Healthy(): clients[i], clients[nHealthy] = clients[nHealthy], clients[i] nHealthy++ default: } } shuffleClients(clients[:nHealthy]) shuffleClients(clients[nHealthy:]) orderedClients = clients } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. var sendNextTimer util.Timer defer sendNextTimer.Stop() for { sendNextTimer.Reset(opts.SendNextTimeout) select { case <-sendNextTimer.C: sendNextTimer.Read = true // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("timeout, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } case call := <-done: if call.Error == nil { // Verify response data integrity if this is a proto response. if req, reqOk := call.Args.(roachpb.Request); reqOk { if resp, respOk := call.Reply.(roachpb.Response); respOk { if err := resp.Verify(req); err != nil { call.Error = err } } else { call.Error = util.Errorf("response to proto request must be a proto") } } } err := call.Error if err == nil { if log.V(2) { log.Infof("successful reply: %+v", call.Reply) } return call.Reply.(proto.Message), nil } // Error handling. if log.V(1) { log.Warningf("error reply: %s", err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("error, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } } } }