Example #1
0
// sendToReplicas sends one or more RPCs to clients specified by the
// slice of replicas. On success, Send returns the first successful
// reply. If an error occurs which is not specific to a single
// replica, it's returned immediately. Otherwise, when all replicas
// have been tried and failed, returns a send error.
func (ds *DistSender) sendToReplicas(
	opts SendOptions,
	rangeID roachpb.RangeID,
	replicas ReplicaSlice,
	args roachpb.BatchRequest,
	rpcContext *rpc.Context,
) (*roachpb.BatchResponse, error) {
	if len(replicas) < 1 {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
				len(replicas), 1))
	}

	var ambiguousResult bool
	var haveCommit bool
	// We only check for committed txns, not aborts because aborts may
	// be retried without any risk of inconsistencies.
	if etArg, ok := args.GetArg(roachpb.EndTransaction); ok &&
		etArg.(*roachpb.EndTransactionRequest).Commit {
		haveCommit = true
	}
	done := make(chan BatchCall, len(replicas))

	transportFactory := opts.transportFactory
	if transportFactory == nil {
		transportFactory = grpcTransportFactory
	}
	transport, err := transportFactory(opts, rpcContext, replicas, args)
	if err != nil {
		return nil, err
	}
	defer transport.Close()
	if transport.IsExhausted() {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("sending to all %d replicas failed", len(replicas)))
	}

	// Send the first request.
	pending := 1
	log.VEventf(opts.ctx, 2, "sending RPC for batch: %s", args.Summary())
	transport.SendNext(done)

	// Wait for completions. This loop will retry operations that fail
	// with errors that reflect per-replica state and may succeed on
	// other replicas.
	var sendNextTimer timeutil.Timer
	defer sendNextTimer.Stop()
	for {
		sendNextTimer.Reset(opts.SendNextTimeout)
		select {
		case <-sendNextTimer.C:
			sendNextTimer.Read = true
			// On successive RPC timeouts, send to additional replicas if available.
			if !transport.IsExhausted() {
				log.VEventf(opts.ctx, 2, "timeout, trying next peer")
				pending++
				transport.SendNext(done)
			}

		case call := <-done:
			pending--
			err := call.Err
			if err == nil {
				if log.V(2) {
					log.Infof(opts.ctx, "RPC reply: %s", call.Reply)
				} else if log.V(1) && call.Reply.Error != nil {
					log.Infof(opts.ctx, "application error: %s", call.Reply.Error)
				}

				if call.Reply.Error == nil {
					return call.Reply, nil
				} else if !ds.handlePerReplicaError(opts.ctx, transport, rangeID, call.Reply.Error) {
					// The error received is not specific to this replica, so we
					// should return it instead of trying other replicas. However,
					// if we're trying to commit a transaction and there are
					// still other RPCs outstanding or an ambiguous RPC error
					// was already received, we must return an ambiguous commit
					// error instead of returned error.
					if haveCommit && (pending > 0 || ambiguousResult) {
						return nil, roachpb.NewAmbiguousResultError()
					}
					return call.Reply, nil
				}

				// Extract the detail so it can be included in the error
				// message if this is our last replica.
				//
				// TODO(bdarnell): The last error is not necessarily the best
				// one to return; we may want to remember the "best" error
				// we've seen (for example, a NotLeaseHolderError conveys more
				// information than a RangeNotFound).
				err = call.Reply.Error.GoError()
			} else {
				if log.V(1) {
					log.Warningf(opts.ctx, "RPC error: %s", err)
				}
				// All connection errors except for an unavailable node (this
				// is GRPC's fail-fast error), may mean that the request
				// succeeded on the remote server, but we were unable to
				// receive the reply. Set the ambiguous commit flag.
				//
				// We retry ambiguous commit batches to avoid returning the
				// unrecoverable AmbiguousResultError. This is safe because
				// repeating an already-successfully applied batch is
				// guaranteed to return either a TransactionReplayError (in
				// case the replay happens at the original leader), or a
				// TransactionRetryError (in case the replay happens at a new
				// leader). If the original attempt merely timed out or was
				// lost, then the batch will succeed and we can be assured the
				// commit was applied just once.
				//
				// The Unavailable code is used by GRPC to indicate that a
				// request fails fast and is not sent, so we can be sure there
				// is no ambiguity on these errors. Note that these are common
				// if a node is down.
				// See https://github.com/grpc/grpc-go/blob/52f6504dc290bd928a8139ba94e3ab32ed9a6273/call.go#L182
				// See https://github.com/grpc/grpc-go/blob/52f6504dc290bd928a8139ba94e3ab32ed9a6273/stream.go#L158
				if haveCommit && grpc.Code(err) != codes.Unavailable {
					ambiguousResult = true
				}
			}

			// Send to additional replicas if available.
			if !transport.IsExhausted() {
				log.VEventf(opts.ctx, 2, "error, trying next peer: %s", err)
				pending++
				transport.SendNext(done)
			}
			if pending == 0 {
				if ambiguousResult {
					err = roachpb.NewAmbiguousResultError()
				} else {
					err = roachpb.NewSendError(
						fmt.Sprintf("sending to all %d replicas failed; last error: %v", len(replicas), err),
					)
				}
				if log.V(2) {
					log.ErrEvent(opts.ctx, err.Error())
				}
				return nil, err
			}
		}
	}
}
Example #2
0
func (n *Node) batchInternal(
	ctx context.Context, args *roachpb.BatchRequest,
) (*roachpb.BatchResponse, error) {
	// TODO(marc): grpc's authentication model (which gives credential access in
	// the request handler) doesn't really fit with the current design of the
	// security package (which assumes that TLS state is only given at connection
	// time) - that should be fixed.
	if peer, ok := peer.FromContext(ctx); ok {
		if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok {
			certUser, err := security.GetCertificateUser(&tlsInfo.State)
			if err != nil {
				return nil, err
			}
			if certUser != security.NodeUser {
				return nil, errors.Errorf("user %s is not allowed", certUser)
			}
		}
	}

	var br *roachpb.BatchResponse

	type snowballInfo struct {
		syncutil.Mutex
		collectedSpans [][]byte
		done           bool
	}
	var snowball *snowballInfo

	if err := n.stopper.RunTaskWithErr(func() error {
		const opName = "node.Batch"
		sp, err := tracing.JoinOrNew(n.storeCfg.AmbientCtx.Tracer, args.TraceContext, opName)
		if err != nil {
			return err
		}
		// If this is a snowball span, it gets special treatment: It skips the
		// regular tracing machinery, and we instead send the collected spans
		// back with the response. This is more expensive, but then again,
		// those are individual requests traced by users, so they can be.
		if sp.BaggageItem(tracing.Snowball) != "" {
			sp.LogEvent("delegating to snowball tracing")
			sp.Finish()

			snowball = new(snowballInfo)
			recorder := func(rawSpan basictracer.RawSpan) {
				snowball.Lock()
				defer snowball.Unlock()
				if snowball.done {
					// This is a late span that we must discard because the request was
					// already completed.
					return
				}
				encSp, err := tracing.EncodeRawSpan(&rawSpan, nil)
				if err != nil {
					log.Warning(ctx, err)
				}
				snowball.collectedSpans = append(snowball.collectedSpans, encSp)
			}

			if sp, err = tracing.JoinOrNewSnowball(opName, args.TraceContext, recorder); err != nil {
				return err
			}
		}
		defer sp.Finish()
		traceCtx := opentracing.ContextWithSpan(ctx, sp)
		log.Event(traceCtx, args.Summary())

		tStart := timeutil.Now()
		var pErr *roachpb.Error
		br, pErr = n.stores.Send(traceCtx, *args)
		if pErr != nil {
			br = &roachpb.BatchResponse{}
			log.ErrEventf(traceCtx, "%T", pErr.GetDetail())
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(n.stores, br))
		}
		n.metrics.callComplete(timeutil.Since(tStart), pErr)
		br.Error = pErr
		return nil
	}); err != nil {
		return nil, err
	}

	if snowball != nil {
		snowball.Lock()
		br.CollectedSpans = snowball.collectedSpans
		snowball.done = true
		snowball.Unlock()
	}

	return br, nil
}