// SendNext invokes the specified RPC on the supplied client when the
// client is ready. On success, the reply is sent on the channel;
// otherwise an error is sent.
func (gt *grpcTransport) SendNext(ctx context.Context, done chan<- BatchCall) {
	client := gt.orderedClients[gt.clientIndex]
	gt.setPending(client.args.Replica, true)

	// Fork the original context as this async send may outlast the
	// caller's context.
	ctx, sp := tracing.ForkCtxSpan(ctx, "grpcTransport SendNext")
	go func() {
		defer tracing.FinishSpan(sp)
		reply, err := func() (*roachpb.BatchResponse, error) {
			if enableLocalCalls {
				if localServer := gt.rpcContext.GetLocalInternalServerForAddr(client.remoteAddr); localServer != nil {
					// Clone the request. At the time of writing, Replica may mutate it
					// during command execution which can lead to data races.
					// TODO(tamird): we should clone all of client.args.Header, but the
					// assertions in protoutil.Clone fire and there seems to be no
					// reasonable workaround.
					origTxn := client.args.Txn
					if origTxn != nil {
						clonedTxn := origTxn.Clone()
						client.args.Txn = &clonedTxn
					log.VEvent(ctx, 2, "sending request to local server")
					return localServer.Batch(ctx, &client.args)

			log.VEventf(ctx, 2, "sending request to %s", client.remoteAddr)
			reply, err := client.client.Batch(ctx, &client.args)
			if reply != nil {
				for i := range reply.Responses {
					if err := reply.Responses[i].GetInner().Verify(client.args.Requests[i].GetInner()); err != nil {
						log.Error(ctx, err)
			return reply, err
		gt.setPending(client.args.Replica, false)
		done <- BatchCall{Reply: reply, Err: err}
// MaybeAdd adds the specified replica if bq.shouldQueue specifies it
// should be queued. Replicas are added to the queue using the priority
// returned by bq.shouldQueue. If the queue is too full, the replica may
// not be added, as the replica with the lowest priority will be
// dropped.
func (bq *baseQueue) MaybeAdd(repl *Replica, now hlc.Timestamp) {
	// Load the system config.
	cfg, cfgOk := bq.gossip.GetSystemConfig()
	requiresSplit := cfgOk && bq.requiresSplit(cfg, repl)

	defer bq.mu.Unlock()

	if bq.mu.stopped {

	if !repl.IsInitialized() {

	ctx := repl.AnnotateCtx(bq.AnnotateCtx(context.TODO()))

	if !cfgOk {
		log.VEvent(ctx, 1, "no system config available. skipping")

	if requiresSplit {
		// Range needs to be split due to zone configs, but queue does
		// not accept unsplit ranges.
		log.VEventf(ctx, 1, "split needed; not adding")

	if bq.needsLease {
		// Check to see if either we own the lease or do not know who the lease
		// holder is.
		if lease, _ := repl.getLease(); repl.IsLeaseValid(lease, now) &&
			!lease.OwnedBy(repl.store.StoreID()) {
			log.VEventf(ctx, 1, "needs lease; not adding: %+v", lease)

	should, priority := bq.impl.shouldQueue(ctx, now, repl, cfg)
	if _, err := bq.addInternal(ctx, repl.Desc(), should, priority); !isExpectedQueueError(err) {
		log.Errorf(ctx, "unable to add: %s", err)
// PlanAndRun generates a physical plan from a planNode tree and executes it. It
// assumes that the tree is supported (see CheckSupport).
// Note that errors that happen while actually running the flow are reported to
// recv, not returned by this function.
func (dsp *distSQLPlanner) PlanAndRun(
	ctx context.Context, txn *client.Txn, tree planNode, recv *distSQLReceiver,
) error {
	// Trigger limit propagation.
	tree.SetLimitHint(math.MaxInt64, true)

	planCtx := planningCtx{
		ctx:           ctx,
		spanIter:      dsp.spanResolver.NewSpanResolverIterator(),
		nodeAddresses: make(map[roachpb.NodeID]string),
	thisNodeID := dsp.nodeDesc.NodeID
	planCtx.nodeAddresses[thisNodeID] = dsp.nodeDesc.Address.String()

	log.VEvent(ctx, 1, "creating DistSQL plan")

	plan, err := dsp.createPlanForNode(&planCtx, tree)
	if err != nil {
		return err

	// If we don't already have a single result router on this node, add a final
	// stage.
	if len(plan.resultRouters) != 1 ||
		plan.processors[plan.resultRouters[0]].node != thisNodeID {
			&plan, thisNodeID, distsqlrun.ProcessorCoreUnion{Noop: &distsqlrun.NoopCoreSpec{}},
		if len(plan.resultRouters) != 1 {
			panic(fmt.Sprintf("%d results after single group stage", len(plan.resultRouters)))

	// Set up the endpoints for p.streams.
	dsp.populateEndpoints(&planCtx, &plan)

	// Set up the endpoint for the final result.
	finalOut := &plan.processors[plan.resultRouters[0]].spec.Output[0]
	finalOut.Streams = append(finalOut.Streams, distsqlrun.StreamEndpointSpec{
		Type: distsqlrun.StreamEndpointSpec_SYNC_RESPONSE,

	recv.resultToStreamColMap = plan.planToStreamColMap

	// Split the processors by nodeID to create the FlowSpecs.
	flowID := distsqlrun.FlowID{UUID: uuid.MakeV4()}
	nodeIDMap := make(map[roachpb.NodeID]int)
	// nodeAddresses contains addresses for the nodes that were referenced during
	// planning, so we're likely going to have this many nodes (and we have one
	// flow per node).
	nodeIDs := make([]roachpb.NodeID, 0, len(planCtx.nodeAddresses))
	flows := make([]distsqlrun.FlowSpec, 0, len(planCtx.nodeAddresses))

	for _, p := range plan.processors {
		idx, ok := nodeIDMap[p.node]
		if !ok {
			flow := distsqlrun.FlowSpec{FlowID: flowID}
			idx = len(flows)
			flows = append(flows, flow)
			nodeIDs = append(nodeIDs, p.node)
			nodeIDMap[p.node] = idx
		flows[idx].Processors = append(flows[idx].Processors, p.spec)

	if logPlanDiagram {
		log.VEvent(ctx, 1, "creating plan diagram")
		nodeNames := make([]string, len(nodeIDs))
		for i, n := range nodeIDs {
			nodeNames[i] = n.String()

		var buf bytes.Buffer
		if err := distsqlrun.GeneratePlanDiagram(flows, nodeNames, &buf); err != nil {
			log.Infof(ctx, "Error generating diagram: %s", err)
		} else {
			log.Infof(ctx, "Plan diagram JSON:\n%s", buf.String())

	log.VEvent(ctx, 1, "running DistSQL plan")

	// Start the flows on all other nodes.
	for i, nodeID := range nodeIDs {
		if nodeID == thisNodeID {
			// Skip this node.
		req := distsqlrun.SetupFlowRequest{
			Txn:  txn.Proto,
			Flow: flows[i],
		if err := distsqlrun.SetFlowRequestTrace(ctx, &req); err != nil {
			return err
		conn, err := dsp.rpcContext.GRPCDial(planCtx.nodeAddresses[nodeID])
		if err != nil {
			return err
		client := distsqlrun.NewDistSQLClient(conn)
		// TODO(radu): we are not waiting for the flows to complete, but we are
		// still waiting for a round trip; we should start the flows in parallel, at
		// least if there are enough of them.
		if resp, err := client.SetupFlow(context.Background(), &req); err != nil {
			return err
		} else if resp.Error != nil {
			return resp.Error.GoError()
	localReq := distsqlrun.SetupFlowRequest{
		Txn:  txn.Proto,
		Flow: flows[nodeIDMap[thisNodeID]],
	if err := distsqlrun.SetFlowRequestTrace(ctx, &localReq); err != nil {
		return err
	flow, err := dsp.distSQLSrv.SetupSyncFlow(ctx, &localReq, recv)
	if err != nil {
		return err
	// TODO(radu): this should go through the flow scheduler.
	flow.Start(func() {})

	return nil