// SendNext invokes the specified RPC on the supplied client when the // client is ready. On success, the reply is sent on the channel; // otherwise an error is sent. func (gt *grpcTransport) SendNext(ctx context.Context, done chan<- BatchCall) { client := gt.orderedClients[gt.clientIndex] gt.clientIndex++ gt.setPending(client.args.Replica, true) // Fork the original context as this async send may outlast the // caller's context. ctx, sp := tracing.ForkCtxSpan(ctx, "grpcTransport SendNext") go func() { defer tracing.FinishSpan(sp) gt.opts.metrics.SentCount.Inc(1) reply, err := func() (*roachpb.BatchResponse, error) { if enableLocalCalls { if localServer := gt.rpcContext.GetLocalInternalServerForAddr(client.remoteAddr); localServer != nil { // Clone the request. At the time of writing, Replica may mutate it // during command execution which can lead to data races. // // TODO(tamird): we should clone all of client.args.Header, but the // assertions in protoutil.Clone fire and there seems to be no // reasonable workaround. origTxn := client.args.Txn if origTxn != nil { clonedTxn := origTxn.Clone() client.args.Txn = &clonedTxn } gt.opts.metrics.LocalSentCount.Inc(1) log.VEvent(ctx, 2, "sending request to local server") return localServer.Batch(ctx, &client.args) } } log.VEventf(ctx, 2, "sending request to %s", client.remoteAddr) reply, err := client.client.Batch(ctx, &client.args) if reply != nil { for i := range reply.Responses { if err := reply.Responses[i].GetInner().Verify(client.args.Requests[i].GetInner()); err != nil { log.Error(ctx, err) } } } return reply, err }() gt.setPending(client.args.Replica, false) done <- BatchCall{Reply: reply, Err: err} }() }
// MaybeAdd adds the specified replica if bq.shouldQueue specifies it // should be queued. Replicas are added to the queue using the priority // returned by bq.shouldQueue. If the queue is too full, the replica may // not be added, as the replica with the lowest priority will be // dropped. func (bq *baseQueue) MaybeAdd(repl *Replica, now hlc.Timestamp) { // Load the system config. cfg, cfgOk := bq.gossip.GetSystemConfig() requiresSplit := cfgOk && bq.requiresSplit(cfg, repl) bq.mu.Lock() defer bq.mu.Unlock() if bq.mu.stopped { return } if !repl.IsInitialized() { return } ctx := repl.AnnotateCtx(bq.AnnotateCtx(context.TODO())) if !cfgOk { log.VEvent(ctx, 1, "no system config available. skipping") return } if requiresSplit { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. log.VEventf(ctx, 1, "split needed; not adding") return } if bq.needsLease { // Check to see if either we own the lease or do not know who the lease // holder is. if lease, _ := repl.getLease(); repl.IsLeaseValid(lease, now) && !lease.OwnedBy(repl.store.StoreID()) { log.VEventf(ctx, 1, "needs lease; not adding: %+v", lease) return } } should, priority := bq.impl.shouldQueue(ctx, now, repl, cfg) if _, err := bq.addInternal(ctx, repl.Desc(), should, priority); !isExpectedQueueError(err) { log.Errorf(ctx, "unable to add: %s", err) } }
// PlanAndRun generates a physical plan from a planNode tree and executes it. It // assumes that the tree is supported (see CheckSupport). // // Note that errors that happen while actually running the flow are reported to // recv, not returned by this function. func (dsp *distSQLPlanner) PlanAndRun( ctx context.Context, txn *client.Txn, tree planNode, recv *distSQLReceiver, ) error { // Trigger limit propagation. tree.SetLimitHint(math.MaxInt64, true) planCtx := planningCtx{ ctx: ctx, spanIter: dsp.spanResolver.NewSpanResolverIterator(), nodeAddresses: make(map[roachpb.NodeID]string), } thisNodeID := dsp.nodeDesc.NodeID planCtx.nodeAddresses[thisNodeID] = dsp.nodeDesc.Address.String() log.VEvent(ctx, 1, "creating DistSQL plan") plan, err := dsp.createPlanForNode(&planCtx, tree) if err != nil { return err } // If we don't already have a single result router on this node, add a final // stage. if len(plan.resultRouters) != 1 || plan.processors[plan.resultRouters[0]].node != thisNodeID { dsp.addSingleGroupStage( &plan, thisNodeID, distsqlrun.ProcessorCoreUnion{Noop: &distsqlrun.NoopCoreSpec{}}, ) if len(plan.resultRouters) != 1 { panic(fmt.Sprintf("%d results after single group stage", len(plan.resultRouters))) } } // Set up the endpoints for p.streams. dsp.populateEndpoints(&planCtx, &plan) // Set up the endpoint for the final result. finalOut := &plan.processors[plan.resultRouters[0]].spec.Output[0] finalOut.Streams = append(finalOut.Streams, distsqlrun.StreamEndpointSpec{ Type: distsqlrun.StreamEndpointSpec_SYNC_RESPONSE, }) recv.resultToStreamColMap = plan.planToStreamColMap // Split the processors by nodeID to create the FlowSpecs. flowID := distsqlrun.FlowID{UUID: uuid.MakeV4()} nodeIDMap := make(map[roachpb.NodeID]int) // nodeAddresses contains addresses for the nodes that were referenced during // planning, so we're likely going to have this many nodes (and we have one // flow per node). nodeIDs := make([]roachpb.NodeID, 0, len(planCtx.nodeAddresses)) flows := make([]distsqlrun.FlowSpec, 0, len(planCtx.nodeAddresses)) for _, p := range plan.processors { idx, ok := nodeIDMap[p.node] if !ok { flow := distsqlrun.FlowSpec{FlowID: flowID} idx = len(flows) flows = append(flows, flow) nodeIDs = append(nodeIDs, p.node) nodeIDMap[p.node] = idx } flows[idx].Processors = append(flows[idx].Processors, p.spec) } if logPlanDiagram { log.VEvent(ctx, 1, "creating plan diagram") nodeNames := make([]string, len(nodeIDs)) for i, n := range nodeIDs { nodeNames[i] = n.String() } var buf bytes.Buffer if err := distsqlrun.GeneratePlanDiagram(flows, nodeNames, &buf); err != nil { log.Infof(ctx, "Error generating diagram: %s", err) } else { log.Infof(ctx, "Plan diagram JSON:\n%s", buf.String()) } } log.VEvent(ctx, 1, "running DistSQL plan") // Start the flows on all other nodes. for i, nodeID := range nodeIDs { if nodeID == thisNodeID { // Skip this node. continue } req := distsqlrun.SetupFlowRequest{ Txn: txn.Proto, Flow: flows[i], } if err := distsqlrun.SetFlowRequestTrace(ctx, &req); err != nil { return err } conn, err := dsp.rpcContext.GRPCDial(planCtx.nodeAddresses[nodeID]) if err != nil { return err } client := distsqlrun.NewDistSQLClient(conn) // TODO(radu): we are not waiting for the flows to complete, but we are // still waiting for a round trip; we should start the flows in parallel, at // least if there are enough of them. if resp, err := client.SetupFlow(context.Background(), &req); err != nil { return err } else if resp.Error != nil { return resp.Error.GoError() } } localReq := distsqlrun.SetupFlowRequest{ Txn: txn.Proto, Flow: flows[nodeIDMap[thisNodeID]], } if err := distsqlrun.SetFlowRequestTrace(ctx, &localReq); err != nil { return err } flow, err := dsp.distSQLSrv.SetupSyncFlow(ctx, &localReq, recv) if err != nil { return err } // TODO(radu): this should go through the flow scheduler. flow.Start(func() {}) flow.Wait() flow.Cleanup() return nil }