// Run is part of the processor interface. func (m *mergeJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(m.ctx, "merge joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting merge joiner run") defer log.Infof(ctx, "exiting merge joiner run") } for { batch, err := m.streamMerger.NextBatch() if err != nil || len(batch) == 0 { m.output.Close(err) return } for _, rowPair := range batch { row, _, err := m.render(rowPair[0], rowPair[1]) if err != nil { m.output.Close(err) return } if row != nil && !m.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } m.output.Close(nil) return } } } }
// Run is part of the processor interface. func (h *hashJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(h.ctx, "hash joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting hash joiner run") defer log.Infof(ctx, "exiting hash joiner run") } if err := h.buildPhase(ctx); err != nil { h.output.Close(err) return } if h.joinType == rightOuter || h.joinType == fullOuter { for k, bucket := range h.buckets { bucket.seen = make([]bool, len(bucket.rows)) h.buckets[k] = bucket } } err := h.probePhase(ctx) h.output.Close(err) }
// Run is part of the processor interface. func (d *distinct) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(d.ctx, "distinct") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting distinct process") defer log.Infof(ctx, "exiting distinct") } var scratch []byte for { row, err := d.input.NextRow() if err != nil || row == nil { d.output.Close(err) return } // If we are processing DISTINCT(x, y) and the input stream is ordered // by x, we define x to be our group key. Our seen set at any given time // is only the set of all rows with the same group key. The encoding of // the row is the key we use in our 'seen' set. encoding, err := d.encode(scratch, row) if err != nil { d.output.Close(err) return } // The 'seen' set is reset whenever we find consecutive rows differing on the // group key thus avoiding the need to store encodings of all rows. matched, err := d.matchLastGroupKey(row) if err != nil { d.output.Close(err) return } if !matched { d.lastGroupKey = row d.seen = make(map[string]struct{}) } key := string(encoding) if _, ok := d.seen[key]; !ok { d.seen[key] = struct{}{} if !d.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } d.output.Close(nil) return } } scratch = encoding[:0] } }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } first := true for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } if first { first = false types := make([]sqlbase.ColumnType_Kind, len(row)) for i := range types { types[i] = row[i].Type } for i, expr := range ev.specExprs { err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx) if err != nil { ev.output.Close(err) return } ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType()) } } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
// RunLimitedAsyncTask runs function f in a goroutine, using the given // channel as a semaphore to limit the number of tasks that are run // concurrently to the channel's capacity. If wait is true, blocks // until the semaphore is available in order to push back on callers // that may be trying to create many tasks. If wait is false, returns // immediately with an error if the semaphore is not // available. Returns an error if the Stopper is quiescing, in which // case the function is not executed. func (s *Stopper) RunLimitedAsyncTask( ctx context.Context, sem chan struct{}, wait bool, f func(context.Context), ) error { file, line, _ := caller.Lookup(1) key := taskKey{file, line} // Wait for permission to run from the semaphore. select { case sem <- struct{}{}: case <-ctx.Done(): return ctx.Err() case <-s.ShouldQuiesce(): return errUnavailable default: if !wait { return ErrThrottled } log.Infof(context.TODO(), "stopper throttling task from %s:%d due to semaphore", file, line) // Retry the select without the default. select { case sem <- struct{}{}: case <-ctx.Done(): return ctx.Err() case <-s.ShouldQuiesce(): return errUnavailable } } // Check for canceled context: it's possible to get the semaphore even // if the context is canceled. select { case <-ctx.Done(): <-sem return ctx.Err() default: } if !s.runPrelude(key) { <-sem return errUnavailable } ctx, span := tracing.ForkCtxSpan(ctx, fmt.Sprintf("%s:%d", file, line)) go func() { defer s.Recover() defer s.runPostlude(key) defer func() { <-sem }() defer tracing.FinishSpan(span) f(ctx) }() return nil }
// Run is part of the processor interface. func (tr *tableReader) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(tr.ctx, "table reader") defer tracing.FinishSpan(span) txn := tr.flowCtx.setupTxn(ctx) log.VEventf(ctx, 1, "starting (filter: %s)", &tr.filter) if log.V(1) { defer log.Infof(ctx, "exiting") } if err := tr.fetcher.StartScan( txn, tr.spans, true /* limit batches */, tr.getLimitHint(), ); err != nil { log.Errorf(ctx, "scan error: %s", err) tr.output.Close(err) return } var rowIdx int64 for { outRow, err := tr.nextRow() if err != nil || outRow == nil { tr.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing row %s", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !tr.output.PushRow(outRow) { log.VEventf(ctx, 1, "no more rows required") tr.output.Close(nil) return } rowIdx++ if tr.hardLimit != 0 && rowIdx == tr.hardLimit { // We sent tr.hardLimit rows. tr.output.Close(nil) return } } }
// SendNext invokes the specified RPC on the supplied client when the // client is ready. On success, the reply is sent on the channel; // otherwise an error is sent. func (gt *grpcTransport) SendNext(ctx context.Context, done chan<- BatchCall) { client := gt.orderedClients[gt.clientIndex] gt.clientIndex++ gt.setPending(client.args.Replica, true) // Fork the original context as this async send may outlast the // caller's context. ctx, sp := tracing.ForkCtxSpan(ctx, "grpcTransport SendNext") go func() { defer tracing.FinishSpan(sp) gt.opts.metrics.SentCount.Inc(1) reply, err := func() (*roachpb.BatchResponse, error) { if enableLocalCalls { if localServer := gt.rpcContext.GetLocalInternalServerForAddr(client.remoteAddr); localServer != nil { // Clone the request. At the time of writing, Replica may mutate it // during command execution which can lead to data races. // // TODO(tamird): we should clone all of client.args.Header, but the // assertions in protoutil.Clone fire and there seems to be no // reasonable workaround. origTxn := client.args.Txn if origTxn != nil { clonedTxn := origTxn.Clone() client.args.Txn = &clonedTxn } gt.opts.metrics.LocalSentCount.Inc(1) log.VEvent(ctx, 2, "sending request to local server") return localServer.Batch(ctx, &client.args) } } log.VEventf(ctx, 2, "sending request to %s", client.remoteAddr) reply, err := client.client.Batch(ctx, &client.args) if reply != nil { for i := range reply.Responses { if err := reply.Responses[i].GetInner().Verify(client.args.Requests[i].GetInner()); err != nil { log.Error(ctx, err) } } } return reply, err }() gt.setPending(client.args.Replica, false) done <- BatchCall{Reply: reply, Err: err} }() }
// RunAsyncTask runs function f in a goroutine. It returns an error when the // Stopper is quiescing, in which case the function is not executed. func (s *Stopper) RunAsyncTask(ctx context.Context, f func(context.Context)) error { file, line, _ := caller.Lookup(1) key := taskKey{file, line} if !s.runPrelude(key) { return errUnavailable } ctx, span := tracing.ForkCtxSpan(ctx, fmt.Sprintf("%s:%d", file, line)) // Call f. go func() { defer s.Recover() defer s.runPostlude(key) defer tracing.FinishSpan(span) f(ctx) }() return nil }
// Run is part of the processor interface. func (h *hashJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(h.ctx, "hash joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting hash joiner run") defer log.Infof(ctx, "exiting hash joiner run") } if err := h.buildPhase(ctx); err != nil { h.output.Close(err) return } err := h.probePhase(ctx) h.output.Close(err) }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
// mainLoop runs the mainLoop and returns any error. // It does not close the output. func (jr *joinReader) mainLoop() error { primaryKeyPrefix := sqlbase.MakeIndexKeyPrefix(&jr.desc, jr.index.ID) var alloc sqlbase.DatumAlloc spans := make(roachpb.Spans, 0, joinReaderBatchSize) ctx, span := tracing.ChildSpan(jr.ctx, "join reader") defer tracing.FinishSpan(span) txn := jr.flowCtx.setupTxn(ctx) log.VEventf(ctx, 1, "starting (filter: %s)", &jr.filter) if log.V(1) { defer log.Infof(ctx, "exiting") } for { // TODO(radu): figure out how to send smaller batches if the source has // a soft limit (perhaps send the batch out if we don't get a result // within a certain amount of time). for spans = spans[:0]; len(spans) < joinReaderBatchSize; { row, err := jr.input.NextRow() if err != nil { return err } if row == nil { if len(spans) == 0 { return nil } break } key, err := jr.generateKey(row, &alloc, primaryKeyPrefix) if err != nil { return err } spans = append(spans, roachpb.Span{ Key: key, EndKey: key.PrefixEnd(), }) } err := jr.fetcher.StartScan(txn, spans, false /* no batch limits */, 0) if err != nil { log.Errorf(ctx, "scan error: %s", err) return err } // TODO(radu): we are consuming all results from a fetch before starting // the next batch. We could start the next batch early while we are // outputting rows. for { outRow, err := jr.nextRow() if err != nil { return err } if outRow == nil { // Done. break } if log.V(3) { log.Infof(ctx, "pushing row %s", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !jr.output.PushRow(outRow) { log.VEventf(ctx, 1, "no more rows required") return nil } } if len(spans) != joinReaderBatchSize { // This was the last batch. return nil } } }
// Run is part of the processor interface. func (s *sorter) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(s.ctx, "sorter") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting sorter run") defer log.Infof(ctx, "exiting sorter run") } switch { case s.matchLen == 0 && s.limit == 0: // No specified ordering match length and unspecified limit, no optimizations possible so we // simply load all rows into memory and sort all values in-place. It has a worst-case time // complexity of O(n*log(n)) and a worst-case space complexity of O(n). ss := newSortAllStrategy( &sorterValues{ ordering: s.ordering, }) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows in memory: %s", err) } s.output.Close(err) case s.matchLen == 0: // No specified ordering match length but specified limit, we can optimize our sort procedure by // maintaining a max-heap populated with only the smallest k rows seen. It has a worst-case time // complexity of O(n*log(k)) and a worst-case space complexity of O(k). ss := newSortTopKStrategy( &sorterValues{ ordering: s.ordering, }, s.limit) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows: %s", err) } s.output.Close(err) case s.matchLen != 0: // Ordering match length is specified, but no specified limit. We will be able to use // existing ordering in order to avoid loading all the rows into memory. If we're scanning // an index with a prefix matching an ordering prefix, we can only accumulate values for // equal fields in this prefix, sort the accumulated chunk and then output. ss := newSortChunksStrategy( &sorterValues{ ordering: s.ordering, }) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows: %s", err) } s.output.Close(err) default: // TODO(irfansharif): Add optimization for case where both ordering match length and limit is // specified. panic("optimization no implemented yet") } }