// Run is part of the processor interface. func (h *hashJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(h.ctx, "hash joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting hash joiner run") defer log.Infof(ctx, "exiting hash joiner run") } if err := h.buildPhase(ctx); err != nil { h.output.Close(err) return } if h.joinType == rightOuter || h.joinType == fullOuter { for k, bucket := range h.buckets { bucket.seen = make([]bool, len(bucket.rows)) h.buckets[k] = bucket } } err := h.probePhase(ctx) h.output.Close(err) }
// Run is part of the processor interface. func (m *mergeJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(m.ctx, "merge joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting merge joiner run") defer log.Infof(ctx, "exiting merge joiner run") } for { batch, err := m.streamMerger.NextBatch() if err != nil || len(batch) == 0 { m.output.Close(err) return } for _, rowPair := range batch { row, _, err := m.render(rowPair[0], rowPair[1]) if err != nil { m.output.Close(err) return } if row != nil && !m.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } m.output.Close(nil) return } } } }
// Run is part of the processor interface. func (d *distinct) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(d.ctx, "distinct") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting distinct process") defer log.Infof(ctx, "exiting distinct") } var scratch []byte for { row, err := d.input.NextRow() if err != nil || row == nil { d.output.Close(err) return } // If we are processing DISTINCT(x, y) and the input stream is ordered // by x, we define x to be our group key. Our seen set at any given time // is only the set of all rows with the same group key. The encoding of // the row is the key we use in our 'seen' set. encoding, err := d.encode(scratch, row) if err != nil { d.output.Close(err) return } // The 'seen' set is reset whenever we find consecutive rows differing on the // group key thus avoiding the need to store encodings of all rows. matched, err := d.matchLastGroupKey(row) if err != nil { d.output.Close(err) return } if !matched { d.lastGroupKey = row d.seen = make(map[string]struct{}) } key := string(encoding) if _, ok := d.seen[key]; !ok { d.seen[key] = struct{}{} if !d.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } d.output.Close(nil) return } } scratch = encoding[:0] } }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } first := true for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } if first { first = false types := make([]sqlbase.ColumnType_Kind, len(row)) for i := range types { types[i] = row[i].Type } for i, expr := range ev.specExprs { err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx) if err != nil { ev.output.Close(err) return } ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType()) } } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
// Run is part of the processor interface. func (tr *tableReader) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(tr.ctx, "table reader") defer tracing.FinishSpan(span) txn := tr.flowCtx.setupTxn(ctx) log.VEventf(ctx, 1, "starting (filter: %s)", &tr.filter) if log.V(1) { defer log.Infof(ctx, "exiting") } if err := tr.fetcher.StartScan( txn, tr.spans, true /* limit batches */, tr.getLimitHint(), ); err != nil { log.Errorf(ctx, "scan error: %s", err) tr.output.Close(err) return } var rowIdx int64 for { outRow, err := tr.nextRow() if err != nil || outRow == nil { tr.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing row %s", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !tr.output.PushRow(outRow) { log.VEventf(ctx, 1, "no more rows required") tr.output.Close(nil) return } rowIdx++ if tr.hardLimit != 0 && rowIdx == tr.hardLimit { // We sent tr.hardLimit rows. tr.output.Close(nil) return } } }
// Run is part of the processor interface. func (h *hashJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(h.ctx, "hash joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting hash joiner run") defer log.Infof(ctx, "exiting hash joiner run") } if err := h.buildPhase(ctx); err != nil { h.output.Close(err) return } err := h.probePhase(ctx) h.output.Close(err) }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
// mainLoop runs the mainLoop and returns any error. // It does not close the output. func (jr *joinReader) mainLoop() error { primaryKeyPrefix := sqlbase.MakeIndexKeyPrefix(&jr.desc, jr.index.ID) var alloc sqlbase.DatumAlloc spans := make(roachpb.Spans, 0, joinReaderBatchSize) ctx, span := tracing.ChildSpan(jr.ctx, "join reader") defer tracing.FinishSpan(span) txn := jr.flowCtx.setupTxn(ctx) log.VEventf(ctx, 1, "starting (filter: %s)", &jr.filter) if log.V(1) { defer log.Infof(ctx, "exiting") } for { // TODO(radu): figure out how to send smaller batches if the source has // a soft limit (perhaps send the batch out if we don't get a result // within a certain amount of time). for spans = spans[:0]; len(spans) < joinReaderBatchSize; { row, err := jr.input.NextRow() if err != nil { return err } if row == nil { if len(spans) == 0 { return nil } break } key, err := jr.generateKey(row, &alloc, primaryKeyPrefix) if err != nil { return err } spans = append(spans, roachpb.Span{ Key: key, EndKey: key.PrefixEnd(), }) } err := jr.fetcher.StartScan(txn, spans, false /* no batch limits */, 0) if err != nil { log.Errorf(ctx, "scan error: %s", err) return err } // TODO(radu): we are consuming all results from a fetch before starting // the next batch. We could start the next batch early while we are // outputting rows. for { outRow, err := jr.nextRow() if err != nil { return err } if outRow == nil { // Done. break } if log.V(3) { log.Infof(ctx, "pushing row %s", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !jr.output.PushRow(outRow) { log.VEventf(ctx, 1, "no more rows required") return nil } } if len(spans) != joinReaderBatchSize { // This was the last batch. return nil } } }
// Run is part of the processor interface. func (s *sorter) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(s.ctx, "sorter") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting sorter run") defer log.Infof(ctx, "exiting sorter run") } switch { case s.matchLen == 0 && s.limit == 0: // No specified ordering match length and unspecified limit, no optimizations possible so we // simply load all rows into memory and sort all values in-place. It has a worst-case time // complexity of O(n*log(n)) and a worst-case space complexity of O(n). ss := newSortAllStrategy( &sorterValues{ ordering: s.ordering, }) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows in memory: %s", err) } s.output.Close(err) case s.matchLen == 0: // No specified ordering match length but specified limit, we can optimize our sort procedure by // maintaining a max-heap populated with only the smallest k rows seen. It has a worst-case time // complexity of O(n*log(k)) and a worst-case space complexity of O(k). ss := newSortTopKStrategy( &sorterValues{ ordering: s.ordering, }, s.limit) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows: %s", err) } s.output.Close(err) case s.matchLen != 0: // Ordering match length is specified, but no specified limit. We will be able to use // existing ordering in order to avoid loading all the rows into memory. If we're scanning // an index with a prefix matching an ordering prefix, we can only accumulate values for // equal fields in this prefix, sort the accumulated chunk and then output. ss := newSortChunksStrategy( &sorterValues{ ordering: s.ordering, }) err := ss.Execute(s) if err != nil { log.Errorf(ctx, "error sorting rows: %s", err) } s.output.Close(err) default: // TODO(irfansharif): Add optimization for case where both ordering match length and limit is // specified. panic("optimization no implemented yet") } }