Exemple #1
0
// Run is part of the processor interface.
func (h *hashJoiner) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(h.ctx, "hash joiner")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting hash joiner run")
		defer log.Infof(ctx, "exiting hash joiner run")
	}

	if err := h.buildPhase(ctx); err != nil {
		h.output.Close(err)
		return
	}
	if h.joinType == rightOuter || h.joinType == fullOuter {
		for k, bucket := range h.buckets {
			bucket.seen = make([]bool, len(bucket.rows))
			h.buckets[k] = bucket
		}
	}
	err := h.probePhase(ctx)
	h.output.Close(err)
}
Exemple #2
0
// Run is part of the processor interface.
func (m *mergeJoiner) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(m.ctx, "merge joiner")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting merge joiner run")
		defer log.Infof(ctx, "exiting merge joiner run")
	}

	for {
		batch, err := m.streamMerger.NextBatch()
		if err != nil || len(batch) == 0 {
			m.output.Close(err)
			return
		}
		for _, rowPair := range batch {
			row, _, err := m.render(rowPair[0], rowPair[1])
			if err != nil {
				m.output.Close(err)
				return
			}
			if row != nil && !m.output.PushRow(row) {
				if log.V(2) {
					log.Infof(ctx, "no more rows required")
				}
				m.output.Close(nil)
				return
			}
		}
	}
}
Exemple #3
0
// Run is part of the processor interface.
func (d *distinct) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(d.ctx, "distinct")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting distinct process")
		defer log.Infof(ctx, "exiting distinct")
	}

	var scratch []byte
	for {
		row, err := d.input.NextRow()
		if err != nil || row == nil {
			d.output.Close(err)
			return
		}

		// If we are processing DISTINCT(x, y) and the input stream is ordered
		// by x, we define x to be our group key. Our seen set at any given time
		// is only the set of all rows with the same group key. The encoding of
		// the row is the key we use in our 'seen' set.
		encoding, err := d.encode(scratch, row)
		if err != nil {
			d.output.Close(err)
			return
		}

		// The 'seen' set is reset whenever we find consecutive rows differing on the
		// group key thus avoiding the need to store encodings of all rows.
		matched, err := d.matchLastGroupKey(row)
		if err != nil {
			d.output.Close(err)
			return
		}

		if !matched {
			d.lastGroupKey = row
			d.seen = make(map[string]struct{})
		}

		key := string(encoding)
		if _, ok := d.seen[key]; !ok {
			d.seen[key] = struct{}{}
			if !d.output.PushRow(row) {
				if log.V(2) {
					log.Infof(ctx, "no more rows required")
				}
				d.output.Close(nil)
				return
			}
		}
		scratch = encoding[:0]
	}
}
Exemple #4
0
// Run is part of the processor interface.
func (ev *evaluator) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(ev.ctx, "evaluator")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting evaluator process")
		defer log.Infof(ctx, "exiting evaluator")
	}

	first := true
	for {
		row, err := ev.input.NextRow()
		if err != nil || row == nil {
			ev.output.Close(err)
			return
		}

		if first {
			first = false

			types := make([]sqlbase.ColumnType_Kind, len(row))
			for i := range types {
				types[i] = row[i].Type
			}
			for i, expr := range ev.specExprs {
				err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx)
				if err != nil {
					ev.output.Close(err)
					return
				}
				ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType())
			}
		}

		outRow, err := ev.eval(row)
		if err != nil {
			ev.output.Close(err)
			return
		}

		if log.V(3) {
			log.Infof(ctx, "pushing %s\n", outRow)
		}
		// Push the row to the output RowReceiver; stop if they don't need more
		// rows.
		if !ev.output.PushRow(outRow) {
			if log.V(2) {
				log.Infof(ctx, "no more rows required")
			}
			ev.output.Close(nil)
			return
		}
	}
}
Exemple #5
0
// Run is part of the processor interface.
func (tr *tableReader) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(tr.ctx, "table reader")
	defer tracing.FinishSpan(span)

	txn := tr.flowCtx.setupTxn(ctx)

	log.VEventf(ctx, 1, "starting (filter: %s)", &tr.filter)
	if log.V(1) {
		defer log.Infof(ctx, "exiting")
	}

	if err := tr.fetcher.StartScan(
		txn, tr.spans, true /* limit batches */, tr.getLimitHint(),
	); err != nil {
		log.Errorf(ctx, "scan error: %s", err)
		tr.output.Close(err)
		return
	}
	var rowIdx int64
	for {
		outRow, err := tr.nextRow()
		if err != nil || outRow == nil {
			tr.output.Close(err)
			return
		}
		if log.V(3) {
			log.Infof(ctx, "pushing row %s", outRow)
		}
		// Push the row to the output RowReceiver; stop if they don't need more
		// rows.
		if !tr.output.PushRow(outRow) {
			log.VEventf(ctx, 1, "no more rows required")
			tr.output.Close(nil)
			return
		}
		rowIdx++
		if tr.hardLimit != 0 && rowIdx == tr.hardLimit {
			// We sent tr.hardLimit rows.
			tr.output.Close(nil)
			return
		}
	}
}
// Run is part of the processor interface.
func (h *hashJoiner) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(h.ctx, "hash joiner")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting hash joiner run")
		defer log.Infof(ctx, "exiting hash joiner run")
	}

	if err := h.buildPhase(ctx); err != nil {
		h.output.Close(err)
		return
	}
	err := h.probePhase(ctx)
	h.output.Close(err)
}
Exemple #7
0
// Run is part of the processor interface.
func (ev *evaluator) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(ev.ctx, "evaluator")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting evaluator process")
		defer log.Infof(ctx, "exiting evaluator")
	}

	for {
		row, err := ev.input.NextRow()
		if err != nil || row == nil {
			ev.output.Close(err)
			return
		}

		outRow, err := ev.eval(row)
		if err != nil {
			ev.output.Close(err)
			return
		}

		if log.V(3) {
			log.Infof(ctx, "pushing %s\n", outRow)
		}
		// Push the row to the output RowReceiver; stop if they don't need more
		// rows.
		if !ev.output.PushRow(outRow) {
			if log.V(2) {
				log.Infof(ctx, "no more rows required")
			}
			ev.output.Close(nil)
			return
		}
	}
}
Exemple #8
0
// mainLoop runs the mainLoop and returns any error.
// It does not close the output.
func (jr *joinReader) mainLoop() error {
	primaryKeyPrefix := sqlbase.MakeIndexKeyPrefix(&jr.desc, jr.index.ID)

	var alloc sqlbase.DatumAlloc
	spans := make(roachpb.Spans, 0, joinReaderBatchSize)

	ctx, span := tracing.ChildSpan(jr.ctx, "join reader")
	defer tracing.FinishSpan(span)

	txn := jr.flowCtx.setupTxn(ctx)

	log.VEventf(ctx, 1, "starting (filter: %s)", &jr.filter)
	if log.V(1) {
		defer log.Infof(ctx, "exiting")
	}

	for {
		// TODO(radu): figure out how to send smaller batches if the source has
		// a soft limit (perhaps send the batch out if we don't get a result
		// within a certain amount of time).
		for spans = spans[:0]; len(spans) < joinReaderBatchSize; {
			row, err := jr.input.NextRow()
			if err != nil {
				return err
			}
			if row == nil {
				if len(spans) == 0 {
					return nil
				}
				break
			}
			key, err := jr.generateKey(row, &alloc, primaryKeyPrefix)
			if err != nil {
				return err
			}

			spans = append(spans, roachpb.Span{
				Key:    key,
				EndKey: key.PrefixEnd(),
			})
		}

		err := jr.fetcher.StartScan(txn, spans, false /* no batch limits */, 0)
		if err != nil {
			log.Errorf(ctx, "scan error: %s", err)
			return err
		}

		// TODO(radu): we are consuming all results from a fetch before starting
		// the next batch. We could start the next batch early while we are
		// outputting rows.
		for {
			outRow, err := jr.nextRow()
			if err != nil {
				return err
			}
			if outRow == nil {
				// Done.
				break
			}
			if log.V(3) {
				log.Infof(ctx, "pushing row %s", outRow)
			}
			// Push the row to the output RowReceiver; stop if they don't need more
			// rows.
			if !jr.output.PushRow(outRow) {
				log.VEventf(ctx, 1, "no more rows required")
				return nil
			}
		}

		if len(spans) != joinReaderBatchSize {
			// This was the last batch.
			return nil
		}
	}
}
Exemple #9
0
// Run is part of the processor interface.
func (s *sorter) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(s.ctx, "sorter")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting sorter run")
		defer log.Infof(ctx, "exiting sorter run")
	}

	switch {
	case s.matchLen == 0 && s.limit == 0:
		// No specified ordering match length and unspecified limit, no optimizations possible so we
		// simply load all rows into memory and sort all values in-place. It has a worst-case time
		// complexity of O(n*log(n)) and a worst-case space complexity of O(n).
		ss := newSortAllStrategy(
			&sorterValues{
				ordering: s.ordering,
			})
		err := ss.Execute(s)
		if err != nil {
			log.Errorf(ctx, "error sorting rows in memory: %s", err)
		}

		s.output.Close(err)
	case s.matchLen == 0:
		// No specified ordering match length but specified limit, we can optimize our sort procedure by
		// maintaining a max-heap populated with only the smallest k rows seen. It has a worst-case time
		// complexity of O(n*log(k)) and a worst-case space complexity of O(k).
		ss := newSortTopKStrategy(
			&sorterValues{
				ordering: s.ordering,
			}, s.limit)
		err := ss.Execute(s)
		if err != nil {
			log.Errorf(ctx, "error sorting rows: %s", err)
		}

		s.output.Close(err)
	case s.matchLen != 0:
		// Ordering match length is specified, but no specified limit. We will be able to use
		// existing ordering in order to avoid loading all the rows into memory. If we're scanning
		// an index with a prefix matching an ordering prefix, we can only accumulate values for
		// equal fields in this prefix, sort the accumulated chunk and then output.
		ss := newSortChunksStrategy(
			&sorterValues{
				ordering: s.ordering,
			})
		err := ss.Execute(s)
		if err != nil {
			log.Errorf(ctx, "error sorting rows: %s", err)
		}

		s.output.Close(err)
	default:
		// TODO(irfansharif): Add optimization for case where both ordering match length and limit is
		// specified.
		panic("optimization no implemented yet")
	}
}