Example #1
0
// makeEqualityPredicate constructs a joinPredicate object for joins. The join
// condition includes equality between numMergedEqualityColumns columns,
// specified by leftColNames and rightColNames.
func makeEqualityPredicate(
	left, right *dataSourceInfo,
	leftColNames, rightColNames parser.NameList,
	numMergedEqualityColumns int,
	concatInfos *dataSourceInfo,
) (resPred *joinPredicate, info *dataSourceInfo, err error) {
	if len(leftColNames) != len(rightColNames) {
		panic(fmt.Errorf("left columns' length %q doesn't match right columns' length %q in EqualityPredicate",
			len(leftColNames), len(rightColNames)))
	}
	if len(leftColNames) < numMergedEqualityColumns {
		panic(fmt.Errorf("cannot merge %d columns, only %d columns to compare", numMergedEqualityColumns, len(leftColNames)))
	}

	// Prepare the arrays populated below.
	cmpOps := make([]func(*parser.EvalContext, parser.Datum, parser.Datum) (parser.DBool, error), len(leftColNames))
	leftEqualityIndices := make([]int, len(leftColNames))
	rightEqualityIndices := make([]int, len(rightColNames))

	// usedLeft represents the list of indices that participate in the
	// equality predicate. They are collected in order to determine
	// below which columns remain after the equality; this is used
	// only when merging result columns.
	var usedLeft, usedRight []int
	var columns ResultColumns
	if numMergedEqualityColumns > 0 {
		usedLeft = make([]int, len(left.sourceColumns))
		for i := range usedLeft {
			usedLeft[i] = invalidColIdx
		}
		usedRight = make([]int, len(right.sourceColumns))
		for i := range usedRight {
			usedRight[i] = invalidColIdx
		}
		nResultColumns := len(left.sourceColumns) + len(right.sourceColumns) - numMergedEqualityColumns
		columns = make(ResultColumns, 0, nResultColumns)
	}

	// Find out which columns are involved in EqualityPredicate.
	for i := range leftColNames {
		leftColName := leftColNames[i].Normalize()
		rightColName := rightColNames[i].Normalize()

		// Find the column name on the left.
		leftIdx, leftType, err := pickUsingColumn(left.sourceColumns, leftColName, "left")
		if err != nil {
			return nil, nil, err
		}

		// Find the column name on the right.
		rightIdx, rightType, err := pickUsingColumn(right.sourceColumns, rightColName, "right")
		if err != nil {
			return nil, nil, err
		}

		// Remember the indices.
		leftEqualityIndices[i] = leftIdx
		rightEqualityIndices[i] = rightIdx

		// Memoize the comparison function.
		fn, found := parser.FindEqualComparisonFunction(leftType, rightType)
		if !found {
			return nil, nil, fmt.Errorf("JOIN/USING types %s for left column %s and %s for right column %s cannot be matched",
				leftType, leftColName, rightType, rightColName)
		}
		cmpOps[i] = fn

		if i < numMergedEqualityColumns {
			usedLeft[leftIdx] = i
			usedRight[rightIdx] = i

			// Merged columns come first in the results.
			columns = append(columns, left.sourceColumns[leftIdx])
		}
	}

	// Now, prepare/complete the metadata for the result columns.
	// The structure of the join data source results is like this:
	// - first, all the equality/USING columns;
	// - then all the left columns,
	// - then all the right columns,
	// The duplicate columns appended after the equality/USING columns
	// are hidden so that they are invisible to star expansion, but
	// not omitted so that they can still be selected separately.

	// Finish collecting the column definitions from the left and
	// right data sources.
	for i, c := range left.sourceColumns {
		if usedLeft != nil && usedLeft[i] != invalidColIdx {
			c.hidden = true
		}
		columns = append(columns, c)
	}
	for i, c := range right.sourceColumns {
		if usedRight != nil && usedRight[i] != invalidColIdx {
			c.hidden = true
		}
		columns = append(columns, c)
	}

	// Compute the mappings from table aliases to column sets from
	// both sides into a new alias-columnset mapping for the result
	// rows. We need to be extra careful about the aliases
	// for the anonymous table, which needs to be merged.
	aliases := make(sourceAliases, 0, len(left.sourceAliases)+len(right.sourceAliases))

	collectAliases := func(sourceAliases sourceAliases, offset int) {
		for _, alias := range sourceAliases {
			if alias.name == anonymousTable {
				continue
			}
			newRange := make([]int, len(alias.columnRange))
			for i, colIdx := range alias.columnRange {
				newRange[i] = colIdx + offset
			}
			aliases = append(aliases, sourceAlias{name: alias.name, columnRange: newRange})
		}
	}
	collectAliases(left.sourceAliases, numMergedEqualityColumns)
	collectAliases(right.sourceAliases, numMergedEqualityColumns+len(left.sourceColumns))

	anonymousAlias := sourceAlias{name: anonymousTable, columnRange: nil}
	var hiddenLeftNames, hiddenRightNames []string

	// All the merged columns at the beginning belong to the
	// anonymous data source.
	for i := 0; i < numMergedEqualityColumns; i++ {
		anonymousAlias.columnRange = append(anonymousAlias.columnRange, i)
		hiddenLeftNames = append(hiddenLeftNames, parser.ReNormalizeName(left.sourceColumns[i].Name))
		hiddenRightNames = append(hiddenRightNames, parser.ReNormalizeName(right.sourceColumns[i].Name))
	}

	// Now collect the other table-less columns into the anonymous data
	// source, but hide (skip) those that are already merged.
	collectAnonymousAliases := func(
		sourceAliases sourceAliases, hiddenNames []string, cols ResultColumns, offset int,
	) {
		for _, alias := range sourceAliases {
			if alias.name != anonymousTable {
				continue
			}
			for _, colIdx := range alias.columnRange {
				isHidden := false
				for _, hiddenName := range hiddenNames {
					if parser.ReNormalizeName(cols[colIdx].Name) == hiddenName {
						isHidden = true
						break
					}
				}
				if !isHidden {
					anonymousAlias.columnRange = append(anonymousAlias.columnRange, colIdx+offset)
				}
			}
		}
	}
	collectAnonymousAliases(left.sourceAliases, hiddenLeftNames, left.sourceColumns,
		numMergedEqualityColumns)
	collectAnonymousAliases(right.sourceAliases, hiddenRightNames, right.sourceColumns,
		numMergedEqualityColumns+len(left.sourceColumns))

	if anonymousAlias.columnRange != nil {
		aliases = append(aliases, anonymousAlias)
	}

	info = &dataSourceInfo{
		sourceColumns: columns,
		sourceAliases: aliases,
	}

	pred := &joinPredicate{
		numLeftCols:              len(left.sourceColumns),
		numRightCols:             len(right.sourceColumns),
		leftColNames:             leftColNames,
		rightColNames:            rightColNames,
		numMergedEqualityColumns: numMergedEqualityColumns,
		cmpFunctions:             cmpOps,
		leftEqualityIndices:      leftEqualityIndices,
		rightEqualityIndices:     rightEqualityIndices,
		info:                     info,
	}
	// We must initialize the indexed var helper in all cases, even when
	// there is no on condition, so that getNeededColumns() does not get
	// confused.
	pred.iVarHelper = parser.MakeIndexedVarHelper(pred, len(columns))
	return pred, info, nil
}
Example #2
0
// tryAddEqualityFilter attempts to turn the given filter expression into
// an equality predicate. It returns true iff the transformation succeeds.
func (p *joinPredicate) tryAddEqualityFilter(filter parser.Expr, left, right *dataSourceInfo) bool {
	c, ok := filter.(*parser.ComparisonExpr)
	if !ok || c.Operator != parser.EQ {
		return false
	}
	lhs, ok := c.Left.(*parser.IndexedVar)
	if !ok {
		return false
	}
	rhs, ok := c.Right.(*parser.IndexedVar)
	if !ok {
		return false
	}

	sourceBoundary := p.numMergedEqualityColumns + len(left.sourceColumns)
	if (lhs.Idx >= sourceBoundary && rhs.Idx >= sourceBoundary) ||
		(lhs.Idx < sourceBoundary && rhs.Idx < sourceBoundary) {
		// Both variables are on the same side of the join (e.g. `a JOIN b ON a.x = a.y`).
		return false
	}

	if lhs.Idx > rhs.Idx {
		lhs, rhs = rhs, lhs
	}

	// At this point we have an equality, so we can add it to the list
	// of equality columns.

	// To do this we must be a bit careful: the expression contains
	// IndexedVars, and the column indices at this point will refer to
	// the full column set of the joinPredicate, including the
	// merged columns.
	leftColIdx := lhs.Idx - p.numMergedEqualityColumns
	rightColIdx := rhs.Idx - len(left.sourceColumns) - p.numMergedEqualityColumns

	// Also, we will want to avoid redundant equality checks.
	for i := range p.leftEqualityIndices {
		if p.leftEqualityIndices[i] == leftColIdx && p.rightEqualityIndices[i] == rightColIdx {
			// The filter is already there; simply absorb it and say we succeeded.
			return true
		}
	}

	// First resolve the comparison function. We can't use the
	// ComparisonExpr's memoized comparison directly, because we may
	// have swapped the operands above.
	fn, found := parser.FindEqualComparisonFunction(lhs.ResolvedType(), rhs.ResolvedType())
	if !found {
		// This is ... unexpected. This means we have a valid ON
		// expression of the form "a = b" but the expression "b = a" is
		// invalid. We could simply avoid the optimization but this is
		// really a bug in the built-in semantics so we want to complain
		// loudly.
		panic(fmt.Errorf("predicate %s is valid, but '%T = %T' cannot be type checked", c, lhs, rhs))
	}
	p.cmpFunctions = append(p.cmpFunctions, fn)

	p.leftEqualityIndices = append(p.leftEqualityIndices, leftColIdx)
	p.rightEqualityIndices = append(p.rightEqualityIndices, rightColIdx)
	p.leftColNames = append(p.leftColNames, parser.Name(left.sourceColumns[leftColIdx].Name))
	p.rightColNames = append(p.rightColNames, parser.Name(right.sourceColumns[rightColIdx].Name))

	return true
}
Example #3
0
// optimizeOnPredicate tries to turn the filter in an onPredicate into
// equality columns in the joinPredicate, which enables faster
// joins.  The concatInfos argument, if provided, must be a
// precomputed concatenation of the left and right dataSourceInfos.
func optimizeOnPredicate(
	pred *joinPredicate, left, right *dataSourceInfo, concatInfos *dataSourceInfo,
) (*joinPredicate, *dataSourceInfo, error) {
	c, ok := pred.filter.(*parser.ComparisonExpr)
	if !ok || c.Operator != parser.EQ {
		return pred, pred.info, nil
	}
	lhs, ok := c.Left.(*parser.IndexedVar)
	if !ok {
		return pred, pred.info, nil
	}
	rhs, ok := c.Right.(*parser.IndexedVar)
	if !ok {
		return pred, pred.info, nil
	}

	sourceBoundary := pred.numMergedEqualityColumns + len(left.sourceColumns)
	if (lhs.Idx >= sourceBoundary && rhs.Idx >= sourceBoundary) ||
		(lhs.Idx < sourceBoundary && rhs.Idx < sourceBoundary) {
		// Both variables are on the same side of the join (e.g. `a JOIN b ON a.x = a.y`).
		return pred, pred.info, nil
	}

	if lhs.Idx > rhs.Idx {
		lhs, rhs = rhs, lhs
	}

	// At this point we have an equality, so we can add it to the list
	// of equality columns.

	// First resolve the comparison function. We can't use the
	// ComparisonExpr's memoized comparison directly, because we may
	// have swapped the operands above.
	fn, found := parser.FindEqualComparisonFunction(lhs.ResolvedType(), rhs.ResolvedType())
	if !found {
		// This is ... unexpected. This means we have a valid ON
		// expression of the form "a = b" but the expression "b = a" is
		// invalid. We could simply avoid the optimization but this is
		// really a bug in the built-in semantics so we want to complain
		// loudly.
		panic(fmt.Errorf("predicate %s is valid, but '%T = %T' cannot be type checked", c, lhs, rhs))
	}
	pred.cmpFunctions = append(pred.cmpFunctions, fn)

	// To do this we must be a bit careful: the expression contains
	// IndexedVars, and the column indices at this point will refer to
	// the full column set of the joinPredicate, including the
	// merged columns.
	leftColIdx := lhs.Idx - pred.numMergedEqualityColumns
	rightColIdx := rhs.Idx - len(left.sourceColumns) - pred.numMergedEqualityColumns

	pred.leftEqualityIndices = append(pred.leftEqualityIndices, leftColIdx)
	pred.rightEqualityIndices = append(pred.rightEqualityIndices, rightColIdx)
	pred.leftColNames = append(pred.leftColNames, parser.Name(left.sourceColumns[leftColIdx].Name))
	pred.rightColNames = append(pred.rightColNames, parser.Name(right.sourceColumns[rightColIdx].Name))

	// The filter is optimized away now.
	pred.filter = nil

	return pred, pred.info, nil
}