// makeEqualityPredicate constructs a joinPredicate object for joins. The join // condition includes equality between numMergedEqualityColumns columns, // specified by leftColNames and rightColNames. func makeEqualityPredicate( left, right *dataSourceInfo, leftColNames, rightColNames parser.NameList, numMergedEqualityColumns int, concatInfos *dataSourceInfo, ) (resPred *joinPredicate, info *dataSourceInfo, err error) { if len(leftColNames) != len(rightColNames) { panic(fmt.Errorf("left columns' length %q doesn't match right columns' length %q in EqualityPredicate", len(leftColNames), len(rightColNames))) } if len(leftColNames) < numMergedEqualityColumns { panic(fmt.Errorf("cannot merge %d columns, only %d columns to compare", numMergedEqualityColumns, len(leftColNames))) } // Prepare the arrays populated below. cmpOps := make([]func(*parser.EvalContext, parser.Datum, parser.Datum) (parser.DBool, error), len(leftColNames)) leftEqualityIndices := make([]int, len(leftColNames)) rightEqualityIndices := make([]int, len(rightColNames)) // usedLeft represents the list of indices that participate in the // equality predicate. They are collected in order to determine // below which columns remain after the equality; this is used // only when merging result columns. var usedLeft, usedRight []int var columns ResultColumns if numMergedEqualityColumns > 0 { usedLeft = make([]int, len(left.sourceColumns)) for i := range usedLeft { usedLeft[i] = invalidColIdx } usedRight = make([]int, len(right.sourceColumns)) for i := range usedRight { usedRight[i] = invalidColIdx } nResultColumns := len(left.sourceColumns) + len(right.sourceColumns) - numMergedEqualityColumns columns = make(ResultColumns, 0, nResultColumns) } // Find out which columns are involved in EqualityPredicate. for i := range leftColNames { leftColName := leftColNames[i].Normalize() rightColName := rightColNames[i].Normalize() // Find the column name on the left. leftIdx, leftType, err := pickUsingColumn(left.sourceColumns, leftColName, "left") if err != nil { return nil, nil, err } // Find the column name on the right. rightIdx, rightType, err := pickUsingColumn(right.sourceColumns, rightColName, "right") if err != nil { return nil, nil, err } // Remember the indices. leftEqualityIndices[i] = leftIdx rightEqualityIndices[i] = rightIdx // Memoize the comparison function. fn, found := parser.FindEqualComparisonFunction(leftType, rightType) if !found { return nil, nil, fmt.Errorf("JOIN/USING types %s for left column %s and %s for right column %s cannot be matched", leftType, leftColName, rightType, rightColName) } cmpOps[i] = fn if i < numMergedEqualityColumns { usedLeft[leftIdx] = i usedRight[rightIdx] = i // Merged columns come first in the results. columns = append(columns, left.sourceColumns[leftIdx]) } } // Now, prepare/complete the metadata for the result columns. // The structure of the join data source results is like this: // - first, all the equality/USING columns; // - then all the left columns, // - then all the right columns, // The duplicate columns appended after the equality/USING columns // are hidden so that they are invisible to star expansion, but // not omitted so that they can still be selected separately. // Finish collecting the column definitions from the left and // right data sources. for i, c := range left.sourceColumns { if usedLeft != nil && usedLeft[i] != invalidColIdx { c.hidden = true } columns = append(columns, c) } for i, c := range right.sourceColumns { if usedRight != nil && usedRight[i] != invalidColIdx { c.hidden = true } columns = append(columns, c) } // Compute the mappings from table aliases to column sets from // both sides into a new alias-columnset mapping for the result // rows. We need to be extra careful about the aliases // for the anonymous table, which needs to be merged. aliases := make(sourceAliases, 0, len(left.sourceAliases)+len(right.sourceAliases)) collectAliases := func(sourceAliases sourceAliases, offset int) { for _, alias := range sourceAliases { if alias.name == anonymousTable { continue } newRange := make([]int, len(alias.columnRange)) for i, colIdx := range alias.columnRange { newRange[i] = colIdx + offset } aliases = append(aliases, sourceAlias{name: alias.name, columnRange: newRange}) } } collectAliases(left.sourceAliases, numMergedEqualityColumns) collectAliases(right.sourceAliases, numMergedEqualityColumns+len(left.sourceColumns)) anonymousAlias := sourceAlias{name: anonymousTable, columnRange: nil} var hiddenLeftNames, hiddenRightNames []string // All the merged columns at the beginning belong to the // anonymous data source. for i := 0; i < numMergedEqualityColumns; i++ { anonymousAlias.columnRange = append(anonymousAlias.columnRange, i) hiddenLeftNames = append(hiddenLeftNames, parser.ReNormalizeName(left.sourceColumns[i].Name)) hiddenRightNames = append(hiddenRightNames, parser.ReNormalizeName(right.sourceColumns[i].Name)) } // Now collect the other table-less columns into the anonymous data // source, but hide (skip) those that are already merged. collectAnonymousAliases := func( sourceAliases sourceAliases, hiddenNames []string, cols ResultColumns, offset int, ) { for _, alias := range sourceAliases { if alias.name != anonymousTable { continue } for _, colIdx := range alias.columnRange { isHidden := false for _, hiddenName := range hiddenNames { if parser.ReNormalizeName(cols[colIdx].Name) == hiddenName { isHidden = true break } } if !isHidden { anonymousAlias.columnRange = append(anonymousAlias.columnRange, colIdx+offset) } } } } collectAnonymousAliases(left.sourceAliases, hiddenLeftNames, left.sourceColumns, numMergedEqualityColumns) collectAnonymousAliases(right.sourceAliases, hiddenRightNames, right.sourceColumns, numMergedEqualityColumns+len(left.sourceColumns)) if anonymousAlias.columnRange != nil { aliases = append(aliases, anonymousAlias) } info = &dataSourceInfo{ sourceColumns: columns, sourceAliases: aliases, } pred := &joinPredicate{ numLeftCols: len(left.sourceColumns), numRightCols: len(right.sourceColumns), leftColNames: leftColNames, rightColNames: rightColNames, numMergedEqualityColumns: numMergedEqualityColumns, cmpFunctions: cmpOps, leftEqualityIndices: leftEqualityIndices, rightEqualityIndices: rightEqualityIndices, info: info, } // We must initialize the indexed var helper in all cases, even when // there is no on condition, so that getNeededColumns() does not get // confused. pred.iVarHelper = parser.MakeIndexedVarHelper(pred, len(columns)) return pred, info, nil }
// tryAddEqualityFilter attempts to turn the given filter expression into // an equality predicate. It returns true iff the transformation succeeds. func (p *joinPredicate) tryAddEqualityFilter(filter parser.Expr, left, right *dataSourceInfo) bool { c, ok := filter.(*parser.ComparisonExpr) if !ok || c.Operator != parser.EQ { return false } lhs, ok := c.Left.(*parser.IndexedVar) if !ok { return false } rhs, ok := c.Right.(*parser.IndexedVar) if !ok { return false } sourceBoundary := p.numMergedEqualityColumns + len(left.sourceColumns) if (lhs.Idx >= sourceBoundary && rhs.Idx >= sourceBoundary) || (lhs.Idx < sourceBoundary && rhs.Idx < sourceBoundary) { // Both variables are on the same side of the join (e.g. `a JOIN b ON a.x = a.y`). return false } if lhs.Idx > rhs.Idx { lhs, rhs = rhs, lhs } // At this point we have an equality, so we can add it to the list // of equality columns. // To do this we must be a bit careful: the expression contains // IndexedVars, and the column indices at this point will refer to // the full column set of the joinPredicate, including the // merged columns. leftColIdx := lhs.Idx - p.numMergedEqualityColumns rightColIdx := rhs.Idx - len(left.sourceColumns) - p.numMergedEqualityColumns // Also, we will want to avoid redundant equality checks. for i := range p.leftEqualityIndices { if p.leftEqualityIndices[i] == leftColIdx && p.rightEqualityIndices[i] == rightColIdx { // The filter is already there; simply absorb it and say we succeeded. return true } } // First resolve the comparison function. We can't use the // ComparisonExpr's memoized comparison directly, because we may // have swapped the operands above. fn, found := parser.FindEqualComparisonFunction(lhs.ResolvedType(), rhs.ResolvedType()) if !found { // This is ... unexpected. This means we have a valid ON // expression of the form "a = b" but the expression "b = a" is // invalid. We could simply avoid the optimization but this is // really a bug in the built-in semantics so we want to complain // loudly. panic(fmt.Errorf("predicate %s is valid, but '%T = %T' cannot be type checked", c, lhs, rhs)) } p.cmpFunctions = append(p.cmpFunctions, fn) p.leftEqualityIndices = append(p.leftEqualityIndices, leftColIdx) p.rightEqualityIndices = append(p.rightEqualityIndices, rightColIdx) p.leftColNames = append(p.leftColNames, parser.Name(left.sourceColumns[leftColIdx].Name)) p.rightColNames = append(p.rightColNames, parser.Name(right.sourceColumns[rightColIdx].Name)) return true }
// optimizeOnPredicate tries to turn the filter in an onPredicate into // equality columns in the joinPredicate, which enables faster // joins. The concatInfos argument, if provided, must be a // precomputed concatenation of the left and right dataSourceInfos. func optimizeOnPredicate( pred *joinPredicate, left, right *dataSourceInfo, concatInfos *dataSourceInfo, ) (*joinPredicate, *dataSourceInfo, error) { c, ok := pred.filter.(*parser.ComparisonExpr) if !ok || c.Operator != parser.EQ { return pred, pred.info, nil } lhs, ok := c.Left.(*parser.IndexedVar) if !ok { return pred, pred.info, nil } rhs, ok := c.Right.(*parser.IndexedVar) if !ok { return pred, pred.info, nil } sourceBoundary := pred.numMergedEqualityColumns + len(left.sourceColumns) if (lhs.Idx >= sourceBoundary && rhs.Idx >= sourceBoundary) || (lhs.Idx < sourceBoundary && rhs.Idx < sourceBoundary) { // Both variables are on the same side of the join (e.g. `a JOIN b ON a.x = a.y`). return pred, pred.info, nil } if lhs.Idx > rhs.Idx { lhs, rhs = rhs, lhs } // At this point we have an equality, so we can add it to the list // of equality columns. // First resolve the comparison function. We can't use the // ComparisonExpr's memoized comparison directly, because we may // have swapped the operands above. fn, found := parser.FindEqualComparisonFunction(lhs.ResolvedType(), rhs.ResolvedType()) if !found { // This is ... unexpected. This means we have a valid ON // expression of the form "a = b" but the expression "b = a" is // invalid. We could simply avoid the optimization but this is // really a bug in the built-in semantics so we want to complain // loudly. panic(fmt.Errorf("predicate %s is valid, but '%T = %T' cannot be type checked", c, lhs, rhs)) } pred.cmpFunctions = append(pred.cmpFunctions, fn) // To do this we must be a bit careful: the expression contains // IndexedVars, and the column indices at this point will refer to // the full column set of the joinPredicate, including the // merged columns. leftColIdx := lhs.Idx - pred.numMergedEqualityColumns rightColIdx := rhs.Idx - len(left.sourceColumns) - pred.numMergedEqualityColumns pred.leftEqualityIndices = append(pred.leftEqualityIndices, leftColIdx) pred.rightEqualityIndices = append(pred.rightEqualityIndices, rightColIdx) pred.leftColNames = append(pred.leftColNames, parser.Name(left.sourceColumns[leftColIdx].Name)) pred.rightColNames = append(pred.rightColNames, parser.Name(right.sourceColumns[rightColIdx].Name)) // The filter is optimized away now. pred.filter = nil return pred, pred.info, nil }