// PruneColumns implements LogicalPlan interface. func (p *Join) PruneColumns(parentUsedCols []*expression.Column) { for _, eqCond := range p.EqualConditions { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(eqCond)...) } for _, leftCond := range p.LeftConditions { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(leftCond)...) } for _, rightCond := range p.RightConditions { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(rightCond)...) } for _, otherCond := range p.OtherConditions { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(otherCond)...) } lChild := p.GetChildByIndex(0).(LogicalPlan) rChild := p.GetChildByIndex(1).(LogicalPlan) var leftCols, rightCols []*expression.Column for _, col := range parentUsedCols { if lChild.GetSchema().GetIndex(col) != -1 { leftCols = append(leftCols, col) } else if rChild.GetSchema().GetIndex(col) != -1 { rightCols = append(rightCols, col) } } lChild.PruneColumns(leftCols) rChild.PruneColumns(rightCols) composedSchema := append(lChild.GetSchema().Clone(), rChild.GetSchema().Clone()...) if p.JoinType == SemiJoin { p.schema = lChild.GetSchema().Clone() } else if p.JoinType == SemiJoinWithAux { p.schema = append(lChild.GetSchema().Clone(), p.schema[len(p.schema)-1]) } else { p.schema = composedSchema } p.schema.InitIndices() }
// PruneColumns implements LogicalPlan interface. // e.g. For query select b.c, (select count(*) from a where a.id = b.id) from b. Its plan is Projection->Apply->TableScan. // The schema of b is (a,b,c,id). When Pruning Apply, the parentUsedCols is (c, extra), outerSchema is (a,b,c,id). // Then after pruning inner plan, the childOuterUsedCols schema in apply becomes (id). // Now there're two columns in parentUsedCols, c is the column from Apply's child ---- TableScan, but extra isn't. // So only c in parentUsedCols and id in outerSchema can be passed to TableScan. func (p *Apply) PruneColumns(parentUsedCols []*expression.Column) { child := p.GetChildByIndex(0).(LogicalPlan) innerPlan := p.GetChildByIndex(1).(LogicalPlan) var usedCols []*expression.Column if p.Checker != nil { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(p.Checker.Condition)...) } for _, col := range parentUsedCols { if child.GetSchema().GetIndex(col) != -1 { usedCols = append(usedCols, col) } } innerPlan.PruneColumns(innerPlan.GetSchema()) corCols := innerPlan.extractCorrelatedCols() for _, corCol := range corCols { idx := child.GetSchema().GetIndex(&corCol.Column) if idx != -1 { usedCols = append(usedCols, &corCol.Column) } } child.PruneColumns(usedCols) combinedSchema := append(child.GetSchema().Clone(), innerPlan.GetSchema().Clone()...) if p.Checker == nil { p.schema = combinedSchema } else { p.schema = append(child.GetSchema().Clone(), p.schema[len(p.schema)-1]) } p.schema.InitIndices() }
// PredicatePushDown implements LogicalPlan PredicatePushDown interface. func (p *Apply) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan, err error) { child := p.GetChildByIndex(0).(LogicalPlan) var push []expression.Expression for _, cond := range predicates { extractedCols := expression.ExtractColumns(cond) canPush := true for _, col := range extractedCols { if child.GetSchema().GetIndex(col) == -1 { canPush = false break } } if canPush { push = append(push, cond) } else { ret = append(ret, cond) } } childRet, _, err := child.PredicatePushDown(push) if err != nil { return nil, nil, errors.Trace(err) } _, p.children[1], err = p.children[1].(LogicalPlan).PredicatePushDown(nil) if err != nil { return nil, nil, errors.Trace(err) } return append(ret, childRet...), p, nil }
// PredicatePushDown implements LogicalPlan PredicatePushDown interface. func (p *Projection) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan, err error) { retPlan = p var push []expression.Expression for _, cond := range predicates { canSubstitute := true extractedCols := expression.ExtractColumns(cond) for _, col := range extractedCols { id := p.GetSchema().GetIndex(col) if _, ok := p.Exprs[id].(*expression.ScalarFunction); ok { canSubstitute = false break } } if canSubstitute { push = append(push, expression.ColumnSubstitute(cond, p.GetSchema(), p.Exprs)) } else { ret = append(ret, cond) } } child := p.GetChildByIndex(0).(LogicalPlan) restConds, _, err1 := child.PredicatePushDown(push) if err1 != nil { return nil, nil, errors.Trace(err1) } if len(restConds) > 0 { err1 = addSelection(p, child, restConds, p.allocator) if err1 != nil { return nil, nil, errors.Trace(err1) } } return }
// PruneColumns implements LogicalPlan interface. func (p *Selection) PruneColumns(parentUsedCols []*expression.Column) { child := p.GetChildByIndex(0).(LogicalPlan) for _, cond := range p.Conditions { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(cond)...) } child.PruneColumns(parentUsedCols) p.SetSchema(child.GetSchema()) }
// PruneColumns implements LogicalPlan interface. func (p *Sort) PruneColumns(parentUsedCols []*expression.Column) { child := p.GetChildByIndex(0).(LogicalPlan) for _, item := range p.ByItems { parentUsedCols = append(parentUsedCols, expression.ExtractColumns(item.Expr)...) } child.PruneColumns(parentUsedCols) p.SetSchema(p.GetChildByIndex(0).GetSchema()) }
// PruneColumns implements LogicalPlan interface. func (p *Aggregation) PruneColumns(parentUsedCols []*expression.Column) { child := p.GetChildByIndex(0).(LogicalPlan) used := getUsedList(parentUsedCols, p.schema) for i := len(used) - 1; i >= 0; i-- { if !used[i] { p.schema = append(p.schema[:i], p.schema[i+1:]...) p.AggFuncs = append(p.AggFuncs[:i], p.AggFuncs[i+1:]...) } } var selfUsedCols []*expression.Column for _, aggrFunc := range p.AggFuncs { for _, arg := range aggrFunc.GetArgs() { selfUsedCols = append(selfUsedCols, expression.ExtractColumns(arg)...) } } for _, expr := range p.GroupByItems { selfUsedCols = append(selfUsedCols, expression.ExtractColumns(expr)...) } child.PruneColumns(selfUsedCols) p.schema.InitIndices() }
// collectGbyCols collects all columns from gby-items and join-conditions and splits them into two parts: "leftGbyCols" and // "rightGbyCols". e.g. For query "SELECT SUM(B.id) FROM A, B WHERE A.c1 = B.c1 AND A.c2 != B.c2 GROUP BY B.c3" , the optimized // query should be "SELECT SUM(B.agg) FROM A, (SELECT SUM(id) as agg, c1, c2, c3 FROM B GROUP BY id, c1, c2, c3) as B // WHERE A.c1 = B.c1 AND A.c2 != B.c2 GROUP BY B.c3". As you see, all the columns appearing in join-conditions should be // treated as group by columns in join subquery. func (a *aggPushDownSolver) collectGbyCols(agg *Aggregation, join *Join) (leftGbyCols, rightGbyCols []*expression.Column) { leftChild := join.GetChildByIndex(0) for _, gbyExpr := range agg.GroupByItems { cols := expression.ExtractColumns(gbyExpr) for _, col := range cols { if leftChild.GetSchema().GetIndex(col) != -1 { leftGbyCols = append(leftGbyCols, col) } else { rightGbyCols = append(rightGbyCols, col) } } } // extract equal conditions for _, eqFunc := range join.EqualConditions { leftGbyCols = a.addGbyCol(leftGbyCols, eqFunc.Args[0].(*expression.Column)) rightGbyCols = a.addGbyCol(rightGbyCols, eqFunc.Args[1].(*expression.Column)) } for _, leftCond := range join.LeftConditions { cols := expression.ExtractColumns(leftCond) leftGbyCols = a.addGbyCol(leftGbyCols, cols...) } for _, rightCond := range join.RightConditions { cols := expression.ExtractColumns(rightCond) rightGbyCols = a.addGbyCol(rightGbyCols, cols...) } for _, otherCond := range join.OtherConditions { cols := expression.ExtractColumns(otherCond) for _, col := range cols { if leftChild.GetSchema().GetIndex(col) != -1 { leftGbyCols = a.addGbyCol(leftGbyCols, col) } else { rightGbyCols = a.addGbyCol(rightGbyCols, col) } } } return }
// PruneColumns implements LogicalPlan interface. func (p *Projection) PruneColumns(parentUsedCols []*expression.Column) { child := p.GetChildByIndex(0).(LogicalPlan) var selfUsedCols []*expression.Column used := getUsedList(parentUsedCols, p.schema) for i := len(used) - 1; i >= 0; i-- { if !used[i] && exprHasSetVar(p.Exprs[i]) { p.schema = append(p.schema[:i], p.schema[i+1:]...) p.Exprs = append(p.Exprs[:i], p.Exprs[i+1:]...) } } for _, expr := range p.Exprs { selfUsedCols = append(selfUsedCols, expression.ExtractColumns(expr)...) } child.PruneColumns(selfUsedCols) p.schema.InitIndices() }
// checkIndexCondition will check whether all columns of condition is index columns or primary key column. func checkIndexCondition(condition expression.Expression, indexColumns []*model.IndexColumn, pKName model.CIStr) bool { cols := expression.ExtractColumns(condition) for _, col := range cols { if pKName.L == col.ColName.L { continue } isIndexColumn := false for _, indCol := range indexColumns { if col.ColName.L == indCol.Name.L && indCol.Length == types.UnspecifiedLength { isIndexColumn = true break } } if !isIndexColumn { return false } } return true }
// getAggFuncChildIdx gets which children it belongs to, 0 stands for left, 1 stands for right, -1 stands for both. func (a *aggPushDownSolver) getAggFuncChildIdx(aggFunc expression.AggregationFunction, schema expression.Schema) int { fromLeft, fromRight := false, false var cols []*expression.Column for _, arg := range aggFunc.GetArgs() { cols = append(cols, expression.ExtractColumns(arg)...) } for _, col := range cols { if schema.GetIndex(col) != -1 { fromLeft = true } else { fromRight = true } } if fromLeft && fromRight { return -1 } else if fromLeft { return 0 } return 1 }
func extractOnCondition(conditions []expression.Expression, left LogicalPlan, right LogicalPlan) ( eqCond []*expression.ScalarFunction, leftCond []expression.Expression, rightCond []expression.Expression, otherCond []expression.Expression) { for _, expr := range conditions { binop, ok := expr.(*expression.ScalarFunction) if ok && binop.FuncName.L == ast.EQ { ln, lOK := binop.Args[0].(*expression.Column) rn, rOK := binop.Args[1].(*expression.Column) if lOK && rOK { if left.GetSchema().GetIndex(ln) != -1 && right.GetSchema().GetIndex(rn) != -1 { eqCond = append(eqCond, binop) continue } if left.GetSchema().GetIndex(rn) != -1 && right.GetSchema().GetIndex(ln) != -1 { cond, _ := expression.NewFunction(ast.EQ, types.NewFieldType(mysql.TypeTiny), rn, ln) eqCond = append(eqCond, cond.(*expression.ScalarFunction)) continue } } } columns := expression.ExtractColumns(expr) allFromLeft, allFromRight := true, true for _, col := range columns { if left.GetSchema().GetIndex(col) == -1 { allFromLeft = false } if right.GetSchema().GetIndex(col) == -1 { allFromRight = false } } if allFromRight { rightCond = append(rightCond, expr) } else if allFromLeft { leftCond = append(leftCond, expr) } else { otherCond = append(otherCond, expr) } } return }
// PredicatePushDown implements LogicalPlan PredicatePushDown interface. func (p *Aggregation) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan, err error) { retPlan = p var exprsOriginal []expression.Expression var condsToPush []expression.Expression for _, fun := range p.AggFuncs { exprsOriginal = append(exprsOriginal, fun.GetArgs()[0]) } for _, cond := range predicates { switch cond.(type) { case *expression.Constant: condsToPush = append(condsToPush, cond) // Consider SQL list "select sum(b) from t group by a having 1=0". "1=0" is a constant predicate which should be // retained and pushed down at the same time. Because we will get a wrong query result that contains one column // with value 0 rather than an empty query result. ret = append(ret, cond) case *expression.ScalarFunction: extractedCols := expression.ExtractColumns(cond) ok := true for _, col := range extractedCols { if p.getGbyColIndex(col) == -1 { ok = false break } } if ok { newFunc := expression.ColumnSubstitute(cond.Clone(), p.GetSchema(), exprsOriginal) condsToPush = append(condsToPush, newFunc) } else { ret = append(ret, cond) } default: ret = append(ret, cond) } } p.baseLogicalPlan.PredicatePushDown(condsToPush) return }
// reorderJoin implements a simple join reorder algorithm. It will extract all the equal conditions and compose them to a graph. // Then walk through the graph and pick the nodes connected by some edges to compose a join tree. // We will pick the node with least result set as early as possible. func (e *joinReOrderSolver) reorderJoin(group []LogicalPlan, conds []expression.Expression) { e.graph = make([]edgeList, len(group)) e.group = group e.visited = make([]bool, len(group)) e.resultJoin = nil e.groupRank = make([]*rankInfo, len(group)) for i := 0; i < len(e.groupRank); i++ { e.groupRank[i] = &rankInfo{ nodeID: i, rate: 1.0, } } for _, cond := range conds { if f, ok := cond.(*expression.ScalarFunction); ok { if f.FuncName.L == ast.EQ { lCol, lok := f.Args[0].(*expression.Column) rCol, rok := f.Args[1].(*expression.Column) if lok && rok { lID := findColumnIndexByGroup(group, lCol) rID := findColumnIndexByGroup(group, rCol) if lID != rID { e.graph[lID] = append(e.graph[lID], &rankInfo{nodeID: rID}) e.graph[rID] = append(e.graph[rID], &rankInfo{nodeID: lID}) continue } } } id := -1 rate := 1.0 cols := expression.ExtractColumns(f) for _, col := range cols { idx := findColumnIndexByGroup(group, col) if id == -1 { switch f.FuncName.L { case ast.EQ: rate *= 0.1 case ast.LT, ast.LE, ast.GE, ast.GT: rate *= 0.3 // TODO: Estimate it more precisely in future. default: rate *= 0.9 } id = idx } else { id = -1 break } } if id != -1 { e.groupRank[id].rate *= rate } } } for _, node := range e.graph { for _, edge := range node { edge.rate = e.groupRank[edge.nodeID].rate } } sort.Sort(e) for _, edge := range e.graph { sort.Sort(edge) } var cartesianJoinGroup []LogicalPlan for j := 0; j < len(e.groupRank); j++ { i := e.groupRank[j].nodeID if !e.visited[i] { e.resultJoin = e.group[i] e.walkGraphAndComposeJoin(i) cartesianJoinGroup = append(cartesianJoinGroup, e.resultJoin) } } e.makeBushyJoin(cartesianJoinGroup) }