// isDecomposable checks if an aggregate function is decomposable. An aggregation function $F$ is decomposable // if there exist aggregation functions F_1 and F_2 such that F(S_1 union all S_2) = F_2(F_1(S_1),F_1(S_2)), // where S_1 and S_2 are two sets of values. We call S_1 and S_2 partial groups. // It's easy to see that max, min, first row is decomposable, no matter whether it's distinct, but sum(distinct) and // count(distinct) is not. // Currently we don't support avg and concat. func (a *aggPushDownSolver) isDecomposable(fun expression.AggregationFunction) bool { switch fun.GetName() { case ast.AggFuncAvg, ast.AggFuncGroupConcat: // TODO: Support avg push down. return false case ast.AggFuncMax, ast.AggFuncMin, ast.AggFuncFirstRow: return true case ast.AggFuncSum, ast.AggFuncCount: return !fun.IsDistinct() default: return false } }
// getAggFuncChildIdx gets which children it belongs to, 0 stands for left, 1 stands for right, -1 stands for both. func (a *aggPushDownSolver) getAggFuncChildIdx(aggFunc expression.AggregationFunction, schema expression.Schema) int { fromLeft, fromRight := false, false var cols []*expression.Column for _, arg := range aggFunc.GetArgs() { cols = append(cols, expression.ExtractColumns(arg)...) } for _, col := range cols { if schema.GetIndex(col) != -1 { fromLeft = true } else { fromRight = true } } if fromLeft && fromRight { return -1 } else if fromLeft { return 0 } return 1 }
func (b *executorBuilder) newAggFuncToPBExpr(client kv.Client, aggFunc expression.AggregationFunction, tbl *model.TableInfo) *tipb.Expr { var tp tipb.ExprType switch aggFunc.GetName() { case ast.AggFuncCount: tp = tipb.ExprType_Count case ast.AggFuncFirstRow: tp = tipb.ExprType_First case ast.AggFuncGroupConcat: tp = tipb.ExprType_GroupConcat case ast.AggFuncMax: tp = tipb.ExprType_Max case ast.AggFuncMin: tp = tipb.ExprType_Min case ast.AggFuncSum: tp = tipb.ExprType_Sum case ast.AggFuncAvg: tp = tipb.ExprType_Avg } if !client.SupportRequestType(kv.ReqTypeSelect, int64(tp)) { return nil } children := make([]*tipb.Expr, 0, len(aggFunc.GetArgs())) for _, arg := range aggFunc.GetArgs() { pbArg := b.newExprToPBExpr(client, arg, tbl) if pbArg == nil { return nil } children = append(children, pbArg) } return &tipb.Expr{Tp: tp.Enum(), Children: children} }
func aggFuncToPBExpr(sc *variable.StatementContext, client kv.Client, aggFunc expression.AggregationFunction) *tipb.Expr { pc := pbConverter{client: client, sc: sc} var tp tipb.ExprType switch aggFunc.GetName() { case ast.AggFuncCount: tp = tipb.ExprType_Count case ast.AggFuncFirstRow: tp = tipb.ExprType_First case ast.AggFuncGroupConcat: tp = tipb.ExprType_GroupConcat case ast.AggFuncMax: tp = tipb.ExprType_Max case ast.AggFuncMin: tp = tipb.ExprType_Min case ast.AggFuncSum: tp = tipb.ExprType_Sum case ast.AggFuncAvg: tp = tipb.ExprType_Avg } if !client.SupportRequestType(kv.ReqTypeSelect, int64(tp)) { return nil } children := make([]*tipb.Expr, 0, len(aggFunc.GetArgs())) for _, arg := range aggFunc.GetArgs() { pbArg := pc.exprToPB(arg) if pbArg == nil { return nil } children = append(children, pbArg) } return &tipb.Expr{Tp: tp, Children: children} }
// decompose splits an aggregate function to two parts: a final mode function and a partial mode function. Currently // there are no differences between partial mode and complete mode, so we can confuse them. func (a *aggPushDownSolver) decompose(aggFunc expression.AggregationFunction, schema expression.Schema, id string) ([]expression.AggregationFunction, expression.Schema) { // Result is a slice because avg should be decomposed to sum and count. Currently we don't process this case. result := []expression.AggregationFunction{aggFunc.Clone()} for _, aggFunc := range result { schema = append(schema, &expression.Column{ ColName: model.NewCIStr(fmt.Sprintf("join_agg_%d", len(schema))), // useless but for debug FromID: id, Position: len(schema), RetType: aggFunc.GetType(), }) } aggFunc.SetArgs(expression.Schema2Exprs(schema[len(schema)-len(result):])) aggFunc.SetMode(expression.FinalMode) return result, schema }
func needValue(af expression.AggregationFunction) bool { return af.GetName() == ast.AggFuncSum || af.GetName() == ast.AggFuncAvg || af.GetName() == ast.AggFuncFirstRow || af.GetName() == ast.AggFuncMax || af.GetName() == ast.AggFuncMin || af.GetName() == ast.AggFuncGroupConcat }
func needCount(af expression.AggregationFunction) bool { return af.GetName() == ast.AggFuncCount || af.GetName() == ast.AggFuncAvg }