func TestSplitWithEmptyBoundaryList(t *testing.T) { splitParams, err := NewSplitParamsGivenNumRowsPerQueryPart( "select * from test_table", map[string]interface{}{"foo": int64(100)}, []sqlparser.ColIdent{ sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id"), }, /* splitColumns */ 1000, getTestSchema()) if err != nil { t.Fatalf("SplitParams.Initialize() failed with: %v", err) } splitter := NewSplitter(splitParams, &FakeSplitAlgorithm{ boundaries: []tuple{}, splitColumns: splitParams.splitColumns, }) var queryParts []querytypes.QuerySplit queryParts, err = splitter.Split() if err != nil { t.Errorf("Splitter.Split() failed with: %v", err) } expected := []querytypes.QuerySplit{ { Sql: "select * from test_table", BindVariables: map[string]interface{}{ "foo": int64(100), }, }, } verifyQueryPartsEqual(t, expected, queryParts) }
func TestWithRealEqualSplits(t *testing.T) { splitParams, err := NewSplitParamsGivenSplitCount( "select * from test_table", map[string]interface{}{}, []sqlparser.ColIdent{sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id")}, 3, /* split_count */ getTestSchema()) if err != nil { t.Fatalf("want: nil, got: %v", err) } mockCtrl := gomock.NewController(t) defer mockCtrl.Finish() mockSQLExecuter := splitquery_testing.NewMockSQLExecuter(mockCtrl) expectedCall1 := mockSQLExecuter.EXPECT().SQLExecute( "select min(id), max(id) from test_table", nil /* Bind Variables */) expectedCall1.Return( &sqltypes.Result{ Rows: [][]sqltypes.Value{ {int64Value(10), int64Value(3010)}, }, }, nil) equalSplits, err := NewEqualSplitsAlgorithm(splitParams, mockSQLExecuter) splitter := NewSplitter(splitParams, equalSplits) queryParts, err := splitter.Split() if err != nil { t.Errorf("Splitter.Split() failed with: %v", err) } expected := []querytypes.QuerySplit{ { Sql: "select * from test_table where id < :_splitquery_end_id", BindVariables: map[string]interface{}{ "_splitquery_end_id": int64(1010), }, }, { Sql: "select * from test_table where" + " (:_splitquery_start_id <= id)" + " and" + " (id < :_splitquery_end_id)", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(1010), "_splitquery_end_id": int64(2010), }, }, { Sql: "select * from test_table where" + " :_splitquery_start_id <= id", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(2010), }, }, } verifyQueryPartsEqual(t, expected, queryParts) }
func TestSQLExecuterReturnsError(t *testing.T) { mockCtrl := gomock.NewController(t) defer mockCtrl.Finish() splitParams, err := NewSplitParamsGivenNumRowsPerQueryPart( "select * from test_table where int_col > 5", nil, /* bindVariables */ []sqlparser.ColIdent{ sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id"), }, /* splitColumns */ 1000, getTestSchema(), ) if err != nil { t.Fatalf("NewSplitParamsGivenNumRowsPerQueryPart failed with: %v", err) } mockSQLExecuter := splitquery_testing.NewMockSQLExecuter(mockCtrl) expectedCall1 := mockSQLExecuter.EXPECT().SQLExecute( "select id, user_id from test_table"+ " where int_col > 5"+ " order by id asc, user_id asc"+ " limit 1000, 1", map[string]interface{}{}) expectedCall1.Return( &sqltypes.Result{ Rows: [][]sqltypes.Value{ {int64Value(1), int64Value(1)}}, }, nil) expectedCall2 := mockSQLExecuter.EXPECT().SQLExecute( "select id, user_id from test_table"+ " where (int_col > 5) and"+ " (:_splitquery_prev_id < id or"+ " (:_splitquery_prev_id = id and :_splitquery_prev_user_id <= user_id))"+ " order by id asc, user_id asc"+ " limit 1000, 1", map[string]interface{}{ "_splitquery_prev_id": int64(1), "_splitquery_prev_user_id": int64(1), }) expectedErr := fmt.Errorf("Error accessing database!") expectedCall2.Return(nil, expectedErr) algorithm, err := NewFullScanAlgorithm(splitParams, mockSQLExecuter) if err != nil { t.Fatalf("NewFullScanAlgorithm failed with: %v", err) } boundaries, err := algorithm.generateBoundaries() if err != expectedErr { t.Fatalf("FullScanAlgorithm.generateBoundaries() did not fail as expected. err: %v", err) } if boundaries != nil { t.Fatalf("boundaries: %v, expected: nil", boundaries) } }
func Example() { // 1. Create a SplitParams object. // There are two "constructors": NewSplitParamsGivenSplitCount and // NewSplitParamsGivenNumRowsPerQueryPart. They each take several parameters including a "schema" // object which should be a map[string]*schema.Table that maps a table name to its schema.Table // object. It is used for error-checking the split columns and their types. We use an empty // object for this toy example, but in real code this object must have correct entries. // // This schema can is typically derived from tabletserver.TabletServer.qe.schemaInfo. schema := map[string]*schema.Table{} splitParams, err := NewSplitParamsGivenSplitCount( "SELECT * FROM table WHERE id > :id", // SQL query map[string]interface{}{"id": int64(5)}, // Bind Variables []sqlparser.ColIdent{ sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id"), }, // SplitColumns 1000, // SplitCount schema) if err != nil { panic(fmt.Sprintf("NewSplitParamsGivenSplitCount failed with: %v", err)) } // 2. Create the SplitAlgorithmInterface object used for splitting. // SplitQuery supports multiple algorithms for splitting the query. These are encapsulated as // types implementing the SplitAlgorithmInterface. Currently two algorithms are supported // represented by the FullScanAlgorithm and EqualSplitsAlgorithm types. See the documentation // of these types for more details on each algorithm. // To do the split we'll need to create an object of one of these types and pass it to the // Splitter (see below). Here we use the FullScan algorithm. // We also pass a type implementing the SQLExecuter interface that the algorithm will // use to send statements to MySQL. algorithm, err := NewFullScanAlgorithm(splitParams, getSQLExecuter()) if err != nil { panic(fmt.Sprintf("NewFullScanAlgorithm failed with: %v", err)) } // 3. Create a splitter object. Always succeeds. splitter := NewSplitter(splitParams, algorithm) // 4. Call splitter.Split() to Split the query. // The result is a slice of querytypes.QuerySplit objects (and an error object). queryParts, err := splitter.Split() if err != nil { panic(fmt.Sprintf("splitter.Split() failed with: %v", err)) } fmt.Println(queryParts) }
// PushSelect pushes the select expression into the route. func (rb *route) PushSelect(expr *sqlparser.NonStarExpr, _ *route) (colsym *colsym, colnum int, err error) { colsym = newColsym(rb, rb.Symtab()) colsym.Alias = expr.As if col, ok := expr.Expr.(*sqlparser.ColName); ok { // If no alias was specified, then the base name // of the column becomes the alias. if colsym.Alias.Original() == "" { colsym.Alias = col.Name } // We should always allow other parts of the query to reference // the fully qualified name of the column. if tab, ok := col.Metadata.(*tabsym); ok { colsym.QualifiedName = sqlparser.NewColIdent(sqlparser.String(tab.Alias) + "." + col.Name.Original()) } colsym.Vindex = rb.Symtab().Vindex(col, rb, true) colsym.Underlying = newColref(col) } else { if rb.IsRHS { return nil, 0, errors.New("unsupported: complex left join and column expressions") } // We should ideally generate an alias based on the // expression, but we currently don't have the ability // to reference such expressions. So, we leave the // alias blank. } rb.Select.SelectExprs = append(rb.Select.SelectExprs, expr) rb.Colsyms = append(rb.Colsyms, colsym) return colsym, len(rb.Colsyms) - 1, nil }
// PushStar pushes the '*' expression into the route. func (rb *route) PushStar(expr *sqlparser.StarExpr) *colsym { colsym := newColsym(rb, rb.Symtab()) colsym.Alias = sqlparser.NewColIdent(sqlparser.String(expr)) rb.Select.SelectExprs = append(rb.Select.SelectExprs, expr) rb.Colsyms = append(rb.Colsyms, colsym) return colsym }
// PushStar pushes the '*' expression into the route. func (rb *route) PushStar(expr *sqlparser.StarExpr) *colsym { colsym := newColsym(rb, rb.Symtab()) // This is not perfect, but it should be good enough. // We'll match unqualified column names against Alias // and qualified column names against QualifiedName. // If someone uses 'select *' and then uses table.col // in the HAVING clause, then things won't match. But // such cases are easy to correct in the application. if expr.TableName == "" { colsym.Alias = sqlparser.NewColIdent(sqlparser.String(expr)) } else { colsym.QualifiedName = sqlparser.NewColIdent(sqlparser.String(expr)) } rb.Select.SelectExprs = append(rb.Select.SelectExprs, expr) rb.Colsyms = append(rb.Colsyms, colsym) return colsym }
func TestSmallNumberOfRows(t *testing.T) { mockCtrl := gomock.NewController(t) defer mockCtrl.Finish() splitParams, err := NewSplitParamsGivenNumRowsPerQueryPart( "select * from test_table where int_col > 5", nil, /* bindVariables */ []sqlparser.ColIdent{ sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id"), }, /* splitColumns */ 1000, getTestSchema(), ) if err != nil { t.Fatalf("NewSplitParamsGivenNumRowsPerQueryPart failed with: %v", err) } mockSQLExecuter := splitquery_testing.NewMockSQLExecuter(mockCtrl) expectedCall1 := mockSQLExecuter.EXPECT().SQLExecute( "select id, user_id from test_table"+ " where int_col > 5"+ " order by id asc, user_id asc"+ " limit 1000, 1", map[string]interface{}{}) expectedCall1.Return( &sqltypes.Result{Rows: [][]sqltypes.Value{}}, nil) algorithm, err := NewFullScanAlgorithm(splitParams, mockSQLExecuter) if err != nil { t.Fatalf("NewFullScanAlgorithm failed with: %v", err) } boundaries, err := algorithm.generateBoundaries() if err != nil { t.Fatalf("FullScanAlgorithm.generateBoundaries() failed with: %v", err) } expectedBoundaries := []tuple{} if !reflect.DeepEqual(expectedBoundaries, boundaries) { t.Fatalf("expected: %v, got: %v", expectedBoundaries, boundaries) } }
// SupplyCol changes the router to supply the requested column // name, and returns the result column number. If the column // is already in the list, it's reused. func (rb *route) SupplyCol(ref colref) int { for i, colsym := range rb.Colsyms { if colsym.Underlying == ref { return i } } ts := ref.Meta.(*tabsym) rb.Colsyms = append(rb.Colsyms, &colsym{ Alias: sqlparser.NewColIdent(string(ts.Alias) + "." + ref.Name), Underlying: ref, }) rb.Select.SelectExprs = append( rb.Select.SelectExprs, &sqlparser.NonStarExpr{ Expr: &sqlparser.ColName{ Metadata: ref.Meta, Qualifier: &sqlparser.TableName{Name: ts.ASTName}, Name: sqlparser.NewColIdent(ref.Name), }, }, ) return len(rb.Colsyms) - 1 }
// Find returns the route for the symbol referenced by col. // If a reference is found, the column's Metadata is set to point // it. Subsequent searches will reuse this meatadata. // If autoResolve is true, and there is only one table in the symbol table, // then an unqualified reference is assumed to be implicitly against // that table. The table info doesn't contain the full list of columns. // So, any column reference is presumed valid. If a Colsyms scope is // present, then the table scope is not searched. If a symbol is found // in the current symtab, then isLocal is set to true. Otherwise, the // search is continued in the outer symtab. If so, isLocal will be set // to false. If the symbol was not found, an error is returned. // isLocal must be checked before you can push-down (or pull-out) // a construct. // If a symbol was found in an outer scope, then the column reference // is added to the Externs field. func (st *symtab) Find(col *sqlparser.ColName, autoResolve bool) (rb *route, isLocal bool, err error) { if m, ok := col.Metadata.(sym); ok { return m.Route(), m.Symtab() == st, nil } if len(st.Colsyms) != 0 { name := sqlparser.String(col) starname := sqlparser.String(&sqlparser.ColName{ Name: sqlparser.NewColIdent("*"), Qualifier: col.Qualifier, }) for _, colsym := range st.Colsyms { if colsym.Alias.EqualString(name) || colsym.Alias.EqualString(starname) || colsym.Alias.EqualString("*") { col.Metadata = colsym return colsym.Route(), true, nil } } if st.Outer != nil { // autoResolve only allowed for innermost scope. rb, _, err = st.Outer.Find(col, false) if err == nil { st.Externs = append(st.Externs, col) } return rb, false, err } return nil, false, fmt.Errorf("symbol %s not found", sqlparser.String(col)) } qualifier := sqlparser.TableIdent(sqlparser.String(col.Qualifier)) if qualifier == "" && autoResolve && len(st.tables) == 1 { for _, t := range st.tables { qualifier = t.Alias break } } alias := st.findTable(qualifier) if alias == nil { if st.Outer != nil { // autoResolve only allowed for innermost scope. rb, _, err = st.Outer.Find(col, false) if err == nil { st.Externs = append(st.Externs, col) } return rb, false, err } return nil, false, fmt.Errorf("symbol %s not found", sqlparser.String(col)) } col.Metadata = alias return alias.Route(), true, nil }
// PushSelect pushes the select expression into the route. func (rb *route) PushSelect(expr *sqlparser.NonStarExpr, _ *route) (colsym *colsym, colnum int, err error) { colsym = newColsym(rb, rb.Symtab()) if expr.As.Original() != "" { colsym.Alias = expr.As } if col, ok := expr.Expr.(*sqlparser.ColName); ok { if colsym.Alias.Original() == "" { colsym.Alias = sqlparser.NewColIdent(sqlparser.String(col)) } colsym.Vindex = rb.Symtab().Vindex(col, rb, true) colsym.Underlying = newColref(col) } else { if rb.IsRHS { return nil, 0, errors.New("unsupported: complex left join and column expressions") } } rb.Select.SelectExprs = append(rb.Select.SelectExprs, expr) rb.Colsyms = append(rb.Colsyms, colsym) return colsym, len(rb.Colsyms) - 1, nil }
// SplitQueryV2 splits a query + bind variables into smaller queries that return a // subset of rows from the original query. This is the new version that supports multiple // split columns and multiple split algortihms. // See the documentation of SplitQueryRequest in proto/vtgate.proto for more details. func (tsv *TabletServer) SplitQueryV2( ctx context.Context, target *querypb.Target, sql string, bindVariables map[string]interface{}, splitColumns []string, splitCount int64, numRowsPerQueryPart int64, algorithm querypb.SplitQueryRequest_Algorithm, ) (splits []querytypes.QuerySplit, err error) { logStats := newLogStats("SplitQuery", ctx) logStats.OriginalSQL = sql logStats.BindVariables = bindVariables defer handleError(&err, logStats, tsv.qe.queryServiceStats) if err = tsv.startRequest(target, false, false); err != nil { return nil, err } // We don't set a timeout for SplitQueryV2. // SplitQuery using the Full Scan algorithm can take a while and // we don't expect too many of these queries to run concurrently. defer tsv.endRequest(false) ciSplitColumns := make([]sqlparser.ColIdent, 0, len(splitColumns)) for _, s := range splitColumns { ciSplitColumns = append(ciSplitColumns, sqlparser.NewColIdent(s)) } if err := validateSplitQueryParameters( target, sql, bindVariables, splitColumns, splitCount, numRowsPerQueryPart, algorithm, ); err != nil { return nil, err } schema := getSchemaForSplitQuery(tsv.qe.schemaInfo) splitParams, err := createSplitParams( sql, bindVariables, ciSplitColumns, splitCount, numRowsPerQueryPart, schema) if err != nil { return nil, err } defer func(start time.Time) { splitTableName := splitParams.GetSplitTableName() addUserTableQueryStats( tsv.qe.queryServiceStats, ctx, splitTableName, "SplitQuery", int64(time.Now().Sub(start))) }(time.Now()) sqlExecuter, err := newSplitQuerySQLExecuter(ctx, logStats, tsv.qe) if err != nil { return nil, err } defer sqlExecuter.done() algorithmObject, err := createSplitQueryAlgorithmObject(algorithm, splitParams, sqlExecuter) if err != nil { return nil, err } result, err := splitquery.NewSplitter(splitParams, algorithmObject).Split() return result, splitQueryToTabletError(err) }
func TestSplit1SplitColumn(t *testing.T) { splitParams, err := NewSplitParamsGivenNumRowsPerQueryPart( "select * from test_table", map[string]interface{}{}, []sqlparser.ColIdent{sqlparser.NewColIdent("id")}, 1000, // numRowsPerQueryPart getTestSchema()) if err != nil { t.Fatalf("SplitParams.Initialize() failed with: %v", err) } splitter := NewSplitter(splitParams, &FakeSplitAlgorithm{ boundaries: []tuple{ {int64Value(1)}, {int64Value(10)}, {int64Value(50)}, }, splitColumns: splitParams.splitColumns, }) var queryParts []querytypes.QuerySplit queryParts, err = splitter.Split() if err != nil { t.Errorf("Splitter.Split() failed with: %v", err) } expected := []querytypes.QuerySplit{ { Sql: "select * from test_table where id < :_splitquery_end_id", BindVariables: map[string]interface{}{ "_splitquery_end_id": int64(1), }, }, { Sql: "select * from test_table where" + " (:_splitquery_start_id <= id)" + " and" + " (id < :_splitquery_end_id)", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(1), "_splitquery_end_id": int64(10), }, }, { Sql: "select * from test_table where" + " (:_splitquery_start_id <= id)" + " and" + " (id < :_splitquery_end_id)", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(10), "_splitquery_end_id": int64(50), }, }, { Sql: "select * from test_table where" + " :_splitquery_start_id <= id", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(50), }, }, } verifyQueryPartsEqual(t, expected, queryParts) }
func TestSplitWithWhereClause(t *testing.T) { splitParams, err := NewSplitParamsGivenNumRowsPerQueryPart( "select * from test_table where name!='foo'", map[string]interface{}{}, []sqlparser.ColIdent{ sqlparser.NewColIdent("id"), sqlparser.NewColIdent("user_id"), }, /* splitColumns */ 1000, // numRowsPerQueryPart getTestSchema()) if err != nil { t.Fatalf("SplitParams.Initialize() failed with: %v", err) } splitter := NewSplitter(splitParams, &FakeSplitAlgorithm{ boundaries: []tuple{ {int64Value(1), int64Value(2)}, {int64Value(1), int64Value(3)}, {int64Value(5), int64Value(1)}, }, splitColumns: splitParams.splitColumns, }) var queryParts []querytypes.QuerySplit queryParts, err = splitter.Split() if err != nil { t.Errorf("Splitter.Split() failed with: %v", err) } expected := []querytypes.QuerySplit{ { Sql: "select * from test_table where (name != 'foo') and" + " (id < :_splitquery_end_id or" + " (id = :_splitquery_end_id and user_id < :_splitquery_end_user_id))", BindVariables: map[string]interface{}{ "_splitquery_end_id": int64(1), "_splitquery_end_user_id": int64(2), }, }, { Sql: "select * from test_table where (name != 'foo') and" + " ((:_splitquery_start_id < id or" + " (:_splitquery_start_id = id and :_splitquery_start_user_id <= user_id))" + " and" + " (id < :_splitquery_end_id or" + " (id = :_splitquery_end_id and user_id < :_splitquery_end_user_id)))", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(1), "_splitquery_start_user_id": int64(2), "_splitquery_end_id": int64(1), "_splitquery_end_user_id": int64(3), }, }, { Sql: "select * from test_table where (name != 'foo') and" + " ((:_splitquery_start_id < id or" + " (:_splitquery_start_id = id and :_splitquery_start_user_id <= user_id))" + " and" + " (id < :_splitquery_end_id or" + " (id = :_splitquery_end_id and user_id < :_splitquery_end_user_id)))", BindVariables: map[string]interface{}{ "_splitquery_start_id": int64(1), "_splitquery_start_user_id": int64(3), "_splitquery_end_id": int64(5), "_splitquery_end_user_id": int64(1), }, }, { Sql: "select * from test_table where (name != 'foo') and" + " (:_splitquery_start_id < id or" + " (:_splitquery_start_id = id and :_splitquery_start_user_id <= user_id))", BindVariables: map[string]interface{}{ "_splitquery_start_user_id": int64(1), "_splitquery_start_id": int64(5), }, }, } verifyQueryPartsEqual(t, expected, queryParts) }
) var splitParamsTestCases = []struct { SQL string BindVariables map[string]interface{} SplitColumnNames []sqlparser.ColIdent NumRowsPerQueryPart int64 SplitCount int64 ExpectedErrorRegex *regexp.Regexp ExpectedSplitParams SplitParams }{ { // Test NewSplitParamsGivenSplitCount; correct input. SQL: "select id from test_table", BindVariables: map[string]interface{}{"foo": "123"}, SplitColumnNames: []sqlparser.ColIdent{sqlparser.NewColIdent("id")}, SplitCount: 100, ExpectedSplitParams: SplitParams{ splitCount: 100, numRowsPerQueryPart: 10, // TableRows of 'test_table' should be 1000 splitColumns: []*schema.TableColumn{getTestSchemaColumn("test_table", "id")}, splitTableSchema: testSchema["test_table"], }, }, { // Test NewSplitParamsGivenNumRowsPerQueryPart; correct input. SQL: "select user_id from test_table", BindVariables: map[string]interface{}{"foo": "123"}, SplitColumnNames: []sqlparser.ColIdent{sqlparser.NewColIdent("id")}, NumRowsPerQueryPart: 100,