// NewSplitCloneWorker returns a new SplitCloneWorker object. func NewSplitCloneWorker(wr *wrangler.Wrangler, cell, keyspace, shard string, excludeTables []string, strategyStr string, sourceReaderCount, destinationPackCount int, minTableSizeForSplit uint64, destinationWriterCount int) (Worker, error) { strategy, err := mysqlctl.NewSplitStrategy(wr.Logger(), strategyStr) if err != nil { return nil, err } return &SplitCloneWorker{ wr: wr, cell: cell, keyspace: keyspace, shard: shard, excludeTables: excludeTables, strategy: strategy, sourceReaderCount: sourceReaderCount, destinationPackCount: destinationPackCount, minTableSizeForSplit: minTableSizeForSplit, destinationWriterCount: destinationWriterCount, cleaner: &wrangler.Cleaner{}, state: stateSCNotSarted, ev: &events.SplitClone{ Cell: cell, Keyspace: keyspace, Shard: shard, ExcludeTables: excludeTables, Strategy: strategy.String(), }, }, nil }
func zkResolveWildcards(wr *wrangler.Wrangler, args []string) ([]string, error) { zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return args, nil } return zk.ResolveWildcards(zkts.GetZConn(), args) }
// StartActionLoop will start the action loop for a fake tablet, // using ft.FakeMysqlDaemon as the backing mysqld. func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) { if ft.Agent != nil { t.Fatalf("Agent for %v is already running", ft.Tablet.Alias) } // Listen on a random port var err error ft.Listener, err = net.Listen("tcp", ":0") if err != nil { t.Fatalf("Cannot listen: %v", err) } port := ft.Listener.Addr().(*net.TCPAddr).Port // create a test agent on that port, and re-read the record // (it has new ports and IP) ft.Agent = tabletmanager.NewTestActionAgent(wr.TopoServer(), ft.Tablet.Alias, port, ft.FakeMysqlDaemon) ft.Tablet = ft.Agent.Tablet().Tablet // create the RPC server ft.RpcServer = rpcplus.NewServer() gorpctmserver.RegisterForTest(ft.RpcServer, ft.Agent) // create the HTTP server, serve the server from it handler := http.NewServeMux() bsonrpc.ServeCustomRPC(handler, ft.RpcServer, false) httpServer := http.Server{ Handler: handler, } go httpServer.Serve(ft.Listener) }
// runSqlCommands will send the sql commands to the remote tablet. func runSqlCommands(wr *wrangler.Wrangler, ti *topo.TabletInfo, commands []string, abort chan struct{}, disableBinLogs bool) error { for _, command := range commands { command, err := fillStringTemplate(command, map[string]string{"DatabaseName": ti.DbName()}) if err != nil { return fmt.Errorf("fillStringTemplate failed: %v", err) } ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) _, err = wr.TabletManagerClient().ExecuteFetch(ctx, ti, command, 0, false, disableBinLogs) if err != nil { return err } cancel() // check on abort select { case <-abort: return nil default: break } } return nil }
func commandExportZkns(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action ExportZkns requires <cell name|zk vt root path>") } cell, err := zkVtPathToCell(subFlags.Arg(0)) if err != nil { return err } return wr.ExportZkns(cell) }
func commandExportZknsForKeyspace(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action ExportZknsForKeyspace requires <keyspace|zk global keyspace path>") } keyspace, err := zkKeyspaceParamToKeyspace(subFlags.Arg(0)) if err != nil { return err } return wr.ExportZknsForKeyspace(keyspace) }
func commandReparentTablet(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action ReparentTablet requires <tablet alias|zk tablet path>") } tabletAlias, err := tabletParamToTabletAlias(subFlags.Arg(0)) if err != nil { return err } return wr.ReparentTablet(tabletAlias) }
func commandReparentShard(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { leaveMasterReadOnly := subFlags.Bool("leave-master-read-only", false, "leaves the master read-only after reparenting") force := subFlags.Bool("force", false, "will force the reparent even if the master is already correct") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("action ReparentShard requires <keyspace/shard|zk shard path> <tablet alias|zk tablet path>") } keyspace, shard, err := shardParamToKeyspaceShard(subFlags.Arg(0)) if err != nil { return err } tabletAlias, err := tabletParamToTabletAlias(subFlags.Arg(1)) if err != nil { return err } return wr.ReparentShard(keyspace, shard, tabletAlias, *leaveMasterReadOnly, *force) }
// buildSQLFromChunks returns the SQL command to run to insert the data // using the chunks definitions into the provided table. func buildSQLFromChunks(wr *wrangler.Wrangler, td *myproto.TableDefinition, chunks []string, chunkIndex int, source string) string { selectSQL := "SELECT " + strings.Join(td.Columns, ", ") + " FROM " + td.Name if chunks[chunkIndex] != "" || chunks[chunkIndex+1] != "" { wr.Logger().Infof("Starting to stream all data from tablet %v table %v between '%v' and '%v'", source, td.Name, chunks[chunkIndex], chunks[chunkIndex+1]) clauses := make([]string, 0, 2) if chunks[chunkIndex] != "" { clauses = append(clauses, td.PrimaryKeyColumns[0]+">="+chunks[chunkIndex]) } if chunks[chunkIndex+1] != "" { clauses = append(clauses, td.PrimaryKeyColumns[0]+"<"+chunks[chunkIndex+1]) } selectSQL += " WHERE " + strings.Join(clauses, " AND ") } else { wr.Logger().Infof("Starting to stream all data from tablet %v table %v", source, td.Name) } if len(td.PrimaryKeyColumns) > 0 { selectSQL += " ORDER BY " + strings.Join(td.PrimaryKeyColumns, ", ") } return selectSQL }
// CreateTestTablet creates the test tablet in the topology. 'uid' // has to be between 0 and 99. All the tablet info will be derived // from that. Look at the implementation if you need values. // Use TabletOption implementations if you need to change values at creation. func NewFakeTablet(t *testing.T, wr *wrangler.Wrangler, cell string, uid uint32, tabletType topo.TabletType, options ...TabletOption) *FakeTablet { if uid < 0 || uid > 99 { t.Fatalf("uid has to be between 0 and 99: %v", uid) } state := topo.STATE_READ_ONLY if tabletType == topo.TYPE_MASTER { state = topo.STATE_READ_WRITE } tablet := &topo.Tablet{ Alias: topo.TabletAlias{Cell: cell, Uid: uid}, Hostname: fmt.Sprintf("%vhost", cell), Portmap: map[string]int{ "vt": 8100 + int(uid), "mysql": 3300 + int(uid), "vts": 8200 + int(uid), }, IPAddr: fmt.Sprintf("%v.0.0.1", 100+uid), Keyspace: "test_keyspace", Shard: "0", Type: tabletType, State: state, } for _, option := range options { option(tablet) } if err := wr.InitTablet(tablet, false, true, false); err != nil { t.Fatalf("cannot create tablet %v: %v", uid, err) } // create a FakeMysqlDaemon with the right information by default fakeMysqlDaemon := &mysqlctl.FakeMysqlDaemon{} if !tablet.Parent.IsZero() { fakeMysqlDaemon.MasterAddr = fmt.Sprintf("%v.0.0.1:%v", 100+tablet.Parent.Uid, 3300+int(tablet.Parent.Uid)) } fakeMysqlDaemon.MysqlPort = 3300 + int(uid) return &FakeTablet{ Tablet: tablet, FakeMysqlDaemon: fakeMysqlDaemon, } }
func keyspacesWithOverlappingShards(wr *wrangler.Wrangler) ([]map[string]string, error) { keyspaces, err := wr.TopoServer().GetKeyspaces() if err != nil { return nil, err } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]map[string]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() osList, err := topotools.FindOverlappingShards(wr.TopoServer(), keyspace) if err != nil { rec.RecordError(err) return } mu.Lock() for _, os := range osList { result = append(result, map[string]string{ "Keyspace": os.Left[0].Keyspace(), "Shard": os.Left[0].ShardName(), }) } mu.Unlock() }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("There are no keyspaces with overlapping shards") } return result, nil }
// findHealthyRdonlyEndPoint returns a random healthy endpoint. // Since we don't want to use them all, we require at least 2 servers // are healthy. func findHealthyRdonlyEndPoint(wr *wrangler.Wrangler, cell, keyspace, shard string) (topo.TabletAlias, error) { endPoints, err := wr.TopoServer().GetEndPoints(cell, keyspace, shard, topo.TYPE_RDONLY) if err != nil { return topo.TabletAlias{}, fmt.Errorf("GetEndPoints(%v,%v,%v,rdonly) failed: %v", cell, keyspace, shard, err) } healthyEndpoints := make([]topo.EndPoint, 0, len(endPoints.Entries)) for _, entry := range endPoints.Entries { if len(entry.Health) == 0 { healthyEndpoints = append(healthyEndpoints, entry) } } if len(healthyEndpoints) <= 1 { return topo.TabletAlias{}, fmt.Errorf("Not enough endpoints to chose from in (%v,%v/%v), have %v healthy ones", cell, keyspace, shard, len(healthyEndpoints)) } // random server in the list is what we want index := rand.Intn(len(healthyEndpoints)) return topo.TabletAlias{ Cell: cell, Uid: healthyEndpoints[index].Uid, }, nil }
// executeFetchLoop loops over the provided insertChannel // and sends the commands to the provided tablet. func executeFetchLoop(wr *wrangler.Wrangler, ti *topo.TabletInfo, insertChannel chan string, abort chan struct{}, disableBinLogs bool) error { for { select { case cmd, ok := <-insertChannel: if !ok { // no more to read, we're done return nil } cmd = "INSERT INTO `" + ti.DbName() + "`." + cmd ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) _, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, cmd, 0, false, disableBinLogs) if err != nil { return fmt.Errorf("ExecuteFetch failed: %v", err) } cancel() case <-abort: // FIXME(alainjobart): note this select case // could be starved here, and we might miss // the abort in some corner cases. return nil } } }
// keyspacesWithServedFrom returns all the keyspaces that have ServedFrom set // to one value. func keyspacesWithServedFrom(wr *wrangler.Wrangler) ([]string, error) { keyspaces, err := wr.TopoServer().GetKeyspaces() if err != nil { return nil, err } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() ki, err := wr.TopoServer().GetKeyspace(keyspace) if err != nil { rec.RecordError(err) return } if len(ki.ServedFromMap) > 0 { mu.Lock() result = append(result, keyspace) mu.Unlock() } }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("There are no keyspaces with ServedFrom") } return result, nil }
// findChecker: // - find a rdonly instance in the keyspace / shard // - mark it as checker // - tag it with our worker process func findChecker(wr *wrangler.Wrangler, cleaner *wrangler.Cleaner, cell, keyspace, shard string) (topo.TabletAlias, error) { tabletAlias, err := findHealthyRdonlyEndPoint(wr, cell, keyspace, shard) if err != nil { return topo.TabletAlias{}, err } // We add the tag before calling ChangeSlaveType, so the destination // vttablet reloads the worker URL when it reloads the tablet. ourURL := servenv.ListeningURL.String() wr.Logger().Infof("Adding tag[worker]=%v to tablet %v", ourURL, tabletAlias) if err := wr.TopoServer().UpdateTabletFields(tabletAlias, func(tablet *topo.Tablet) error { if tablet.Tags == nil { tablet.Tags = make(map[string]string) } tablet.Tags["worker"] = ourURL return nil }); err != nil { return topo.TabletAlias{}, err } // we remove the tag *before* calling ChangeSlaveType back, so // we need to record this tag change after the change slave // type change in the cleaner. defer wrangler.RecordTabletTagAction(cleaner, tabletAlias, "worker", "") wr.Logger().Infof("Changing tablet %v to 'checker'", tabletAlias) wr.ResetActionTimeout(30 * time.Second) if err := wr.ChangeType(tabletAlias, topo.TYPE_CHECKER, false /*force*/); err != nil { return topo.TabletAlias{}, err } // Record a clean-up action to take the tablet back to rdonly. // We will alter this one later on and let the tablet go back to // 'spare' if we have stopped replication for too long on it. wrangler.RecordChangeSlaveTypeAction(cleaner, tabletAlias, topo.TYPE_RDONLY) return tabletAlias, nil }
// shardsWithSources returns all the shards that have SourceShards set // with no Tables list. func shardsWithSources(wr *wrangler.Wrangler) ([]map[string]string, error) { keyspaces, err := wr.TopoServer().GetKeyspaces() if err != nil { return nil, err } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]map[string]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := wr.TopoServer().GetShardNames(keyspace) if err != nil { rec.RecordError(err) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() si, err := wr.TopoServer().GetShard(keyspace, shard) if err != nil { rec.RecordError(err) return } if len(si.SourceShards) > 0 && len(si.SourceShards[0].Tables) == 0 { mu.Lock() result = append(result, map[string]string{ "Keyspace": keyspace, "Shard": shard, }) mu.Unlock() } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("There are no shards with SourceShards") } return result, nil }
func commandDemoteMaster(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action DemoteMaster requires <tablet alias|zk tablet path>") } tabletAlias, err := tabletParamToTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(tabletAlias) if err != nil { return err } return wr.TabletManagerClient().DemoteMaster(wr.Context(), tabletInfo) }
func commandPruneActionLogs(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keepCount := subFlags.Int("keep-count", 10, "count to keep") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() == 0 { return fmt.Errorf("action PruneActionLogs requires <zk action log path> ...") } paths, err := zkResolveWildcards(wr, subFlags.Args()) if err != nil { return err } zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return fmt.Errorf("PruneActionLogs requires a zktopo.Server") } var errCount sync2.AtomicInt32 wg := sync.WaitGroup{} for _, zkActionLogPath := range paths { wg.Add(1) go func(zkActionLogPath string) { defer wg.Done() purgedCount, err := zkts.PruneActionLogs(zkActionLogPath, *keepCount) if err == nil { wr.Logger().Infof("%v pruned %v", zkActionLogPath, purgedCount) } else { wr.Logger().Errorf("%v pruning failed: %v", zkActionLogPath, err) errCount.Add(1) } }(zkActionLogPath) } wg.Wait() if errCount.Get() > 0 { return fmt.Errorf("some errors occurred, check the log") } return nil }
// findChunks returns an array of chunks to use for splitting up a table // into multiple data chunks. It only works for tables with a primary key // (and the primary key first column is an integer type). // The array will always look like: // "", "value1", "value2", "" // A non-split tablet will just return: // "", "" func findChunks(wr *wrangler.Wrangler, ti *topo.TabletInfo, td *myproto.TableDefinition, minTableSizeForSplit uint64, sourceReaderCount int) ([]string, error) { result := []string{"", ""} // eliminate a few cases we don't split tables for if len(td.PrimaryKeyColumns) == 0 { // no primary key, what can we do? return result, nil } if td.DataLength < minTableSizeForSplit { // table is too small to split up return result, nil } // get the min and max of the leading column of the primary key query := fmt.Sprintf("SELECT MIN(%v), MAX(%v) FROM %v.%v", td.PrimaryKeyColumns[0], td.PrimaryKeyColumns[0], ti.DbName(), td.Name) ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) qr, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, query, 1, true, false) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks: %v", td.Name, err) return result, nil } cancel() if len(qr.Rows) != 1 { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot get min and max", td.Name) return result, nil } if qr.Rows[0][0].IsNull() || qr.Rows[0][1].IsNull() { wr.Logger().Infof("Not splitting table %v into multiple chunks, min or max is NULL: %v %v", td.Name, qr.Rows[0][0], qr.Rows[0][1]) return result, nil } switch qr.Fields[0].Type { case mproto.VT_TINY, mproto.VT_SHORT, mproto.VT_LONG, mproto.VT_LONGLONG, mproto.VT_INT24: minNumeric := sqltypes.MakeNumeric(qr.Rows[0][0].Raw()) maxNumeric := sqltypes.MakeNumeric(qr.Rows[0][1].Raw()) if qr.Rows[0][0].Raw()[0] == '-' { // signed values, use int64 min, err := minNumeric.ParseInt64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err) return result, nil } max, err := maxNumeric.ParseInt64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err) return result, nil } interval := (max - min) / int64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := int64(1); i < int64(sourceReaderCount); i++ { result[i] = fmt.Sprintf("%v", min+interval*i) } return result, nil } // unsigned values, use uint64 min, err := minNumeric.ParseUint64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err) return result, nil } max, err := maxNumeric.ParseUint64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err) return result, nil } interval := (max - min) / uint64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := uint64(1); i < uint64(sourceReaderCount); i++ { result[i] = fmt.Sprintf("%v", min+interval*i) } return result, nil case mproto.VT_FLOAT, mproto.VT_DOUBLE: min, err := strconv.ParseFloat(qr.Rows[0][0].String(), 64) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, qr.Rows[0][0], err) return result, nil } max, err := strconv.ParseFloat(qr.Rows[0][1].String(), 64) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, qr.Rows[0][1].String(), err) return result, nil } interval := (max - min) / float64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := 1; i < sourceReaderCount; i++ { result[i] = fmt.Sprintf("%v", min+interval*float64(i)) } return result, nil } wr.Logger().Infof("Not splitting table %v into multiple chunks, primary key not numeric", td.Name) return result, nil }