// findDestinationMasters finds for each destination shard the current master. func (scw *SplitCloneWorker) findDestinationMasters(ctx context.Context) error { scw.setState(WorkerStateFindTargets) // Make sure we find a master for each destination shard and log it. scw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") for _, si := range scw.destinationShards { waitCtx, waitCancel := context.WithTimeout(ctx, *waitForHealthyTabletsTimeout) defer waitCancel() if err := scw.tsc.WaitForTablets(waitCtx, scw.cell, si.Keyspace(), si.ShardName(), []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v): %v", si.Keyspace(), si.ShardName(), scw.cell, err) } masters := scw.tsc.GetHealthyTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v) in HealthCheck: empty TabletStats list", si.Keyspace(), si.ShardName(), scw.cell) } master := masters[0] // Get the MySQL database name of the tablet. keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) scw.destinationDbNames[keyspaceAndShard] = topoproto.TabletDbName(master.Tablet) // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. scw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), si.Keyspace(), si.ShardName()) } scw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") return nil }
// Run is part of the Task interface. func (t *SplitCloneTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // TODO(mberlin): Add parameters for the following options? // '--source_reader_count', '1', // '--destination_writer_count', '1', args := []string{"SplitClone"} if online := parameters["online"]; online != "" { args = append(args, "--online="+online) } if offline := parameters["offline"]; offline != "" { args = append(args, "--offline="+offline) } if excludeTables := parameters["exclude_tables"]; excludeTables != "" { args = append(args, "--exclude_tables="+excludeTables) } if writeQueryMaxRows := parameters["write_query_max_rows"]; writeQueryMaxRows != "" { args = append(args, "--write_query_max_rows="+writeQueryMaxRows) } if writeQueryMaxSize := parameters["write_query_max_size"]; writeQueryMaxSize != "" { args = append(args, "--write_query_max_size="+writeQueryMaxSize) } if minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"]; minHealthyRdonlyTablets != "" { args = append(args, "--min_healthy_rdonly_tablets="+minHealthyRdonlyTablets) } args = append(args, topoproto.KeyspaceShardString(parameters["keyspace"], parameters["source_shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. if err == nil { // Ignore output and error of the Reset. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) } return nil, output, err }
// shardsWithTablesSources returns all the shards that have SourceShards set // to one value, with an array of Tables. func shardsWithTablesSources(ctx context.Context, wr *wrangler.Wrangler) ([]map[string]string, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) keyspaces, err := wr.TopoServer().GetKeyspaces(shortCtx) cancel() if err != nil { return nil, fmt.Errorf("failed to get list of keyspaces: %v", err) } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]map[string]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) shards, err := wr.TopoServer().GetShardNames(shortCtx, keyspace) cancel() if err != nil { rec.RecordError(fmt.Errorf("failed to get list of shards for keyspace '%v': %v", keyspace, err)) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) si, err := wr.TopoServer().GetShard(shortCtx, keyspace, shard) cancel() if err != nil { rec.RecordError(fmt.Errorf("failed to get details for shard '%v': %v", topoproto.KeyspaceShardString(keyspace, shard), err)) return } if len(si.SourceShards) == 1 && len(si.SourceShards[0].Tables) > 0 { mu.Lock() result = append(result, map[string]string{ "Keyspace": keyspace, "Shard": shard, }) mu.Unlock() } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("there are no shards with SourceShards") } return result, nil }
// Run is part of the Task interface. func (t *VerticalSplitDiffTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { args := []string{"VerticalSplitDiff"} args = append(args, topoproto.KeyspaceShardString(parameters["dest_keyspace"], parameters["shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. if err == nil { // Ignore output and error of the Reset. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) } return nil, output, err }
// Run is part of the Task interface. func (t *MigrateServedTypesTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { args := []string{"MigrateServedTypes"} if cells := parameters["cells"]; cells != "" { args = append(args, "--cells="+cells) } if reverse := parameters["reverse"]; reverse != "" { args = append(args, "--reverse="+reverse) } args = append(args, topoproto.KeyspaceShardString(parameters["keyspace"], parameters["source_shard"]), parameters["type"]) output, err := ExecuteVtctl(context.TODO(), parameters["vtctld_endpoint"], args) return nil, output, err }
// Run is part of the Task interface. func (t *VerticalSplitDiffTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // Run a "Reset" first to clear the state of a previous finished command. // This reset is best effort. We ignore the output and error of it. // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) args := []string{"VerticalSplitDiff"} if minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"]; minHealthyRdonlyTablets != "" { args = append(args, "--min_healthy_rdonly_tablets="+minHealthyRdonlyTablets) } args = append(args, topoproto.KeyspaceShardString(parameters["dest_keyspace"], parameters["shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) return nil, output, err }
func (scw *SplitCloneWorker) createThrottlers() error { scw.throttlersMu.Lock() defer scw.throttlersMu.Unlock() for _, si := range scw.destinationShards { // Set up the throttler for each destination shard. keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler(keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, scw.maxReplicationLag) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } scw.throttlers[keyspaceAndShard] = t } return nil }
// Run is part of the Task interface. func (t *VerticalSplitCloneTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // TODO(mberlin): Add parameters for the following options? // '--source_reader_count', '1', // '--destination_pack_count', '1', // '--destination_writer_count', '1', args := []string{"VerticalSplitClone"} args = append(args, "--tables="+parameters["tables"]) args = append(args, topoproto.KeyspaceShardString(parameters["dest_keyspace"], parameters["shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. if err == nil { // Ignore output and error of the Reset. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) } return nil, output, err }
// Run is part of the Task interface. func (t *SplitCloneTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // Run a "Reset" first to clear the state of a previous finished command. // This reset is best effort. We ignore the output and error of it. // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) // TODO(mberlin): Add parameters for the following options? // '--source_reader_count', '1', // '--destination_writer_count', '1', args := []string{"SplitClone"} if online := parameters["online"]; online != "" { args = append(args, "--online="+online) } if offline := parameters["offline"]; offline != "" { args = append(args, "--offline="+offline) } if excludeTables := parameters["exclude_tables"]; excludeTables != "" { args = append(args, "--exclude_tables="+excludeTables) } if chunkCount := parameters["chunk_count"]; chunkCount != "" { args = append(args, "--chunk_count="+chunkCount) } if minRowsPerChunk := parameters["min_rows_per_chunk"]; minRowsPerChunk != "" { args = append(args, "--min_rows_per_chunk="+minRowsPerChunk) } if writeQueryMaxRows := parameters["write_query_max_rows"]; writeQueryMaxRows != "" { args = append(args, "--write_query_max_rows="+writeQueryMaxRows) } if writeQueryMaxSize := parameters["write_query_max_size"]; writeQueryMaxSize != "" { args = append(args, "--write_query_max_size="+writeQueryMaxSize) } if minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"]; minHealthyRdonlyTablets != "" { args = append(args, "--min_healthy_rdonly_tablets="+minHealthyRdonlyTablets) } if maxTPS := parameters["max_tps"]; maxTPS != "" { args = append(args, "--max_tps="+maxTPS) } if maxReplicationLag := parameters["max_replication_lag"]; maxReplicationLag != "" { args = append(args, "--max_replication_lag="+maxReplicationLag) } args = append(args, topoproto.KeyspaceShardString(parameters["keyspace"], parameters["source_shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) return nil, output, err }
// Run is part of the Task interface. func (t *SplitDiffTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { args := []string{"SplitDiff"} if excludeTables := parameters["exclude_tables"]; excludeTables != "" { args = append(args, "--exclude_tables="+excludeTables) } if minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"]; minHealthyRdonlyTablets != "" { args = append(args, "--min_healthy_rdonly_tablets="+minHealthyRdonlyTablets) } args = append(args, topoproto.KeyspaceShardString(parameters["keyspace"], parameters["dest_shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. if err == nil { // Ignore output and error of the Reset. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) } return nil, output, err }
// Run is part of the Task interface. func (t *VerticalSplitCloneTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // TODO(mberlin): Add parameters for the following options? // '--source_reader_count', '1', // '--destination_writer_count', '1', args := []string{"VerticalSplitClone"} args = append(args, "--tables="+parameters["tables"]) if online := parameters["online"]; online != "" { args = append(args, "--online="+online) } if offline := parameters["offline"]; offline != "" { args = append(args, "--offline="+offline) } if chunkCount := parameters["chunk_count"]; chunkCount != "" { args = append(args, "--chunk_count="+chunkCount) } if minRowsPerChunk := parameters["min_rows_per_chunk"]; minRowsPerChunk != "" { args = append(args, "--min_rows_per_chunk="+minRowsPerChunk) } if writeQueryMaxRows := parameters["write_query_max_rows"]; writeQueryMaxRows != "" { args = append(args, "--write_query_max_rows="+writeQueryMaxRows) } if writeQueryMaxSize := parameters["write_query_max_size"]; writeQueryMaxSize != "" { args = append(args, "--write_query_max_size="+writeQueryMaxSize) } if minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"]; minHealthyRdonlyTablets != "" { args = append(args, "--min_healthy_rdonly_tablets="+minHealthyRdonlyTablets) } if maxTPS := parameters["max_tps"]; maxTPS != "" { args = append(args, "--max_tps="+maxTPS) } if maxReplicationLag := parameters["max_replication_lag"]; maxReplicationLag != "" { args = append(args, "--max_replication_lag="+maxReplicationLag) } args = append(args, topoproto.KeyspaceShardString(parameters["dest_keyspace"], parameters["shard"])) output, err := ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], args) // TODO(mberlin): Remove explicit reset when vtworker supports it implicility. if err == nil { // Ignore output and error of the Reset. ExecuteVtworker(context.TODO(), parameters["vtworker_endpoint"], []string{"Reset"}) } return nil, output, err }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'worker' pointing back to us // - get the aliases of all the targets func (vscw *VerticalSplitCloneWorker) findTargets(ctx context.Context) error { vscw.setState(WorkerStateFindTargets) // find an appropriate tablet in the source shard var err error vscw.sourceAlias, err = FindWorkerTablet(ctx, vscw.wr, vscw.cleaner, nil /* tsc */, vscw.cell, vscw.sourceKeyspace, "0", vscw.minHealthyRdonlyTablets) if err != nil { return fmt.Errorf("FindWorkerTablet() failed for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err) } vscw.wr.Logger().Infof("Using tablet %v as the source", topoproto.TabletAliasString(vscw.sourceAlias)) // get the tablet info for it shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := vscw.wr.TopoServer().GetTablet(shortCtx, vscw.sourceAlias) cancel() if err != nil { return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(vscw.sourceAlias), err) } vscw.sourceTablet = ti.Tablet // stop replication on it shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = vscw.wr.TabletManagerClient().StopSlave(shortCtx, vscw.sourceTablet) cancel() if err != nil { return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(vscw.sourceAlias)) } wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet) // Initialize healthcheck and add destination shards to it. vscw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) vscw.tsc = discovery.NewTabletStatsCache(vscw.healthCheck, vscw.cell) watcher := discovery.NewShardReplicationWatcher(vscw.wr.TopoServer(), vscw.healthCheck, vscw.cell, vscw.destinationKeyspace, vscw.destinationShard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) vscw.destinationShardWatchers = append(vscw.destinationShardWatchers, watcher) // Make sure we find a master for each destination shard and log it. vscw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") waitCtx, waitCancel := context.WithTimeout(ctx, *waitForHealthyTabletsTimeout) defer waitCancel() if err := vscw.tsc.WaitForTablets(waitCtx, vscw.cell, vscw.destinationKeyspace, vscw.destinationShard, []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v): %v", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell, err) } masters := vscw.tsc.GetHealthyTabletStats(vscw.destinationKeyspace, vscw.destinationShard, topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v) in HealthCheck: empty TabletStats list", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell) } master := masters[0] // Get the MySQL database name of the tablet. keyspaceAndShard := topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard) vscw.destinationDbNames[keyspaceAndShard] = topoproto.TabletDbName(master.Tablet) // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. vscw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), vscw.destinationKeyspace, vscw.destinationShard) vscw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") return nil }
func (p *shardTabletProvider) description() string { return topoproto.KeyspaceShardString(p.keyspace, p.shard) }
// copy phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (scw *LegacySplitCloneWorker) copy(ctx context.Context) error { scw.setState(WorkerStateCloneOffline) start := time.Now() defer func() { statsStateDurationsNs.Set(string(WorkerStateCloneOffline), time.Now().Sub(start).Nanoseconds()) }() // get source schema from the first shard // TODO(alainjobart): for now, we assume the schema is compatible // on all source shards. Furthermore, we estimate the number of rows // in each source shard for each table to be about the same // (rowCount is used to estimate an ETA) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sourceSchemaDefinition, err := scw.wr.GetSchema(shortCtx, scw.sourceAliases[0], nil, scw.excludeTables, false /* includeViews */) cancel() if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", topoproto.TabletAliasString(scw.sourceAliases[0]), err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter in tablet %v", topoproto.TabletAliasString(scw.sourceAliases[0])) } for _, td := range sourceSchemaDefinition.TableDefinitions { if len(td.Columns) == 0 { return fmt.Errorf("schema for table %v has no columns", td.Name) } } scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) scw.tableStatusList.initialize(sourceSchemaDefinition) // In parallel, setup the channels to send SQL data chunks to for each destination tablet: // // mu protects the context for cancelation, and firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { scw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } insertChannels := make([]chan string, len(scw.destinationShards)) destinationWaitGroup := sync.WaitGroup{} for shardIndex, si := range scw.destinationShards { // we create one channel per destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannels[shardIndex] = make(chan string, scw.destinationWriterCount*2) go func(keyspace, shard string, insertChannel chan string) { for j := 0; j < scw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func(threadID int) { defer destinationWaitGroup.Done() keyspaceAndShard := topoproto.KeyspaceShardString(keyspace, shard) throttler := scw.destinationThrottlers[keyspaceAndShard] defer throttler.ThreadFinished(threadID) executor := newExecutor(scw.wr, scw.tsc, throttler, keyspace, shard, threadID) if err := executor.fetchLoop(ctx, insertChannel); err != nil { processError("executer.FetchLoop failed: %v", err) } }(j) } }(si.Keyspace(), si.ShardName(), insertChannels[shardIndex]) } // read the vschema if needed var keyspaceSchema *vindexes.KeyspaceSchema if *useV3ReshardingMode { kschema, err := scw.wr.TopoServer().GetVSchema(ctx, scw.keyspace) if err != nil { return fmt.Errorf("cannot load VSchema for keyspace %v: %v", scw.keyspace, err) } if kschema == nil { return fmt.Errorf("no VSchema for keyspace %v", scw.keyspace) } keyspaceSchema, err = vindexes.BuildKeyspaceSchema(kschema, scw.keyspace) if err != nil { return fmt.Errorf("cannot build vschema for keyspace %v: %v", scw.keyspace, err) } } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} for shardIndex := range scw.sourceShards { sema := sync2.NewSemaphore(scw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { var keyResolver keyspaceIDResolver if *useV3ReshardingMode { keyResolver, err = newV3ResolverFromTableDefinition(keyspaceSchema, td) if err != nil { return fmt.Errorf("cannot resolve v3 sharding keys for keyspace %v: %v", scw.keyspace, err) } } else { keyResolver, err = newV2Resolver(scw.keyspaceInfo, td) if err != nil { return fmt.Errorf("cannot resolve sharding keys for keyspace %v: %v", scw.keyspace, err) } } rowSplitter := NewRowSplitter(scw.destinationShards, keyResolver) chunks, err := generateChunks(ctx, scw.wr, scw.sourceTablets[shardIndex], td, scw.sourceReaderCount, defaultMinRowsPerChunk) if err != nil { return err } scw.tableStatusList.setThreadCount(tableIndex, len(chunks)-1) for _, c := range chunks { sourceWaitGroup.Add(1) go func(td *tabletmanagerdatapb.TableDefinition, tableIndex int, chunk chunk) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() scw.tableStatusList.threadStarted(tableIndex) // Start streaming from the source tablets. tp := newSingleTabletProvider(ctx, scw.wr.TopoServer(), scw.sourceAliases[shardIndex]) rr, err := NewRestartableResultReader(ctx, scw.wr.Logger(), tp, td, chunk, false /* allowMultipleRetries */) if err != nil { processError("NewRestartableResultReader failed: %v", err) return } defer rr.Close(ctx) // process the data dbNames := make([]string, len(scw.destinationShards)) for i, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) dbNames[i] = scw.destinationDbNames[keyspaceAndShard] } if err := scw.processData(ctx, dbNames, td, tableIndex, rr, rowSplitter, insertChannels, scw.destinationPackCount); err != nil { processError("processData failed: %v", err) } scw.tableStatusList.threadDone(tableIndex) }(td, tableIndex, c) } } } sourceWaitGroup.Wait() for shardIndex := range scw.destinationShards { close(insertChannels[shardIndex]) } destinationWaitGroup.Wait() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if scw.strategy.skipPopulateBlpCheckpoint { scw.wr.Logger().Infof("Skipping populating the blp_checkpoint table") } else { queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if scw.strategy.dontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } // get the current position from the sources for shardIndex := range scw.sourceShards { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := scw.wr.TabletManagerClient().SlaveStatus(shortCtx, scw.sourceTablets[shardIndex]) cancel() if err != nil { return err } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(uint32(shardIndex), status.Position, scw.maxTPS, throttler.ReplicationLagModuleDisabled, time.Now().Unix(), flags)) } for _, si := range scw.destinationShards { destinationWaitGroup.Add(1) go func(keyspace, shard string) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Making and populating blp_checkpoint table") keyspaceAndShard := topoproto.KeyspaceShardString(keyspace, shard) if err := runSQLCommands(ctx, scw.wr, scw.tsc, keyspace, shard, scw.destinationDbNames[keyspaceAndShard], queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } }(si.Keyspace(), si.ShardName()) } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. // TODO(alainjobart) this is a superset, some shards may not // overlap, have to deal with this better (for N -> M splits // where both N>1 and M>1) if scw.strategy.skipSetSourceShards { scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.") } else { for _, si := range scw.destinationShards { scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.SetSourceShards(shortCtx, si.Keyspace(), si.ShardName(), scw.sourceAliases, nil) cancel() if err != nil { return fmt.Errorf("failed to set source shards: %v", err) } } } err = scw.findRefreshTargets(ctx) if err != nil { return fmt.Errorf("failed before refreshing state on destination tablets: %v", err) } // And force a state refresh (re-read topo) on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for shardIndex := range scw.destinationShards { for _, tabletAlias := range scw.refreshAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Refreshing state on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.TabletManagerClient().RefreshState(shortCtx, ti.Tablet) cancel() if err != nil { processError("RefreshState failed on tablet %v: %v", ti.AliasString(), err) } }(scw.refreshTablets[shardIndex][*tabletAlias]) } } destinationWaitGroup.Wait() return firstError }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'worker' pointing back to us // - get the aliases of all the targets func (scw *LegacySplitCloneWorker) findTargets(ctx context.Context) error { scw.setState(WorkerStateFindTargets) var err error // find an appropriate tablet in the source shards scw.sourceAliases = make([]*topodatapb.TabletAlias, len(scw.sourceShards)) for i, si := range scw.sourceShards { scw.sourceAliases[i], err = FindWorkerTablet(ctx, scw.wr, scw.cleaner, scw.tsc, scw.cell, si.Keyspace(), si.ShardName(), scw.minHealthyRdonlyTablets) if err != nil { return fmt.Errorf("FindWorkerTablet() failed for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", topoproto.TabletAliasString(scw.sourceAliases[i]), si.Keyspace(), si.ShardName()) } // get the tablet info for them, and stop their replication scw.sourceTablets = make([]*topodatapb.Tablet, len(scw.sourceAliases)) for i, alias := range scw.sourceAliases { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, alias) cancel() if err != nil { return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(alias), err) } scw.sourceTablets[i] = ti.Tablet shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = scw.wr.TabletManagerClient().StopSlave(shortCtx, scw.sourceTablets[i]) cancel() if err != nil { return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(alias)) } wrangler.RecordStartSlaveAction(scw.cleaner, scw.sourceTablets[i]) } // Initialize healthcheck and add destination shards to it. scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) scw.tsc = discovery.NewTabletStatsCache(scw.healthCheck, scw.cell) for _, si := range scw.destinationShards { watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck, scw.cell, si.Keyspace(), si.ShardName(), *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) scw.destinationShardWatchers = append(scw.destinationShardWatchers, watcher) } // Make sure we find a master for each destination shard and log it. scw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") for _, si := range scw.destinationShards { waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second) defer waitCancel() if err := scw.tsc.WaitForTablets(waitCtx, scw.cell, si.Keyspace(), si.ShardName(), []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v: %v", si.Keyspace(), si.ShardName(), err) } masters := scw.tsc.GetHealthyTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v in HealthCheck: empty TabletStats list", si.Keyspace(), si.ShardName()) } master := masters[0] // Get the MySQL database name of the tablet. shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, master.Tablet.Alias) cancel() if err != nil { return fmt.Errorf("cannot get the TabletInfo for destination master (%v) to find out its db name: %v", topoproto.TabletAliasString(master.Tablet.Alias), err) } keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) scw.destinationDbNames[keyspaceAndShard] = ti.DbName() // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. scw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), si.Keyspace(), si.ShardName()) } scw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") // Set up the throttler for each destination shard. for _, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler( keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } scw.destinationThrottlers[keyspaceAndShard] = t } return nil }
// copy phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (scw *SplitCloneWorker) clone(ctx context.Context, state StatusWorkerState) error { if state != WorkerStateCloneOnline && state != WorkerStateCloneOffline { panic(fmt.Sprintf("invalid state passed to clone(): %v", state)) } scw.setState(state) start := time.Now() defer func() { statsStateDurationsNs.Set(string(state), time.Now().Sub(start).Nanoseconds()) }() var firstSourceTablet *topodatapb.Tablet if state == WorkerStateCloneOffline { // Use the first source tablet which we took offline. firstSourceTablet = scw.sourceTablets[0] } else { // Pick any healthy serving source tablet. si := scw.sourceShards[0] tablets := scw.tsc.GetTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_RDONLY) if len(tablets) == 0 { // We fail fast on this problem and don't retry because at the start all tablets should be healthy. return fmt.Errorf("no healthy RDONLY tablet in source shard (%v) available (required to find out the schema)", topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())) } firstSourceTablet = tablets[0].Tablet } var statsCounters []*stats.Counters var tableStatusList *tableStatusList switch state { case WorkerStateCloneOnline: statsCounters = []*stats.Counters{statsOnlineInsertsCounters, statsOnlineUpdatesCounters, statsOnlineDeletesCounters, statsOnlineEqualRowsCounters} tableStatusList = scw.tableStatusListOnline case WorkerStateCloneOffline: statsCounters = []*stats.Counters{statsOfflineInsertsCounters, statsOfflineUpdatesCounters, statsOfflineDeletesCounters, statsOfflineEqualRowsCounters} tableStatusList = scw.tableStatusListOffline } // The throttlers exist only for the duration of this clone() call. // That means a SplitClone invocation with both online and offline phases // will create throttlers for each phase. if err := scw.createThrottlers(); err != nil { return err } defer scw.closeThrottlers() sourceSchemaDefinition, err := scw.getSourceSchema(ctx, firstSourceTablet) if err != nil { return err } scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) tableStatusList.initialize(sourceSchemaDefinition) // In parallel, setup the channels to send SQL data chunks to for each destination tablet: // // mu protects the context for cancelation, and firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { scw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } insertChannels := make([]chan string, len(scw.destinationShards)) destinationWaitGroup := sync.WaitGroup{} for shardIndex, si := range scw.destinationShards { // We create one channel per destination tablet. It is sized to have a // buffer of a maximum of destinationWriterCount * 2 items, to hopefully // always have data. We then have destinationWriterCount go routines reading // from it. insertChannels[shardIndex] = make(chan string, scw.destinationWriterCount*2) for j := 0; j < scw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func(keyspace, shard string, insertChannel chan string, throttler *throttler.Throttler, threadID int) { defer destinationWaitGroup.Done() defer throttler.ThreadFinished(threadID) executor := newExecutor(scw.wr, scw.tsc, throttler, keyspace, shard, threadID) if err := executor.fetchLoop(ctx, insertChannel); err != nil { processError("executer.FetchLoop failed: %v", err) } }(si.Keyspace(), si.ShardName(), insertChannels[shardIndex], scw.getThrottler(si.Keyspace(), si.ShardName()), j) } } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} sema := sync2.NewSemaphore(scw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { td = reorderColumnsPrimaryKeyFirst(td) keyResolver, err := scw.createKeyResolver(td) if err != nil { return fmt.Errorf("cannot resolve sharding keys for keyspace %v: %v", scw.destinationKeyspace, err) } // TODO(mberlin): We're going to chunk *all* source shards based on the MIN // and MAX values of the *first* source shard. Is this going to be a problem? chunks, err := generateChunks(ctx, scw.wr, firstSourceTablet, td, scw.chunkCount, scw.minRowsPerChunk) if err != nil { return err } tableStatusList.setThreadCount(tableIndex, len(chunks)) for _, c := range chunks { sourceWaitGroup.Add(1) go func(td *tabletmanagerdatapb.TableDefinition, tableIndex int, chunk chunk) { defer sourceWaitGroup.Done() errPrefix := fmt.Sprintf("table=%v chunk=%v", td.Name, chunk) // We need our own error per Go routine to avoid races. var err error sema.Acquire() defer sema.Release() tableStatusList.threadStarted(tableIndex) if state == WorkerStateCloneOnline { // Wait for enough healthy tablets (they might have become unhealthy // and their replication lag might have increased since we started.) if err := scw.waitForTablets(ctx, scw.sourceShards, *retryDuration); err != nil { processError("%v: No healthy source tablets found (gave up after %v): ", errPrefix, *retryDuration, err) return } } // Set up readers for the diff. There will be one reader for every // source and destination shard. sourceReaders := make([]ResultReader, len(scw.sourceShards)) destReaders := make([]ResultReader, len(scw.destinationShards)) for shardIndex, si := range scw.sourceShards { var tp tabletProvider allowMultipleRetries := true if state == WorkerStateCloneOffline { tp = newSingleTabletProvider(ctx, scw.wr.TopoServer(), scw.offlineSourceAliases[shardIndex]) // allowMultipleRetries is false to avoid that we'll keep retrying // on the same tablet alias for hours. This guards us against the // situation that an offline tablet gets restarted and serves again. // In that case we cannot use it because its replication is no // longer stopped at the same point as we took it offline initially. allowMultipleRetries = false } else { tp = newShardTabletProvider(scw.tsc, scw.tabletTracker, si.Keyspace(), si.ShardName()) } sourceResultReader, err := NewRestartableResultReader(ctx, scw.wr.Logger(), tp, td, chunk, allowMultipleRetries) if err != nil { processError("%v: NewRestartableResultReader for source: %v failed", errPrefix, tp.description()) return } defer sourceResultReader.Close() sourceReaders[shardIndex] = sourceResultReader } // Wait for enough healthy tablets (they might have become unhealthy // and their replication lag might have increased due to a previous // chunk pipeline.) if err := scw.waitForTablets(ctx, scw.destinationShards, *retryDuration); err != nil { processError("%v: No healthy destination tablets found (gave up after %v): ", errPrefix, *retryDuration, err) return } for shardIndex, si := range scw.destinationShards { tp := newShardTabletProvider(scw.tsc, scw.tabletTracker, si.Keyspace(), si.ShardName()) destResultReader, err := NewRestartableResultReader(ctx, scw.wr.Logger(), tp, td, chunk, true /* allowMultipleRetries */) if err != nil { processError("%v: NewRestartableResultReader for destination: %v failed: %v", errPrefix, tp.description(), err) return } defer destResultReader.Close() destReaders[shardIndex] = destResultReader } var sourceReader ResultReader var destReader ResultReader if len(sourceReaders) >= 2 { sourceReader, err = NewResultMerger(sourceReaders, len(td.PrimaryKeyColumns)) if err != nil { processError("%v: NewResultMerger for source tablets failed: %v", errPrefix, err) return } } else { sourceReader = sourceReaders[0] } if len(destReaders) >= 2 { destReader, err = NewResultMerger(destReaders, len(td.PrimaryKeyColumns)) if err != nil { processError("%v: NewResultMerger for destination tablets failed: %v", errPrefix, err) return } } else { destReader = destReaders[0] } dbNames := make([]string, len(scw.destinationShards)) for i, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) dbNames[i] = scw.destinationDbNames[keyspaceAndShard] } // Compare the data and reconcile any differences. differ, err := NewRowDiffer2(ctx, sourceReader, destReader, td, tableStatusList, tableIndex, scw.destinationShards, keyResolver, insertChannels, ctx.Done(), dbNames, scw.writeQueryMaxRows, scw.writeQueryMaxSize, scw.writeQueryMaxRowsDelete, statsCounters) if err != nil { processError("%v: NewRowDiffer2 failed: %v", errPrefix, err) return } // Ignore the diff report because all diffs should get reconciled. _ /* DiffReport */, err = differ.Diff() if err != nil { processError("%v: RowDiffer2 failed: %v", errPrefix, err) return } tableStatusList.threadDone(tableIndex) }(td, tableIndex, c) } } sourceWaitGroup.Wait() for shardIndex := range scw.destinationShards { close(insertChannels[shardIndex]) } destinationWaitGroup.Wait() if firstError != nil { return firstError } if state == WorkerStateCloneOffline { // Create and populate the blp_checkpoint table to give filtered replication // a starting point. if scw.strategy.skipPopulateBlpCheckpoint { scw.wr.Logger().Infof("Skipping populating the blp_checkpoint table") } else { queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if scw.strategy.dontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } // get the current position from the sources for shardIndex := range scw.sourceShards { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := scw.wr.TabletManagerClient().SlaveStatus(shortCtx, scw.sourceTablets[shardIndex]) cancel() if err != nil { return err } // TODO(mberlin): Fill in scw.maxReplicationLag once the adapative // throttler is enabled by default. queries = append(queries, binlogplayer.PopulateBlpCheckpoint(uint32(shardIndex), status.Position, scw.maxTPS, throttler.ReplicationLagModuleDisabled, time.Now().Unix(), flags)) } for _, si := range scw.destinationShards { destinationWaitGroup.Add(1) go func(keyspace, shard string) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Making and populating blp_checkpoint table") keyspaceAndShard := topoproto.KeyspaceShardString(keyspace, shard) if err := runSQLCommands(ctx, scw.wr, scw.tsc, keyspace, shard, scw.destinationDbNames[keyspaceAndShard], queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } }(si.Keyspace(), si.ShardName()) } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Configure filtered replication by setting the SourceShard info. // The master tablets won't enable filtered replication (the binlog player) // until they re-read the topology due to a restart or a reload. // TODO(alainjobart) this is a superset, some shards may not // overlap, have to deal with this better (for N -> M splits // where both N>1 and M>1) if scw.strategy.skipSetSourceShards { scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.") } else { for _, si := range scw.destinationShards { scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v (tables: %v)", si.Keyspace(), si.ShardName(), scw.tables) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.SetSourceShards(shortCtx, si.Keyspace(), si.ShardName(), scw.offlineSourceAliases, scw.tables) cancel() if err != nil { return fmt.Errorf("failed to set source shards: %v", err) } } } // Force a state refresh (re-read topo) on all destination tablets. // The master tablet will end up starting filtered replication at this point. // // Find all tablets first, then refresh the state on each in parallel. err = scw.findRefreshTargets(ctx) if err != nil { return fmt.Errorf("failed before refreshing state on destination tablets: %v", err) } for shardIndex := range scw.destinationShards { for _, tabletAlias := range scw.refreshAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Refreshing state on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.TabletManagerClient().RefreshState(shortCtx, ti.Tablet) cancel() if err != nil { processError("RefreshState failed on tablet %v: %v", ti.AliasString(), err) } }(scw.refreshTablets[shardIndex][*tabletAlias]) } } } // clonePhase == offline destinationWaitGroup.Wait() return firstError }
func (scw *SplitCloneWorker) getThrottlerLocked(keyspace, shard string) *throttler.Throttler { keyspaceAndShard := topoproto.KeyspaceShardString(keyspace, shard) return scw.throttlers[keyspaceAndShard] }
// Run is part of the Task interface. func (t *WaitForFilteredReplicationTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { keyspaceAndShard := topoproto.KeyspaceShardString(parameters["keyspace"], parameters["shard"]) output, err := ExecuteVtctl(context.TODO(), parameters["vtctld_endpoint"], []string{"WaitForFilteredReplication", "-max_delay", parameters["max_delay"], keyspaceAndShard}) return nil, output, err }
// clone phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (vscw *VerticalSplitCloneWorker) clone(ctx context.Context) error { vscw.setState(WorkerStateCloneOffline) start := time.Now() defer func() { statsStateDurationsNs.Set(string(WorkerStateCloneOffline), time.Now().Sub(start).Nanoseconds()) }() // get source schema shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sourceSchemaDefinition, err := vscw.wr.GetSchema(shortCtx, vscw.sourceAlias, vscw.tables, nil, true) cancel() if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", topoproto.TabletAliasString(vscw.sourceAlias), err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter") } vscw.wr.Logger().Infof("Source tablet has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) vscw.tableStatusList.initialize(sourceSchemaDefinition) // In parallel, setup the channels to send SQL data chunks to // for each destination tablet. // // mu protects firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { vscw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } destinationWaitGroup := sync.WaitGroup{} // we create one channel for the destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannel := make(chan string, vscw.destinationWriterCount*2) // Set up the throttler for the destination shard. keyspaceAndShard := topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard) destinationThrottler, err := throttler.NewThrottler( keyspaceAndShard, "transactions", vscw.destinationWriterCount, vscw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } for j := 0; j < vscw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func(threadID int) { defer destinationWaitGroup.Done() defer destinationThrottler.ThreadFinished(threadID) executor := newExecutor(vscw.wr, vscw.tsc, destinationThrottler, vscw.destinationKeyspace, vscw.destinationShard, threadID) if err := executor.fetchLoop(ctx, insertChannel); err != nil { processError("executer.FetchLoop failed: %v", err) } }(j) } // Now for each table, read data chunks and send them to insertChannel sourceWaitGroup := sync.WaitGroup{} sema := sync2.NewSemaphore(vscw.sourceReaderCount, 0) dbName := vscw.destinationDbNames[topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard)] for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == tmutils.TableView { continue } chunks, err := generateChunks(ctx, vscw.wr, vscw.sourceTablet, td, vscw.minTableSizeForSplit, vscw.sourceReaderCount) if err != nil { return err } vscw.tableStatusList.setThreadCount(tableIndex, len(chunks)-1) for _, c := range chunks { sourceWaitGroup.Add(1) go func(td *tabletmanagerdatapb.TableDefinition, tableIndex int, chunk chunk) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() vscw.tableStatusList.threadStarted(tableIndex) // Start streaming from the source tablet. rr, err := NewRestartableResultReader(ctx, vscw.wr.Logger(), vscw.wr.TopoServer(), vscw.sourceAlias, td, chunk) if err != nil { processError("NewRestartableResultReader failed: %v", err) return } defer rr.Close() // process the data if err := vscw.processData(ctx, dbName, td, tableIndex, rr, insertChannel, vscw.destinationPackCount); err != nil { processError("ResultReader failed: %v", err) } vscw.tableStatusList.threadDone(tableIndex) }(td, tableIndex, c) } } sourceWaitGroup.Wait() close(insertChannel) destinationWaitGroup.Wait() // Stop Throttler. destinationThrottler.Close() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if vscw.strategy.skipPopulateBlpCheckpoint { vscw.wr.Logger().Infof("Skipping populating the blp_checkpoint table") } else { // get the current position from the source shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := vscw.wr.TabletManagerClient().SlaveStatus(shortCtx, vscw.sourceTablet) cancel() if err != nil { return err } queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if vscw.strategy.dontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, vscw.maxTPS, throttler.ReplicationLagModuleDisabled, time.Now().Unix(), flags)) vscw.wr.Logger().Infof("Making and populating blp_checkpoint table") if err := runSQLCommands(ctx, vscw.wr, vscw.tsc, vscw.destinationKeyspace, vscw.destinationShard, dbName, queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. if vscw.strategy.skipSetSourceShards { vscw.wr.Logger().Infof("Skipping setting SourceShard on destination shard.") } else { vscw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", vscw.destinationKeyspace, vscw.destinationShard) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := vscw.wr.SetSourceShards(shortCtx, vscw.destinationKeyspace, vscw.destinationShard, []*topodatapb.TabletAlias{vscw.sourceAlias}, vscw.tables) cancel() if err != nil { return fmt.Errorf("Failed to set source shards: %v", err) } } err = vscw.findRefreshTargets(ctx) if err != nil { return fmt.Errorf("failed before refreshing state on destination tablets: %v", err) } // And force a state refresh (re-read topo) on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for _, tabletAlias := range vscw.refreshAliases { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() vscw.wr.Logger().Infof("Refreshing state on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := vscw.wr.TabletManagerClient().RefreshState(shortCtx, ti.Tablet) cancel() if err != nil { processError("RefreshState failed on tablet %v: %v", ti.AliasString(), err) } }(vscw.refreshTablets[*tabletAlias]) } destinationWaitGroup.Wait() return firstError }
// Run is part of the Task interface. func (t *HorizontalReshardingTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // Required parameters. // Example: test_keyspace keyspace := parameters["keyspace"] // Example: 10-20 sourceShards := strings.Split(parameters["source_shard_list"], ",") // Example: 10-18,18-20 destShards := strings.Split(parameters["dest_shard_list"], ",") // Example: localhost:15000 vtctldEndpoint := parameters["vtctld_endpoint"] // Example: localhost:15001 vtworkerEndpoint := parameters["vtworker_endpoint"] // Optional parameters. // Example: unrelated1,unrelated2 excludeTables := parameters["exclude_tables"] // Example: 1 minHealthyRdonlyTablets := parameters["min_healthy_rdonly_tablets"] var newTasks []*automationpb.TaskContainer copySchemaTasks := NewTaskContainer() for _, destShard := range destShards { AddTask(copySchemaTasks, "CopySchemaShardTask", map[string]string{ "source_keyspace_and_shard": topoproto.KeyspaceShardString(keyspace, sourceShards[0]), "dest_keyspace_and_shard": topoproto.KeyspaceShardString(keyspace, destShard), "exclude_tables": excludeTables, "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, copySchemaTasks) splitCloneTasks := NewTaskContainer() for _, sourceShard := range sourceShards { // TODO(mberlin): Add a semaphore as argument to limit the parallism. AddTask(splitCloneTasks, "SplitCloneTask", map[string]string{ "keyspace": keyspace, "source_shard": sourceShard, "vtworker_endpoint": vtworkerEndpoint, "exclude_tables": excludeTables, "min_healthy_rdonly_tablets": minHealthyRdonlyTablets, }) } newTasks = append(newTasks, splitCloneTasks) // TODO(mberlin): When the framework supports nesting tasks, these wait tasks should be run before each SplitDiff. waitTasks := NewTaskContainer() for _, destShard := range destShards { AddTask(waitTasks, "WaitForFilteredReplicationTask", map[string]string{ "keyspace": keyspace, "shard": destShard, "max_delay": "30s", "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, waitTasks) // TODO(mberlin): Run all SplitDiffTasks in parallel which do not use overlapping source shards for the comparison. for _, destShard := range destShards { splitDiffTask := NewTaskContainer() AddTask(splitDiffTask, "SplitDiffTask", map[string]string{ "keyspace": keyspace, "dest_shard": destShard, "vtworker_endpoint": vtworkerEndpoint, "exclude_tables": excludeTables, "min_healthy_rdonly_tablets": minHealthyRdonlyTablets, }) newTasks = append(newTasks, splitDiffTask) } for _, servedType := range []string{"rdonly", "replica", "master"} { migrateServedTypesTasks := NewTaskContainer() for _, sourceShard := range sourceShards { AddTask(migrateServedTypesTasks, "MigrateServedTypesTask", map[string]string{ "keyspace": keyspace, "source_shard": sourceShard, "type": servedType, "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, migrateServedTypesTasks) } return newTasks, "", nil }
// Run is part of the Task interface. func (t *VerticalSplitTask) Run(parameters map[string]string) ([]*automationpb.TaskContainer, string, error) { // Example: source_keyspace sourceKeyspace := parameters["source_keyspace"] // Example: destination_keyspace destKeyspace := parameters["dest_keyspace"] // Example: 10-20 shards := strings.Split(parameters["shard_list"], ",") // Example: table1,table2 tables := parameters["tables"] // Example: localhost:15000 vtctldEndpoint := parameters["vtctld_endpoint"] // Example: localhost:15001 vtworkerEndpoint := parameters["vtworker_endpoint"] var newTasks []*automationpb.TaskContainer copySchemaTasks := NewTaskContainer() for _, shard := range shards { AddTask(copySchemaTasks, "CopySchemaShardTask", map[string]string{ "source_keyspace_and_shard": topoproto.KeyspaceShardString(sourceKeyspace, shard), "dest_keyspace_and_shard": topoproto.KeyspaceShardString(destKeyspace, shard), "tables": tables, "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, copySchemaTasks) vSplitCloneTasks := NewTaskContainer() for _, shard := range shards { // TODO(mberlin): Add a semaphore as argument to limit the parallism. AddTask(vSplitCloneTasks, "VerticalSplitCloneTask", map[string]string{ "dest_keyspace": destKeyspace, "shard": shard, "tables": tables, "vtworker_endpoint": vtworkerEndpoint, }) } newTasks = append(newTasks, vSplitCloneTasks) // TODO(mberlin): When the framework supports nesting tasks, these wait tasks should be run before each SplitDiff. waitTasks := NewTaskContainer() for _, shard := range shards { AddTask(waitTasks, "WaitForFilteredReplicationTask", map[string]string{ "keyspace": destKeyspace, "shard": shard, "max_delay": "30s", "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, waitTasks) // TODO(mberlin): Run all SplitDiffTasks in parallel which do not use overlapping source shards for the comparison. for _, shard := range shards { vSplitDiffTask := NewTaskContainer() AddTask(vSplitDiffTask, "VerticalSplitDiffTask", map[string]string{ "dest_keyspace": destKeyspace, "shard": shard, "vtworker_endpoint": vtworkerEndpoint, }) newTasks = append(newTasks, vSplitDiffTask) } for _, servedType := range []string{"rdonly", "replica", "master"} { migrateServedTypesTasks := NewTaskContainer() for _, shard := range shards { AddTask(migrateServedTypesTasks, "MigrateServedFromTask", map[string]string{ "dest_keyspace": destKeyspace, "shard": shard, "type": servedType, "vtctld_endpoint": vtctldEndpoint, }) } newTasks = append(newTasks, migrateServedTypesTasks) } return newTasks, "", nil }