// TestVtctlThrottlerCommands tests all vtctl commands from the // "Resharding Throttler" group. func TestVtctlThrottlerCommands(t *testing.T) { // Run a throttler server using the default process throttle manager. listener, err := net.Listen("tcp", ":0") if err != nil { t.Fatalf("Cannot listen: %v", err) } s := grpc.NewServer() go s.Serve(listener) grpcthrottlerserver.StartServer(s, throttler.GlobalManager) addr := fmt.Sprintf("localhost:%v", listener.Addr().(*net.TCPAddr).Port) ts := zktestserver.New(t, []string{"cell1", "cell2"}) vp := NewVtctlPipe(t, ts) defer vp.Close() // Get and set rate commands do not fail when no throttler is registered. { got, err := vp.RunAndOutput([]string{"ThrottlerMaxRates", "-server", addr}) if err != nil { t.Fatalf("VtctlPipe.RunAndStreamOutput() failed: %v", err) } want := "no active throttlers" if !strings.Contains(got, want) { t.Fatalf("ThrottlerMaxRates() = %v, want substring = %v", got, want) } } { got, err := vp.RunAndOutput([]string{"ThrottlerSetMaxRate", "-server", addr, "23"}) if err != nil { t.Fatalf("VtctlPipe.RunAndStreamOutput() failed: %v", err) } want := "no active throttlers" if !strings.Contains(got, want) { t.Fatalf("ThrottlerSetMaxRate(23) = %v, want substring = %v", got, want) } } // Add a throttler and check the commands again. t1, err := throttler.NewThrottler("t1", "TPS", 1 /* threadCount */, 2323, throttler.ReplicationLagModuleDisabled) if err != nil { t.Fatal(err) } defer t1.Close() // MaxRates() will return the initial rate. expectRate(t, vp, addr, "2323") // Disable the module by setting the rate to 'unlimited'. setRate(t, vp, addr, "unlimited") expectRate(t, vp, addr, "unlimited") // Re-enable it by setting a limit. setRate(t, vp, addr, "9999") expectRate(t, vp, addr, "9999") }
func (tf *testFixture) setUp() error { for _, name := range throttlerNames { t, err := throttler.NewThrottler(name, "TPS", 1 /* threadCount */, 1, throttler.ReplicationLagModuleDisabled) if err != nil { return err } tf.throttlers = append(tf.throttlers, t) } return nil }
func (scw *SplitCloneWorker) createThrottlers() error { scw.throttlersMu.Lock() defer scw.throttlersMu.Unlock() for _, si := range scw.destinationShards { // Set up the throttler for each destination shard. keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler(keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, scw.maxReplicationLag) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } scw.throttlers[keyspaceAndShard] = t } return nil }
func newClient(master *master, replica *replica) *client { t, err := throttler.NewThrottler("client", "TPS", 1, throttler.MaxRateModuleDisabled, 5 /* seconds */) if err != nil { log.Fatal(err) } healthCheck := discovery.NewHealthCheck(1*time.Minute, 5*time.Second, 1*time.Minute) c := &client{ master: master, healthCheck: healthCheck, throttler: t, stopChan: make(chan struct{}), } c.healthCheck.SetListener(c, false /* sendDownEvents */) c.healthCheck.AddTablet(replica.fakeTablet.Tablet, "name") return c }
func newReplica(lagUpdateInterval, degrationInterval, degrationDuration time.Duration) *replica { t := &testing.T{} ts := zktestserver.New(t, []string{"cell1"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) db := fakesqldb.Register() fakeTablet := testlib.NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_REPLICA, db, testlib.TabletKeyspaceShard(t, "ks", "-80")) fakeTablet.StartActionLoop(t, wr) target := querypb.Target{ Keyspace: "ks", Shard: "-80", TabletType: topodatapb.TabletType_REPLICA, } qs := fakes.NewStreamHealthQueryService(target) grpcqueryservice.Register(fakeTablet.RPCServer, qs) throttler, err := throttler.NewThrottler("replica", "TPS", 1, *rate, throttler.ReplicationLagModuleDisabled) if err != nil { log.Fatal(err) } var nextDegration time.Time if degrationInterval != time.Duration(0) { nextDegration = time.Now().Add(degrationInterval) } r := &replica{ fakeTablet: fakeTablet, qs: qs, throttler: throttler, replicationStream: make(chan time.Time, 1*1024*1024), lagUpdateInterval: lagUpdateInterval, degrationInterval: degrationInterval, degrationDuration: degrationDuration, nextDegration: nextDegration, stopChan: make(chan struct{}), } r.wg.Add(1) go r.processReplicationStream() return r }
// clone phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (vscw *VerticalSplitCloneWorker) clone(ctx context.Context) error { vscw.setState(WorkerStateCloneOffline) start := time.Now() defer func() { statsStateDurationsNs.Set(string(WorkerStateCloneOffline), time.Now().Sub(start).Nanoseconds()) }() // get source schema shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sourceSchemaDefinition, err := vscw.wr.GetSchema(shortCtx, vscw.sourceAlias, vscw.tables, nil, true) cancel() if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", topoproto.TabletAliasString(vscw.sourceAlias), err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter") } vscw.wr.Logger().Infof("Source tablet has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) vscw.tableStatusList.initialize(sourceSchemaDefinition) // In parallel, setup the channels to send SQL data chunks to // for each destination tablet. // // mu protects firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { vscw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } destinationWaitGroup := sync.WaitGroup{} // we create one channel for the destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannel := make(chan string, vscw.destinationWriterCount*2) // Set up the throttler for the destination shard. keyspaceAndShard := topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard) destinationThrottler, err := throttler.NewThrottler( keyspaceAndShard, "transactions", vscw.destinationWriterCount, vscw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } for j := 0; j < vscw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func(threadID int) { defer destinationWaitGroup.Done() defer destinationThrottler.ThreadFinished(threadID) executor := newExecutor(vscw.wr, vscw.tsc, destinationThrottler, vscw.destinationKeyspace, vscw.destinationShard, threadID) if err := executor.fetchLoop(ctx, insertChannel); err != nil { processError("executer.FetchLoop failed: %v", err) } }(j) } // Now for each table, read data chunks and send them to insertChannel sourceWaitGroup := sync.WaitGroup{} sema := sync2.NewSemaphore(vscw.sourceReaderCount, 0) dbName := vscw.destinationDbNames[topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard)] for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == tmutils.TableView { continue } chunks, err := generateChunks(ctx, vscw.wr, vscw.sourceTablet, td, vscw.minTableSizeForSplit, vscw.sourceReaderCount) if err != nil { return err } vscw.tableStatusList.setThreadCount(tableIndex, len(chunks)-1) for _, c := range chunks { sourceWaitGroup.Add(1) go func(td *tabletmanagerdatapb.TableDefinition, tableIndex int, chunk chunk) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() vscw.tableStatusList.threadStarted(tableIndex) // Start streaming from the source tablet. rr, err := NewRestartableResultReader(ctx, vscw.wr.Logger(), vscw.wr.TopoServer(), vscw.sourceAlias, td, chunk) if err != nil { processError("NewRestartableResultReader failed: %v", err) return } defer rr.Close() // process the data if err := vscw.processData(ctx, dbName, td, tableIndex, rr, insertChannel, vscw.destinationPackCount); err != nil { processError("ResultReader failed: %v", err) } vscw.tableStatusList.threadDone(tableIndex) }(td, tableIndex, c) } } sourceWaitGroup.Wait() close(insertChannel) destinationWaitGroup.Wait() // Stop Throttler. destinationThrottler.Close() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if vscw.strategy.skipPopulateBlpCheckpoint { vscw.wr.Logger().Infof("Skipping populating the blp_checkpoint table") } else { // get the current position from the source shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := vscw.wr.TabletManagerClient().SlaveStatus(shortCtx, vscw.sourceTablet) cancel() if err != nil { return err } queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if vscw.strategy.dontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, vscw.maxTPS, throttler.ReplicationLagModuleDisabled, time.Now().Unix(), flags)) vscw.wr.Logger().Infof("Making and populating blp_checkpoint table") if err := runSQLCommands(ctx, vscw.wr, vscw.tsc, vscw.destinationKeyspace, vscw.destinationShard, dbName, queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. if vscw.strategy.skipSetSourceShards { vscw.wr.Logger().Infof("Skipping setting SourceShard on destination shard.") } else { vscw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", vscw.destinationKeyspace, vscw.destinationShard) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := vscw.wr.SetSourceShards(shortCtx, vscw.destinationKeyspace, vscw.destinationShard, []*topodatapb.TabletAlias{vscw.sourceAlias}, vscw.tables) cancel() if err != nil { return fmt.Errorf("Failed to set source shards: %v", err) } } err = vscw.findRefreshTargets(ctx) if err != nil { return fmt.Errorf("failed before refreshing state on destination tablets: %v", err) } // And force a state refresh (re-read topo) on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for _, tabletAlias := range vscw.refreshAliases { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() vscw.wr.Logger().Infof("Refreshing state on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := vscw.wr.TabletManagerClient().RefreshState(shortCtx, ti.Tablet) cancel() if err != nil { processError("RefreshState failed on tablet %v: %v", ti.AliasString(), err) } }(vscw.refreshTablets[*tabletAlias]) } destinationWaitGroup.Wait() return firstError }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'worker' pointing back to us // - get the aliases of all the targets func (scw *LegacySplitCloneWorker) findTargets(ctx context.Context) error { scw.setState(WorkerStateFindTargets) var err error // find an appropriate tablet in the source shards scw.sourceAliases = make([]*topodatapb.TabletAlias, len(scw.sourceShards)) for i, si := range scw.sourceShards { scw.sourceAliases[i], err = FindWorkerTablet(ctx, scw.wr, scw.cleaner, scw.tsc, scw.cell, si.Keyspace(), si.ShardName(), scw.minHealthyRdonlyTablets) if err != nil { return fmt.Errorf("FindWorkerTablet() failed for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", topoproto.TabletAliasString(scw.sourceAliases[i]), si.Keyspace(), si.ShardName()) } // get the tablet info for them, and stop their replication scw.sourceTablets = make([]*topodatapb.Tablet, len(scw.sourceAliases)) for i, alias := range scw.sourceAliases { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, alias) cancel() if err != nil { return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(alias), err) } scw.sourceTablets[i] = ti.Tablet shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = scw.wr.TabletManagerClient().StopSlave(shortCtx, scw.sourceTablets[i]) cancel() if err != nil { return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(alias)) } wrangler.RecordStartSlaveAction(scw.cleaner, scw.sourceTablets[i]) } // Initialize healthcheck and add destination shards to it. scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) scw.tsc = discovery.NewTabletStatsCache(scw.healthCheck, scw.cell) for _, si := range scw.destinationShards { watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck, scw.cell, si.Keyspace(), si.ShardName(), *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) scw.destinationShardWatchers = append(scw.destinationShardWatchers, watcher) } // Make sure we find a master for each destination shard and log it. scw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") for _, si := range scw.destinationShards { waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second) defer waitCancel() if err := scw.tsc.WaitForTablets(waitCtx, scw.cell, si.Keyspace(), si.ShardName(), []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v: %v", si.Keyspace(), si.ShardName(), err) } masters := scw.tsc.GetHealthyTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v in HealthCheck: empty TabletStats list", si.Keyspace(), si.ShardName()) } master := masters[0] // Get the MySQL database name of the tablet. shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, master.Tablet.Alias) cancel() if err != nil { return fmt.Errorf("cannot get the TabletInfo for destination master (%v) to find out its db name: %v", topoproto.TabletAliasString(master.Tablet.Alias), err) } keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) scw.destinationDbNames[keyspaceAndShard] = ti.DbName() // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. scw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), si.Keyspace(), si.ShardName()) } scw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") // Set up the throttler for each destination shard. for _, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler( keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } scw.destinationThrottlers[keyspaceAndShard] = t } return nil }
// copy phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (scw *SplitCloneWorker) clone(ctx context.Context, state StatusWorkerState) error { if state != WorkerStateCloneOnline && state != WorkerStateCloneOffline { panic(fmt.Sprintf("invalid state passed to clone(): %v", state)) } scw.setState(state) start := time.Now() defer func() { statsStateDurationsNs.Set(string(state), time.Now().Sub(start).Nanoseconds()) }() var firstSourceTablet *topodatapb.Tablet if state == WorkerStateCloneOffline { // Use the first source tablet which we took offline. firstSourceTablet = scw.sourceTablets[0] } else { // Pick any healthy serving source tablet. si := scw.sourceShards[0] tablets := discovery.RemoveUnhealthyTablets( scw.healthCheck.GetTabletStatsFromTarget(si.Keyspace(), si.ShardName(), topodatapb.TabletType_RDONLY)) if len(tablets) == 0 { // We fail fast on this problem and don't retry because at the start all tablets should be healthy. return fmt.Errorf("no healthy RDONLY tablet in source shard (%v) available (required to find out the schema)", topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())) } firstSourceTablet = tablets[0].Tablet } var statsCounters []*stats.Counters var tableStatusList *tableStatusList switch state { case WorkerStateCloneOnline: statsCounters = []*stats.Counters{statsOnlineInsertsCounters, statsOnlineUpdatesCounters, statsOnlineDeletesCounters} tableStatusList = scw.tableStatusListOnline case WorkerStateCloneOffline: statsCounters = []*stats.Counters{statsOfflineInsertsCounters, statsOfflineUpdatesCounters, statsOfflineDeletesCounters} tableStatusList = scw.tableStatusListOffline } sourceSchemaDefinition, err := scw.getSourceSchema(ctx, firstSourceTablet) if err != nil { return err } scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) tableStatusList.initialize(sourceSchemaDefinition) // In parallel, setup the channels to send SQL data chunks to for each destination tablet: // // mu protects the context for cancelation, and firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { scw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } insertChannels := make([]chan string, len(scw.destinationShards)) destinationThrottlers := make([]*throttler.Throttler, len(scw.destinationShards)) destinationWaitGroup := sync.WaitGroup{} for shardIndex, si := range scw.destinationShards { // we create one channel per destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannels[shardIndex] = make(chan string, scw.destinationWriterCount*2) // Set up the throttler for each destination shard. keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler( keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } destinationThrottlers[shardIndex] = t defer func(i int) { if t := destinationThrottlers[shardIndex]; t != nil { t.Close() } }(shardIndex) go func(keyspace, shard string, insertChannel chan string) { for j := 0; j < scw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func(throttler *throttler.Throttler, threadID int) { defer destinationWaitGroup.Done() defer throttler.ThreadFinished(threadID) executor := newExecutor(scw.wr, scw.healthCheck, throttler, keyspace, shard, threadID) if err := executor.fetchLoop(ctx, insertChannel); err != nil { processError("executer.FetchLoop failed: %v", err) } }(t, j) } }(si.Keyspace(), si.ShardName(), insertChannels[shardIndex]) } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} sema := sync2.NewSemaphore(scw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == tmutils.TableView { continue } td = reorderColumnsPrimaryKeyFirst(td) var keyResolver keyspaceIDResolver if *useV3ReshardingMode { keyResolver, err = newV3ResolverFromTableDefinition(scw.keyspaceSchema, td) if err != nil { return fmt.Errorf("cannot resolve v3 sharding keys for keyspace %v: %v", scw.keyspace, err) } } else { keyResolver, err = newV2Resolver(scw.keyspaceInfo, td) if err != nil { return fmt.Errorf("cannot resolve sharding keys for keyspace %v: %v", scw.keyspace, err) } } // TODO(mberlin): We're going to chunk *all* source shards based on the MIN // and MAX values of the *first* source shard. Is this going to be a problem? chunks, err := generateChunks(ctx, scw.wr, firstSourceTablet, td, scw.minTableSizeForSplit, scw.sourceReaderCount) if err != nil { return err } tableStatusList.setThreadCount(tableIndex, len(chunks)) for _, c := range chunks { sourceWaitGroup.Add(1) go func(td *tabletmanagerdatapb.TableDefinition, tableIndex int, chunk chunk) { defer sourceWaitGroup.Done() // We need our own error per Go routine to avoid races. var err error sema.Acquire() defer sema.Release() tableStatusList.threadStarted(tableIndex) if state == WorkerStateCloneOnline { // Wait for enough healthy tablets (they might have become unhealthy // and their replication lag might have increased since we started.) if err := scw.waitForTablets(ctx, scw.sourceShards, *retryDuration); err != nil { processError("table=%v chunk=%v: No healthy source tablets found (gave up after %v): ", td.Name, chunk, *retryDuration, err) return } } // Set up readers for the diff. There will be one reader for every // source and destination shard. sourceReaders := make([]ResultReader, len(scw.sourceShards)) destReaders := make([]ResultReader, len(scw.destinationShards)) for shardIndex, si := range scw.sourceShards { var sourceAlias *topodatapb.TabletAlias if state == WorkerStateCloneOffline { // Use the source tablet which we took offline for this phase. sourceAlias = scw.sourceAliases[shardIndex] } else { // Pick any healthy serving source tablet. tablets := discovery.RemoveUnhealthyTablets( scw.healthCheck.GetTabletStatsFromTarget(si.Keyspace(), si.ShardName(), topodatapb.TabletType_RDONLY)) if len(tablets) == 0 { processError("no healthy RDONLY tablets in source shard (%v) available", topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())) return } sourceAlias = scw.tabletTracker.Track(tablets) defer scw.tabletTracker.Untrack(sourceAlias) } sourceResultReader, err := NewRestartableResultReader(ctx, scw.wr.Logger(), scw.wr.TopoServer(), sourceAlias, td, chunk) if err != nil { processError("NewRestartableResultReader for source tablet: %v failed: %v", sourceAlias, err) return } defer sourceResultReader.Close() sourceReaders[shardIndex] = sourceResultReader } // Wait for enough healthy tablets (they might have become unhealthy // and their replication lag might have increased due to a previous // chunk pipeline.) if err := scw.waitForTablets(ctx, scw.destinationShards, *retryDuration); err != nil { processError("table=%v chunk=%v: No healthy destination tablets found (gave up after %v): ", td.Name, chunk, *retryDuration, err) return } for shardIndex, si := range scw.destinationShards { // Pick any healthy serving destination tablet. tablets := discovery.RemoveUnhealthyTablets( scw.healthCheck.GetTabletStatsFromTarget(si.Keyspace(), si.ShardName(), topodatapb.TabletType_RDONLY)) if len(tablets) == 0 { processError("no healthy RDONLY tablets in destination shard (%v) available", topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())) return } destAlias := scw.tabletTracker.Track(tablets) defer scw.tabletTracker.Untrack(destAlias) destResultReader, err := NewRestartableResultReader(ctx, scw.wr.Logger(), scw.wr.TopoServer(), destAlias, td, chunk) if err != nil { processError("NewQueryResultReaderForTablet for dest tablet: %v failed: %v", destAlias, err) return } defer destResultReader.Close() destReaders[shardIndex] = destResultReader } var sourceReader ResultReader var destReader ResultReader if len(sourceReaders) >= 2 { sourceReader, err = NewResultMerger(sourceReaders, len(td.PrimaryKeyColumns)) if err != nil { processError("NewResultMerger for source tablets failed: %v", err) return } } else { sourceReader = sourceReaders[0] } if len(destReaders) >= 2 { destReader, err = NewResultMerger(destReaders, len(td.PrimaryKeyColumns)) if err != nil { processError("NewResultMerger for dest tablets failed: %v", err) return } } else { destReader = destReaders[0] } dbNames := make([]string, len(scw.destinationShards)) for i, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) dbNames[i] = scw.destinationDbNames[keyspaceAndShard] } // Compare the data and reconcile any differences. differ, err := NewRowDiffer2(ctx, sourceReader, destReader, td, tableStatusList, tableIndex, scw.destinationShards, keyResolver, insertChannels, ctx.Done(), dbNames, scw.writeQueryMaxRows, scw.writeQueryMaxSize, scw.writeQueryMaxRowsDelete, statsCounters) if err != nil { processError("NewRowDiffer2 failed: %v", err) return } // Ignore the diff report because all diffs should get reconciled. _ /* DiffReport */, err = differ.Diff() if err != nil { processError("RowDiffer2 failed: %v", err) return } tableStatusList.threadDone(tableIndex) }(td, tableIndex, c) } } sourceWaitGroup.Wait() for shardIndex := range scw.destinationShards { close(insertChannels[shardIndex]) } destinationWaitGroup.Wait() // Stop Throttlers. for i, t := range destinationThrottlers { t.Close() destinationThrottlers[i] = nil } if firstError != nil { return firstError } if state == WorkerStateCloneOffline { // Create and populate the blp_checkpoint table to give filtered replication // a starting point. if scw.strategy.skipPopulateBlpCheckpoint { scw.wr.Logger().Infof("Skipping populating the blp_checkpoint table") } else { queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if scw.strategy.dontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } // get the current position from the sources for shardIndex := range scw.sourceShards { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := scw.wr.TabletManagerClient().SlaveStatus(shortCtx, scw.sourceTablets[shardIndex]) cancel() if err != nil { return err } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(uint32(shardIndex), status.Position, scw.maxTPS, throttler.ReplicationLagModuleDisabled, time.Now().Unix(), flags)) } for _, si := range scw.destinationShards { destinationWaitGroup.Add(1) go func(keyspace, shard string) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Making and populating blp_checkpoint table") keyspaceAndShard := topoproto.KeyspaceShardString(keyspace, shard) if err := runSQLCommands(ctx, scw.wr, scw.healthCheck, keyspace, shard, scw.destinationDbNames[keyspaceAndShard], queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } }(si.Keyspace(), si.ShardName()) } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Configure filtered replication by setting the SourceShard info. // The master tablets won't enable filtered replication (the binlog player) // until they re-read the topology due to a restart or a reload. // TODO(alainjobart) this is a superset, some shards may not // overlap, have to deal with this better (for N -> M splits // where both N>1 and M>1) if scw.strategy.skipSetSourceShards { scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.") } else { for _, si := range scw.destinationShards { scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.SetSourceShards(shortCtx, si.Keyspace(), si.ShardName(), scw.sourceAliases, nil) cancel() if err != nil { return fmt.Errorf("failed to set source shards: %v", err) } } } // Force a state refresh (re-read topo) on all destination tablets. // The master tablet will end up starting filtered replication at this point. // // Find all tablets first, then refresh the state on each in parallel. err = scw.findRefreshTargets(ctx) if err != nil { return fmt.Errorf("failed before refreshing state on destination tablets: %v", err) } for shardIndex := range scw.destinationShards { for _, tabletAlias := range scw.refreshAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Refreshing state on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.TabletManagerClient().RefreshState(shortCtx, ti.Tablet) cancel() if err != nil { processError("RefreshState failed on tablet %v: %v", ti.AliasString(), err) } }(scw.refreshTablets[shardIndex][*tabletAlias]) } } } // clonePhase == offline destinationWaitGroup.Wait() return firstError }
// ApplyBinlogEvents makes an RPC request to BinlogServer // and processes the events. It will return nil if the provided context // was canceled, or if we reached the stopping point. // It will return io.EOF if the server stops sending us updates. // It may return any other error it encounters. func (blp *BinlogPlayer) ApplyBinlogEvents(ctx context.Context) error { // Instantiate the throttler based on the configuration stored in the db. maxTPS, maxReplicationLag, err := blp.readThrottlerSettings() if err != nil { log.Error(err) return err } t, err := throttler.NewThrottler( fmt.Sprintf("BinlogPlayer/%d", blp.uid), "transactions", 1 /* threadCount */, maxTPS, maxReplicationLag) if err != nil { err := fmt.Errorf("failed to instantiate throttler: %v", err) log.Error(err) return err } defer t.Close() // Log the mode of operation and when the player stops. if len(blp.tables) > 0 { log.Infof("BinlogPlayer client %v for tables %v starting @ '%v', server: %v", blp.uid, blp.tables, blp.position, blp.tablet, ) } else { log.Infof("BinlogPlayer client %v for keyrange '%v-%v' starting @ '%v', server: %v", blp.uid, hex.EncodeToString(blp.keyRange.Start), hex.EncodeToString(blp.keyRange.End), blp.position, blp.tablet, ) } if !blp.stopPosition.IsZero() { // We need to stop at some point. Sanity check the point. switch { case blp.position.Equal(blp.stopPosition): log.Infof("Not starting BinlogPlayer, we're already at the desired position %v", blp.stopPosition) return nil case blp.position.AtLeast(blp.stopPosition): return fmt.Errorf("starting point %v greater than stopping point %v", blp.position, blp.stopPosition) default: log.Infof("Will stop player when reaching %v", blp.stopPosition) } } clientFactory, ok := clientFactories[*binlogPlayerProtocol] if !ok { return fmt.Errorf("no binlog player client factory named %v", *binlogPlayerProtocol) } blplClient := clientFactory() err = blplClient.Dial(blp.tablet, *BinlogPlayerConnTimeout) if err != nil { err := fmt.Errorf("error dialing binlog server: %v", err) log.Error(err) return err } defer blplClient.Close() // Get the current charset of our connection, so we can ask the stream server // to check that they match. The streamer will also only send per-statement // charset data if that statement's charset is different from what we specify. if dbClient, ok := blp.dbClient.(*DBClient); ok { blp.defaultCharset, err = dbClient.dbConn.GetCharset() if err != nil { return fmt.Errorf("can't get charset to request binlog stream: %v", err) } log.Infof("original charset: %v", blp.defaultCharset) blp.currentCharset = blp.defaultCharset // Restore original charset when we're done. defer func() { // If the connection has been closed, there's no need to restore // this connection-specific setting. if dbClient.dbConn == nil { return } log.Infof("restoring original charset %v", blp.defaultCharset) if csErr := dbClient.dbConn.SetCharset(blp.defaultCharset); csErr != nil { log.Errorf("can't restore original charset %v: %v", blp.defaultCharset, csErr) } }() } var stream BinlogTransactionStream if len(blp.tables) > 0 { stream, err = blplClient.StreamTables(ctx, replication.EncodePosition(blp.position), blp.tables, blp.defaultCharset) } else { stream, err = blplClient.StreamKeyRange(ctx, replication.EncodePosition(blp.position), blp.keyRange, blp.defaultCharset) } if err != nil { err := fmt.Errorf("error sending streaming query to binlog server: %v", err) log.Error(err) return err } for { // Block if we are throttled. for { backoff := t.Throttle(0 /* threadID */) if backoff == throttler.NotThrottled { break } // We don't bother checking for context cancellation here because the // sleep will block only up to 1 second. (Usually, backoff is 1s / rate // e.g. a rate of 1000 TPS results into a backoff of 1 ms.) time.Sleep(backoff) } // get the response response, err := stream.Recv() if err != nil { switch err { case context.Canceled: return nil default: // if the context is canceled, we // return nil (some RPC // implementations will remap the // context error to their own errors) select { case <-ctx.Done(): if ctx.Err() == context.Canceled { return nil } default: } return fmt.Errorf("Error received from Stream %v", err) } } // process the transaction for { ok, err = blp.processTransaction(response) if err != nil { return fmt.Errorf("Error in processing binlog event %v", err) } if ok { if !blp.stopPosition.IsZero() { if blp.position.AtLeast(blp.stopPosition) { log.Infof("Reached stopping position, done playing logs") return nil } } break } log.Infof("Retrying txn") time.Sleep(1 * time.Second) } } }