func createDiscoveryGateway(hc discovery.HealthCheck, topoServer topo.Server, serv topo.SrvTopoServer, cell string, retryCount int) Gateway { dg := &discoveryGateway{ hc: hc, tsc: discovery.NewTabletStatsCache(hc, cell), topoServer: topoServer, srvTopoServer: serv, localCell: cell, retryCount: retryCount, tabletsWatchers: make([]*discovery.TopologyWatcher, 0, 1), statusAggregators: make(map[string]*TabletStatusAggregator), } log.Infof("loading tablets for cells: %v", *cellsToWatch) for _, c := range strings.Split(*cellsToWatch, ",") { if c == "" { continue } var tr discovery.TabletRecorder = dg.hc if len(tabletFilters) > 0 { fbs, err := discovery.NewFilterByShard(dg.hc, tabletFilters) if err != nil { log.Fatalf("Cannot parse tablet_filters parameter: %v", err) } tr = fbs } ctw := discovery.NewCellTabletsWatcher(dg.topoServer, tr, c, *refreshInterval, *topoReadConcurrency) dg.tabletsWatchers = append(dg.tabletsWatchers, ctw) } return dg }
// newBinlogPlayerController instantiates a new BinlogPlayerController. // Use Start() and Stop() to start and stop it. // Once stopped, you should call Close() to stop and free resources e.g. the // healthcheck instance. func newBinlogPlayerController(ts topo.Server, vtClientFactory func() binlogplayer.VtClient, mysqld mysqlctl.MysqlDaemon, cell string, keyRange *topodatapb.KeyRange, sourceShard *topodatapb.Shard_SourceShard, dbName string) *BinlogPlayerController { healthCheck := discovery.NewHealthCheck(*binlogplayer.BinlogPlayerConnTimeout, *healthcheckRetryDelay, *healthCheckTimeout) return &BinlogPlayerController{ ts: ts, vtClientFactory: vtClientFactory, mysqld: mysqld, cell: cell, keyRange: keyRange, dbName: dbName, sourceShard: sourceShard, binlogPlayerStats: binlogplayer.NewStats(), // Note: healthCheck and shardReplicationWatcher remain active independent // of whether the BinlogPlayerController is Start()'d or Stop()'d. // Use Close() after Stop() to finally close them and free their resources. healthCheck: healthCheck, tabletStatsCache: discovery.NewTabletStatsCache(healthCheck, cell), shardReplicationWatcher: discovery.NewShardReplicationWatcher(ts, healthCheck, cell, sourceShard.Keyspace, sourceShard.Shard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency), } }
// FindHealthyRdonlyTablet returns a random healthy RDONLY tablet. // Since we don't want to use them all, we require at least // minHealthyRdonlyTablets servers to be healthy. // May block up to -wait_for_healthy_rdonly_tablets_timeout. func FindHealthyRdonlyTablet(ctx context.Context, wr *wrangler.Wrangler, tsc *discovery.TabletStatsCache, cell, keyspace, shard string, minHealthyRdonlyTablets int) (*topodatapb.TabletAlias, error) { if tsc == nil { // No healthcheck instance provided. Create one. healthCheck := discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) tsc = discovery.NewTabletStatsCache(healthCheck, cell) watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), healthCheck, cell, keyspace, shard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) defer watcher.Stop() defer healthCheck.Close() } healthyTablets, err := waitForHealthyRdonlyTablets(ctx, wr, tsc, cell, keyspace, shard, minHealthyRdonlyTablets, *waitForHealthyTabletsTimeout) if err != nil { return nil, err } // random server in the list is what we want index := rand.Intn(len(healthyTablets)) return healthyTablets[index].Tablet.Alias, nil }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'worker' pointing back to us // - get the aliases of all the targets func (vscw *VerticalSplitCloneWorker) findTargets(ctx context.Context) error { vscw.setState(WorkerStateFindTargets) // find an appropriate tablet in the source shard var err error vscw.sourceAlias, err = FindWorkerTablet(ctx, vscw.wr, vscw.cleaner, nil /* tsc */, vscw.cell, vscw.sourceKeyspace, "0", vscw.minHealthyRdonlyTablets) if err != nil { return fmt.Errorf("FindWorkerTablet() failed for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err) } vscw.wr.Logger().Infof("Using tablet %v as the source", topoproto.TabletAliasString(vscw.sourceAlias)) // get the tablet info for it shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := vscw.wr.TopoServer().GetTablet(shortCtx, vscw.sourceAlias) cancel() if err != nil { return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(vscw.sourceAlias), err) } vscw.sourceTablet = ti.Tablet // stop replication on it shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = vscw.wr.TabletManagerClient().StopSlave(shortCtx, vscw.sourceTablet) cancel() if err != nil { return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(vscw.sourceAlias)) } wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet) // Initialize healthcheck and add destination shards to it. vscw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) vscw.tsc = discovery.NewTabletStatsCache(vscw.healthCheck, vscw.cell) watcher := discovery.NewShardReplicationWatcher(vscw.wr.TopoServer(), vscw.healthCheck, vscw.cell, vscw.destinationKeyspace, vscw.destinationShard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) vscw.destinationShardWatchers = append(vscw.destinationShardWatchers, watcher) // Make sure we find a master for each destination shard and log it. vscw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") waitCtx, waitCancel := context.WithTimeout(ctx, *waitForHealthyTabletsTimeout) defer waitCancel() if err := vscw.tsc.WaitForTablets(waitCtx, vscw.cell, vscw.destinationKeyspace, vscw.destinationShard, []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v): %v", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell, err) } masters := vscw.tsc.GetHealthyTabletStats(vscw.destinationKeyspace, vscw.destinationShard, topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v) in HealthCheck: empty TabletStats list", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell) } master := masters[0] // Get the MySQL database name of the tablet. keyspaceAndShard := topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard) vscw.destinationDbNames[keyspaceAndShard] = topoproto.TabletDbName(master.Tablet) // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. vscw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), vscw.destinationKeyspace, vscw.destinationShard) vscw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") return nil }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'worker' pointing back to us // - get the aliases of all the targets func (scw *LegacySplitCloneWorker) findTargets(ctx context.Context) error { scw.setState(WorkerStateFindTargets) var err error // find an appropriate tablet in the source shards scw.sourceAliases = make([]*topodatapb.TabletAlias, len(scw.sourceShards)) for i, si := range scw.sourceShards { scw.sourceAliases[i], err = FindWorkerTablet(ctx, scw.wr, scw.cleaner, scw.tsc, scw.cell, si.Keyspace(), si.ShardName(), scw.minHealthyRdonlyTablets) if err != nil { return fmt.Errorf("FindWorkerTablet() failed for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", topoproto.TabletAliasString(scw.sourceAliases[i]), si.Keyspace(), si.ShardName()) } // get the tablet info for them, and stop their replication scw.sourceTablets = make([]*topodatapb.Tablet, len(scw.sourceAliases)) for i, alias := range scw.sourceAliases { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, alias) cancel() if err != nil { return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(alias), err) } scw.sourceTablets[i] = ti.Tablet shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = scw.wr.TabletManagerClient().StopSlave(shortCtx, scw.sourceTablets[i]) cancel() if err != nil { return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(alias)) } wrangler.RecordStartSlaveAction(scw.cleaner, scw.sourceTablets[i]) } // Initialize healthcheck and add destination shards to it. scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) scw.tsc = discovery.NewTabletStatsCache(scw.healthCheck, scw.cell) for _, si := range scw.destinationShards { watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck, scw.cell, si.Keyspace(), si.ShardName(), *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) scw.destinationShardWatchers = append(scw.destinationShardWatchers, watcher) } // Make sure we find a master for each destination shard and log it. scw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...") for _, si := range scw.destinationShards { waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second) defer waitCancel() if err := scw.tsc.WaitForTablets(waitCtx, scw.cell, si.Keyspace(), si.ShardName(), []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v: %v", si.Keyspace(), si.ShardName(), err) } masters := scw.tsc.GetHealthyTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER) if len(masters) == 0 { return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v in HealthCheck: empty TabletStats list", si.Keyspace(), si.ShardName()) } master := masters[0] // Get the MySQL database name of the tablet. shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ti, err := scw.wr.TopoServer().GetTablet(shortCtx, master.Tablet.Alias) cancel() if err != nil { return fmt.Errorf("cannot get the TabletInfo for destination master (%v) to find out its db name: %v", topoproto.TabletAliasString(master.Tablet.Alias), err) } keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) scw.destinationDbNames[keyspaceAndShard] = ti.DbName() // TODO(mberlin): Verify on the destination master that the // _vt.blp_checkpoint table has the latest schema. scw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), si.Keyspace(), si.ShardName()) } scw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.") // Set up the throttler for each destination shard. for _, si := range scw.destinationShards { keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName()) t, err := throttler.NewThrottler( keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, throttler.ReplicationLagModuleDisabled) if err != nil { return fmt.Errorf("cannot instantiate throttler: %v", err) } scw.destinationThrottlers[keyspaceAndShard] = t } return nil }
// init phase: // - read the destination keyspace, make sure it has 'servedFrom' values func (scw *SplitCloneWorker) init(ctx context.Context) error { scw.setState(WorkerStateInit) var err error // read the keyspace and validate it shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) scw.keyspaceInfo, err = scw.wr.TopoServer().GetKeyspace(shortCtx, scw.keyspace) cancel() if err != nil { return fmt.Errorf("cannot read keyspace %v: %v", scw.keyspace, err) } // find the OverlappingShards in the keyspace shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) osList, err := topotools.FindOverlappingShards(shortCtx, scw.wr.TopoServer(), scw.keyspace) cancel() if err != nil { return fmt.Errorf("cannot FindOverlappingShards in %v: %v", scw.keyspace, err) } // find the shard we mentioned in there, if any os := topotools.OverlappingShardsForShard(osList, scw.shard) if os == nil { return fmt.Errorf("the specified shard %v/%v is not in any overlapping shard", scw.keyspace, scw.shard) } scw.wr.Logger().Infof("Found overlapping shards: %+v\n", os) // one side should have served types, the other one none, // figure out wich is which, then double check them all if len(os.Left[0].ServedTypes) > 0 { scw.sourceShards = os.Left scw.destinationShards = os.Right } else { scw.sourceShards = os.Right scw.destinationShards = os.Left } // Verify that filtered replication is not already enabled. for _, si := range scw.destinationShards { if len(si.SourceShards) > 0 { return fmt.Errorf("destination shard %v/%v has filtered replication already enabled from a previous resharding (ShardInfo is set)."+ " This requires manual intervention e.g. use vtctl SourceShardDelete to remove it", si.Keyspace(), si.ShardName()) } } // validate all serving types servingTypes := []topodatapb.TabletType{topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} for _, st := range servingTypes { for _, si := range scw.sourceShards { if si.GetServedType(st) == nil { return fmt.Errorf("source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), st) } } } for _, si := range scw.destinationShards { if len(si.ServedTypes) > 0 { return fmt.Errorf("destination shard %v/%v is serving some types", si.Keyspace(), si.ShardName()) } } // read the vschema if needed var keyspaceSchema *vindexes.KeyspaceSchema if *useV3ReshardingMode { kschema, err := scw.wr.TopoServer().GetVSchema(ctx, scw.keyspace) if err != nil { return fmt.Errorf("cannot load VSchema for keyspace %v: %v", scw.keyspace, err) } if kschema == nil { return fmt.Errorf("no VSchema for keyspace %v", scw.keyspace) } keyspaceSchema, err = vindexes.BuildKeyspaceSchema(kschema, scw.keyspace) if err != nil { return fmt.Errorf("cannot build vschema for keyspace %v: %v", scw.keyspace, err) } scw.keyspaceSchema = keyspaceSchema } // Initialize healthcheck and add destination shards to it. scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) scw.tsc = discovery.NewTabletStatsCache(scw.healthCheck, scw.cell) allShards := append(scw.sourceShards, scw.destinationShards...) for _, si := range allShards { watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck, scw.cell, si.Keyspace(), si.ShardName(), *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) scw.shardWatchers = append(scw.shardWatchers, watcher) } return nil }
func (wr *Wrangler) waitForDrainInCell(ctx context.Context, cell, keyspace, shard string, servedType topodatapb.TabletType, retryDelay, healthCheckTopologyRefresh, healthcheckRetryDelay, healthCheckTimeout, initialWait time.Duration) error { // Create the healthheck module, with a cache. hc := discovery.NewHealthCheck(healthCheckTimeout /* connectTimeout */, healthcheckRetryDelay, healthCheckTimeout) defer hc.Close() tsc := discovery.NewTabletStatsCache(hc, cell) // Create a tablet watcher. watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), hc, cell, keyspace, shard, healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) defer watcher.Stop() // Wait for at least one tablet. if err := tsc.WaitForTablets(ctx, cell, keyspace, shard, []topodatapb.TabletType{servedType}); err != nil { return fmt.Errorf("%v: error waiting for initial %v tablets for %v/%v: %v", cell, servedType, keyspace, shard, err) } wr.Logger().Infof("%v: Waiting for %.1f seconds to make sure that the discovery module retrieves healthcheck information from all tablets.", cell, initialWait.Seconds()) // Wait at least for -initial_wait to elapse to make sure that we // see all healthy tablets. Otherwise, we might miss some tablets. // Note the default value for the parameter is set to the same // default as healthcheck timeout, and it's safe to wait not // longer for this because we would only miss slow tablets and // vtgate would not serve from such tablets anyway. time.Sleep(initialWait) // Now check the QPS rate of all tablets until the timeout expires. startTime := time.Now() for { // map key: tablet uid drainedHealthyTablets := make(map[uint32]*discovery.TabletStats) notDrainedHealtyTablets := make(map[uint32]*discovery.TabletStats) healthyTablets := tsc.GetHealthyTabletStats(keyspace, shard, servedType) for _, ts := range healthyTablets { if ts.Stats.Qps == 0.0 { drainedHealthyTablets[ts.Tablet.Alias.Uid] = &ts } else { notDrainedHealtyTablets[ts.Tablet.Alias.Uid] = &ts } } if len(drainedHealthyTablets) == len(healthyTablets) { wr.Logger().Infof("%v: All %d healthy tablets were drained after %.1f seconds (not counting %.1f seconds for the initial wait).", cell, len(healthyTablets), time.Now().Sub(startTime).Seconds(), healthCheckTimeout.Seconds()) break } // Continue waiting, sleep in between. deadlineString := "" if d, ok := ctx.Deadline(); ok { deadlineString = fmt.Sprintf(" up to %.1f more seconds", d.Sub(time.Now()).Seconds()) } wr.Logger().Infof("%v: Waiting%v for all healthy tablets to be drained (%d/%d done).", cell, deadlineString, len(drainedHealthyTablets), len(healthyTablets)) timer := time.NewTimer(retryDelay) select { case <-ctx.Done(): timer.Stop() var l []string for _, ts := range notDrainedHealtyTablets { l = append(l, formatTabletStats(ts)) } return fmt.Errorf("%v: WaitForDrain failed for %v tablets in %v/%v. Only %d/%d tablets were drained. err: %v List of tablets which were not drained: %v", cell, servedType, keyspace, shard, len(drainedHealthyTablets), len(healthyTablets), ctx.Err(), strings.Join(l, ";")) case <-timer.C: } } return nil }