// RebuildShard updates the SrvShard objects and underlying serving graph. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function will start each cell over from the beginning on ErrBadVersion, // so it doesn't need a lock on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, lockTimeout time.Duration) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(ctx, keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } wg.Add(1) go func(cell string) { defer wg.Done() rec.RecordError(rebuildCellSrvShard(ctx, log, ts, shardInfo, cell)) }(cell) } wg.Wait() return shardInfo, rec.Error() }
// refreshTablet needs to be run after an action may have changed the current // state of the tablet. func (agent *ActionAgent) refreshTablet(ctx context.Context, reason string) error { log.Infof("Executing post-action state refresh: %v", reason) span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.refreshTablet") span.Annotate("reason", reason) defer span.Finish() ctx = trace.NewContext(ctx, span) // Actions should have side effects on the tablet, so reload the data. ti, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) if err != nil { log.Warningf("Failed rereading tablet after %v - services may be inconsistent: %v", reason, err) return fmt.Errorf("refreshTablet failed rereading tablet after %v: %v", reason, err) } tablet := ti.Tablet if updatedTablet := agent.checkTabletMysqlPort(ctx, tablet); updatedTablet != nil { tablet = updatedTablet } agent.updateState(ctx, tablet, reason) log.Infof("Done with post-action state refresh") return nil }
// refreshTablet needs to be run after an action may have changed the current // state of the tablet. func (agent *ActionAgent) refreshTablet(ctx context.Context, reason string) error { log.Infof("Executing post-action state refresh") span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.refreshTablet") span.Annotate("reason", reason) defer span.Finish() ctx = trace.NewContext(ctx, span) // Save the old tablet so callbacks can have a better idea of // the precise nature of the transition. oldTablet := agent.Tablet().Tablet // Actions should have side effects on the tablet, so reload the data. ti, err := agent.readTablet(ctx) if err != nil { log.Warningf("Failed rereading tablet after %v - services may be inconsistent: %v", reason, err) return fmt.Errorf("Failed rereading tablet after %v: %v", reason, err) } if updatedTablet := agent.checkTabletMysqlPort(ctx, ti); updatedTablet != nil { agent.mutex.Lock() agent._tablet = updatedTablet agent.mutex.Unlock() } if err := agent.updateState(ctx, oldTablet, reason); err != nil { return err } log.Infof("Done with post-action state refresh") return nil }
// FindAllTabletAliasesInShardByCell uses the replication graph to find all the // tablet aliases in the given shard. // // It can return ErrPartialResult if some cells were not fetched, // in which case the result only contains the cells that were fetched. // // The tablet aliases are sorted by cell, then by UID. func FindAllTabletAliasesInShardByCell(ctx context.Context, ts Server, keyspace, shard string, cells []string) ([]TabletAlias, error) { span := trace.NewSpanFromContext(ctx) span.StartLocal("topo.FindAllTabletAliasesInShardbyCell") span.Annotate("keyspace", keyspace) span.Annotate("shard", shard) span.Annotate("num_cells", len(cells)) defer span.Finish() ctx = trace.NewContext(ctx, span) // read the shard information to find the cells si, err := GetShard(ctx, ts, keyspace, shard) if err != nil { return nil, err } resultAsMap := make(map[TabletAlias]bool) if si.MasterAlias != nil && !TabletAliasIsZero(si.MasterAlias) { if InCellList(si.MasterAlias.Cell, cells) { resultAsMap[ProtoToTabletAlias(si.MasterAlias)] = true } } // read the replication graph in each cell and add all found tablets wg := sync.WaitGroup{} mutex := sync.Mutex{} rec := concurrency.AllErrorRecorder{} for _, cell := range si.Cells { if !InCellList(cell, cells) { continue } wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } mutex.Lock() for _, node := range sri.Nodes { resultAsMap[ProtoToTabletAlias(node.TabletAlias)] = true } mutex.Unlock() }(cell) } wg.Wait() err = nil if rec.HasErrors() { log.Warningf("FindAllTabletAliasesInShard(%v,%v): got partial result: %v", keyspace, shard, rec.Error()) err = ErrPartialResult } result := make([]TabletAlias, 0, len(resultAsMap)) for a := range resultAsMap { result = append(result, a) } sort.Sort(TabletAliasList(result)) return result, err }