Exemple #1
0
// RebuildShard updates the SrvShard objects and underlying serving graph.
//
// Re-read from TopologyServer to make sure we are using the side
// effects of all actions.
//
// This function will start each cell over from the beginning on ErrBadVersion,
// so it doesn't need a lock on the shard.
func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, lockTimeout time.Duration) (*topo.ShardInfo, error) {
	log.Infof("RebuildShard %v/%v", keyspace, shard)

	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("topotools.RebuildShard")
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// read the existing shard info. It has to exist.
	shardInfo, err := ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return nil, err
	}

	// rebuild all cells in parallel
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range shardInfo.Cells {
		// skip this cell if we shouldn't rebuild it
		if !topo.InCellList(cell, cells) {
			continue
		}

		wg.Add(1)
		go func(cell string) {
			defer wg.Done()
			rec.RecordError(rebuildCellSrvShard(ctx, log, ts, shardInfo, cell))
		}(cell)
	}
	wg.Wait()

	return shardInfo, rec.Error()
}
Exemple #2
0
// refreshTablet needs to be run after an action may have changed the current
// state of the tablet.
func (agent *ActionAgent) refreshTablet(ctx context.Context, reason string) error {
	log.Infof("Executing post-action state refresh: %v", reason)

	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("ActionAgent.refreshTablet")
	span.Annotate("reason", reason)
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// Actions should have side effects on the tablet, so reload the data.
	ti, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias)
	if err != nil {
		log.Warningf("Failed rereading tablet after %v - services may be inconsistent: %v", reason, err)
		return fmt.Errorf("refreshTablet failed rereading tablet after %v: %v", reason, err)
	}
	tablet := ti.Tablet

	if updatedTablet := agent.checkTabletMysqlPort(ctx, tablet); updatedTablet != nil {
		tablet = updatedTablet
	}

	agent.updateState(ctx, tablet, reason)
	log.Infof("Done with post-action state refresh")
	return nil
}
Exemple #3
0
// refreshTablet needs to be run after an action may have changed the current
// state of the tablet.
func (agent *ActionAgent) refreshTablet(ctx context.Context, reason string) error {
	log.Infof("Executing post-action state refresh")

	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("ActionAgent.refreshTablet")
	span.Annotate("reason", reason)
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// Save the old tablet so callbacks can have a better idea of
	// the precise nature of the transition.
	oldTablet := agent.Tablet().Tablet

	// Actions should have side effects on the tablet, so reload the data.
	ti, err := agent.readTablet(ctx)
	if err != nil {
		log.Warningf("Failed rereading tablet after %v - services may be inconsistent: %v", reason, err)
		return fmt.Errorf("Failed rereading tablet after %v: %v", reason, err)
	}

	if updatedTablet := agent.checkTabletMysqlPort(ctx, ti); updatedTablet != nil {
		agent.mutex.Lock()
		agent._tablet = updatedTablet
		agent.mutex.Unlock()
	}

	if err := agent.updateState(ctx, oldTablet, reason); err != nil {
		return err
	}
	log.Infof("Done with post-action state refresh")
	return nil
}
Exemple #4
0
// FindAllTabletAliasesInShardByCell uses the replication graph to find all the
// tablet aliases in the given shard.
//
// It can return ErrPartialResult if some cells were not fetched,
// in which case the result only contains the cells that were fetched.
//
// The tablet aliases are sorted by cell, then by UID.
func FindAllTabletAliasesInShardByCell(ctx context.Context, ts Server, keyspace, shard string, cells []string) ([]TabletAlias, error) {
	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("topo.FindAllTabletAliasesInShardbyCell")
	span.Annotate("keyspace", keyspace)
	span.Annotate("shard", shard)
	span.Annotate("num_cells", len(cells))
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// read the shard information to find the cells
	si, err := GetShard(ctx, ts, keyspace, shard)
	if err != nil {
		return nil, err
	}

	resultAsMap := make(map[TabletAlias]bool)
	if si.MasterAlias != nil && !TabletAliasIsZero(si.MasterAlias) {
		if InCellList(si.MasterAlias.Cell, cells) {
			resultAsMap[ProtoToTabletAlias(si.MasterAlias)] = true
		}
	}

	// read the replication graph in each cell and add all found tablets
	wg := sync.WaitGroup{}
	mutex := sync.Mutex{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range si.Cells {
		if !InCellList(cell, cells) {
			continue
		}
		wg.Add(1)
		go func(cell string) {
			defer wg.Done()
			sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
				return
			}

			mutex.Lock()
			for _, node := range sri.Nodes {
				resultAsMap[ProtoToTabletAlias(node.TabletAlias)] = true
			}
			mutex.Unlock()
		}(cell)
	}
	wg.Wait()
	err = nil
	if rec.HasErrors() {
		log.Warningf("FindAllTabletAliasesInShard(%v,%v): got partial result: %v", keyspace, shard, rec.Error())
		err = ErrPartialResult
	}

	result := make([]TabletAlias, 0, len(resultAsMap))
	for a := range resultAsMap {
		result = append(result, a)
	}
	sort.Sort(TabletAliasList(result))
	return result, err
}