Пример #1
0
// findCellsForRebuild will find all the cells in the given keyspace
// and create an entry if the map for them
func (wr *Wrangler) findCellsForRebuild(ki *topo.KeyspaceInfo, keyspace string, shards []string, cells []string, srvKeyspaceMap map[string]*topo.SrvKeyspace) error {
	er := concurrency.FirstErrorRecorder{}
	mu := sync.Mutex{}
	wg := sync.WaitGroup{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if si, err := wr.ts.GetShard(keyspace, shard); err != nil {
				er.RecordError(fmt.Errorf("GetShard(%v,%v) failed: %v", keyspace, shard, err))
			} else {
				mu.Lock()
				for _, cell := range si.Cells {
					if !topo.InCellList(cell, cells) {
						continue
					}
					if _, ok := srvKeyspaceMap[cell]; !ok {
						srvKeyspaceMap[cell] = &topo.SrvKeyspace{
							Shards:             make([]topo.SrvShard, 0, 16),
							ShardingColumnName: ki.ShardingColumnName,
							ShardingColumnType: ki.ShardingColumnType,
							ServedFrom:         ki.ServedFrom,
						}
					}
				}
				mu.Unlock()
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	return er.Error()
}
Пример #2
0
// Clone will do all the necessary actions to copy all the data from a
// source to a set of destinations.
func (wr *Wrangler) Clone(srcTabletAlias topo.TabletAlias, dstTabletAliases []topo.TabletAlias, forceMasterSnapshot bool, snapshotConcurrency, fetchConcurrency, fetchRetryCount int, serverMode bool) error {
	// make sure the destination can be restored into (otherwise
	// there is no point in taking the snapshot in the first place),
	// and reserve it.
	reserved := make([]topo.TabletAlias, 0, len(dstTabletAliases))
	for _, dstTabletAlias := range dstTabletAliases {
		err := wr.ReserveForRestore(srcTabletAlias, dstTabletAlias)
		if err != nil {
			wr.UnreserveForRestoreMulti(reserved)
			return err
		}
		reserved = append(reserved, dstTabletAlias)
		wr.Logger().Infof("Successfully reserved %v for restore", dstTabletAlias)
	}

	// take the snapshot, or put the server in SnapshotSource mode
	// srcFilePath, parentAlias, slaveStartRequired, readWrite
	sr, originalType, err := wr.Snapshot(srcTabletAlias, forceMasterSnapshot, snapshotConcurrency, serverMode)
	if err != nil {
		// The snapshot failed so un-reserve the destinations and return
		wr.UnreserveForRestoreMulti(reserved)
		return err
	}

	// try to restore the snapshot
	// In serverMode, and in the case where we're replicating from
	// the master, we can't wait for replication, as the master is down.
	wg := sync.WaitGroup{}
	rec := concurrency.FirstErrorRecorder{}
	for _, dstTabletAlias := range dstTabletAliases {
		wg.Add(1)
		go func(dstTabletAlias topo.TabletAlias) {
			e := wr.Restore(srcTabletAlias, sr.ManifestPath, dstTabletAlias, sr.ParentAlias, fetchConcurrency, fetchRetryCount, true, serverMode && originalType == topo.TYPE_MASTER)
			rec.RecordError(e)
			wg.Done()
		}(dstTabletAlias)
	}
	wg.Wait()
	err = rec.Error()

	// in any case, fix the server
	if serverMode {
		resetErr := wr.SnapshotSourceEnd(srcTabletAlias, sr.SlaveStartRequired, sr.ReadOnly, originalType)
		if resetErr != nil {
			if err == nil {
				// If there is no other error, this matters.
				err = resetErr
			} else {
				// In the context of a larger failure, just log a note to cleanup.
				wr.Logger().Errorf("Failed to reset snapshot source: %v - vtctl SnapshotSourceEnd is required", resetErr)
			}
		}
	}

	return err
}
Пример #3
0
// FindAllShardsInKeyspace reads and returns all the existing shards in
// a keyspace. It doesn't take any lock.
func (ts Server) FindAllShardsInKeyspace(ctx context.Context, keyspace string) (map[string]*ShardInfo, error) {
	shards, err := ts.GetShardNames(ctx, keyspace)
	if err != nil {
		return nil, err
	}

	result := make(map[string]*ShardInfo, len(shards))
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	rec := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			defer wg.Done()
			si, err := ts.GetShard(ctx, keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShard(%v,%v) failed: %v", keyspace, shard, err))
				return
			}
			mu.Lock()
			result[shard] = si
			mu.Unlock()
		}(shard)
	}
	wg.Wait()
	if rec.HasErrors() {
		return nil, rec.Error()
	}
	return result, nil
}
Пример #4
0
// FindAllShardsInKeyspace reads and returns all the existing shards in
// a keyspace. It doesn't take any lock.
func (ts Server) FindAllShardsInKeyspace(ctx context.Context, keyspace string) (map[string]*ShardInfo, error) {
	shards, err := ts.GetShardNames(ctx, keyspace)
	if err != nil {
		return nil, fmt.Errorf("failed to get list of shards for keyspace '%v': %v", keyspace, err)
	}

	result := make(map[string]*ShardInfo, len(shards))
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	rec := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			defer wg.Done()
			si, err := ts.GetShard(ctx, keyspace, shard)
			if err != nil {
				if err == ErrNoNode {
					log.Warningf("GetShard(%v,%v) returned ErrNoNode, consider checking the topology.", keyspace, shard)
				} else {
					rec.RecordError(fmt.Errorf("GetShard(%v,%v) failed: %v", keyspace, shard, err))
				}
				return
			}
			mu.Lock()
			result[shard] = si
			mu.Unlock()
		}(shard)
	}
	wg.Wait()
	if rec.HasErrors() {
		return nil, rec.Error()
	}
	return result, nil
}
Пример #5
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	log.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		// Temporary change: we try to keep going even if node
		// doesn't exist
		if err != topo.ErrNoNode {
			return err
		}
		ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{})
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if er.HasErrors() {
		return er.Error()
	}

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0])
	if err != nil {
		return err
	}

	// srvKeyspaceMap is a map:
	//   key: local keyspace {cell,keyspace}
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[cellKeyspace]*topo.SrvKeyspace)
	for _, alias := range aliases {
		keyspaceLocation := cellKeyspace{alias.Cell, keyspace}
		if _, ok := srvKeyspaceMap[keyspaceLocation]; !ok {
			// before adding keyspaceLocation to the map of
			// of KeyspaceByPath, we check this is a
			// serving tablet. No serving tablet in shard
			// 0 means we're not rebuilding the serving
			// graph in that cell.  This is somewhat
			// expensive, but we only do it on all the
			// non-serving tablets in a shard before we
			// find a serving tablet.
			ti, err := wr.ts.GetTablet(alias)
			if err != nil {
				return err
			}
			if !ti.IsInServingGraph() {
				continue
			}

			srvKeyspaceMap[keyspaceLocation] = &topo.SrvKeyspace{
				Shards:             make([]topo.SrvShard, 0, 16),
				ShardingColumnName: ki.ShardingColumnName,
				ShardingColumnType: ki.ShardingColumnType,
				ServedFrom:         ki.ServedFrom,
			}
		}
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the ShardInfo structures for each shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for ck, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(ck.cell, ck.keyspace, shard)
			if err != nil {
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		first := true
		for tabletType, partition := range srvKeyspace.Partitions {
			topo.SrvShardArray(partition.Shards).Sort()

			// check the first Start is MinKey, the last End is MaxKey,
			// and the values in between match: End[i] == Start[i+1]
			if partition.Shards[0].KeyRange.Start != key.MinKey {
				return fmt.Errorf("Keyspace partition for %v does not start with %v", tabletType, key.MinKey)
			}
			if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey {
				return fmt.Errorf("Keyspace partition for %v does not end with %v", tabletType, key.MaxKey)
			}
			for i := range partition.Shards[0 : len(partition.Shards)-1] {
				if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start {
					return fmt.Errorf("Non-contiguous KeyRange values for %v at shard %v to %v: %v != %v", tabletType, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex())
				}
			}

			// backfill Shards
			if first {
				first = false
				srvKeyspace.Shards = partition.Shards
			}
		}
	}

	// and then finally save the keyspace objects
	for ck, srvKeyspace := range srvKeyspaceMap {
		if err := wr.ts.UpdateSrvKeyspace(ck.cell, ck.keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #6
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	relog.Info("rebuildKeyspace %v", keyspace)
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if er.HasErrors() {
		return er.Error()
	}

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0])
	if err != nil {
		return err
	}

	// srvKeyspaceByPath is a map:
	//   key: local keyspace {cell,keyspace}
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceByPath := make(map[cellKeyspace]*topo.SrvKeyspace)
	for _, alias := range aliases {
		keyspaceLocation := cellKeyspace{alias.Cell, keyspace}
		if _, ok := srvKeyspaceByPath[keyspaceLocation]; !ok {
			// before adding keyspaceLocation to the map of
			// of KeyspaceByPath, we check this is a
			// serving tablet. No serving tablet in shard
			// 0 means we're not rebuilding the serving
			// graph in that cell.  This is somewhat
			// expensive, but we only do it on all the
			// non-serving tablets in a shard before we
			// find a serving tablet.
			ti, err := wr.ts.GetTablet(alias)
			if err != nil {
				return err
			}
			if !ti.IsServingType() {
				continue
			}

			srvKeyspaceByPath[keyspaceLocation] = &topo.SrvKeyspace{Shards: make([]topo.SrvShard, 0, 16)}
		}
	}

	// for each entry in the srvKeyspaceByPath map, we do the following:
	// - read the ShardInfo structures for each shard
	//    - prune the AddrsByType field, result would be too big
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for srvPath, srvKeyspace := range srvKeyspaceByPath {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(srvPath.cell, srvPath.keyspace, shard)
			if err != nil {
				return err
			}
			for dbType, _ := range srvShard.AddrsByType {
				keyspaceDbTypes[topo.TabletType(dbType)] = true
			}
			// Prune addrs, this is unnecessarily expensive right now. It is easier to
			// load on-demand since we have to do that anyway on a reconnect.
			srvShard.AddrsByType = nil
			srvKeyspace.Shards = append(srvKeyspace.Shards, *srvShard)
		}
		tabletTypes := make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType, _ := range keyspaceDbTypes {
			tabletTypes = append(tabletTypes, dbType)
		}
		srvKeyspace.TabletTypes = tabletTypes
		// FIXME(msolomon) currently this only works when the shards are range-based
		topo.SrvShardArray(srvKeyspace.Shards).Sort()

		// check the first Start is MinKey, the last End is MaxKey,
		// and the values in between match: End[i] == Start[i+1]
		if srvKeyspace.Shards[0].KeyRange.Start != key.MinKey {
			return fmt.Errorf("Keyspace does not start with %v", key.MinKey)
		}
		if srvKeyspace.Shards[len(srvKeyspace.Shards)-1].KeyRange.End != key.MaxKey {
			return fmt.Errorf("Keyspace does not end with %v", key.MaxKey)
		}
		for i, _ := range srvKeyspace.Shards[0 : len(srvKeyspace.Shards)-1] {
			if srvKeyspace.Shards[i].KeyRange.End != srvKeyspace.Shards[i+1].KeyRange.Start {
				return fmt.Errorf("Non-contiguous KeyRange values at shard %v to %v: %v != %v", i, i+1, srvKeyspace.Shards[i].KeyRange.End.Hex(), srvKeyspace.Shards[i+1].KeyRange.Start.Hex())
			}
		}
	}

	// and then finally save the keyspace objects
	for srvPath, srvKeyspace := range srvKeyspaceByPath {
		if err := wr.ts.UpdateSrvKeyspace(srvPath.cell, srvPath.keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #7
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string, shardCache map[string]*topo.ShardInfo) error {
	wr.logger.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		return err
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if er.HasErrors() {
		return er.Error()
	}

	// Build the list of cells to work on: we get the union
	// of all the Cells of all the Shards, limited to the provided cells.
	//
	// srvKeyspaceMap is a map:
	//   key: cell
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[string]*topo.SrvKeyspace)
	if err := wr.findCellsForRebuild(ki, keyspace, shards, cells, srvKeyspaceMap); err != nil {
		return err
	}

	// Then we add the cells from the keyspaces we might be 'ServedFrom'.
	for _, servedFrom := range ki.ServedFrom {
		servedFromShards, err := wr.ts.GetShardNames(servedFrom)
		if err != nil {
			return err
		}
		if err := wr.findCellsForRebuild(ki, servedFrom, servedFromShards, cells, srvKeyspaceMap); err != nil {
			return err
		}
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the SrvShard structures for each shard / cell
	// - if not present, build an empty one from global Shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	if shardCache == nil {
		shardCache = make(map[string]*topo.ShardInfo)
	}
	for cell, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(cell, keyspace, shard)
			switch err {
			case nil:
				// we keep going
			case topo.ErrNoNode:
				wr.logger.Infof("Cell %v for %v/%v has no SvrShard, using Shard data with no TabletTypes instead", cell, keyspace, shard)
				si, ok := shardCache[shard]
				if !ok {
					si, err = wr.ts.GetShard(keyspace, shard)
					if err != nil {
						return fmt.Errorf("GetShard(%v, %v) (backup for GetSrvShard in cell %v) failed: %v", keyspace, shard, cell, err)
					}
					shardCache[shard] = si
				}
				srvShard = &topo.SrvShard{
					Name:        si.ShardName(),
					KeyRange:    si.KeyRange,
					ServedTypes: si.ServedTypes,
					MasterCell:  si.MasterAlias.Cell,
				}
			default:
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		first := true
		for tabletType, partition := range srvKeyspace.Partitions {
			topo.SrvShardArray(partition.Shards).Sort()

			// check the first Start is MinKey, the last End is MaxKey,
			// and the values in between match: End[i] == Start[i+1]
			if partition.Shards[0].KeyRange.Start != key.MinKey {
				return fmt.Errorf("keyspace partition for %v in cell %v does not start with %v", tabletType, cell, key.MinKey)
			}
			if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey {
				return fmt.Errorf("keyspace partition for %v in cell %v does not end with %v", tabletType, cell, key.MaxKey)
			}
			for i := range partition.Shards[0 : len(partition.Shards)-1] {
				if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start {
					return fmt.Errorf("non-contiguous KeyRange values for %v in cell %v at shard %v to %v: %v != %v", tabletType, cell, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex())
				}
			}

			// backfill Shards
			if first {
				first = false
				srvKeyspace.Shards = partition.Shards
			}
		}
	}

	// and then finally save the keyspace objects
	for cell, srvKeyspace := range srvKeyspaceMap {
		wr.logger.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace)
		if err := wr.ts.UpdateSrvKeyspace(cell, keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #8
0
// rebuildKeyspace should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func rebuildKeyspace(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace string, cells []string, rebuildSrvShards bool) error {
	log.Infof("rebuildKeyspace %v", keyspace)

	ki, err := ts.GetKeyspace(ctx, keyspace)
	if err != nil {
		return err
	}

	var shardCache map[string]*topo.ShardInfo
	if rebuildSrvShards {
		shards, err := ts.GetShardNames(ctx, keyspace)
		if err != nil {
			return nil
		}

		// Rebuild all shards in parallel, save the shards
		shardCache = make(map[string]*topo.ShardInfo)
		wg := sync.WaitGroup{}
		mu := sync.Mutex{}
		rec := concurrency.FirstErrorRecorder{}
		for _, shard := range shards {
			wg.Add(1)
			go func(shard string) {
				if shardInfo, err := RebuildShard(ctx, log, ts, keyspace, shard, cells); err != nil {
					rec.RecordError(fmt.Errorf("RebuildShard failed: %v/%v %v", keyspace, shard, err))
				} else {
					mu.Lock()
					shardCache[shard] = shardInfo
					mu.Unlock()
				}
				wg.Done()
			}(shard)
		}
		wg.Wait()
		if rec.HasErrors() {
			return rec.Error()
		}

	} else {
		shardCache, err = ts.FindAllShardsInKeyspace(ctx, keyspace)
		if err != nil {
			return err
		}
	}

	// Build the list of cells to work on: we get the union
	// of all the Cells of all the Shards, limited to the provided cells.
	//
	// srvKeyspaceMap is a map:
	//   key: cell
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[string]*topodatapb.SrvKeyspace)
	findCellsForRebuild(ki, shardCache, cells, srvKeyspaceMap)

	// Then we add the cells from the keyspaces we might be 'ServedFrom'.
	for _, ksf := range ki.ServedFroms {
		servedFromShards, err := ts.FindAllShardsInKeyspace(ctx, ksf.Keyspace)
		if err != nil {
			return err
		}
		findCellsForRebuild(ki, servedFromShards, cells, srvKeyspaceMap)
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the SrvShard structures for each shard / cell
	// - if not present, build an empty one from global Shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for cell, srvKeyspace := range srvKeyspaceMap {
		for _, si := range shardCache {
			servedTypes := si.GetServedTypesPerCell(cell)

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range servedTypes {
				partition := topoproto.SrvKeyspaceGetPartition(srvKeyspace, tabletType)
				if partition == nil {
					partition = &topodatapb.SrvKeyspace_KeyspacePartition{
						ServedType: tabletType,
					}
					srvKeyspace.Partitions = append(srvKeyspace.Partitions, partition)
				}
				partition.ShardReferences = append(partition.ShardReferences, &topodatapb.ShardReference{
					Name:     si.ShardName(),
					KeyRange: si.KeyRange,
				})
			}
		}

		if err := orderAndCheckPartitions(cell, srvKeyspace); err != nil {
			return err
		}
	}

	// and then finally save the keyspace objects
	for cell, srvKeyspace := range srvKeyspaceMap {
		log.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace)
		if err := ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #9
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	wr.logger.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		return err
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel, save the shards
	shardCache := make(map[string]*topo.ShardInfo)
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	rec := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if shardInfo, err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				rec.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			} else {
				mu.Lock()
				shardCache[shard] = shardInfo
				mu.Unlock()
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if rec.HasErrors() {
		return rec.Error()
	}

	// Build the list of cells to work on: we get the union
	// of all the Cells of all the Shards, limited to the provided cells.
	//
	// srvKeyspaceMap is a map:
	//   key: cell
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[string]*topo.SrvKeyspace)
	wr.findCellsForRebuild(ki, shardCache, cells, srvKeyspaceMap)

	// Then we add the cells from the keyspaces we might be 'ServedFrom'.
	for _, ksf := range ki.ServedFromMap {
		servedFromShards, err := topo.FindAllShardsInKeyspace(wr.ts, ksf.Keyspace)
		if err != nil {
			return err
		}
		wr.findCellsForRebuild(ki, servedFromShards, cells, srvKeyspaceMap)
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the SrvShard structures for each shard / cell
	// - if not present, build an empty one from global Shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for cell, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for shard, si := range shardCache {
			srvShard, err := wr.ts.GetSrvShard(cell, keyspace, shard)
			switch err {
			case nil:
				// we keep going
			case topo.ErrNoNode:
				wr.logger.Infof("Cell %v for %v/%v has no SvrShard, using Shard data with no TabletTypes instead", cell, keyspace, shard)
				srvShard = &topo.SrvShard{
					Name:        si.ShardName(),
					KeyRange:    si.KeyRange,
					ServedTypes: si.GetServedTypesPerCell(cell),
					MasterCell:  si.MasterAlias.Cell,
				}
			default:
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		if err := wr.checkPartitions(cell, srvKeyspace); err != nil {
			return err
		}
	}

	// and then finally save the keyspace objects
	for cell, srvKeyspace := range srvKeyspaceMap {
		wr.logger.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace)
		if err := wr.ts.UpdateSrvKeyspace(cell, keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}