Пример #1
0
func NewSplitStrategy(logger logutil.Logger, argsStr string) (*SplitStrategy, error) {
	var args []string
	if argsStr != "" {
		args = strings.Split(argsStr, " ")
	}
	flagSet := flag.NewFlagSet("strategy", flag.ContinueOnError)
	flagSet.SetOutput(logutil.NewLoggerWriter(logger))
	flagSet.Usage = func() {
		logger.Printf("Strategy flag has the following options:\n")
		flagSet.PrintDefaults()
	}
	populateBlpCheckpoint := flagSet.Bool("populate_blp_checkpoint", false, "populates the blp checkpoint table")
	dontStartBinlogPlayer := flagSet.Bool("dont_start_binlog_player", false, "do not start the binlog player after restore is complete")
	skipSetSourceShards := flagSet.Bool("skip_set_source_shards", false, "do not set the SourceShar field on destination shards")
	if err := flagSet.Parse(args); err != nil {
		return nil, fmt.Errorf("cannot parse strategy: %v", err)
	}
	if flagSet.NArg() > 0 {
		return nil, fmt.Errorf("strategy doesn't have positional arguments")
	}

	return &SplitStrategy{
		PopulateBlpCheckpoint: *populateBlpCheckpoint,
		DontStartBinlogPlayer: *dontStartBinlogPlayer,
		SkipSetSourceShards:   *skipSetSourceShards,
	}, nil
}
Пример #2
0
// FixShardReplication will fix the first problem it encounters within
// a ShardReplication object
func FixShardReplication(ts Server, logger logutil.Logger, cell, keyspace, shard string) error {
	sri, err := ts.GetShardReplication(cell, keyspace, shard)
	if err != nil {
		return err
	}

	for _, rl := range sri.ReplicationLinks {
		ti, err := ts.GetTablet(rl.TabletAlias)
		if err == ErrNoNode {
			logger.Warningf("Tablet %v is in the replication graph, but does not exist, removing it", rl.TabletAlias)
			return RemoveShardReplicationRecord(ts, cell, keyspace, shard, rl.TabletAlias)
		}
		if err != nil {
			// unknown error, we probably don't want to continue
			return err
		}

		if ti.Type == TYPE_SCRAP {
			logger.Warningf("Tablet %v is in the replication graph, but is scrapped, removing it", rl.TabletAlias)
			return RemoveShardReplicationRecord(ts, cell, keyspace, shard, rl.TabletAlias)
		}

		logger.Infof("Keeping tablet %v in the replication graph", rl.TabletAlias)
	}

	logger.Infof("All entries in replication graph are valid")
	return nil
}
Пример #3
0
// TableScanByKeyRange returns a QueryResultReader that gets all the
// rows from a table that match the supplied KeyRange, ordered by
// Primary Key. The returned columns are ordered with the Primary Key
// columns in front.
func TableScanByKeyRange(log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition, keyRange key.KeyRange, keyspaceIdType key.KeyspaceIdType) (*QueryResultReader, error) {
	where := ""
	switch keyspaceIdType {
	case key.KIT_UINT64:
		if keyRange.Start != key.MinKey {
			if keyRange.End != key.MaxKey {
				// have start & end
				where = fmt.Sprintf("WHERE keyspace_id >= %v AND keyspace_id < %v ", uint64FromKeyspaceId(keyRange.Start), uint64FromKeyspaceId(keyRange.End))
			} else {
				// have start only
				where = fmt.Sprintf("WHERE keyspace_id >= %v ", uint64FromKeyspaceId(keyRange.Start))
			}
		} else {
			if keyRange.End != key.MaxKey {
				// have end only
				where = fmt.Sprintf("WHERE keyspace_id < %v ", uint64FromKeyspaceId(keyRange.End))
			}
		}
	case key.KIT_BYTES:
		if keyRange.Start != key.MinKey {
			if keyRange.End != key.MaxKey {
				// have start & end
				where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' AND HEX(keyspace_id) < '%v' ", keyRange.Start.Hex(), keyRange.End.Hex())
			} else {
				// have start only
				where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' ", keyRange.Start.Hex())
			}
		} else {
			if keyRange.End != key.MaxKey {
				// have end only
				where = fmt.Sprintf("WHERE HEX(keyspace_id) < '%v' ", keyRange.End.Hex())
			}
		}
	default:
		return nil, fmt.Errorf("Unsupported KeyspaceIdType: %v", keyspaceIdType)
	}

	sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", "))
	log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql)
	return NewQueryResultReaderForTablet(ts, tabletAlias, sql)
}
Пример #4
0
// This piece runs on the presumably empty machine acting as the target in the
// create replica action.
//
// validate target (self)
// shutdown_mysql()
// create temp data directory /vt/target/vt_<keyspace>
// copy compressed data files via HTTP
// verify hash of compressed files
// uncompress into /vt/vt_<target-uid>/data/vt_<keyspace>
// start_mysql()
// clean up compressed files
func (mysqld *Mysqld) RestoreFromSnapshot(logger logutil.Logger, snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error {
	if snapshotManifest == nil {
		return errors.New("RestoreFromSnapshot: nil snapshotManifest")
	}

	logger.Infof("ValidateCloneTarget")
	if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil {
		return err
	}

	logger.Infof("Shutdown mysqld")
	if err := mysqld.Shutdown(true, MysqlWaitTime); err != nil {
		return err
	}

	logger.Infof("Fetch snapshot")
	if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil {
		return err
	}

	logger.Infof("Restart mysqld")
	if err := mysqld.Start(MysqlWaitTime); err != nil {
		return err
	}

	cmdList, err := mysqld.StartReplicationCommands(snapshotManifest.ReplicationStatus)
	if err != nil {
		return err
	}
	if err := mysqld.ExecuteSuperQueryList(cmdList); err != nil {
		return err
	}

	if !dontWaitForSlaveStart {
		if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
			return err
		}
	}

	h := hook.NewSimpleHook("postflight_restore")
	h.ExtraEnv = hookExtraEnv
	if err := h.ExecuteOptional(); err != nil {
		return err
	}

	return nil
}
Пример #5
0
// RestartSlavesExternal will tell all the slaves in the provided list
// that they have a new master, and also tell all the masters. The
// masters will be scrapped if they don't answer.
// We execute all the actions in parallel.
func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTabletAlias topo.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *actionnode.SlaveWasRestartedArgs) error) {
	wg := sync.WaitGroup{}

	swrd := actionnode.SlaveWasRestartedArgs{
		Parent: masterElectTabletAlias,
	}

	log.Infof("Updating individual tablets with the right master...")

	// do all the slaves
	for _, ti := range slaveTabletMap {
		wg.Add(1)
		go func(ti *topo.TabletInfo) {
			if err := slaveWasRestarted(ti, &swrd); err != nil {
				log.Warningf("Slave %v had an error: %v", ti.Alias, err)
			}
			wg.Done()
		}(ti)
	}

	// and do the old master and any straggler, if possible.
	for _, ti := range masterTabletMap {
		wg.Add(1)
		go func(ti *topo.TabletInfo) {
			err := slaveWasRestarted(ti, &swrd)
			if err != nil {
				// the old master can be annoying if left
				// around in the replication graph, so if we
				// can't restart it, we just scrap it.
				// We don't rebuild the Shard just yet though.
				log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err)

				ti.Type = topo.TYPE_SPARE
				ti.Parent = masterElectTabletAlias
				if err := topo.UpdateTablet(context.TODO(), ts, ti); err != nil {
					log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err)
				}
			}
			wg.Done()
		}(ti)
	}
	wg.Wait()
}
Пример #6
0
// TableScan returns a QueryResultReader that gets all the rows from a
// table, ordered by Primary Key. The returned columns are ordered
// with the Primary Key columns in front.
func TableScan(log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition) (*QueryResultReader, error) {
	sql := fmt.Sprintf("SELECT %v FROM %v ORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, strings.Join(tableDefinition.PrimaryKeyColumns, ", "))
	log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql)
	return NewQueryResultReaderForTablet(ts, tabletAlias, sql)
}
Пример #7
0
// Go runs the diff. If there is no error, it will drain both sides.
// If an error occurs, it will just return it and stop.
func (rd *RowSubsetDiffer) Go(log logutil.Logger) (dr DiffReport, err error) {

	dr.startingTime = time.Now()
	defer dr.ComputeQPS()

	var superset []sqltypes.Value
	var subset []sqltypes.Value
	advanceSuperset := true
	advanceSubset := true
	for {
		if advanceSuperset {
			superset, err = rd.superset.Next()
			if err != nil {
				return
			}
			advanceSuperset = false
		}
		if advanceSubset {
			subset, err = rd.subset.Next()
			if err != nil {
				return
			}
			advanceSubset = false
		}
		dr.processedRows++
		if superset == nil {
			// no more rows from the superset
			if subset == nil {
				// no more rows from subset either, we're done
				return
			}

			// drain subset, update count
			if count, err := rd.subset.Drain(); err != nil {
				return dr, err
			} else {
				dr.extraRowsRight += 1 + count
			}
			return
		}
		if subset == nil {
			// no more rows from the subset
			// we know we have rows from superset, drain
			if _, err := rd.superset.Drain(); err != nil {
				return dr, err
			}
			return
		}

		// we have both superset and subset, compare
		f := RowsEqual(superset, subset)
		if f == -1 {
			// rows are the same, next
			dr.matchingRows++
			advanceSuperset = true
			advanceSubset = true
			continue
		}

		if f >= rd.pkFieldCount {
			// rows have the same primary key, only content is different
			if dr.mismatchedRows < 10 {
				log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset)
			}
			dr.mismatchedRows++
			advanceSuperset = true
			advanceSubset = true
			continue
		}

		// have to find the 'smallest' raw and advance it
		c, err := CompareRows(rd.superset.Fields(), rd.pkFieldCount, superset, subset)
		if err != nil {
			return dr, err
		}
		if c < 0 {
			advanceSuperset = true
			continue
		} else if c > 0 {
			if dr.extraRowsRight < 10 {
				log.Errorf("Extra row %v on subset: %v", dr.extraRowsRight, subset)
			}
			dr.extraRowsRight++
			advanceSubset = true
			continue
		}

		// After looking at primary keys more carefully,
		// they're the same. Logging a regular difference
		// then, and advancing both.
		if dr.mismatchedRows < 10 {
			log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset)
		}
		dr.mismatchedRows++
		advanceSuperset = true
		advanceSubset = true
	}
}
Пример #8
0
// This function runs on the machine acting as the source for the clone.
//
// Check master/slave status and determine restore needs.
// If this instance is a slave, stop replication, otherwise place in read-only mode.
// Record replication position.
// Shutdown mysql
// Check paths for storing data
//
// Depending on the serverMode flag, we do the following:
// serverMode = false:
//   Compress /vt/vt_[0-9a-f]+/data/vt_.+
//   Compute hash (of compressed files, as we serve .gz files here)
//   Place in /vt/clone_src where they will be served by http server (not rpc)
//   Restart mysql
// serverMode = true:
//   Make symlinks for /vt/vt_[0-9a-f]+/data/vt_.+ to innodb files
//   Compute hash (of uncompressed files, as we serve uncompressed files)
//   Place symlinks in /vt/clone_src where they will be served by http server
//   Leave mysql stopped, return slaveStartRequired, readOnly
func (mysqld *Mysqld) CreateSnapshot(logger logutil.Logger, dbName, sourceAddr string, allowHierarchicalReplication bool, concurrency int, serverMode bool, hookExtraEnv map[string]string) (snapshotManifestUrlPath string, slaveStartRequired, readOnly bool, err error) {
	if dbName == "" {
		return "", false, false, errors.New("CreateSnapshot failed: no database name provided")
	}

	if err = mysqld.validateCloneSource(serverMode, hookExtraEnv); err != nil {
		return
	}

	// save initial state so we can restore on Start()
	slaveStartRequired = false
	sourceIsMaster := false
	readOnly = true

	slaveStatus, err := mysqld.SlaveStatus()
	if err == nil {
		slaveStartRequired = slaveStatus.SlaveRunning()
	} else if err == ErrNotSlave {
		sourceIsMaster = true
	} else {
		// If we can't get any data, just fail.
		return
	}

	readOnly, err = mysqld.IsReadOnly()
	if err != nil {
		return
	}

	// Stop sources of writes so we can get a consistent replication position.
	// If the source is a slave use the master replication position
	// unless we are allowing hierarchical replicas.
	masterAddr := ""
	var replicationPosition proto.ReplicationPosition
	if sourceIsMaster {
		if err = mysqld.SetReadOnly(true); err != nil {
			return
		}
		replicationPosition, err = mysqld.MasterPosition()
		if err != nil {
			return
		}
		masterAddr = mysqld.IpAddr()
	} else {
		if err = mysqld.StopSlave(hookExtraEnv); err != nil {
			return
		}
		var slaveStatus *proto.ReplicationStatus
		slaveStatus, err = mysqld.SlaveStatus()
		if err != nil {
			return
		}
		replicationPosition = slaveStatus.Position

		// We are a slave, check our replication strategy before
		// choosing the master address.
		if allowHierarchicalReplication {
			masterAddr = mysqld.IpAddr()
		} else {
			masterAddr, err = mysqld.GetMasterAddr()
			if err != nil {
				return
			}
		}
	}

	if err = mysqld.Shutdown(true, MysqlWaitTime); err != nil {
		return
	}

	var smFile string
	dataFiles, snapshotErr := mysqld.createSnapshot(logger, concurrency, serverMode)
	if snapshotErr != nil {
		logger.Errorf("CreateSnapshot failed: %v", snapshotErr)
	} else {
		var sm *SnapshotManifest
		sm, snapshotErr = newSnapshotManifest(sourceAddr, mysqld.IpAddr(),
			masterAddr, dbName, dataFiles, replicationPosition, proto.ReplicationPosition{})
		if snapshotErr != nil {
			logger.Errorf("CreateSnapshot failed: %v", snapshotErr)
		} else {
			smFile = path.Join(mysqld.SnapshotDir, SnapshotManifestFile)
			if snapshotErr = writeJson(smFile, sm); snapshotErr != nil {
				logger.Errorf("CreateSnapshot failed: %v", snapshotErr)
			}
		}
	}

	// restore our state if required
	if serverMode && snapshotErr == nil {
		logger.Infof("server mode snapshot worked, not restarting mysql")
	} else {
		if err = mysqld.SnapshotSourceEnd(slaveStartRequired, readOnly, false /*deleteSnapshot*/, hookExtraEnv); err != nil {
			return
		}
	}

	if snapshotErr != nil {
		return "", slaveStartRequired, readOnly, snapshotErr
	}
	relative, err := filepath.Rel(mysqld.SnapshotDir, smFile)
	if err != nil {
		return "", slaveStartRequired, readOnly, nil
	}
	return path.Join(SnapshotURLPath, relative), slaveStartRequired, readOnly, nil
}
Пример #9
0
func (mysqld *Mysqld) createSnapshot(logger logutil.Logger, concurrency int, serverMode bool) ([]SnapshotFile, error) {
	sources := make([]string, 0, 128)
	destinations := make([]string, 0, 128)

	// clean out and start fresh
	logger.Infof("removing previous snapshots: %v", mysqld.SnapshotDir)
	if err := os.RemoveAll(mysqld.SnapshotDir); err != nil {
		return nil, err
	}

	// FIXME(msolomon) innodb paths must match patterns in mycnf -
	// probably belongs as a derived path.
	type snapPair struct{ srcDir, dstDir string }
	dps := []snapPair{
		{mysqld.config.InnodbDataHomeDir, path.Join(mysqld.SnapshotDir, innodbDataSubdir)},
		{mysqld.config.InnodbLogGroupHomeDir, path.Join(mysqld.SnapshotDir, innodbLogSubdir)},
	}

	dataDirEntries, err := ioutil.ReadDir(mysqld.config.DataDir)
	if err != nil {
		return nil, err
	}

	for _, de := range dataDirEntries {
		dbDirPath := path.Join(mysqld.config.DataDir, de.Name())
		// If this is not a directory, try to eval it as a syslink.
		if !de.IsDir() {
			dbDirPath, err = filepath.EvalSymlinks(dbDirPath)
			if err != nil {
				return nil, err
			}
			de, err = os.Stat(dbDirPath)
			if err != nil {
				return nil, err
			}
		}
		if de.IsDir() {
			// Copy anything that defines a db.opt file - that includes empty databases.
			_, err := os.Stat(path.Join(dbDirPath, "db.opt"))
			if err == nil {
				dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())})
			} else {
				// Look for at least one .frm file
				dbDirEntries, err := ioutil.ReadDir(dbDirPath)
				if err == nil {
					for _, dbEntry := range dbDirEntries {
						if strings.HasSuffix(dbEntry.Name(), ".frm") {
							dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())})
							break
						}
					}
				} else {
					return nil, err
				}
			}
		}
	}

	for _, dp := range dps {
		if err := os.MkdirAll(dp.dstDir, 0775); err != nil {
			return nil, err
		}
		if s, d, err := findFilesToServe(dp.srcDir, dp.dstDir, !serverMode); err != nil {
			return nil, err
		} else {
			sources = append(sources, s...)
			destinations = append(destinations, d...)
		}
	}

	return newSnapshotFiles(sources, destinations, mysqld.SnapshotDir, concurrency, !serverMode)
}
Пример #10
0
func logStuff(logger logutil.Logger, count int) {
	for i := 0; i < count; i++ {
		logger.Infof(testLogString)
	}
}
Пример #11
0
// Update shard file with new master, replicas, etc.
//
// Re-read from TopologyServer to make sure we are using the side
// effects of all actions.
//
// This function locks individual SvrShard paths, so it doesn't need a lock
// on the shard.
func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) (*topo.ShardInfo, error) {
	log.Infof("RebuildShard %v/%v", keyspace, shard)

	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("topotools.RebuildShard")
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// read the existing shard info. It has to exist.
	shardInfo, err := ts.GetShard(keyspace, shard)
	if err != nil {
		return nil, err
	}

	// rebuild all cells in parallel
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range shardInfo.Cells {
		// skip this cell if we shouldn't rebuild it
		if !topo.InCellList(cell, cells) {
			continue
		}

		// start with the master if it's in the current cell
		tabletsAsMap := make(map[topo.TabletAlias]bool)
		if shardInfo.MasterAlias.Cell == cell {
			tabletsAsMap[shardInfo.MasterAlias] = true
		}

		wg.Add(1)
		go func(cell string) {
			defer wg.Done()

			// Lock the SrvShard so we don't race with other rebuilds of the same
			// shard in the same cell (e.g. from our peer tablets).
			actionNode := actionnode.RebuildSrvShard()
			lockPath, err := actionNode.LockSrvShard(ctx, ts, cell, keyspace, shard, timeout, interrupted)
			if err != nil {
				rec.RecordError(err)
				return
			}

			// read the ShardReplication object to find tablets
			sri, err := ts.GetShardReplication(cell, keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
				return
			}

			// add all relevant tablets to the map
			for _, rl := range sri.ReplicationLinks {
				tabletsAsMap[rl.TabletAlias] = true
				if rl.Parent.Cell == cell {
					tabletsAsMap[rl.Parent] = true
				}
			}

			// convert the map to a list
			aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap))
			for a := range tabletsAsMap {
				aliases = append(aliases, a)
			}

			// read all the Tablet records
			tablets, err := topo.GetTabletMap(ctx, ts, aliases)
			switch err {
			case nil:
				// keep going, we're good
			case topo.ErrPartialResult:
				log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell)
			default:
				rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err))
				return
			}

			// write the data we need to
			rebuildErr := rebuildCellSrvShard(ctx, log, ts, shardInfo, cell, tablets)

			// and unlock
			if err := actionNode.UnlockSrvShard(ctx, ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil {
				rec.RecordError(err)
			}
		}(cell)
	}
	wg.Wait()

	return shardInfo, rec.Error()
}
Пример #12
0
// rebuildCellSrvShard computes and writes the serving graph data to a
// single cell
func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error {
	log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell)

	// Get all existing db types so they can be removed if nothing
	// had been edited.
	existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		if err != topo.ErrNoNode {
			return err
		}
	}

	// Update db type addresses in the serving graph
	//
	// locationAddrsMap is a map:
	//   key: tabletType
	//   value: EndPoints (list of server records)
	locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints)
	for _, tablet := range tablets {
		if !tablet.IsInReplicationGraph() {
			// only valid case is a scrapped master in the
			// catastrophic reparent case
			if tablet.Parent.Uid != topo.NO_TABLET {
				log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias)
			}
			continue
		}

		// Check IsInServingGraph, we don't want to add tablets that
		// are not serving
		if !tablet.IsInServingGraph() {
			continue
		}

		// Check the Keyspace and Shard for the tablet are right
		if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() {
			return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard)
		}

		// Add the tablet to the list
		addrs, ok := locationAddrsMap[tablet.Type]
		if !ok {
			addrs = topo.NewEndPoints()
			locationAddrsMap[tablet.Type] = addrs
		}
		entry, err := tablet.Tablet.EndPoint()
		if err != nil {
			log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err)
			continue
		}
		addrs.Entries = append(addrs.Entries, *entry)
	}

	// we're gonna parallelize a lot here:
	// - writing all the tabletTypes records
	// - removing the unused records
	// - writing SrvShard
	rec := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// write all the EndPoints nodes everywhere we want them
	for tabletType, addrs := range locationAddrsMap {
		wg.Add(1)
		go func(tabletType topo.TabletType, addrs *topo.EndPoints) {
			log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType)
			span := trace.NewSpanFromContext(ctx)
			span.StartClient("TopoServer.UpdateEndPoints")
			span.Annotate("tablet_type", string(tabletType))
			if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil {
				rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err))
			}
			span.Finish()
			wg.Done()
		}(tabletType, addrs)
	}

	// Delete any pre-existing paths that were not updated by this process.
	// That's the existingTabletTypes - locationAddrsMap
	for _, tabletType := range existingTabletTypes {
		if _, ok := locationAddrsMap[tabletType]; !ok {
			wg.Add(1)
			go func(tabletType topo.TabletType) {
				log.Infof("removing stale db type from serving graph: %v", tabletType)
				span := trace.NewSpanFromContext(ctx)
				span.StartClient("TopoServer.DeleteEndPoints")
				span.Annotate("tablet_type", string(tabletType))
				if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil {
					log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err)
				}
				span.Finish()
				wg.Done()
			}(tabletType)
		}
	}

	// Update srvShard object
	wg.Add(1)
	go func() {
		log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName())
		srvShard := &topo.SrvShard{
			Name:        shardInfo.ShardName(),
			KeyRange:    shardInfo.KeyRange,
			ServedTypes: shardInfo.GetServedTypesPerCell(cell),
			MasterCell:  shardInfo.MasterAlias.Cell,
			TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)),
		}
		for tabletType := range locationAddrsMap {
			srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType)
		}

		span := trace.NewSpanFromContext(ctx)
		span.StartClient("TopoServer.UpdateSrvShard")
		span.Annotate("keyspace", shardInfo.Keyspace())
		span.Annotate("shard", shardInfo.ShardName())
		span.Annotate("cell", cell)
		if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil {
			rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err))
		}
		span.Finish()
		wg.Done()
	}()

	wg.Wait()
	return rec.Error()
}