// ChangeType changes the type of the tablet and possibly also updates // the health informaton for it. Make this external, since these // transitions need to be forced from time to time. // // - if health is nil, we don't touch the Tablet's Health record. // - if health is an empty map, we clear the Tablet's Health record. // - if health has values, we overwrite the Tablet's Health record. func ChangeType(ctx context.Context, ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, health map[string]string) error { tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } tablet.Type = newType if newType == topo.TYPE_IDLE { tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} tablet.Health = health } if health != nil { if len(health) == 0 { tablet.Health = nil } else { tablet.Health = health } } return topo.UpdateTablet(ctx, ts, tablet) }
// RebuildShard updates the SrvShard objects and underlying serving graph. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function will start each cell over from the beginning on ErrBadVersion, // so it doesn't need a lock on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, lockTimeout time.Duration) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(ctx, keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } wg.Add(1) go func(cell string) { defer wg.Done() rec.RecordError(rebuildCellSrvShard(ctx, log, ts, shardInfo, cell)) }(cell) } wg.Wait() return shardInfo, rec.Error() }
// If error is not nil, the results in the dictionary are incomplete. func GetTabletMap(ts topo.Server, tabletAliases []topo.TabletAlias) (map[topo.TabletAlias]*topo.TabletInfo, error) { wg := sync.WaitGroup{} mutex := sync.Mutex{} tabletMap := make(map[topo.TabletAlias]*topo.TabletInfo) var someError error for _, tabletAlias := range tabletAliases { wg.Add(1) go func(tabletAlias topo.TabletAlias) { defer wg.Done() tabletInfo, err := ts.GetTablet(tabletAlias) mutex.Lock() if err != nil { relog.Warning("%v: %v", tabletAlias, err) // There can be data races removing nodes - ignore them for now. if err != topo.ErrNoNode { someError = err } } else { tabletMap[tabletAlias] = tabletInfo } mutex.Unlock() }(tabletAlias) } wg.Wait() return tabletMap, someError }
// UnlockShard unlocks a previously locked shard. func (n *ActionNode) UnlockShard(ctx context.Context, ts topo.Server, keyspace, shard string, lockPath string, actionError error) error { span := trace.NewSpanFromContext(ctx) span.StartClient("TopoServer.UnlockShardForAction") span.Annotate("action", n.Action) span.Annotate("keyspace", keyspace) span.Annotate("shard", shard) defer span.Finish() // first update the actionNode if actionError != nil { log.Infof("Unlocking shard %v/%v for action %v with error %v", keyspace, shard, n.Action, actionError) n.Error = actionError.Error() n.State = ActionStateFailed } else { log.Infof("Unlocking shard %v/%v for successful action %v", keyspace, shard, n.Action) n.Error = "" n.State = ActionStateDone } err := ts.UnlockShardForAction(ctx, keyspace, shard, lockPath, n.ToJSON()) if actionError != nil { if err != nil { // this will be masked log.Warningf("UnlockShardForAction failed: %v", err) } return actionError } return err }
// GetAllTabletsAcrossCells returns all tablets from known cells. // If it returns topo.ErrPartialResult, then the list is valid, but partial. func GetAllTabletsAcrossCells(ctx context.Context, ts topo.Server) ([]*topo.TabletInfo, error) { cells, err := ts.GetKnownCells(ctx) if err != nil { return nil, err } results := make([][]*topo.TabletInfo, len(cells)) errors := make([]error, len(cells)) wg := sync.WaitGroup{} wg.Add(len(cells)) for i, cell := range cells { go func(i int, cell string) { results[i], errors[i] = GetAllTablets(ctx, ts, cell) wg.Done() }(i, cell) } wg.Wait() err = nil var allTablets []*topo.TabletInfo for i := range cells { if errors[i] == nil { allTablets = append(allTablets, results[i]...) } else { err = topo.ErrPartialResult } } return allTablets, err }
func createSourceTablet(t *testing.T, ts topo.Server, keyspace, shard string) { vshard, kr, err := topo.ValidateShardName(shard) if err != nil { t.Fatalf("ValidateShardName(%v) failed: %v", shard, err) } ctx := context.Background() tablet := &pb.Tablet{ Alias: &pb.TabletAlias{ Cell: "cell1", Uid: 100, }, Type: pb.TabletType_REPLICA, KeyRange: kr, Keyspace: keyspace, Shard: vshard, PortMap: map[string]int32{ "vt": 80, }, } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } if err := topotools.UpdateTabletEndpoints(ctx, ts, tablet); err != nil { t.Fatalf("topotools.UpdateTabletEndpoints failed: %v", err) } }
// GetAllTabletsAccrossCells returns all tablets from known cells. func GetAllTabletsAccrossCells(ts topo.Server) ([]*topo.TabletInfo, error) { cells, err := ts.GetKnownCells() if err != nil { return nil, err } results := make(chan []*topo.TabletInfo) errors := make(chan error) for _, cell := range cells { go func(cell string) { tablets, err := GetAllTablets(ts, cell) if err != nil && err != topo.ErrNoNode { errors <- err return } results <- tablets }(cell) } allTablets := make([]*topo.TabletInfo, 0) for _ = range cells { select { case tablets := <-results: allTablets = append(allTablets, tablets...) case err := <-errors: return nil, err } } return allTablets, nil }
// GetAllTablets returns a sorted list of tablets. func GetAllTablets(ctx context.Context, ts topo.Server, cell string) ([]*topo.TabletInfo, error) { aliases, err := ts.GetTabletsByCell(ctx, cell) if err != nil { return nil, err } sort.Sort(topo.TabletAliasList(aliases)) tabletMap, err := topo.GetTabletMap(ctx, ts, aliases) if err != nil { // we got another error than topo.ErrNoNode return nil, err } tablets := make([]*topo.TabletInfo, 0, len(aliases)) for _, tabletAlias := range aliases { tabletInfo, ok := tabletMap[*tabletAlias] if !ok { // tablet disappeared on us (GetTabletMap ignores // topo.ErrNoNode), just echo a warning log.Warningf("failed to load tablet %v", tabletAlias) } else { tablets = append(tablets, tabletInfo) } } return tablets, nil }
// NewQueryResultReaderForTablet creates a new QueryResultReader for // the provided tablet / sql query func NewQueryResultReaderForTablet(ctx context.Context, ts topo.Server, tabletAlias topo.TabletAlias, sql string) (*QueryResultReader, error) { tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return nil, err } endPoint, err := tablet.EndPoint() if err != nil { return nil, err } conn, err := tabletconn.GetDialer()(ctx, *endPoint, tablet.Keyspace, tablet.Shard, *remoteActionsTimeout) if err != nil { return nil, err } sr, clientErrFn, err := conn.StreamExecute(ctx, sql, make(map[string]interface{}), 0) if err != nil { return nil, err } // read the columns, or grab the error cols, ok := <-sr if !ok { return nil, fmt.Errorf("Cannot read Fields for query '%v': %v", sql, clientErrFn()) } return &QueryResultReader{ Output: sr, Fields: cols.Fields, conn: conn, clientErrFn: clientErrFn, }, nil }
func SlaveWasRestarted(ts topo.Server, tabletAlias topo.TabletAlias, swrd *actionnode.SlaveWasRestartedArgs) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } // Once this action completes, update authoritive tablet node first. tablet.Parent = swrd.Parent if tablet.Type == topo.TYPE_MASTER { tablet.Type = topo.TYPE_SPARE tablet.State = topo.STATE_READ_ONLY } err = topo.UpdateTablet(ts, tablet) if err != nil { return err } // Update the new tablet location in the replication graph now that // we've updated the tablet. err = topo.CreateTabletReplicationData(ts, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. It is called by the RPC // server. func TabletExternallyReparented(ts topo.Server, tabletAlias topo.TabletAlias, actionTimeout, lockTimeout time.Duration) error { // we're apprently not the master yet, so let's do the work tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } // fast quick check on the shard shardInfo, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } if shardInfo.MasterAlias == tabletAlias { return nil } // grab the shard lock actionNode := actionnode.ShardExternallyReparented(tabletAlias) interrupted := make(chan struct{}) lockPath, err := actionNode.LockShard(ts, tablet.Keyspace, tablet.Shard, lockTimeout, interrupted) if err != nil { return err } // do the work err = tabletExternallyReparentedLocked(ts, tablet, actionTimeout, lockTimeout, interrupted) // release the lock in any case return actionNode.UnlockShard(ts, tablet.Keyspace, tablet.Shard, lockPath, err) }
func newRealtimeStats(ts topo.Server) (*realtimeStats, error) { hc := discovery.NewHealthCheck(*vtctl.HealthCheckTimeout, *vtctl.HealthcheckRetryDelay, *vtctl.HealthCheckTimeout) tabletStatsCache := &tabletStatsCache{ statuses: make(map[string]map[string]*discovery.TabletStats), } hc.SetListener(tabletStatsCache) r := &realtimeStats{ healthCheck: hc, tabletStats: tabletStatsCache, } // Get the list of all tablets from all cells and monitor the topology for added or removed tablets with a CellTabletsWatcher. cells, err := ts.GetKnownCells(context.Background()) if err != nil { return r, fmt.Errorf("error when getting cells: %v", err) } var watchers []*discovery.TopologyWatcher for _, cell := range cells { watcher := discovery.NewCellTabletsWatcher(ts, hc, cell, *vtctl.HealthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) watchers = append(watchers, watcher) } r.cellWatchers = watchers return r, nil }
func checkKeyspaceLockTimeout(ctx context.Context, t *testing.T, ts topo.Server) { lockPath, err := ts.LockKeyspaceForAction(ctx, "test_keyspace", "fake-content") if err != nil { t.Fatalf("LockKeyspaceForAction: %v", err) } // test we can't take the lock again fastCtx, cancel := context.WithTimeout(ctx, timeUntilLockIsTaken) if _, err := ts.LockKeyspaceForAction(fastCtx, "test_keyspace", "unused-fake-content"); err != topo.ErrTimeout { t.Fatalf("LockKeyspaceForAction(again): %v", err) } cancel() // test we can interrupt taking the lock interruptCtx, cancel := context.WithCancel(ctx) go func() { time.Sleep(timeUntilLockIsTaken) cancel() }() if _, err := ts.LockKeyspaceForAction(interruptCtx, "test_keyspace", "unused-fake-content"); err != topo.ErrInterrupted { t.Fatalf("LockKeyspaceForAction(interrupted): %v", err) } if err := ts.UnlockKeyspaceForAction(ctx, "test_keyspace", lockPath, "fake-results"); err != nil { t.Fatalf("UnlockKeyspaceForAction(): %v", err) } // test we can't unlock again if err := ts.UnlockKeyspaceForAction(ctx, "test_keyspace", lockPath, "fake-results"); err == nil { t.Fatalf("UnlockKeyspaceForAction(again) worked") } }
// NewRestartableResultReader creates a new RestartableResultReader for // the provided tablet and chunk. // It will automatically create the necessary query to read all rows within // the chunk. // NOTE: We assume that the Columns field in "td" was ordered by a preceding // call to reorderColumnsPrimaryKeyFirst(). func NewRestartableResultReader(ctx context.Context, logger logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, td *tabletmanagerdatapb.TableDefinition, chunk chunk) (*RestartableResultReader, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) tablet, err := ts.GetTablet(shortCtx, tabletAlias) cancel() if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to resolve tablet alias: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } conn, err := tabletconn.GetDialer()(tablet.Tablet, *remoteActionsTimeout) if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to get dialer for tablet: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } r := &RestartableResultReader{ ctx: ctx, logger: logger, tablet: tablet.Tablet, td: td, chunk: chunk, conn: conn, } if err := r.startStream(); err != nil { return nil, err } logger.Infof("tablet=%v table=%v chunk=%v: Starting to stream rows using query '%v'.", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, r.query) return r, nil }
// NewQueryResultReaderForTablet creates a new QueryResultReader for // the provided tablet / sql query func NewQueryResultReaderForTablet(ctx context.Context, ts topo.Server, tabletAlias *topodatapb.TabletAlias, sql string) (*QueryResultReader, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) tablet, err := ts.GetTablet(shortCtx, tabletAlias) cancel() if err != nil { return nil, err } conn, err := tabletconn.GetDialer()(tablet.Tablet, *remoteActionsTimeout) if err != nil { return nil, err } stream, err := conn.StreamExecute(ctx, &querypb.Target{ Keyspace: tablet.Tablet.Keyspace, Shard: tablet.Tablet.Shard, TabletType: tablet.Tablet.Type, }, sql, make(map[string]interface{}), nil) if err != nil { return nil, err } // read the columns, or grab the error cols, err := stream.Recv() if err != nil { return nil, fmt.Errorf("Cannot read Fields for query '%v': %v", sql, err) } return &QueryResultReader{ output: stream, fields: cols.Fields, conn: conn, }, nil }
// createSourceTablet is a helper method to create the source tablet // in the given keyspace/shard. func createSourceTablet(t *testing.T, name string, ts topo.Server, keyspace, shard string) { vshard, kr, err := topo.ValidateShardName(shard) if err != nil { t.Fatalf("ValidateShardName(%v) failed: %v", shard, err) } ctx := context.Background() tablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: "cell1", Uid: 100, }, Keyspace: keyspace, Shard: vshard, Type: topodatapb.TabletType_REPLICA, KeyRange: kr, PortMap: map[string]int32{ "vt": 80, }, } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } // register a tablet conn dialer that will return the instance // we want tabletconn.RegisterDialer(name, func(tablet *topodatapb.Tablet, timeout time.Duration) (tabletconn.TabletConn, error) { return &fakeTabletConn{ tablet: tablet, }, nil }) flag.Set("tablet_protocol", name) }
// UnlockShard unlocks a previously locked shard. func (n *ActionNode) UnlockShard(ctx context.Context, ts topo.Server, keyspace, shard string, lockPath string, actionError error) error { // Detach from the parent timeout, but copy the trace span. // We need to still release the lock even if the parent context timed out. ctx = trace.CopySpan(context.TODO(), ctx) ctx, cancel := context.WithTimeout(ctx, DefaultLockTimeout) defer cancel() span := trace.NewSpanFromContext(ctx) span.StartClient("TopoServer.UnlockShardForAction") span.Annotate("action", n.Action) span.Annotate("keyspace", keyspace) span.Annotate("shard", shard) defer span.Finish() // first update the actionNode if actionError != nil { log.Infof("Unlocking shard %v/%v for action %v with error %v", keyspace, shard, n.Action, actionError) n.Error = actionError.Error() n.State = ActionStateFailed } else { log.Infof("Unlocking shard %v/%v for successful action %v", keyspace, shard, n.Action) n.Error = "" n.State = ActionStateDone } err := ts.UnlockShardForAction(ctx, keyspace, shard, lockPath, n.ToJSON()) if actionError != nil { if err != nil { // this will be masked log.Warningf("UnlockShardForAction failed: %v", err) } return actionError } return err }
// CopyKeyspaces will create the keyspaces in the destination topo func CopyKeyspaces(fromTS, toTS topo.Server) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces failed: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() if err := toTS.CreateKeyspace(keyspace); err != nil { if err == topo.ErrNodeExists { log.Warningf("keyspace %v already exists", keyspace) } else { rec.RecordError(err) } } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyKeyspaces failed: %v", rec.Error()) } }
func SlaveWasPromoted(ts topo.Server, tabletAlias topo.TabletAlias) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } return updateReplicationGraphForPromotedSlave(ts, tablet) }
// ChangeOwnType is like ChangeType, except it fails if you no longer own the // tablet record, as determined by CheckOwnership(). // // Note that oldTablet is only used for its Alias, and to call CheckOwnership(). // Other fields in oldTablet have no effect on the update, which will read the // latest tablet record before setting the type and health info (just like // ChangeType() does). // // If successful, the updated tablet record is returned. func ChangeOwnType(ctx context.Context, ts topo.Server, oldTablet *topodatapb.Tablet, newType topodatapb.TabletType, health map[string]string) (*topodatapb.Tablet, error) { return ts.UpdateTabletFields(ctx, oldTablet.Alias, func(tablet *topodatapb.Tablet) error { if err := CheckOwnership(oldTablet, tablet); err != nil { return err } return changeType(tablet, newType, health) }) }
func addSrvkeyspace(ctx context.Context, ts topo.Server, cell, keyspace string, srvKeyspaces map[string]interface{}) error { srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace) if err != nil { return fmt.Errorf("invalid keyspace name: %q ", keyspace) } srvKeyspaces[keyspace] = srvKeyspace return nil }
// FindOverlappingShards will return an array of OverlappingShards // for the provided keyspace. // We do not support more than two overlapping shards (for instance, // having 40-80, 40-60 and 40-50 in the same keyspace is not supported and // will return an error). // If shards don't perfectly overlap, they are not returned. func FindOverlappingShards(ctx context.Context, ts topo.Server, keyspace string) ([]*OverlappingShards, error) { shardMap, err := ts.FindAllShardsInKeyspace(ctx, keyspace) if err != nil { return nil, err } return findOverlappingShards(shardMap) }
// UpdateSrvShard creates the SrvShard object based on the global ShardInfo, // and writes it to the given cell. func UpdateSrvShard(ctx context.Context, ts topo.Server, cell string, si *topo.ShardInfo) error { srvShard := &topo.SrvShard{ Name: si.ShardName(), KeyRange: si.KeyRange, MasterCell: si.MasterAlias.Cell, } return ts.UpdateSrvShard(ctx, cell, si.Keyspace(), si.ShardName(), srvShard) }
// UpdateTabletEndpoints fixes up any entries in the serving graph that relate // to a given tablet. func UpdateTabletEndpoints(ctx context.Context, ts topo.Server, tablet *topo.Tablet) (err error) { if *lockSrvShard { // This lock is only necessary until all tablets are upgraded to lock-free. actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { return fmt.Errorf("can't lock shard for UpdateTabletEndpoints(%v): %v", tablet, err) } defer func() { actionNode.UnlockSrvShard(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, lockPath, err) }() } srvTypes, err := ts.GetSrvTabletTypesPerShard(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { if err != topo.ErrNoNode { return err } // It's fine if there are no existing types. srvTypes = nil } wg := sync.WaitGroup{} errs := concurrency.AllErrorRecorder{} // Update the list that the tablet is supposed to be in (if any). if tablet.IsInServingGraph() { endpoint, err := tablet.EndPoint() if err != nil { return err } wg.Add(1) go func() { defer wg.Done() errs.RecordError( updateEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tablet.Type, endpoint)) }() } // Remove it from any other lists it isn't supposed to be in. for _, srvType := range srvTypes { if srvType != tablet.Type { wg.Add(1) go func(tabletType topo.TabletType) { defer wg.Done() errs.RecordError( removeEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tabletType, tablet.Alias.Uid)) }(srvType) } } wg.Wait() return errs.Error() }
// ChangeOwnType is like ChangeType, except it fails if you no longer own the // tablet record, as determined by CheckOwnership(). // // Note that oldTablet is only used for its Alias, and to call CheckOwnership(). // Other fields in oldTablet have no effect on the update, which will read the // latest tablet record before setting the type (just like ChangeType() does). // // If successful, the updated tablet record is returned. func ChangeOwnType(ctx context.Context, ts topo.Server, oldTablet *topodatapb.Tablet, newType topodatapb.TabletType) (*topodatapb.Tablet, error) { return ts.UpdateTabletFields(ctx, oldTablet.Alias, func(tablet *topodatapb.Tablet) error { if err := CheckOwnership(oldTablet, tablet); err != nil { return err } tablet.Type = newType return nil }) }
// Make this external, since these transitions need to be forced from time to time. func SetBlacklistedTables(ts topo.Server, tabletAlias topo.TabletAlias, tables []string) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } tablet.BlacklistedTables = tables return topo.UpdateTablet(ts, tablet) }
// RebuildKeyspace rebuilds the serving graph data while locking out other changes. func RebuildKeyspace(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace string, cells []string) (err error) { ctx, unlock, lockErr := ts.LockKeyspace(ctx, keyspace, "RebuildKeyspace") if lockErr != nil { return lockErr } defer unlock(&err) return RebuildKeyspaceLocked(ctx, log, ts, keyspace, cells) }
// CheckKeyspaceLock checks we can take a keyspace lock as expected. func CheckKeyspaceLock(ctx context.Context, t *testing.T, ts topo.Server) { if err := ts.CreateKeyspace(ctx, "test_keyspace", &pb.Keyspace{}); err != nil { t.Fatalf("CreateKeyspace: %v", err) } checkKeyspaceLockTimeout(ctx, t, ts) checkKeyspaceLockMissing(ctx, t, ts) checkKeyspaceLockUnblocks(ctx, t, ts) }
// CopyShardReplications will create the ShardReplication objects in // the destination topo func CopyShardReplications(ctx context.Context, fromTS, toTS topo.Impl) { keyspaces, err := fromTS.GetKeyspaces(ctx) if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } tts := topo.Server{ Impl: toTS, } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(ctx, keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() // read the source shard to get the cells s, _, err := fromTS.GetShard(ctx, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } for _, cell := range s.Cells { sri, err := fromTS.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v): %v", cell, keyspace, shard, err)) continue } if err := tts.UpdateShardReplicationFields(ctx, cell, keyspace, shard, func(oldSR *topodatapb.ShardReplication) error { *oldSR = *sri.ShardReplication return nil }); err != nil { rec.RecordError(fmt.Errorf("UpdateShardReplicationFields(%v, %v, %v): %v", cell, keyspace, shard, err)) } } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
// CopyShards will create the shards in the destination topo func CopyShards(fromTS, toTS topo.Server, deleteKeyspaceShards bool) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } if deleteKeyspaceShards { if err := toTS.DeleteKeyspaceShards(keyspace); err != nil { rec.RecordError(fmt.Errorf("DeleteKeyspaceShards(%v): %v", keyspace, err)) return } } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() if err := topo.CreateShard(toTS, keyspace, shard); err != nil { if err == topo.ErrNodeExists { log.Warningf("shard %v/%v already exists", keyspace, shard) } else { rec.RecordError(fmt.Errorf("CreateShard(%v, %v): %v", keyspace, shard, err)) return } } si, err := fromTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } if err := toTS.UpdateShard(si); err != nil { rec.RecordError(fmt.Errorf("UpdateShard(%v, %v): %v", keyspace, shard, err)) } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }