// MigrateServedTypes is used during horizontal splits to migrate a // served type from a list of shards to another. func (wr *Wrangler) MigrateServedTypes(ctx context.Context, keyspace, shard string, cells []string, servedType topodatapb.TabletType, reverse, skipReFreshState bool, filteredReplicationWaitTime time.Duration) error { if servedType == topodatapb.TabletType_MASTER { // we cannot migrate a master back, since when master migration // is done, the source shards are dead if reverse { return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard) } // we cannot skip refresh state for a master if skipReFreshState { return fmt.Errorf("Cannot skip refresh state for master migration on %v/%v", keyspace, shard) } } // find overlapping shards in this keyspace wr.Logger().Infof("Finding the overlapping shards in keyspace %v", keyspace) osList, err := topotools.FindOverlappingShards(ctx, wr.ts, keyspace) if err != nil { return fmt.Errorf("FindOverlappingShards failed: %v", err) } // find our shard in there os := topotools.OverlappingShardsForShard(osList, shard) if os == nil { return fmt.Errorf("Shard %v is not involved in any overlapping shards", shard) } // find which list is which: the sources have no source // shards, the destination have source shards. We check the // first entry in the lists, then just check they're // consistent var sourceShards []*topo.ShardInfo var destinationShards []*topo.ShardInfo if len(os.Left[0].SourceShards) == 0 { sourceShards = os.Left destinationShards = os.Right } else { sourceShards = os.Right destinationShards = os.Left } // Verify the sources has the type we're migrating (or not if reverse) for _, si := range sourceShards { if err := si.CheckServedTypesMigration(servedType, cells, !reverse); err != nil { return err } } // Verify the destinations do not have the type we're // migrating (or do if reverse) for _, si := range destinationShards { if err := si.CheckServedTypesMigration(servedType, cells, reverse); err != nil { return err } } // lock the shards: sources, then destinations // (note they're all ordered by shard name) actionNode := actionnode.MigrateServedTypes(servedType) sourceLockPath := make([]string, len(sourceShards)) for i, si := range sourceShards { sourceLockPath[i], err = wr.lockShard(ctx, si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } destinationLockPath := make([]string, len(destinationShards)) for i, si := range destinationShards { destinationLockPath[i], err = wr.lockShard(ctx, si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } // record the action error and all unlock errors rec := concurrency.AllErrorRecorder{} // execute the migration rec.RecordError(wr.migrateServedTypes(ctx, keyspace, sourceShards, destinationShards, cells, servedType, reverse, filteredReplicationWaitTime)) // unlock the shards, we're done for i := len(destinationShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(ctx, destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil)) } for i := len(sourceShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(ctx, sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil)) } // rebuild the keyspace serving graph if there was no error if !rec.HasErrors() { rec.RecordError(wr.RebuildKeyspaceGraph(ctx, keyspace, cells, false)) } // Send a refresh to the tablets we just disabled, iff: // - we're not migrating a master // - we don't have any errors // - we're not told to skip the refresh if servedType != topodatapb.TabletType_MASTER && !rec.HasErrors() && !skipReFreshState { var refreshShards []*topo.ShardInfo if reverse { // For a backwards migration, we just disabled query service on the destination shards refreshShards = destinationShards } else { // For a forwards migration, we just disabled query service on the source shards refreshShards = sourceShards } for _, si := range refreshShards { rec.RecordError(wr.RefreshTablesByShard(ctx, si, servedType, cells)) } } return rec.Error() }
func (wr *Wrangler) MigrateServedTypes(keyspace, shard string, servedType topo.TabletType, reverse, skipRebuild bool) error { if servedType == topo.TYPE_MASTER { // we cannot migrate a master back, since when master migration // is done, the source shards are dead if reverse { return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard) } // we cannot skip rebuild for a master if skipRebuild { return fmt.Errorf("Cannot skip rebuild for master migration on %v/%v", keyspace, shard) } } // first figure out the destination shards // TODO(alainjobart) for now we only look in the same keyspace. // We might want to look elsewhere eventually too, maybe through // an extra command line parameter? shardNames, err := wr.ts.GetShardNames(keyspace) if err != nil { return nil } destinationShards := make([]*topo.ShardInfo, 0, 0) for _, shardName := range shardNames { si, err := wr.ts.GetShard(keyspace, shardName) if err != nil { return err } for _, sourceShard := range si.SourceShards { if sourceShard.Keyspace == keyspace && sourceShard.Shard == shard { // this shard is replicating from the source shard we specified log.Infof("Found %v/%v as a destination shard", si.Keyspace(), si.ShardName()) destinationShards = append(destinationShards, si) break } } } if len(destinationShards) == 0 { return fmt.Errorf("Cannot find any destination shard replicating from %v/%v", keyspace, shard) } // TODO(alainjobart) for a reverse split, we also need to find // more sources. For now, a single source is all we need, but we // still use a list of sources to not have to change the code later. sourceShards := make([]*topo.ShardInfo, 0, 0) // Verify the source has the type we're migrating si, err := wr.ts.GetShard(keyspace, shard) if err != nil { return err } foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if foundType { return fmt.Errorf("Source shard %v/%v is already serving type %v", keyspace, shard, servedType) } } else { if !foundType { return fmt.Errorf("Source shard %v/%v is not serving type %v", keyspace, shard, servedType) } } if servedType == topo.TYPE_MASTER && len(si.ServedTypes) > 1 { return fmt.Errorf("Cannot migrate master out of %v/%v until everything else is migrated out", keyspace, shard) } sourceShards = append(sourceShards, si) // lock the shards: sources, then destinations // (note they're all ordered by shard name) actionNode := actionnode.MigrateServedTypes(servedType) sourceLockPath := make([]string, len(sourceShards)) for i, si := range sourceShards { sourceLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { log.Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } destinationLockPath := make([]string, len(destinationShards)) for i, si := range destinationShards { destinationLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { log.Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } // record the action error and all unlock errors rec := concurrency.AllErrorRecorder{} // execute the migration shardCache := make(map[string]*topo.ShardInfo) rec.RecordError(wr.migrateServedTypes(keyspace, sourceShards, destinationShards, servedType, reverse, shardCache)) // unlock the shards, we're done for i := len(destinationShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil)) } for i := len(sourceShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil)) } // rebuild the keyspace serving graph if there was no error if rec.Error() == nil { if skipRebuild { log.Infof("Skipping keyspace rebuild, please run it at earliest convenience") } else { rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, nil, shardCache)) } } return rec.Error() }
// MigrateServedTypes is used during horizontal splits to migrate a // served type from a list of shards to another. func (wr *Wrangler) MigrateServedTypes(keyspace, shard string, servedType topo.TabletType, reverse, skipRebuild bool) error { if servedType == topo.TYPE_MASTER { // we cannot migrate a master back, since when master migration // is done, the source shards are dead if reverse { return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard) } // we cannot skip rebuild for a master if skipRebuild { return fmt.Errorf("Cannot skip rebuild for master migration on %v/%v", keyspace, shard) } } // find overlapping shards in this keyspace wr.Logger().Infof("Finding the overlapping shards in keyspace %v", keyspace) osList, err := topotools.FindOverlappingShards(wr.ts, keyspace) if err != nil { return fmt.Errorf("FindOverlappingShards failed: %v", err) } // find our shard in there os := topotools.OverlappingShardsForShard(osList, shard) if os == nil { return fmt.Errorf("Shard %v is not involved in any overlapping shards", shard) } // find which list is which: the sources have no source // shards, the destination have source shards. We check the // first entry in the lists, then just check they're // consistent var sourceShards []*topo.ShardInfo var destinationShards []*topo.ShardInfo if len(os.Left[0].SourceShards) == 0 { sourceShards = os.Left destinationShards = os.Right } else { sourceShards = os.Right destinationShards = os.Left } // Verify the sources has the type we're migrating (or not if reverse) for _, si := range sourceShards { foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if foundType { return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } } else { if !foundType { return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace, si.ShardName(), servedType) } } if servedType == topo.TYPE_MASTER && len(si.ServedTypes) > 1 { return fmt.Errorf("Cannot migrate master out of %v/%v until everything else is migrated out", si.Keyspace(), si.ShardName()) } } // Verify the destinations do not have the type we're // migrating (or do if reverse) for _, si := range destinationShards { foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if !foundType { return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace, si.ShardName(), servedType) } } else { if foundType { return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } } } // lock the shards: sources, then destinations // (note they're all ordered by shard name) actionNode := actionnode.MigrateServedTypes(servedType) sourceLockPath := make([]string, len(sourceShards)) for i, si := range sourceShards { sourceLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } destinationLockPath := make([]string, len(destinationShards)) for i, si := range destinationShards { destinationLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } // record the action error and all unlock errors rec := concurrency.AllErrorRecorder{} // execute the migration shardCache := make(map[string]*topo.ShardInfo) rec.RecordError(wr.migrateServedTypes(keyspace, sourceShards, destinationShards, servedType, reverse, shardCache)) // unlock the shards, we're done for i := len(destinationShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil)) } for i := len(sourceShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil)) } // rebuild the keyspace serving graph if there was no error if rec.Error() == nil { if skipRebuild { wr.Logger().Infof("Skipping keyspace rebuild, please run it at earliest convenience") } else { rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, nil, shardCache)) } } return rec.Error() }