// init phase: // - read the destination keyspace, make sure it has 'servedFrom' values func (scw *SplitCloneWorker) init() error { scw.setState(stateSCInit) var err error // read the keyspace and validate it scw.keyspaceInfo, err = scw.wr.TopoServer().GetKeyspace(scw.keyspace) if err != nil { return fmt.Errorf("cannot read keyspace %v: %v", scw.keyspace, err) } // find the OverlappingShards in the keyspace osList, err := topotools.FindOverlappingShards(scw.wr.TopoServer(), scw.keyspace) if err != nil { return fmt.Errorf("cannot FindOverlappingShards in %v: %v", scw.keyspace, err) } // find the shard we mentioned in there, if any os := topotools.OverlappingShardsForShard(osList, scw.shard) if os == nil { return fmt.Errorf("the specified shard %v/%v is not in any overlapping shard", scw.keyspace, scw.shard) } // one side should have served types, the other one none, // figure out wich is which, then double check them all if len(os.Left[0].ServedTypes) > 0 { scw.sourceShards = os.Left scw.destinationShards = os.Right } else { scw.sourceShards = os.Right scw.destinationShards = os.Left } // validate all serving types servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY} for _, st := range servingTypes { for _, si := range scw.sourceShards { if !topo.IsTypeInList(st, si.ServedTypes) { return fmt.Errorf("source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), st) } } } for _, si := range scw.destinationShards { if len(si.ServedTypes) > 0 { return fmt.Errorf("destination shard %v/%v is serving some types", si.Keyspace(), si.ShardName()) } } return nil }
func (wr *Wrangler) MigrateServedTypes(keyspace, shard string, servedType topo.TabletType, reverse, skipRebuild bool) error { if servedType == topo.TYPE_MASTER { // we cannot migrate a master back, since when master migration // is done, the source shards are dead if reverse { return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard) } // we cannot skip rebuild for a master if skipRebuild { return fmt.Errorf("Cannot skip rebuild for master migration on %v/%v", keyspace, shard) } } // first figure out the destination shards // TODO(alainjobart) for now we only look in the same keyspace. // We might want to look elsewhere eventually too, maybe through // an extra command line parameter? shardNames, err := wr.ts.GetShardNames(keyspace) if err != nil { return nil } destinationShards := make([]*topo.ShardInfo, 0, 0) for _, shardName := range shardNames { si, err := wr.ts.GetShard(keyspace, shardName) if err != nil { return err } for _, sourceShard := range si.SourceShards { if sourceShard.Keyspace == keyspace && sourceShard.Shard == shard { // this shard is replicating from the source shard we specified log.Infof("Found %v/%v as a destination shard", si.Keyspace(), si.ShardName()) destinationShards = append(destinationShards, si) break } } } if len(destinationShards) == 0 { return fmt.Errorf("Cannot find any destination shard replicating from %v/%v", keyspace, shard) } // TODO(alainjobart) for a reverse split, we also need to find // more sources. For now, a single source is all we need, but we // still use a list of sources to not have to change the code later. sourceShards := make([]*topo.ShardInfo, 0, 0) // Verify the source has the type we're migrating si, err := wr.ts.GetShard(keyspace, shard) if err != nil { return err } foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if foundType { return fmt.Errorf("Source shard %v/%v is already serving type %v", keyspace, shard, servedType) } } else { if !foundType { return fmt.Errorf("Source shard %v/%v is not serving type %v", keyspace, shard, servedType) } } if servedType == topo.TYPE_MASTER && len(si.ServedTypes) > 1 { return fmt.Errorf("Cannot migrate master out of %v/%v until everything else is migrated out", keyspace, shard) } sourceShards = append(sourceShards, si) // lock the shards: sources, then destinations // (note they're all ordered by shard name) actionNode := actionnode.MigrateServedTypes(servedType) sourceLockPath := make([]string, len(sourceShards)) for i, si := range sourceShards { sourceLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { log.Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } destinationLockPath := make([]string, len(destinationShards)) for i, si := range destinationShards { destinationLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { log.Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } // record the action error and all unlock errors rec := concurrency.AllErrorRecorder{} // execute the migration shardCache := make(map[string]*topo.ShardInfo) rec.RecordError(wr.migrateServedTypes(keyspace, sourceShards, destinationShards, servedType, reverse, shardCache)) // unlock the shards, we're done for i := len(destinationShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil)) } for i := len(sourceShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil)) } // rebuild the keyspace serving graph if there was no error if rec.Error() == nil { if skipRebuild { log.Infof("Skipping keyspace rebuild, please run it at earliest convenience") } else { rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, nil, shardCache)) } } return rec.Error() }
// migrateServedTypes operates with all concerned shards locked. func (wr *Wrangler) migrateServedTypes(keyspace string, sourceShards, destinationShards []*topo.ShardInfo, servedType topo.TabletType, reverse bool, shardCache map[string]*topo.ShardInfo) (err error) { // re-read all the shards so we are up to date for i, si := range sourceShards { if sourceShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } shardCache[si.ShardName()] = sourceShards[i] } for i, si := range destinationShards { if destinationShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } shardCache[si.ShardName()] = destinationShards[i] } ev := &events.MigrateServedTypes{ Keyspace: *topo.NewKeyspaceInfo(keyspace, nil, -1), SourceShards: sourceShards, DestinationShards: destinationShards, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // check and update all shard records, in memory only for _, si := range sourceShards { if reverse { // need to add to source if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } else { // need to remove from source var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } } for _, si := range destinationShards { if reverse { // need to remove from destination var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } else { // need to add to destination if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } } // For master type migration, need to: // - switch the source shards to read-only // - gather all replication points // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact if servedType == topo.TYPE_MASTER { event.DispatchUpdate(ev, "setting all source masters read-only") err := wr.makeMastersReadOnly(sourceShards) if err != nil { return err } event.DispatchUpdate(ev, "getting positions of source masters") masterPositions, err := wr.getMastersPosition(sourceShards) if err != nil { return err } event.DispatchUpdate(ev, "waiting for destination masters to catch up") if err := wr.waitForFilteredReplication(masterPositions, destinationShards); err != nil { return err } for _, si := range destinationShards { si.SourceShards = nil } } // All is good, we can save the shards now event.DispatchUpdate(ev, "updating source shards") for _, si := range sourceShards { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } shardCache[si.ShardName()] = si } event.DispatchUpdate(ev, "updating destination shards") for _, si := range destinationShards { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } shardCache[si.ShardName()] = si } // And tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. if servedType == topo.TYPE_MASTER { event.DispatchUpdate(ev, "setting destination masters read-write") if err := wr.makeMastersReadWrite(destinationShards); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, securePort, gRPCPort int) error { // only enabled if one of init_tablet_type (when healthcheck // is disabled) or init_keyspace (when healthcheck is enabled) // is passed in, then check other parameters if *initTabletType == "" && *initKeyspace == "" { return nil } // figure out our default target type var tabletType topo.TabletType if *initTabletType != "" { if *targetTabletType != "" { log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)") } // use the type specified on the command line tabletType = topo.TabletType(*initTabletType) if !topo.IsTypeInList(tabletType, topo.AllTabletTypes) { log.Fatalf("InitTablet encountered unknown init_tablet_type '%v'", *initTabletType) } if tabletType == topo.TYPE_MASTER || tabletType == topo.TYPE_SCRAP { // We disallow TYPE_MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. // We also disallow TYPE_SCRAP, obviously. log.Fatalf("init_tablet_type cannot be %v", tabletType) } } else if *targetTabletType != "" { if tabletType := topo.TabletType(*targetTabletType); tabletType == topo.TYPE_MASTER { log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topo.TYPE_REPLICA) } // use spare, the healthcheck will turn us into what // we need to be eventually tabletType = topo.TYPE_SPARE } else { log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified") } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // if we're assigned to a shard, make sure it exists, see if // we are its master, and update its cells list if necessary if tabletType != topo.TYPE_IDLE { if *initKeyspace == "" || *initShard == "" { log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified") } shard, _, err := topo.ValidateShardName(*initShard) if err != nil { log.Fatalf("cannot validate shard name: %v", err) } log.Infof("Reading shard record %v/%v", *initKeyspace, shard) // read the shard, create it if necessary si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias == agent.TabletAlias { // we are the current master for this shard (probably // means the master tablet process was just restarted), // so InitTablet as master. tabletType = topo.TYPE_MASTER } // See if we need to add the tablet's cell to the shard's cell // list. If we do, it has to be under the shard lock. if !si.HasCell(agent.TabletAlias.Cell) { actionNode := actionnode.UpdateShard() lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err) } // re-read the shard with the lock si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard) if err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } // see if we really need to update it now if !si.HasCell(agent.TabletAlias.Cell) { si.Cells = append(si.Cells, agent.TabletAlias.Cell) // write it back if err := topo.UpdateShard(ctx, agent.TopoServer, si); err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } } // and unlock if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil { return err } } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname == "" { var err error hostname, err = netutil.FullyQualifiedHostname() if err != nil { return err } } // create and populate tablet record tablet := &topo.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, Portmap: make(map[string]int), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.Portmap["vt"] = port } if securePort != 0 { tablet.Portmap["vts"] = securePort } if gRPCPort != 0 { tablet.Portmap["grpc"] = gRPCPort } if err := tablet.Complete(); err != nil { return fmt.Errorf("InitTablet tablet.Complete failed: %v", err) } // now try to create the record err := topo.CreateTablet(ctx, agent.TopoServer, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if tablet.IsInReplicationGraph() { if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } } case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // And overwrite the rest *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(ctx, agent.TopoServer, oldTablet); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } // and now update the serving graph. Note we do that in any case, // to clean any inaccurate record from any part of the serving graph. if tabletType != topo.TYPE_IDLE { if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } } return nil }
// migrateServedTypes operates with all concerned shards locked. func (wr *Wrangler) migrateServedTypes(sourceShards, destinationShards []*topo.ShardInfo, servedType topo.TabletType, reverse bool) error { // re-read all the shards so we are up to date var err error for i, si := range sourceShards { if sourceShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } } for i, si := range destinationShards { if destinationShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } } // check and update all shard records, in memory only for _, si := range sourceShards { if reverse { // need to add to source if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } else { // need to remove from source var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } } for _, si := range destinationShards { if reverse { // need to remove from destination var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } else { // need to add to destination if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } } // For master type migration, need to: // - switch the source shards to read-only // - gather all replication points // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact if servedType == topo.TYPE_MASTER { err := wr.makeMastersReadOnly(sourceShards) if err != nil { return err } masterPositions, err := wr.getMastersPosition(sourceShards) if err != nil { return err } if err := wr.waitForFilteredReplication(masterPositions, destinationShards); err != nil { return err } for _, si := range destinationShards { si.SourceShards = nil } } // All is good, we can save the shards now for _, si := range sourceShards { if err := wr.ts.UpdateShard(si); err != nil { return err } } for _, si := range destinationShards { if err := wr.ts.UpdateShard(si); err != nil { return err } } // And tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. if servedType == topo.TYPE_MASTER { if err := wr.makeMastersReadWrite(destinationShards); err != nil { return err } } return nil }
// MigrateServedTypes is used during horizontal splits to migrate a // served type from a list of shards to another. func (wr *Wrangler) MigrateServedTypes(keyspace, shard string, servedType topo.TabletType, reverse, skipRebuild bool) error { if servedType == topo.TYPE_MASTER { // we cannot migrate a master back, since when master migration // is done, the source shards are dead if reverse { return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard) } // we cannot skip rebuild for a master if skipRebuild { return fmt.Errorf("Cannot skip rebuild for master migration on %v/%v", keyspace, shard) } } // find overlapping shards in this keyspace wr.Logger().Infof("Finding the overlapping shards in keyspace %v", keyspace) osList, err := topotools.FindOverlappingShards(wr.ts, keyspace) if err != nil { return fmt.Errorf("FindOverlappingShards failed: %v", err) } // find our shard in there os := topotools.OverlappingShardsForShard(osList, shard) if os == nil { return fmt.Errorf("Shard %v is not involved in any overlapping shards", shard) } // find which list is which: the sources have no source // shards, the destination have source shards. We check the // first entry in the lists, then just check they're // consistent var sourceShards []*topo.ShardInfo var destinationShards []*topo.ShardInfo if len(os.Left[0].SourceShards) == 0 { sourceShards = os.Left destinationShards = os.Right } else { sourceShards = os.Right destinationShards = os.Left } // Verify the sources has the type we're migrating (or not if reverse) for _, si := range sourceShards { foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if foundType { return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } } else { if !foundType { return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace, si.ShardName(), servedType) } } if servedType == topo.TYPE_MASTER && len(si.ServedTypes) > 1 { return fmt.Errorf("Cannot migrate master out of %v/%v until everything else is migrated out", si.Keyspace(), si.ShardName()) } } // Verify the destinations do not have the type we're // migrating (or do if reverse) for _, si := range destinationShards { foundType := topo.IsTypeInList(servedType, si.ServedTypes) if reverse { if !foundType { return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace, si.ShardName(), servedType) } } else { if foundType { return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } } } // lock the shards: sources, then destinations // (note they're all ordered by shard name) actionNode := actionnode.MigrateServedTypes(servedType) sourceLockPath := make([]string, len(sourceShards)) for i, si := range sourceShards { sourceLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } destinationLockPath := make([]string, len(destinationShards)) for i, si := range destinationShards { destinationLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode) if err != nil { wr.Logger().Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName()) return err } } // record the action error and all unlock errors rec := concurrency.AllErrorRecorder{} // execute the migration shardCache := make(map[string]*topo.ShardInfo) rec.RecordError(wr.migrateServedTypes(keyspace, sourceShards, destinationShards, servedType, reverse, shardCache)) // unlock the shards, we're done for i := len(destinationShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil)) } for i := len(sourceShards) - 1; i >= 0; i-- { rec.RecordError(wr.unlockShard(sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil)) } // rebuild the keyspace serving graph if there was no error if rec.Error() == nil { if skipRebuild { wr.Logger().Infof("Skipping keyspace rebuild, please run it at earliest convenience") } else { rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, nil, shardCache)) } } return rec.Error() }