// replicaMigrateServedFrom handles the slave (replica, rdonly) migration. func (wr *Wrangler) replicaMigrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, tables []string, ev *events.MigrateServedFrom) error { // Save the destination keyspace (its ServedFrom has been changed) event.DispatchUpdate(ev, "updating keyspace") if err := wr.ts.UpdateKeyspace(ctx, ki); err != nil { return err } // Save the source shard (its blacklisted tables field has changed) event.DispatchUpdate(ev, "updating source shard") if err := sourceShard.UpdateSourceBlacklistedTables(servedType, cells, reverse, tables); err != nil { return fmt.Errorf("UpdateSourceBlacklistedTables(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } if err := wr.ts.UpdateShard(ctx, sourceShard); err != nil { return fmt.Errorf("UpdateShard(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } // Now refresh the source servers so they reload their // blacklisted table list event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables") if err := wr.RefreshTablesByShard(ctx, sourceShard, servedType, cells); err != nil { return err } return nil }
func (wr *Wrangler) MultiRestore(dstTabletAlias topo.TabletAlias, sources []topo.TabletAlias, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) (err error) { var ti *topo.TabletInfo ti, err = wr.ts.GetTablet(dstTabletAlias) if err != nil { return } args := &actionnode.MultiRestoreArgs{SrcTabletAliases: sources, Concurrency: concurrency, FetchConcurrency: fetchConcurrency, InsertTableConcurrency: insertTableConcurrency, FetchRetryCount: fetchRetryCount, Strategy: strategy} ev := &events.MultiRestore{ Tablet: *ti.Tablet, Args: *args, } event.DispatchUpdate(ev, "starting") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() actionPath, err := wr.ai.MultiRestore(dstTabletAlias, args) if err != nil { return err } if err := wr.WaitForCompletion(actionPath); err != nil { return err } event.DispatchUpdate(ev, "finished") return nil }
// replicaMigrateServedFrom handles the slave (replica, rdonly) migration. func (wr *Wrangler) replicaMigrateServedFrom(ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool, tables []string, ev *events.MigrateServedFrom) error { // Save the destination keyspace (its ServedFrom has been changed) event.DispatchUpdate(ev, "updating keyspace") if err := topo.UpdateKeyspace(wr.ts, ki); err != nil { return err } // Save the source shard (its blacklisted tables field has changed) event.DispatchUpdate(ev, "updating source shard") if sourceShard.BlacklistedTablesMap == nil { sourceShard.BlacklistedTablesMap = make(map[topo.TabletType][]string) } if reverse { delete(sourceShard.BlacklistedTablesMap, servedType) } else { sourceShard.BlacklistedTablesMap[servedType] = tables } if err := topo.UpdateShard(wr.ts, sourceShard); err != nil { return err } // Now refresh the source servers so they reload their // blacklisted table list event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables") if err := wr.RefreshTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType); err != nil { return err } return nil }
// reparentShardExternal handles an external reparent. // // The ev parameter is an event struct prefilled with information that the // caller has on hand, which would be expensive for us to re-query. func (wr *Wrangler) reparentShardExternal(ev *events.Reparent, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo) error { event.DispatchUpdate(ev, "starting external") // we fix the new master in the replication graph event.DispatchUpdate(ev, "checking if new master was promoted") err := wr.slaveWasPromoted(masterElectTablet) if err != nil { // This suggests that the master-elect is dead. This is bad. return fmt.Errorf("slaveWasPromoted(%v) failed: %v", masterElectTablet, err) } // Once the slave is promoted, remove it from our maps delete(slaveTabletMap, masterElectTablet.Alias) delete(masterTabletMap, masterElectTablet.Alias) // Then fix all the slaves, including the old master. This // last step is very likely to time out for some tablets (one // random guy is dead, the old master is dead, ...). We // execute them all in parallel until we get to // wr.ActionTimeout(). After this, no other action with a // timeout is executed, so even if we got to the timeout, // we're still good. event.DispatchUpdate(ev, "restarting slaves") topotools.RestartSlavesExternal(wr.ts, wr.logger, slaveTabletMap, masterTabletMap, masterElectTablet.Alias, wr.slaveWasRestarted) return nil }
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ki.KeyspaceName()) if err != nil { return err } if reverse { if _, ok := ki.ServedFrom[servedType]; ok { return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType) } ki.ServedFrom[servedType] = destinationShard.SourceShards[0].Keyspace } else { if _, ok := ki.ServedFrom[servedType]; !ok { return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType) } delete(ki.ServedFrom, servedType) } // re-read and check the destination shard destinationShard, err = wr.ts.GetShard(destinationShard.Keyspace(), destinationShard.ShardName()) if err != nil { return err } if len(destinationShard.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName()) } tables := destinationShard.SourceShards[0].Tables // read the source shard, we'll need its master, and we'll need to // update the blacklisted tables. var sourceShard *topo.ShardInfo sourceShard, err = wr.ts.GetShard(destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ Keyspace: *ki, SourceShard: *sourceShard, DestinationShard: *destinationShard, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() if servedType == topo.TYPE_MASTER { err = wr.masterMigrateServedFrom(ki, sourceShard, destinationShard, servedType, tables, ev) } else { err = wr.replicaMigrateServedFrom(ki, sourceShard, destinationShard, servedType, reverse, tables, ev) } event.DispatchUpdate(ev, "finished") return }
func (wr *Wrangler) migrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, filteredReplicationWaitTime time.Duration) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ctx, ki.KeyspaceName()) if err != nil { return err } if reverse { ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, false, nil) } else { ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, true, destinationShard.Cells) } // re-read and check the destination shard destinationShard, err = wr.ts.GetShard(ctx, destinationShard.Keyspace(), destinationShard.ShardName()) if err != nil { return err } if len(destinationShard.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName()) } tables := destinationShard.SourceShards[0].Tables // read the source shard, we'll need its master, and we'll need to // update the blacklisted tables. var sourceShard *topo.ShardInfo sourceShard, err = wr.ts.GetShard(ctx, destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ KeyspaceName: ki.KeyspaceName(), SourceShard: *sourceShard, DestinationShard: *destinationShard, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() if servedType == topodatapb.TabletType_MASTER { err = wr.masterMigrateServedFrom(ctx, ki, sourceShard, destinationShard, tables, ev, filteredReplicationWaitTime) } else { err = wr.replicaMigrateServedFrom(ctx, ki, sourceShard, destinationShard, servedType, cells, reverse, tables, ev) } event.DispatchUpdate(ev, "finished") return }
func (vscw *VerticalSplitCloneWorker) setState(state string) { vscw.mu.Lock() vscw.state = state vscw.mu.Unlock() event.DispatchUpdate(vscw.ev, state) }
func (scw *SplitCloneWorker) setState(state string) { scw.mu.Lock() scw.state = state scw.mu.Unlock() event.DispatchUpdate(scw.ev, state) }
func (wr *Wrangler) MultiSnapshot(keyRanges []key.KeyRange, tabletAlias topo.TabletAlias, concurrency int, tables, excludeTables []string, forceMasterSnapshot, skipSlaveRestart bool, maximumFilesize uint64) (manifests []string, parent topo.TabletAlias, err error) { var ti *topo.TabletInfo ti, err = wr.ts.GetTablet(tabletAlias) if err != nil { return } args := &actionnode.MultiSnapshotArgs{KeyRanges: keyRanges, Concurrency: concurrency, Tables: tables, ExcludeTables: excludeTables, SkipSlaveRestart: skipSlaveRestart, MaximumFilesize: maximumFilesize} ev := &events.MultiSnapshot{ Tablet: *ti.Tablet, Args: *args, } if len(tables) > 0 { event.DispatchUpdate(ev, "starting table") } else { event.DispatchUpdate(ev, "starting keyrange") } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() restoreAfterSnapshot, err := wr.prepareToSnapshot(ti, forceMasterSnapshot) if err != nil { return } defer func() { err = replaceError(err, restoreAfterSnapshot()) }() actionPath, err := wr.ai.MultiSnapshot(tabletAlias, args) if err != nil { return } results, err := wr.WaitForCompletionReply(actionPath) if err != nil { return } reply := results.(*actionnode.MultiSnapshotReply) event.DispatchUpdate(ev, "finished") return reply.ManifestPaths, reply.ParentAlias, nil }
func (vscw *VerticalSplitCloneWorker) recordError(err error) { vscw.mu.Lock() vscw.state = stateVSCError vscw.err = err vscw.mu.Unlock() event.DispatchUpdate(vscw.ev, "error: "+err.Error()) }
func (scw *SplitCloneWorker) recordError(err error) { scw.mu.Lock() scw.state = stateSCError scw.err = err scw.mu.Unlock() event.DispatchUpdate(scw.ev, "error: "+err.Error()) }
// EmergencyReparentShard will make the provided tablet the master for // the shard, when the old master is completely unreachable. func (wr *Wrangler) EmergencyReparentShard(ctx context.Context, keyspace, shard string, masterElectTabletAlias *topodatapb.TabletAlias, waitSlaveTimeout time.Duration) (err error) { // lock the shard ctx, unlock, lockErr := wr.ts.LockShard(ctx, keyspace, shard, fmt.Sprintf("EmergencyReparentShard(%v)", topoproto.TabletAliasString(masterElectTabletAlias))) if lockErr != nil { return lockErr } defer unlock(&err) // Create reusable Reparent event with available info ev := &events.Reparent{} // do the work err = wr.emergencyReparentShardLocked(ctx, ev, keyspace, shard, masterElectTabletAlias, waitSlaveTimeout) if err != nil { event.DispatchUpdate(ev, "failed EmergencyReparentShard: "+err.Error()) } else { event.DispatchUpdate(ev, "finished EmergencyReparentShard") } return err }
// EmergencyReparentShard will make the provided tablet the master for // the shard, when the old master is completely unreachable. func (wr *Wrangler) EmergencyReparentShard(ctx context.Context, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { // lock the shard actionNode := actionnode.ReparentShard(emergencyReparentShardOperation, masterElectTabletAlias) lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // Create reusable Reparent event with available info ev := &events.Reparent{} // do the work err = wr.emergencyReparentShardLocked(ctx, ev, keyspace, shard, masterElectTabletAlias, waitSlaveTimeout) if err != nil { event.DispatchUpdate(ev, "failed EmergencyReparentShard: "+err.Error()) } else { event.DispatchUpdate(ev, "finished EmergencyReparentShard") } // and unlock return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
func TestUpdateDispatch(t *testing.T) { triggered := false event.AddListener(func(ev *testEvent) { triggered = true }) want := "status" ev := &testEvent{} event.DispatchUpdate(ev, "status") if ev.Status != want { t.Errorf("ev.Status = %#v, want %#v", ev.Status, want) } if !triggered { t.Errorf("listener wasn't triggered on Dispatch()") } }
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, si *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ki.KeyspaceName()) if err != nil { return err } if reverse { if _, ok := ki.ServedFrom[servedType]; ok { return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType) } ki.ServedFrom[servedType] = si.SourceShards[0].Keyspace } else { if _, ok := ki.ServedFrom[servedType]; !ok { return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType) } delete(ki.ServedFrom, servedType) } // re-read and check the destination shard si, err = wr.ts.GetShard(si.Keyspace(), si.ShardName()) if err != nil { return err } if len(si.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", si.Keyspace(), si.ShardName()) } tables := si.SourceShards[0].Tables // read the source shard, we'll need its master sourceShard, err := wr.ts.GetShard(si.SourceShards[0].Keyspace, si.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ Keyspace: *ki, SourceShard: *sourceShard, DestinationShard: *si, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // For master type migration, need to: // - switch the source shard to read-only // - gather the replication point // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact var sourceMasterTabletInfo *topo.TabletInfo if servedType == topo.TYPE_MASTER { // set master to read-only event.DispatchUpdate(ev, "setting source shard master to read-only") actionPath, err := wr.ai.SetReadOnly(sourceShard.MasterAlias) if err != nil { return err } if err := wr.WaitForCompletion(actionPath); err != nil { return err } // get the position event.DispatchUpdate(ev, "getting master position") sourceMasterTabletInfo, err = wr.ts.GetTablet(sourceShard.MasterAlias) if err != nil { return err } masterPosition, err := wr.ai.MasterPosition(sourceMasterTabletInfo, wr.ActionTimeout()) if err != nil { return err } // wait for it event.DispatchUpdate(ev, "waiting for destination master to catch up to source master") if err := wr.ai.WaitBlpPosition(si.MasterAlias, blproto.BlpPosition{ Uid: 0, Position: masterPosition, }, wr.ActionTimeout()); err != nil { return err } // and clear the shard record si.SourceShards = nil } // All is good, we can save the keyspace and shard (if needed) now event.DispatchUpdate(ev, "updating keyspace") if err = topo.UpdateKeyspace(wr.ts, ki); err != nil { return err } event.DispatchUpdate(ev, "updating destination shard") if servedType == topo.TYPE_MASTER { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } } // Tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. event.DispatchUpdate(ev, "setting destination shard masters read-write") if servedType == topo.TYPE_MASTER { if err := wr.makeMastersReadWrite([]*topo.ShardInfo{si}); err != nil { return err } } // Now blacklist the table list on the right servers event.DispatchUpdate(ev, "setting blacklisted tables on source shard") if servedType == topo.TYPE_MASTER { if err := wr.ai.SetBlacklistedTables(sourceMasterTabletInfo, tables, wr.ActionTimeout()); err != nil { return err } } else { // We use the list of tables that are replicating // for the blacklist. In case of a reverse move, we clear the // blacklist. if reverse { tables = nil } if err := wr.SetBlacklistedTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType, tables); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
// migrateServedTypes operates with all concerned shards locked. func (wr *Wrangler) migrateServedTypes(keyspace string, sourceShards, destinationShards []*topo.ShardInfo, servedType topo.TabletType, reverse bool, shardCache map[string]*topo.ShardInfo) (err error) { // re-read all the shards so we are up to date for i, si := range sourceShards { if sourceShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } shardCache[si.ShardName()] = sourceShards[i] } for i, si := range destinationShards { if destinationShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil { return err } shardCache[si.ShardName()] = destinationShards[i] } ev := &events.MigrateServedTypes{ Keyspace: *topo.NewKeyspaceInfo(keyspace, nil, -1), SourceShards: sourceShards, DestinationShards: destinationShards, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // check and update all shard records, in memory only for _, si := range sourceShards { if reverse { // need to add to source if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } else { // need to remove from source var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } } for _, si := range destinationShards { if reverse { // need to remove from destination var found bool if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found { return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType) } } else { // need to add to destination if topo.IsTypeInList(servedType, si.ServedTypes) { return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType) } si.ServedTypes = append(si.ServedTypes, servedType) } } // For master type migration, need to: // - switch the source shards to read-only // - gather all replication points // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact if servedType == topo.TYPE_MASTER { event.DispatchUpdate(ev, "setting all source masters read-only") err := wr.makeMastersReadOnly(sourceShards) if err != nil { return err } event.DispatchUpdate(ev, "getting positions of source masters") masterPositions, err := wr.getMastersPosition(sourceShards) if err != nil { return err } event.DispatchUpdate(ev, "waiting for destination masters to catch up") if err := wr.waitForFilteredReplication(masterPositions, destinationShards); err != nil { return err } for _, si := range destinationShards { si.SourceShards = nil } } // All is good, we can save the shards now event.DispatchUpdate(ev, "updating source shards") for _, si := range sourceShards { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } shardCache[si.ShardName()] = si } event.DispatchUpdate(ev, "updating destination shards") for _, si := range destinationShards { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } shardCache[si.ShardName()] = si } // And tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. if servedType == topo.TYPE_MASTER { event.DispatchUpdate(ev, "setting destination masters read-write") if err := wr.makeMastersReadWrite(destinationShards); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
func (scw *SplitCloneWorker) setErrorState(err error) { scw.SetState(WorkerStateError) event.DispatchUpdate(scw.ev, "error: "+err.Error()) }
func (scw *SplitCloneWorker) setState(state StatusWorkerState) { scw.SetState(state) event.DispatchUpdate(scw.ev, state.String()) }
func tabletExternallyReparentedLocked(ts topo.Server, tablet *topo.TabletInfo, actionTimeout, lockTimeout time.Duration, interrupted chan struct{}) (err error) { // read the shard, make sure again the master is not already good. // critical read, we want up to date info (and the shard is locked). shardInfo, err := ts.GetShardCritical(tablet.Keyspace, tablet.Shard) if err != nil { return err } if shardInfo.MasterAlias == tablet.Alias { return fmt.Errorf("this tablet is already the master") } // Read the tablets, make sure the master elect is known to the shard // (it's this tablet, so it better be!). // Note we will keep going with a partial tablet map, which usually // happens when a cell is not reachable. After these checks, the // guarantees we'll have are: // - global cell is reachable (we just locked and read the shard) // - the local cell that contains the new master is reachable // (as we're going to check the new master is in the list) // That should be enough. tabletMap, err := topo.GetTabletMapForShard(ts, tablet.Keyspace, tablet.Shard) switch err { case nil: // keep going case topo.ErrPartialResult: log.Warningf("Got topo.ErrPartialResult from GetTabletMapForShard, may need to re-init some tablets") default: return err } masterElectTablet, ok := tabletMap[tablet.Alias] if !ok { return fmt.Errorf("this master-elect tablet %v not found in replication graph %v/%v %v", tablet.Alias, tablet.Keyspace, tablet.Shard, topotools.MapKeys(tabletMap)) } // Create reusable Reparent event with available info ev := &events.Reparent{ ShardInfo: *shardInfo, NewMaster: *tablet.Tablet, } if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok { ev.OldMaster = *oldMasterTablet.Tablet } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // sort the tablets, and handle them slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap) event.DispatchUpdate(ev, "starting external from tablet") // we fix the new master in the replication graph event.DispatchUpdate(ev, "mark ourself as new master") err = updateReplicationGraphForPromotedSlave(ts, tablet) if err != nil { // This suggests we can't talk to topo server. This is bad. return fmt.Errorf("updateReplicationGraphForPromotedSlave failed: %v", err) } // Once this tablet is promoted, remove it from our maps delete(slaveTabletMap, tablet.Alias) delete(masterTabletMap, tablet.Alias) // Then fix all the slaves, including the old master. This // last step is very likely to time out for some tablets (one // random guy is dead, the old master is dead, ...). We // execute them all in parallel until we get to // wr.ActionTimeout(). After this, no other action with a // timeout is executed, so even if we got to the timeout, // we're still good. event.DispatchUpdate(ev, "restarting slaves") logger := logutil.NewConsoleLogger() ai := initiator.NewActionInitiator(ts) topotools.RestartSlavesExternal(ts, logger, slaveTabletMap, masterTabletMap, masterElectTablet.Alias, func(ti *topo.TabletInfo, swrd *actionnode.SlaveWasRestartedArgs) error { return ai.RpcSlaveWasRestarted(ti, swrd, actionTimeout) }) // Compute the list of Cells we need to rebuild: old master and // all other cells if reparenting to another cell. cells := []string{shardInfo.MasterAlias.Cell} if shardInfo.MasterAlias.Cell != tablet.Alias.Cell { cells = nil } // now update the master record in the shard object event.DispatchUpdate(ev, "updating shard record") log.Infof("Updating Shard's MasterAlias record") shardInfo.MasterAlias = tablet.Alias if err = topo.UpdateShard(ts, shardInfo); err != nil { return err } // and rebuild the shard serving graph event.DispatchUpdate(ev, "rebuilding shard serving graph") log.Infof("Rebuilding shard serving graph data") if err = topotools.RebuildShard(logger, ts, tablet.Keyspace, tablet.Shard, cells, lockTimeout, interrupted); err != nil { return err } event.DispatchUpdate(ev, "finished") return nil }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Remember when we were first told we're the master. // If another tablet claims to be master and offers a more recent time, // that tablet will be trusted over us. agent.mutex.Lock() agent._tabletExternallyReparentedTime = startTime agent._replicationDelay = 0 agent.mutex.Unlock() // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet, OldMaster: topodatapb.Tablet{ Alias: si.MasterAlias, Type: topodatapb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") var wg sync.WaitGroup var errs concurrency.AllErrorRecorder // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := proto.Clone(tablet).(*topodatapb.Tablet) tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil agent.setTablet(tablet) wg.Add(1) go func() { defer wg.Done() // This is where updateState will block for gracePeriod, while it gives // vtgate a chance to stop sending replica queries. if err := agent.updateState(ctx, oldTablet, "fastTabletExternallyReparented"); err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err)) } }() wg.Add(1) go func() { defer wg.Done() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err)) return } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER, &topodatapb.EndPoints{Entries: []*topodatapb.EndPoint{ep}}, -1) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err)) return } externalReparentStats.Record("NewMasterVisible", startTime) }() // Wait for serving state grace period and serving graph update. wg.Wait() if errs.HasErrors() { return errs.Error() } // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check the master elect is in tabletMap masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet // Check the master is the only master is the shard, or -force was used. _, masterTabletMap := topotools.SortedTabletMap(tabletMap) if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if !force { return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok { if !force { return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } haveOtherMaster := false for alias, ti := range masterTabletMap { if !topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) && ti.Type != pb.TabletType_SCRAP { haveOtherMaster = true } } if haveOtherMaster { if !force { return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } // First phase: reset replication on all tablets. If anyone fails, // we stop. It is probably because it is unreachable, and may leave // an unstable database process in the mix, with a database daemon // at a wrong replication spot. event.DispatchUpdate(ev, "resetting replication on all tablets") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("resetting replication on tablet %v", topoproto.TabletAliasString(&alias)) if err := wr.TabletManagerClient().ResetReplication(ctx, tabletInfo); err != nil { rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } wg.Wait() if err := rec.Error(); err != nil { return err } // Tell the new master to break its slaves, return its replication // position wr.logger.Infof("initializing master on %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "initializing master") rp, err := wr.TabletManagerClient().InitMaster(ctx, masterElectTabletInfo) if err != nil { return err } // Now tell the new master to insert the reparent_journal row, // and tell everybody else to become a slave of the new master, // and wait for the row in the reparent_journal table. // We start all these in parallel, to handle the semi-sync // case: for the master to be able to commit its row in the // reparent_journal table, it needs connected slaves. event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, initShardMasterOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("initializing slave %v", topoproto.TabletAliasString(&alias)) if err := wr.TabletManagerClient().InitSlave(ctx, tabletInfo, masterElectTabletAlias, rp, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v InitSlave failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } } // Wait for the slaves to complete. If some of them fail, we // don't want to rebuild the shard serving graph (the failure // will most likely be a timeout, and our context will be // expired, so the rebuild will fail anyway) wgSlaves.Wait() if err := rec.Error(); err != nil { return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. event.DispatchUpdate(ev, "rebuilding shard graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *topodatapb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check the master elect is in tabletMap masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet // Check the master is the only master is the shard, or -force was used. _, masterTabletMap := topotools.SortedTabletMap(tabletMap) if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if !force { return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok { if !force { return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } haveOtherMaster := false for alias := range masterTabletMap { if !topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { haveOtherMaster = true } } if haveOtherMaster { if !force { return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } // First phase: reset replication on all tablets. If anyone fails, // we stop. It is probably because it is unreachable, and may leave // an unstable database process in the mix, with a database daemon // at a wrong replication spot. event.DispatchUpdate(ev, "resetting replication on all tablets") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("resetting replication on tablet %v", topoproto.TabletAliasString(&alias)) if err := wr.tmc.ResetReplication(ctx, tabletInfo.Tablet); err != nil { rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } wg.Wait() if err := rec.Error(); err != nil { return err } // Tell the new master to break its slaves, return its replication // position wr.logger.Infof("initializing master on %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "initializing master") rp, err := wr.tmc.InitMaster(ctx, masterElectTabletInfo.Tablet) if err != nil { return err } // Now tell the new master to insert the reparent_journal row, // and tell everybody else to become a slave of the new master, // and wait for the row in the reparent_journal table. // We start all these in parallel, to handle the semi-sync // case: for the master to be able to commit its row in the // reparent_journal table, it needs connected slaves. event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.tmc.PopulateReparentJournal(ctx, tabletInfo.Tablet, now, initShardMasterOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("initializing slave %v", topoproto.TabletAliasString(&alias)) if err := wr.tmc.InitSlave(ctx, tabletInfo.Tablet, masterElectTabletAlias, rp, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v InitSlave failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if _, err := wr.ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { si.MasterAlias = masterElectTabletAlias return nil }); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } } // Wait for the slaves to complete. If some of them fail, we // don't want to rebuild the shard serving graph (the failure // will most likely be a timeout, and our context will be // expired, so the rebuild will fail anyway) wgSlaves.Wait() if err := rec.Error(); err != nil { return err } // Create database if necessary on the master. Slaves will get it too through // replication. Since the user called InitShardMaster, they've told us to // assume that whatever data is on all the slaves is what they intended. // If the database doesn't exist, it means the user intends for these tablets // to begin serving with no data (i.e. first time initialization). createDB := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`", topoproto.TabletDbName(masterElectTabletInfo.Tablet)) if _, err := wr.tmc.ExecuteFetchAsDba(ctx, masterElectTabletInfo.Tablet, false, []byte(createDB), 1, false, true); err != nil { return fmt.Errorf("failed to create database: %v", err) } return nil }
// masterMigrateServedFrom handles the master migration. The ordering is // a bit different than for rdonly / replica to guarantee a smooth transition. // // The order is as follows: // - Add BlacklistedTables on the source shard map for master // - Refresh the source master, so it stops writing on the tables // - Get the source master position, wait until destination master reaches it // - Clear SourceShard on the destination Shard // - Refresh the destination master, so its stops its filtered // replication and starts accepting writes func (wr *Wrangler) masterMigrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, tables []string, ev *events.MigrateServedFrom, filteredReplicationWaitTime time.Duration) error { // Read the data we need sourceMasterTabletInfo, err := wr.ts.GetTablet(ctx, sourceShard.MasterAlias) if err != nil { return err } destinationMasterTabletInfo, err := wr.ts.GetTablet(ctx, destinationShard.MasterAlias) if err != nil { return err } // Update source shard (more blacklisted tables) event.DispatchUpdate(ev, "updating source shard") if err := sourceShard.UpdateSourceBlacklistedTables(topodatapb.TabletType_MASTER, nil, false, tables); err != nil { return fmt.Errorf("UpdateSourceBlacklistedTables(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } if err := wr.ts.UpdateShard(ctx, sourceShard); err != nil { return fmt.Errorf("UpdateShard(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } // Now refresh the blacklisted table list on the source master event.DispatchUpdate(ev, "refreshing source master so it updates its blacklisted tables") if err := wr.tmc.RefreshState(ctx, sourceMasterTabletInfo); err != nil { return err } // get the position event.DispatchUpdate(ev, "getting master position") masterPosition, err := wr.tmc.MasterPosition(ctx, sourceMasterTabletInfo) if err != nil { return err } // wait for it event.DispatchUpdate(ev, "waiting for destination master to catch up to source master") if err := wr.tmc.WaitBlpPosition(ctx, destinationMasterTabletInfo, &tabletmanagerdatapb.BlpPosition{ Uid: 0, Position: masterPosition, }, filteredReplicationWaitTime); err != nil { return err } // Update the destination keyspace (its ServedFrom has changed) event.DispatchUpdate(ev, "updating keyspace") if err = wr.ts.UpdateKeyspace(ctx, ki); err != nil { return err } // Update the destination shard (no more source shard) event.DispatchUpdate(ev, "updating destination shard") destinationShard.SourceShards = nil if err := wr.ts.UpdateShard(ctx, destinationShard); err != nil { return err } // Tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. event.DispatchUpdate(ev, "setting destination shard masters read-write") if err := wr.refreshMasters(ctx, []*topo.ShardInfo{destinationShard}); err != nil { return err } return nil }
func (vscw *VerticalSplitCloneWorker) setState(state StatusWorkerState) { vscw.SetState(state) event.DispatchUpdate(vscw.ev, state.String()) }
func (vscw *VerticalSplitCloneWorker) setErrorState(err error) { vscw.SetState(WorkerStateError) event.DispatchUpdate(vscw.ev, "error: "+err.Error()) }
// migrateServedTypes operates with all concerned shards locked. func (wr *Wrangler) migrateServedTypes(ctx context.Context, keyspace string, sourceShards, destinationShards []*topo.ShardInfo, cells []string, servedType topodatapb.TabletType, reverse bool, filteredReplicationWaitTime time.Duration) (err error) { // re-read all the shards so we are up to date wr.Logger().Infof("Re-reading all shards") for i, si := range sourceShards { if sourceShards[i], err = wr.ts.GetShard(ctx, si.Keyspace(), si.ShardName()); err != nil { return err } } for i, si := range destinationShards { if destinationShards[i], err = wr.ts.GetShard(ctx, si.Keyspace(), si.ShardName()); err != nil { return err } } ev := &events.MigrateServedTypes{ KeyspaceName: keyspace, SourceShards: sourceShards, DestinationShards: destinationShards, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // For master type migration, need to: // - switch the source shards to read-only by disabling query service // - gather all replication points // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact if servedType == topodatapb.TabletType_MASTER { event.DispatchUpdate(ev, "disabling query service on all source masters") for _, si := range sourceShards { if err := si.UpdateDisableQueryService(topodatapb.TabletType_MASTER, nil, true); err != nil { return err } if err := wr.ts.UpdateShard(ctx, si); err != nil { return err } } if err := wr.refreshMasters(ctx, sourceShards); err != nil { return err } event.DispatchUpdate(ev, "getting positions of source masters") masterPositions, err := wr.getMastersPosition(ctx, sourceShards) if err != nil { return err } event.DispatchUpdate(ev, "waiting for destination masters to catch up") if err := wr.waitForFilteredReplication(ctx, masterPositions, destinationShards, filteredReplicationWaitTime); err != nil { return err } for _, si := range destinationShards { si.SourceShards = nil } } // Check and update all shard records, in memory only. // We remember if we need to refresh the state of the source tablets // so their query service is enabled again, for reverse migration. needToRefreshSourceTablets := false for _, si := range sourceShards { if err := si.UpdateServedTypesMap(servedType, cells, !reverse); err != nil { return err } if tc := si.GetTabletControl(servedType); reverse && tc != nil && tc.DisableQueryService { // this is a backward migration, where the // source tablets were disabled previously, so // we need to refresh them if err := si.UpdateDisableQueryService(servedType, cells, false); err != nil { return err } needToRefreshSourceTablets = true } if !reverse && servedType != topodatapb.TabletType_MASTER { // this is a forward migration, we need to disable // query service on the source shards. // (this was already done for masters earlier) if err := si.UpdateDisableQueryService(servedType, cells, true); err != nil { return err } } } // We remember if we need to refresh the state of the destination tablets // so their query service will be enabled. needToRefreshDestinationTablets := false for _, si := range destinationShards { if err := si.UpdateServedTypesMap(servedType, cells, reverse); err != nil { return err } if tc := si.GetTabletControl(servedType); !reverse && tc != nil && tc.DisableQueryService { // This is a forwards migration, and the destination query service was already in a disabled state. // We need to enable and force a refresh, otherwise it's possible that both the source and destination // will have query service disabled at the same time, and queries would have nowhere to go. if err := si.UpdateDisableQueryService(servedType, cells, false); err != nil { return err } needToRefreshDestinationTablets = true } if reverse && servedType != topodatapb.TabletType_MASTER { // this is a backwards migration, we need to disable // query service on the destination shards. // (we're not allowed to reverse a master migration) if err := si.UpdateDisableQueryService(servedType, cells, true); err != nil { return err } } } // All is good, we can save the shards now event.DispatchUpdate(ev, "updating source shards") for _, si := range sourceShards { if err := wr.ts.UpdateShard(ctx, si); err != nil { return err } } if needToRefreshSourceTablets { event.DispatchUpdate(ev, "refreshing source shard tablets so they restart their query service") for _, si := range sourceShards { wr.RefreshTablesByShard(ctx, si, servedType, cells) } } event.DispatchUpdate(ev, "updating destination shards") for _, si := range destinationShards { if err := wr.ts.UpdateShard(ctx, si); err != nil { return err } } if needToRefreshDestinationTablets { event.DispatchUpdate(ev, "refreshing destination shard tablets so they restart their query service") for _, si := range destinationShards { wr.RefreshTablesByShard(ctx, si, servedType, cells) } } // And tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. if servedType == topodatapb.TabletType_MASTER { event.DispatchUpdate(ev, "setting destination masters read-write") if err := wr.refreshMasters(ctx, destinationShards); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if !ok { return fmt.Errorf("old master tablet %v is not in the shard", topoproto.TabletAliasString(shardInfo.MasterAlias)) } ev.OldMaster = *oldMasterTabletInfo.Tablet // Demote the current master, get its replication position wr.logger.Infof("demote current master %v", shardInfo.MasterAlias) event.DispatchUpdate(ev, "demoting old master") rp, err := wr.tmc.DemoteMaster(ctx, oldMasterTabletInfo) if err != nil { return fmt.Errorf("old master tablet %v DemoteMaster failed: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) } // Wait on the master-elect tablet until it reaches that position, // then promote it wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(ctx, masterElectTabletInfo, rp) if err != nil { return fmt.Errorf("master-elect tablet %v failed to catch up with replication or be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, plannedReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) // also restart replication on old master forceStartSlave := topoproto.TabletAliasEqual(&alias, oldMasterTabletInfo.Alias) if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) return } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias) shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func (wr *Wrangler) shardExternallyReparentedLocked(keyspace, shard string, masterElectTabletAlias topo.TabletAlias) (err error) { // read the shard, make sure the master is not already good. shardInfo, err := wr.ts.GetShard(keyspace, shard) if err != nil { return err } if shardInfo.MasterAlias == masterElectTabletAlias { return fmt.Errorf("master-elect tablet %v is already master", masterElectTabletAlias) } // Read the tablets, make sure the master elect is known to us. // Note we will keep going with a partial tablet map, which usually // happens when a cell is not reachable. After these checks, the // guarantees we'll have are: // - global cell is reachable (we just locked and read the shard) // - the local cell that contains the new master is reachable // (as we're going to check the new master is in the list) // That should be enough. tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard) switch err { case nil: // keep going case topo.ErrPartialResult: wr.logger.Warningf("Got topo.ErrPartialResult from GetTabletMapForShard, may need to re-init some tablets") default: return err } masterElectTablet, ok := tabletMap[masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, topotools.MapKeys(tabletMap)) } // Create reusable Reparent event with available info ev := &events.Reparent{ ShardInfo: *shardInfo, NewMaster: *masterElectTablet.Tablet, } if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok { ev.OldMaster = *oldMasterTablet.Tablet } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // sort the tablets, and handle them slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap) err = wr.reparentShardExternal(ev, slaveTabletMap, masterTabletMap, masterElectTablet) if err != nil { wr.logger.Infof("Skipping shard rebuild with failed reparent") return err } // Compute the list of Cells we need to rebuild: old master and // all other cells if reparenting to another cell. cells := []string{shardInfo.MasterAlias.Cell} if shardInfo.MasterAlias.Cell != masterElectTabletAlias.Cell { cells = nil } // now update the master record in the shard object event.DispatchUpdate(ev, "updating shard record") wr.logger.Infof("Updating Shard's MasterAlias record") shardInfo.MasterAlias = masterElectTabletAlias if err = topo.UpdateShard(wr.ts, shardInfo); err != nil { return err } // and rebuild the shard serving graph event.DispatchUpdate(ev, "rebuilding shard serving graph") wr.logger.Infof("Rebuilding shard serving graph data") if _, err = topotools.RebuildShard(wr.logger, wr.ts, masterElectTablet.Keyspace, masterElectTablet.Shard, cells, wr.lockTimeout, interrupted); err != nil { return err } event.DispatchUpdate(ev, "finished") return nil }
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading all tablets") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } // Deal with the old master: try to remote-scrap it, if it's // truely dead we force-scrap it. Remove it from our map in any case. if shardInfo.HasMaster() { scrapOldMaster := true oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if ok { delete(tabletMap, *shardInfo.MasterAlias) } else { oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias) if err != nil { wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) scrapOldMaster = false } } if scrapOldMaster { ev.OldMaster = *oldMasterTabletInfo.Tablet wr.logger.Infof("scrapping old master %v", topoproto.TabletAliasString(shardInfo.MasterAlias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() if err := wr.tmc.Scrap(ctx, oldMasterTabletInfo); err != nil { wr.logger.Warningf("remote scrapping failed master failed, will force the scrap: %v", err) if err := topotools.Scrap(ctx, wr.ts, shardInfo.MasterAlias, true); err != nil { wr.logger.Warningf("old master topo scrapping failed, continuing anyway: %v", err) } } } } // Stop replication on all slaves, get their current // replication position event.DispatchUpdate(ev, "stop replication on all slaves") wg := sync.WaitGroup{} mu := sync.Mutex{} statusMap := make(map[pb.TabletAlias]myproto.ReplicationStatus) for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(&alias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo) if err != nil { wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(&alias), err) return } mu.Lock() statusMap[alias] = rp mu.Unlock() }(alias, tabletInfo) } wg.Wait() // Verify masterElect is alive and has the most advanced position masterElectStatus, ok := statusMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("couldn't get master elect %v replication position", topoproto.TabletAliasString(masterElectTabletAlias)) } for alias, status := range statusMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { continue } if !masterElectStatus.Position.AtLeast(status.Position) { return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", topoproto.TabletAliasString(&alias), topoproto.TabletAliasString(masterElectTabletAlias), status.Position, masterElectStatus) } } // Promote the masterElect wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo) if err != nil { return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Reset replication on all slaves to point to the new master, and // insert test row in the new master. // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) forceStartSlave := false if status, ok := statusMap[alias]; ok { forceStartSlave = status.SlaveIORunning || status.SlaveSQLRunning } if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", topoproto.TabletAliasString(masterElectTabletAlias)) shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation // tasks that ensure topology is self-consistent, and then marks the reparent as // finished by updating the global shard record. func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) { var wg sync.WaitGroup var errs concurrency.AllErrorRecorder oldMasterAlias := si.MasterAlias // Update the tablet records and serving graph for the old and new master concurrently. event.DispatchUpdate(ev, "updating old and new master tablet records") log.Infof("finalizeTabletExternallyReparented: updating tablet records") wg.Add(1) go func() { defer wg.Done() // Update our own record to master. updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil return nil }) if err != nil { errs.RecordError(err) return } // Update the serving graph for the tablet. if updatedTablet != nil { errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet)) } }() if !topoproto.TabletAliasIsZero(oldMasterAlias) { wg.Add(1) go func() { // Forcibly demote the old master in topology, since we can't rely on the // old master to be up to change its own record. oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_SPARE return nil }) if err != nil { errs.RecordError(err) wg.Done() return } // We now know more about the old master, so add it to event data. ev.OldMaster = *oldMasterTablet // Update the serving graph. errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet)) wg.Done() // Tell the old master to re-read its tablet record and change its state. // We don't need to wait for it. tmc := tmclient.NewTabletManagerClient() tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1)) }() } tablet := agent.Tablet() // Wait for the tablet records to be updated. At that point, any rebuild will // see the new master, so we're ready to mark the reparent as done in the // global shard record. wg.Wait() if errs.HasErrors() { return errs.Error() } // Update the master field in the global shard record. We don't use a lock // here anymore. The lock was only to ensure that the global shard record // didn't get modified between the time when we read it and the time when we // write it back. Now we use an update loop pattern to do that instead. event.DispatchUpdate(ev, "updating global shard record") log.Infof("finalizeTabletExternallyReparented: updating global shard record") si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error { shard.MasterAlias = tablet.Alias return nil }) if err != nil { return err } // We already took care of updating the serving graph for the old and new masters. // All that's left now is in case of a cross-cell reparent, we need to update the // master cell setting in the SrvShard records of all cells. if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell { event.DispatchUpdate(ev, "rebuilding shard serving graph") log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent") if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }