// WaitBlpPosition will wait for the filtered replication to reach at least // the provided position. func WaitBlpPosition(mysqld MysqlDaemon, sql string, replicationPosition string, waitTimeout time.Duration) error { position, err := replication.DecodePosition(replicationPosition) if err != nil { return err } timeOut := time.Now().Add(waitTimeout) for { if time.Now().After(timeOut) { break } qr, err := mysqld.FetchSuperQuery(sql) if err != nil { return err } if len(qr.Rows) != 1 { return fmt.Errorf("QueryBlpCheckpoint(%v) returned unexpected row count: %v", sql, len(qr.Rows)) } var pos replication.Position if !qr.Rows[0][0].IsNull() { pos, err = replication.DecodePosition(qr.Rows[0][0].String()) if err != nil { return err } } if pos.AtLeast(position) { return nil } log.Infof("Sleeping 1 second waiting for binlog replication(%v) to catch up: %v != %v", sql, pos, position) time.Sleep(1 * time.Second) } return fmt.Errorf("WaitBlpPosition(%v) timed out", sql) }
func positionCmd(subFlags *flag.FlagSet, args []string) error { subFlags.Parse(args) if len(args) < 3 { return fmt.Errorf("Not enough arguments for position operation.") } pos1, err := replication.DecodePosition(args[1]) if err != nil { return err } switch args[0] { case "equal": pos2, err := replication.DecodePosition(args[2]) if err != nil { return err } fmt.Println(pos1.Equal(pos2)) case "at_least": pos2, err := replication.DecodePosition(args[2]) if err != nil { return err } fmt.Println(pos1.AtLeast(pos2)) case "append": gtid, err := replication.DecodeGTID(args[2]) if err != nil { return err } fmt.Println(replication.AppendGTID(pos1, gtid)) } return nil }
// WaitBlpPosition will wait for the filtered replication to reach at least // the provided position. func WaitBlpPosition(ctx context.Context, mysqld MysqlDaemon, sql string, replicationPosition string) error { position, err := replication.DecodePosition(replicationPosition) if err != nil { return err } for { select { case <-ctx.Done(): return ctx.Err() default: } qr, err := mysqld.FetchSuperQuery(ctx, sql) if err != nil { return err } if len(qr.Rows) != 1 { return fmt.Errorf("QueryBlpCheckpoint(%v) returned unexpected row count: %v", sql, len(qr.Rows)) } var pos replication.Position if !qr.Rows[0][0].IsNull() { pos, err = replication.DecodePosition(qr.Rows[0][0].String()) if err != nil { return err } } if pos.AtLeast(position) { return nil } log.Infof("Sleeping 1 second waiting for binlog replication(%v) to catch up: %v != %v", sql, pos, position) time.Sleep(1 * time.Second) } }
// StreamTables is part of the UpdateStream interface func (updateStream *UpdateStreamImpl) StreamTables(ctx context.Context, position string, tables []string, charset *binlogdatapb.Charset, sendReply func(reply *binlogdatapb.BinlogTransaction) error) (err error) { pos, err := replication.DecodePosition(position) if err != nil { return err } updateStream.actionLock.Lock() if !updateStream.IsEnabled() { updateStream.actionLock.Unlock() log.Errorf("Unable to serve client request: Update stream service is not enabled") return fmt.Errorf("update stream service is not enabled") } updateStream.stateWaitGroup.Add(1) updateStream.actionLock.Unlock() defer updateStream.stateWaitGroup.Done() streamCount.Add("Tables", 1) defer streamCount.Add("Tables", -1) log.Infof("ServeUpdateStream starting @ %#v", pos) // Calls cascade like this: binlog.Streamer->TablesFilterFunc->func(*binlogdatapb.BinlogTransaction)->sendReply f := TablesFilterFunc(tables, func(reply *binlogdatapb.BinlogTransaction) error { tablesStatements.Add(int64(len(reply.Statements))) tablesTransactions.Add(1) return sendReply(reply) }) bls := NewStreamer(updateStream.dbname, updateStream.mysqld, charset, pos, 0, f) streamCtx, cancel := context.WithCancel(ctx) i := updateStream.streams.Add(cancel) defer updateStream.streams.Delete(i) return bls.Stream(streamCtx) }
// PromoteSlaveWhenCaughtUp waits for this slave to be caught up on // replication up to the provided point, and then makes the slave the // shard master. func (agent *ActionAgent) PromoteSlaveWhenCaughtUp(ctx context.Context, position string) (string, error) { pos, err := replication.DecodePosition(position) if err != nil { return "", err } // TODO(alainjobart) change the flavor API to take the context directly // For now, extract the timeout from the context, or wait forever var waitTimeout time.Duration if deadline, ok := ctx.Deadline(); ok { waitTimeout = deadline.Sub(time.Now()) if waitTimeout <= 0 { waitTimeout = time.Millisecond } } if err := agent.MysqlDaemon.WaitMasterPos(pos, waitTimeout); err != nil { return "", err } pos, err = agent.MysqlDaemon.PromoteSlave(agent.hookExtraEnv()) if err != nil { return "", err } if err := agent.MysqlDaemon.SetReadOnly(false); err != nil { return "", err } if _, err := topotools.ChangeType(ctx, agent.TopoServer, agent.TabletAlias, topodatapb.TabletType_MASTER, topotools.ClearHealthMap); err != nil { return "", err } return replication.EncodePosition(pos), nil }
func (maxPosSearch *maxReplPosSearch) processTablet(tablet *topodatapb.Tablet) { defer maxPosSearch.waitGroup.Done() maxPosSearch.wrangler.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias)) slaveStatusCtx, cancelSlaveStatus := context.WithTimeout(maxPosSearch.ctx, maxPosSearch.waitSlaveTimeout) defer cancelSlaveStatus() status, err := maxPosSearch.wrangler.tmc.SlaveStatus(slaveStatusCtx, tablet) if err != nil { maxPosSearch.wrangler.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err) return } replPos, err := replication.DecodePosition(status.Position) if err != nil { maxPosSearch.wrangler.logger.Warningf("cannot decode slave %v position %v: %v", topoproto.TabletAliasString(tablet.Alias), status.Position, err) return } maxPosSearch.maxPosLock.Lock() if maxPosSearch.maxPosTablet == nil || !maxPosSearch.maxPos.AtLeast(replPos) { maxPosSearch.maxPos = replPos maxPosSearch.maxPosTablet = tablet } maxPosSearch.maxPosLock.Unlock() }
// StreamTables is part of the UpdateStream interface func (updateStream *UpdateStreamImpl) StreamTables(position string, tables []string, charset *binlogdatapb.Charset, sendReply func(reply *binlogdatapb.BinlogTransaction) error) (err error) { pos, err := replication.DecodePosition(position) if err != nil { return err } updateStream.actionLock.Lock() if !updateStream.IsEnabled() { updateStream.actionLock.Unlock() log.Errorf("Unable to serve client request: Update stream service is not enabled") return fmt.Errorf("update stream service is not enabled") } updateStream.stateWaitGroup.Add(1) updateStream.actionLock.Unlock() defer updateStream.stateWaitGroup.Done() streamCount.Add("Tables", 1) defer streamCount.Add("Tables", -1) log.Infof("ServeUpdateStream starting @ %#v", pos) // Calls cascade like this: binlog.Streamer->TablesFilterFunc->func(*binlogdatapb.BinlogTransaction)->sendReply f := TablesFilterFunc(tables, func(reply *binlogdatapb.BinlogTransaction) error { keyrangeStatements.Add(int64(len(reply.Statements))) keyrangeTransactions.Add(1) return sendReply(reply) }) bls := NewStreamer(updateStream.dbname, updateStream.mysqld, charset, pos, f) svm := &sync2.ServiceManager{} svm.Go(bls.Stream) updateStream.streams.Add(svm) defer updateStream.streams.Delete(svm) return svm.Join() }
// InitSlave sets replication master and position, and waits for the // reparent_journal table entry up to context timeout func (agent *ActionAgent) InitSlave(ctx context.Context, parent *topodatapb.TabletAlias, position string, timeCreatedNS int64) error { pos, err := replication.DecodePosition(position) if err != nil { return err } ti, err := agent.TopoServer.GetTablet(ctx, parent) if err != nil { return err } cmds, err := agent.MysqlDaemon.SetSlavePositionCommands(pos) if err != nil { return err } cmds2, err := agent.MysqlDaemon.SetMasterCommands(ti.Hostname, int(ti.PortMap["mysql"])) if err != nil { return err } cmds = append(cmds, cmds2...) cmds = append(cmds, "START SLAVE") if err := agent.MysqlDaemon.ExecuteSuperQueryList(cmds); err != nil { return err } agent.initReplication = true // wait until we get the replicated row, or our context times out return agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS) }
// PromoteSlaveWhenCaughtUp waits for this slave to be caught up on // replication up to the provided point, and then makes the slave the // shard master. func (agent *ActionAgent) PromoteSlaveWhenCaughtUp(ctx context.Context, position string) (string, error) { pos, err := replication.DecodePosition(position) if err != nil { return "", err } if err := agent.MysqlDaemon.WaitMasterPos(ctx, pos); err != nil { return "", err } pos, err = agent.MysqlDaemon.PromoteSlave(agent.hookExtraEnv()) if err != nil { return "", err } // If using semi-sync, we need to enable it before going read-write. if *enableSemiSync { if err := agent.enableSemiSync(true); err != nil { return "", err } } if err := agent.MysqlDaemon.SetReadOnly(false); err != nil { return "", err } if _, err := topotools.ChangeType(ctx, agent.TopoServer, agent.TabletAlias, topodatapb.TabletType_MASTER); err != nil { return "", err } return replication.EncodePosition(pos), nil }
// ServeUpdateStream is part of the UpdateStream interface func (updateStream *UpdateStreamImpl) ServeUpdateStream(position string, sendReply func(reply *binlogdatapb.StreamEvent) error) (err error) { pos, err := replication.DecodePosition(position) if err != nil { return err } updateStream.actionLock.Lock() if !updateStream.IsEnabled() { updateStream.actionLock.Unlock() log.Errorf("Unable to serve client request: update stream service is not enabled") return fmt.Errorf("update stream service is not enabled") } updateStream.stateWaitGroup.Add(1) updateStream.actionLock.Unlock() defer updateStream.stateWaitGroup.Done() streamCount.Add("Updates", 1) defer streamCount.Add("Updates", -1) log.Infof("ServeUpdateStream starting @ %#v", pos) evs := NewEventStreamer(updateStream.dbname, updateStream.mysqld, pos, func(reply *binlogdatapb.StreamEvent) error { if reply.Category == binlogdatapb.StreamEvent_SE_ERR { updateStreamErrors.Add("UpdateStream", 1) } else { updateStreamEvents.Add(reply.Category.String(), 1) } return sendReply(reply) }) svm := &sync2.ServiceManager{} svm.Go(evs.Stream) updateStream.streams.Add(svm) defer updateStream.streams.Delete(svm) return svm.Join() }
// PopulateReparentJournal adds an entry into the reparent_journal table. func (agent *ActionAgent) PopulateReparentJournal(ctx context.Context, timeCreatedNS int64, actionName string, masterAlias *topodatapb.TabletAlias, position string) error { pos, err := replication.DecodePosition(position) if err != nil { return err } cmds := mysqlctl.CreateReparentJournal() cmds = append(cmds, mysqlctl.PopulateReparentJournal(timeCreatedNS, actionName, topoproto.TabletAliasString(masterAlias), pos)) return agent.MysqlDaemon.ExecuteSuperQueryList(cmds) }
// NewBinlogPlayerKeyRange returns a new BinlogPlayer pointing at the server // replicating the provided keyrange, starting at the startPosition, // and updating _vt.blp_checkpoint with uid=startPosition.Uid. // If !stopPosition.IsZero(), it will stop when reaching that position. func NewBinlogPlayerKeyRange(dbClient VtClient, tablet *topodatapb.Tablet, keyRange *topodatapb.KeyRange, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) { result := &BinlogPlayer{ tablet: tablet, dbClient: dbClient, keyRange: keyRange, uid: uid, blplStats: blplStats, } var err error result.position, err = replication.DecodePosition(startPosition) if err != nil { return nil, err } if stopPosition != "" { result.stopPosition, err = replication.DecodePosition(stopPosition) if err != nil { return nil, err } } return result, nil }
// Fresher compares two event tokens. It returns a negative number if // ev1<ev2, zero if they're equal, and a positive number if // ev1>ev2. In case of doubt (we don't have enough information to know // for sure), it returns a negative number. func Fresher(ev1, ev2 *querypb.EventToken) int { if ev1 == nil || ev2 == nil { // Either one is nil, we don't know. return -1 } if ev1.Timestamp != ev2.Timestamp { // The timestamp is enough to set them apart. return int(ev1.Timestamp - ev2.Timestamp) } if ev1.Shard != "" && ev1.Shard == ev2.Shard { // They come from the same shard. See if we have positions. if ev1.Position == "" || ev2.Position == "" { return -1 } // We can parse them. pos1, err := replication.DecodePosition(ev1.Position) if err != nil { return -1 } pos2, err := replication.DecodePosition(ev2.Position) if err != nil { return -1 } // Then compare. if pos1.Equal(pos2) { return 0 } if pos1.AtLeast(pos2) { return 1 } return -1 } // We do not know. return -1 }
// NewBinlogPlayerKeyRange returns a new BinlogPlayer pointing at the server // replicating the provided keyrange, starting at the startPosition, // and updating _vt.blp_checkpoint with uid=startPosition.Uid. // If !stopPosition.IsZero(), it will stop when reaching that position. func NewBinlogPlayerKeyRange(dbClient VtClient, endPoint *pbt.EndPoint, keyspaceIDType pbt.KeyspaceIdType, keyRange *pbt.KeyRange, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) { result := &BinlogPlayer{ endPoint: endPoint, dbClient: dbClient, keyspaceIDType: keyspaceIDType, keyRange: keyRange, uid: uid, blplStats: blplStats, } var err error result.position, err = replication.DecodePosition(startPosition) if err != nil { return nil, err } if stopPosition != "" { result.stopPosition, err = replication.DecodePosition(stopPosition) if err != nil { return nil, err } } return result, nil }
// NewBinlogPlayerTables returns a new BinlogPlayer pointing at the server // replicating the provided tables, starting at the startPosition, // and updating _vt.blp_checkpoint with uid=startPosition.Uid. // If !stopPosition.IsZero(), it will stop when reaching that position. func NewBinlogPlayerTables(dbClient VtClient, endPoint *pbt.EndPoint, tables []string, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) { result := &BinlogPlayer{ endPoint: endPoint, dbClient: dbClient, tables: tables, uid: uid, blplStats: blplStats, } var err error result.position, err = replication.DecodePosition(startPosition) if err != nil { return nil, err } if stopPosition != "" { var err error result.stopPosition, err = replication.DecodePosition(stopPosition) if err != nil { return nil, err } } return result, nil }
// StopSlaveMinimum will stop the slave after it reaches at least the // provided position. Works both when Vitess manages // replication or not (using hook if not). func (agent *ActionAgent) StopSlaveMinimum(ctx context.Context, position string, waitTime time.Duration) (string, error) { pos, err := replication.DecodePosition(position) if err != nil { return "", err } if err := agent.MysqlDaemon.WaitMasterPos(pos, waitTime); err != nil { return "", err } if err := mysqlctl.StopSlave(agent.MysqlDaemon, agent.hookExtraEnv()); err != nil { return "", err } pos, err = agent.MysqlDaemon.MasterPosition() if err != nil { return "", err } return replication.EncodePosition(pos), nil }
// InitSlave sets replication master and position, and waits for the // reparent_journal table entry up to context timeout func (agent *ActionAgent) InitSlave(ctx context.Context, parent *topodatapb.TabletAlias, position string, timeCreatedNS int64) error { if err := agent.lock(ctx); err != nil { return err } defer agent.unlock() pos, err := replication.DecodePosition(position) if err != nil { return err } ti, err := agent.TopoServer.GetTablet(ctx, parent) if err != nil { return err } agent.setSlaveStopped(false) // If using semi-sync, we need to enable it before connecting to master. if *enableSemiSync { if err := agent.enableSemiSync(false); err != nil { return err } } cmds, err := agent.MysqlDaemon.SetSlavePositionCommands(pos) if err != nil { return err } cmds2, err := agent.MysqlDaemon.SetMasterCommands(ti.Hostname, int(ti.PortMap["mysql"])) if err != nil { return err } cmds = append(cmds, cmds2...) cmds = append(cmds, "START SLAVE") if err := agent.MysqlDaemon.ExecuteSuperQueryList(ctx, cmds); err != nil { return err } agent.initReplication = true // wait until we get the replicated row, or our context times out return agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS) }
// StopSlaveMinimum will stop the slave after it reaches at least the // provided position. Works both when Vitess manages // replication or not (using hook if not). func (agent *ActionAgent) StopSlaveMinimum(ctx context.Context, position string, waitTime time.Duration) (string, error) { pos, err := replication.DecodePosition(position) if err != nil { return "", err } waitCtx, cancel := context.WithTimeout(ctx, waitTime) defer cancel() if err := agent.MysqlDaemon.WaitMasterPos(waitCtx, pos); err != nil { return "", err } if err := agent.StopSlave(ctx); err != nil { return "", err } pos, err = agent.MysqlDaemon.MasterPosition() if err != nil { return "", err } return replication.EncodePosition(pos), nil }
// ReloadSchema will reload the schema // Should be called under RPCWrap. // This doesn't need the action mutex because periodic schema reloads happen // in the background anyway. func (agent *ActionAgent) ReloadSchema(ctx context.Context, waitPosition string) error { if agent.DBConfigs.IsZero() { // we skip this for test instances that can't connect to the DB anyway return nil } if waitPosition != "" { pos, err := replication.DecodePosition(waitPosition) if err != nil { return fmt.Errorf("ReloadSchema: can't parse wait position (%q): %v", waitPosition, err) } log.Infof("ReloadSchema: waiting for replication position: %v", waitPosition) if err := agent.MysqlDaemon.WaitMasterPos(ctx, pos); err != nil { return err } } log.Infof("ReloadSchema requested via RPC") return agent.QueryServiceControl.ReloadSchema(ctx) }
// UpdateStream streams binlog events. func (tsv *TabletServer) UpdateStream(ctx context.Context, target *querypb.Target, position string, timestamp int64, sendReply func(*querypb.StreamEvent) error) error { // Parse the position if needed. var p replication.Position var err error if timestamp == 0 { p, err = replication.DecodePosition(position) if err != nil { return NewTabletError(vtrpcpb.ErrorCode_BAD_INPUT, "cannot parse position: %v", err) } } else if position != "" { return NewTabletError(vtrpcpb.ErrorCode_BAD_INPUT, "only one of position and timestamp should be specified") } // Validate proper target is used. if err = tsv.startRequest(target, false, false); err != nil { return err } defer tsv.endRequest(false) s := binlog.NewEventStreamer(tsv.dbconfigs.App.DbName, tsv.mysqld, p, timestamp, func(event *querypb.StreamEvent) error { return sendReply(event) }) // Create a cancelable wrapping context. streamCtx, streamCancel := context.WithCancel(ctx) i := tsv.updateStreamList.Add(streamCancel) defer tsv.updateStreamList.Delete(i) // And stream with it. err = s.Stream(streamCtx) switch err { case mysqlctl.ErrBinlogUnavailable: return NewTabletError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED, "%v", err) case nil: return nil default: return NewTabletError(vtrpcpb.ErrorCode_INTERNAL_ERROR, "%v", err) } }
// writeRecoveryPosition will write the current GTID as the recovery position // for the next transaction. // We will also try to get the timestamp for the transaction. Two cases: // - we have statements, and they start with a SET TIMESTAMP that we // can parse: then we update transaction_timestamp in blp_checkpoint // with it, and set SecondsBehindMaster to now() - transaction_timestamp // - otherwise (the statements are probably filtered out), we leave // transaction_timestamp alone (keeping the old value), and we don't // change SecondsBehindMaster func (blp *BinlogPlayer) writeRecoveryPosition(tx *binlogdatapb.BinlogTransaction) error { position, err := replication.DecodePosition(tx.EventToken.Position) if err != nil { return err } now := time.Now().Unix() blp.position = position updateRecovery := updateBlpCheckpoint(blp.uid, blp.position, now, tx.EventToken.Timestamp) qr, err := blp.exec(updateRecovery) if err != nil { return fmt.Errorf("Error %v in writing recovery info %v", err, updateRecovery) } if qr.RowsAffected != 1 { return fmt.Errorf("Cannot update blp_recovery table, affected %v rows", qr.RowsAffected) } blp.blplStats.SetLastPosition(blp.position) if tx.EventToken.Timestamp != 0 { blp.blplStats.SecondsBehindMaster.Set(now - tx.EventToken.Timestamp) } return nil }
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *topodatapb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading all tablets") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } // Deal with the old master: try to remote-scrap it, if it's // truely dead we force-scrap it. Remove it from our map in any case. if shardInfo.HasMaster() { deleteOldMaster := true oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if ok { delete(tabletMap, *shardInfo.MasterAlias) } else { oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias) if err != nil { wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) deleteOldMaster = false } } if deleteOldMaster { ev.OldMaster = *oldMasterTabletInfo.Tablet wr.logger.Infof("deleting old master %v", topoproto.TabletAliasString(shardInfo.MasterAlias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() if err := topotools.DeleteTablet(ctx, wr.ts, oldMasterTabletInfo.Tablet); err != nil { wr.logger.Warningf("failed to delete old master tablet %v: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) } } } // Stop replication on all slaves, get their current // replication position event.DispatchUpdate(ev, "stop replication on all slaves") wg := sync.WaitGroup{} mu := sync.Mutex{} statusMap := make(map[topodatapb.TabletAlias]*replicationdatapb.Status) for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(&alias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo) if err != nil { wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(&alias), err) return } mu.Lock() statusMap[alias] = rp mu.Unlock() }(alias, tabletInfo) } wg.Wait() // Verify masterElect is alive and has the most advanced position masterElectStatus, ok := statusMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("couldn't get master elect %v replication position", topoproto.TabletAliasString(masterElectTabletAlias)) } masterElectPos, err := replication.DecodePosition(masterElectStatus.Position) if err != nil { return fmt.Errorf("cannot decode master elect position %v: %v", masterElectStatus.Position, err) } for alias, status := range statusMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { continue } pos, err := replication.DecodePosition(status.Position) if err != nil { return fmt.Errorf("cannot decode slave %v position %v: %v", topoproto.TabletAliasString(&alias), status.Position, err) } if !masterElectPos.AtLeast(pos) { return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", topoproto.TabletAliasString(&alias), topoproto.TabletAliasString(masterElectTabletAlias), status.Position, masterElectStatus) } } // Promote the masterElect wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo) if err != nil { return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Reset replication on all slaves to point to the new master, and // insert test row in the new master. // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) forceStartSlave := false if status, ok := statusMap[alias]; ok { forceStartSlave = status.SlaveIoRunning || status.SlaveSqlRunning } if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", topoproto.TabletAliasString(masterElectTabletAlias)) if _, err := wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error { s.MasterAlias = masterElectTabletAlias return nil }); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }