Exemple #1
0
// WaitBlpPosition will wait for the filtered replication to reach at least
// the provided position.
func WaitBlpPosition(mysqld MysqlDaemon, sql string, replicationPosition string, waitTimeout time.Duration) error {
	position, err := replication.DecodePosition(replicationPosition)
	if err != nil {
		return err
	}
	timeOut := time.Now().Add(waitTimeout)
	for {
		if time.Now().After(timeOut) {
			break
		}

		qr, err := mysqld.FetchSuperQuery(sql)
		if err != nil {
			return err
		}
		if len(qr.Rows) != 1 {
			return fmt.Errorf("QueryBlpCheckpoint(%v) returned unexpected row count: %v", sql, len(qr.Rows))
		}
		var pos replication.Position
		if !qr.Rows[0][0].IsNull() {
			pos, err = replication.DecodePosition(qr.Rows[0][0].String())
			if err != nil {
				return err
			}
		}
		if pos.AtLeast(position) {
			return nil
		}

		log.Infof("Sleeping 1 second waiting for binlog replication(%v) to catch up: %v != %v", sql, pos, position)
		time.Sleep(1 * time.Second)
	}

	return fmt.Errorf("WaitBlpPosition(%v) timed out", sql)
}
Exemple #2
0
func positionCmd(subFlags *flag.FlagSet, args []string) error {
	subFlags.Parse(args)
	if len(args) < 3 {
		return fmt.Errorf("Not enough arguments for position operation.")
	}

	pos1, err := replication.DecodePosition(args[1])
	if err != nil {
		return err
	}

	switch args[0] {
	case "equal":
		pos2, err := replication.DecodePosition(args[2])
		if err != nil {
			return err
		}
		fmt.Println(pos1.Equal(pos2))
	case "at_least":
		pos2, err := replication.DecodePosition(args[2])
		if err != nil {
			return err
		}
		fmt.Println(pos1.AtLeast(pos2))
	case "append":
		gtid, err := replication.DecodeGTID(args[2])
		if err != nil {
			return err
		}
		fmt.Println(replication.AppendGTID(pos1, gtid))
	}

	return nil
}
Exemple #3
0
// WaitBlpPosition will wait for the filtered replication to reach at least
// the provided position.
func WaitBlpPosition(ctx context.Context, mysqld MysqlDaemon, sql string, replicationPosition string) error {
	position, err := replication.DecodePosition(replicationPosition)
	if err != nil {
		return err
	}
	for {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		qr, err := mysqld.FetchSuperQuery(ctx, sql)
		if err != nil {
			return err
		}
		if len(qr.Rows) != 1 {
			return fmt.Errorf("QueryBlpCheckpoint(%v) returned unexpected row count: %v", sql, len(qr.Rows))
		}
		var pos replication.Position
		if !qr.Rows[0][0].IsNull() {
			pos, err = replication.DecodePosition(qr.Rows[0][0].String())
			if err != nil {
				return err
			}
		}
		if pos.AtLeast(position) {
			return nil
		}

		log.Infof("Sleeping 1 second waiting for binlog replication(%v) to catch up: %v != %v", sql, pos, position)
		time.Sleep(1 * time.Second)
	}
}
Exemple #4
0
// StreamTables is part of the UpdateStream interface
func (updateStream *UpdateStreamImpl) StreamTables(ctx context.Context, position string, tables []string, charset *binlogdatapb.Charset, sendReply func(reply *binlogdatapb.BinlogTransaction) error) (err error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}

	updateStream.actionLock.Lock()
	if !updateStream.IsEnabled() {
		updateStream.actionLock.Unlock()
		log.Errorf("Unable to serve client request: Update stream service is not enabled")
		return fmt.Errorf("update stream service is not enabled")
	}
	updateStream.stateWaitGroup.Add(1)
	updateStream.actionLock.Unlock()
	defer updateStream.stateWaitGroup.Done()

	streamCount.Add("Tables", 1)
	defer streamCount.Add("Tables", -1)
	log.Infof("ServeUpdateStream starting @ %#v", pos)

	// Calls cascade like this: binlog.Streamer->TablesFilterFunc->func(*binlogdatapb.BinlogTransaction)->sendReply
	f := TablesFilterFunc(tables, func(reply *binlogdatapb.BinlogTransaction) error {
		tablesStatements.Add(int64(len(reply.Statements)))
		tablesTransactions.Add(1)
		return sendReply(reply)
	})
	bls := NewStreamer(updateStream.dbname, updateStream.mysqld, charset, pos, 0, f)

	streamCtx, cancel := context.WithCancel(ctx)
	i := updateStream.streams.Add(cancel)
	defer updateStream.streams.Delete(i)

	return bls.Stream(streamCtx)
}
// PromoteSlaveWhenCaughtUp waits for this slave to be caught up on
// replication up to the provided point, and then makes the slave the
// shard master.
func (agent *ActionAgent) PromoteSlaveWhenCaughtUp(ctx context.Context, position string) (string, error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return "", err
	}

	// TODO(alainjobart) change the flavor API to take the context directly
	// For now, extract the timeout from the context, or wait forever
	var waitTimeout time.Duration
	if deadline, ok := ctx.Deadline(); ok {
		waitTimeout = deadline.Sub(time.Now())
		if waitTimeout <= 0 {
			waitTimeout = time.Millisecond
		}
	}
	if err := agent.MysqlDaemon.WaitMasterPos(pos, waitTimeout); err != nil {
		return "", err
	}

	pos, err = agent.MysqlDaemon.PromoteSlave(agent.hookExtraEnv())
	if err != nil {
		return "", err
	}

	if err := agent.MysqlDaemon.SetReadOnly(false); err != nil {
		return "", err
	}

	if _, err := topotools.ChangeType(ctx, agent.TopoServer, agent.TabletAlias, topodatapb.TabletType_MASTER, topotools.ClearHealthMap); err != nil {
		return "", err
	}

	return replication.EncodePosition(pos), nil
}
Exemple #6
0
func (maxPosSearch *maxReplPosSearch) processTablet(tablet *topodatapb.Tablet) {
	defer maxPosSearch.waitGroup.Done()
	maxPosSearch.wrangler.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias))

	slaveStatusCtx, cancelSlaveStatus := context.WithTimeout(maxPosSearch.ctx, maxPosSearch.waitSlaveTimeout)
	defer cancelSlaveStatus()

	status, err := maxPosSearch.wrangler.tmc.SlaveStatus(slaveStatusCtx, tablet)
	if err != nil {
		maxPosSearch.wrangler.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err)
		return
	}
	replPos, err := replication.DecodePosition(status.Position)
	if err != nil {
		maxPosSearch.wrangler.logger.Warningf("cannot decode slave %v position %v: %v", topoproto.TabletAliasString(tablet.Alias), status.Position, err)
		return
	}

	maxPosSearch.maxPosLock.Lock()
	if maxPosSearch.maxPosTablet == nil || !maxPosSearch.maxPos.AtLeast(replPos) {
		maxPosSearch.maxPos = replPos
		maxPosSearch.maxPosTablet = tablet
	}
	maxPosSearch.maxPosLock.Unlock()
}
Exemple #7
0
// StreamTables is part of the UpdateStream interface
func (updateStream *UpdateStreamImpl) StreamTables(position string, tables []string, charset *binlogdatapb.Charset, sendReply func(reply *binlogdatapb.BinlogTransaction) error) (err error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}

	updateStream.actionLock.Lock()
	if !updateStream.IsEnabled() {
		updateStream.actionLock.Unlock()
		log.Errorf("Unable to serve client request: Update stream service is not enabled")
		return fmt.Errorf("update stream service is not enabled")
	}
	updateStream.stateWaitGroup.Add(1)
	updateStream.actionLock.Unlock()
	defer updateStream.stateWaitGroup.Done()

	streamCount.Add("Tables", 1)
	defer streamCount.Add("Tables", -1)
	log.Infof("ServeUpdateStream starting @ %#v", pos)

	// Calls cascade like this: binlog.Streamer->TablesFilterFunc->func(*binlogdatapb.BinlogTransaction)->sendReply
	f := TablesFilterFunc(tables, func(reply *binlogdatapb.BinlogTransaction) error {
		keyrangeStatements.Add(int64(len(reply.Statements)))
		keyrangeTransactions.Add(1)
		return sendReply(reply)
	})
	bls := NewStreamer(updateStream.dbname, updateStream.mysqld, charset, pos, f)

	svm := &sync2.ServiceManager{}
	svm.Go(bls.Stream)
	updateStream.streams.Add(svm)
	defer updateStream.streams.Delete(svm)
	return svm.Join()
}
// InitSlave sets replication master and position, and waits for the
// reparent_journal table entry up to context timeout
func (agent *ActionAgent) InitSlave(ctx context.Context, parent *topodatapb.TabletAlias, position string, timeCreatedNS int64) error {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}
	ti, err := agent.TopoServer.GetTablet(ctx, parent)
	if err != nil {
		return err
	}

	cmds, err := agent.MysqlDaemon.SetSlavePositionCommands(pos)
	if err != nil {
		return err
	}
	cmds2, err := agent.MysqlDaemon.SetMasterCommands(ti.Hostname, int(ti.PortMap["mysql"]))
	if err != nil {
		return err
	}
	cmds = append(cmds, cmds2...)
	cmds = append(cmds, "START SLAVE")

	if err := agent.MysqlDaemon.ExecuteSuperQueryList(cmds); err != nil {
		return err
	}
	agent.initReplication = true

	// wait until we get the replicated row, or our context times out
	return agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS)
}
Exemple #9
0
// PromoteSlaveWhenCaughtUp waits for this slave to be caught up on
// replication up to the provided point, and then makes the slave the
// shard master.
func (agent *ActionAgent) PromoteSlaveWhenCaughtUp(ctx context.Context, position string) (string, error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return "", err
	}

	if err := agent.MysqlDaemon.WaitMasterPos(ctx, pos); err != nil {
		return "", err
	}

	pos, err = agent.MysqlDaemon.PromoteSlave(agent.hookExtraEnv())
	if err != nil {
		return "", err
	}

	// If using semi-sync, we need to enable it before going read-write.
	if *enableSemiSync {
		if err := agent.enableSemiSync(true); err != nil {
			return "", err
		}
	}

	if err := agent.MysqlDaemon.SetReadOnly(false); err != nil {
		return "", err
	}

	if _, err := topotools.ChangeType(ctx, agent.TopoServer, agent.TabletAlias, topodatapb.TabletType_MASTER); err != nil {
		return "", err
	}

	return replication.EncodePosition(pos), nil
}
Exemple #10
0
// ServeUpdateStream is part of the UpdateStream interface
func (updateStream *UpdateStreamImpl) ServeUpdateStream(position string, sendReply func(reply *binlogdatapb.StreamEvent) error) (err error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}

	updateStream.actionLock.Lock()
	if !updateStream.IsEnabled() {
		updateStream.actionLock.Unlock()
		log.Errorf("Unable to serve client request: update stream service is not enabled")
		return fmt.Errorf("update stream service is not enabled")
	}
	updateStream.stateWaitGroup.Add(1)
	updateStream.actionLock.Unlock()
	defer updateStream.stateWaitGroup.Done()

	streamCount.Add("Updates", 1)
	defer streamCount.Add("Updates", -1)
	log.Infof("ServeUpdateStream starting @ %#v", pos)

	evs := NewEventStreamer(updateStream.dbname, updateStream.mysqld, pos, func(reply *binlogdatapb.StreamEvent) error {
		if reply.Category == binlogdatapb.StreamEvent_SE_ERR {
			updateStreamErrors.Add("UpdateStream", 1)
		} else {
			updateStreamEvents.Add(reply.Category.String(), 1)
		}
		return sendReply(reply)
	})

	svm := &sync2.ServiceManager{}
	svm.Go(evs.Stream)
	updateStream.streams.Add(svm)
	defer updateStream.streams.Delete(svm)
	return svm.Join()
}
// PopulateReparentJournal adds an entry into the reparent_journal table.
func (agent *ActionAgent) PopulateReparentJournal(ctx context.Context, timeCreatedNS int64, actionName string, masterAlias *topodatapb.TabletAlias, position string) error {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}
	cmds := mysqlctl.CreateReparentJournal()
	cmds = append(cmds, mysqlctl.PopulateReparentJournal(timeCreatedNS, actionName, topoproto.TabletAliasString(masterAlias), pos))

	return agent.MysqlDaemon.ExecuteSuperQueryList(cmds)
}
Exemple #12
0
// NewBinlogPlayerKeyRange returns a new BinlogPlayer pointing at the server
// replicating the provided keyrange, starting at the startPosition,
// and updating _vt.blp_checkpoint with uid=startPosition.Uid.
// If !stopPosition.IsZero(), it will stop when reaching that position.
func NewBinlogPlayerKeyRange(dbClient VtClient, tablet *topodatapb.Tablet, keyRange *topodatapb.KeyRange, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) {
	result := &BinlogPlayer{
		tablet:    tablet,
		dbClient:  dbClient,
		keyRange:  keyRange,
		uid:       uid,
		blplStats: blplStats,
	}
	var err error
	result.position, err = replication.DecodePosition(startPosition)
	if err != nil {
		return nil, err
	}
	if stopPosition != "" {
		result.stopPosition, err = replication.DecodePosition(stopPosition)
		if err != nil {
			return nil, err
		}
	}
	return result, nil
}
Exemple #13
0
// Fresher compares two event tokens.  It returns a negative number if
// ev1<ev2, zero if they're equal, and a positive number if
// ev1>ev2. In case of doubt (we don't have enough information to know
// for sure), it returns a negative number.
func Fresher(ev1, ev2 *querypb.EventToken) int {
	if ev1 == nil || ev2 == nil {
		// Either one is nil, we don't know.
		return -1
	}

	if ev1.Timestamp != ev2.Timestamp {
		// The timestamp is enough to set them apart.
		return int(ev1.Timestamp - ev2.Timestamp)
	}

	if ev1.Shard != "" && ev1.Shard == ev2.Shard {
		// They come from the same shard. See if we have positions.
		if ev1.Position == "" || ev2.Position == "" {
			return -1
		}

		// We can parse them.
		pos1, err := replication.DecodePosition(ev1.Position)
		if err != nil {
			return -1
		}
		pos2, err := replication.DecodePosition(ev2.Position)
		if err != nil {
			return -1
		}

		// Then compare.
		if pos1.Equal(pos2) {
			return 0
		}
		if pos1.AtLeast(pos2) {
			return 1
		}
		return -1
	}

	// We do not know.
	return -1
}
Exemple #14
0
// NewBinlogPlayerKeyRange returns a new BinlogPlayer pointing at the server
// replicating the provided keyrange, starting at the startPosition,
// and updating _vt.blp_checkpoint with uid=startPosition.Uid.
// If !stopPosition.IsZero(), it will stop when reaching that position.
func NewBinlogPlayerKeyRange(dbClient VtClient, endPoint *pbt.EndPoint, keyspaceIDType pbt.KeyspaceIdType, keyRange *pbt.KeyRange, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) {
	result := &BinlogPlayer{
		endPoint:       endPoint,
		dbClient:       dbClient,
		keyspaceIDType: keyspaceIDType,
		keyRange:       keyRange,
		uid:            uid,
		blplStats:      blplStats,
	}
	var err error
	result.position, err = replication.DecodePosition(startPosition)
	if err != nil {
		return nil, err
	}
	if stopPosition != "" {
		result.stopPosition, err = replication.DecodePosition(stopPosition)
		if err != nil {
			return nil, err
		}
	}
	return result, nil
}
Exemple #15
0
// NewBinlogPlayerTables returns a new BinlogPlayer pointing at the server
// replicating the provided tables, starting at the startPosition,
// and updating _vt.blp_checkpoint with uid=startPosition.Uid.
// If !stopPosition.IsZero(), it will stop when reaching that position.
func NewBinlogPlayerTables(dbClient VtClient, endPoint *pbt.EndPoint, tables []string, uid uint32, startPosition string, stopPosition string, blplStats *Stats) (*BinlogPlayer, error) {
	result := &BinlogPlayer{
		endPoint:  endPoint,
		dbClient:  dbClient,
		tables:    tables,
		uid:       uid,
		blplStats: blplStats,
	}
	var err error
	result.position, err = replication.DecodePosition(startPosition)
	if err != nil {
		return nil, err
	}
	if stopPosition != "" {
		var err error
		result.stopPosition, err = replication.DecodePosition(stopPosition)
		if err != nil {
			return nil, err
		}
	}
	return result, nil
}
// StopSlaveMinimum will stop the slave after it reaches at least the
// provided position. Works both when Vitess manages
// replication or not (using hook if not).
func (agent *ActionAgent) StopSlaveMinimum(ctx context.Context, position string, waitTime time.Duration) (string, error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return "", err
	}
	if err := agent.MysqlDaemon.WaitMasterPos(pos, waitTime); err != nil {
		return "", err
	}
	if err := mysqlctl.StopSlave(agent.MysqlDaemon, agent.hookExtraEnv()); err != nil {
		return "", err
	}
	pos, err = agent.MysqlDaemon.MasterPosition()
	if err != nil {
		return "", err
	}
	return replication.EncodePosition(pos), nil
}
Exemple #17
0
// InitSlave sets replication master and position, and waits for the
// reparent_journal table entry up to context timeout
func (agent *ActionAgent) InitSlave(ctx context.Context, parent *topodatapb.TabletAlias, position string, timeCreatedNS int64) error {
	if err := agent.lock(ctx); err != nil {
		return err
	}
	defer agent.unlock()

	pos, err := replication.DecodePosition(position)
	if err != nil {
		return err
	}
	ti, err := agent.TopoServer.GetTablet(ctx, parent)
	if err != nil {
		return err
	}

	agent.setSlaveStopped(false)

	// If using semi-sync, we need to enable it before connecting to master.
	if *enableSemiSync {
		if err := agent.enableSemiSync(false); err != nil {
			return err
		}
	}

	cmds, err := agent.MysqlDaemon.SetSlavePositionCommands(pos)
	if err != nil {
		return err
	}
	cmds2, err := agent.MysqlDaemon.SetMasterCommands(ti.Hostname, int(ti.PortMap["mysql"]))
	if err != nil {
		return err
	}
	cmds = append(cmds, cmds2...)
	cmds = append(cmds, "START SLAVE")

	if err := agent.MysqlDaemon.ExecuteSuperQueryList(ctx, cmds); err != nil {
		return err
	}
	agent.initReplication = true

	// wait until we get the replicated row, or our context times out
	return agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS)
}
Exemple #18
0
// StopSlaveMinimum will stop the slave after it reaches at least the
// provided position. Works both when Vitess manages
// replication or not (using hook if not).
func (agent *ActionAgent) StopSlaveMinimum(ctx context.Context, position string, waitTime time.Duration) (string, error) {
	pos, err := replication.DecodePosition(position)
	if err != nil {
		return "", err
	}
	waitCtx, cancel := context.WithTimeout(ctx, waitTime)
	defer cancel()
	if err := agent.MysqlDaemon.WaitMasterPos(waitCtx, pos); err != nil {
		return "", err
	}
	if err := agent.StopSlave(ctx); err != nil {
		return "", err
	}
	pos, err = agent.MysqlDaemon.MasterPosition()
	if err != nil {
		return "", err
	}
	return replication.EncodePosition(pos), nil
}
Exemple #19
0
// ReloadSchema will reload the schema
// Should be called under RPCWrap.
// This doesn't need the action mutex because periodic schema reloads happen
// in the background anyway.
func (agent *ActionAgent) ReloadSchema(ctx context.Context, waitPosition string) error {
	if agent.DBConfigs.IsZero() {
		// we skip this for test instances that can't connect to the DB anyway
		return nil
	}

	if waitPosition != "" {
		pos, err := replication.DecodePosition(waitPosition)
		if err != nil {
			return fmt.Errorf("ReloadSchema: can't parse wait position (%q): %v", waitPosition, err)
		}
		log.Infof("ReloadSchema: waiting for replication position: %v", waitPosition)
		if err := agent.MysqlDaemon.WaitMasterPos(ctx, pos); err != nil {
			return err
		}
	}

	log.Infof("ReloadSchema requested via RPC")
	return agent.QueryServiceControl.ReloadSchema(ctx)
}
Exemple #20
0
// UpdateStream streams binlog events.
func (tsv *TabletServer) UpdateStream(ctx context.Context, target *querypb.Target, position string, timestamp int64, sendReply func(*querypb.StreamEvent) error) error {
	// Parse the position if needed.
	var p replication.Position
	var err error
	if timestamp == 0 {
		p, err = replication.DecodePosition(position)
		if err != nil {
			return NewTabletError(vtrpcpb.ErrorCode_BAD_INPUT, "cannot parse position: %v", err)
		}
	} else if position != "" {
		return NewTabletError(vtrpcpb.ErrorCode_BAD_INPUT, "only one of position and timestamp should be specified")
	}

	// Validate proper target is used.
	if err = tsv.startRequest(target, false, false); err != nil {
		return err
	}
	defer tsv.endRequest(false)

	s := binlog.NewEventStreamer(tsv.dbconfigs.App.DbName, tsv.mysqld, p, timestamp, func(event *querypb.StreamEvent) error {
		return sendReply(event)
	})

	// Create a cancelable wrapping context.
	streamCtx, streamCancel := context.WithCancel(ctx)
	i := tsv.updateStreamList.Add(streamCancel)
	defer tsv.updateStreamList.Delete(i)

	// And stream with it.
	err = s.Stream(streamCtx)
	switch err {
	case mysqlctl.ErrBinlogUnavailable:
		return NewTabletError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED, "%v", err)
	case nil:
		return nil
	default:
		return NewTabletError(vtrpcpb.ErrorCode_INTERNAL_ERROR, "%v", err)
	}
}
Exemple #21
0
// writeRecoveryPosition will write the current GTID as the recovery position
// for the next transaction.
// We will also try to get the timestamp for the transaction. Two cases:
// - we have statements, and they start with a SET TIMESTAMP that we
//   can parse: then we update transaction_timestamp in blp_checkpoint
//   with it, and set SecondsBehindMaster to now() - transaction_timestamp
// - otherwise (the statements are probably filtered out), we leave
//   transaction_timestamp alone (keeping the old value), and we don't
//   change SecondsBehindMaster
func (blp *BinlogPlayer) writeRecoveryPosition(tx *binlogdatapb.BinlogTransaction) error {
	position, err := replication.DecodePosition(tx.EventToken.Position)
	if err != nil {
		return err
	}

	now := time.Now().Unix()

	blp.position = position
	updateRecovery := updateBlpCheckpoint(blp.uid, blp.position, now, tx.EventToken.Timestamp)

	qr, err := blp.exec(updateRecovery)
	if err != nil {
		return fmt.Errorf("Error %v in writing recovery info %v", err, updateRecovery)
	}
	if qr.RowsAffected != 1 {
		return fmt.Errorf("Cannot update blp_recovery table, affected %v rows", qr.RowsAffected)
	}
	blp.blplStats.SetLastPosition(blp.position)
	if tx.EventToken.Timestamp != 0 {
		blp.blplStats.SecondsBehindMaster.Set(now - tx.EventToken.Timestamp)
	}
	return nil
}
Exemple #22
0
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *topodatapb.TabletAlias, waitSlaveTimeout time.Duration) error {
	shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return err
	}
	ev.ShardInfo = *shardInfo

	event.DispatchUpdate(ev, "reading all tablets")
	tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard)
	if err != nil {
		return err
	}

	// Check corner cases we're going to depend on
	masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias]
	if !ok {
		return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias))
	}
	ev.NewMaster = *masterElectTabletInfo.Tablet
	if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) {
		return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias))
	}

	// Deal with the old master: try to remote-scrap it, if it's
	// truely dead we force-scrap it. Remove it from our map in any case.
	if shardInfo.HasMaster() {
		deleteOldMaster := true
		oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias]
		if ok {
			delete(tabletMap, *shardInfo.MasterAlias)
		} else {
			oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias)
			if err != nil {
				wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err)
				deleteOldMaster = false
			}
		}

		if deleteOldMaster {
			ev.OldMaster = *oldMasterTabletInfo.Tablet
			wr.logger.Infof("deleting old master %v", topoproto.TabletAliasString(shardInfo.MasterAlias))

			ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout)
			defer cancel()

			if err := topotools.DeleteTablet(ctx, wr.ts, oldMasterTabletInfo.Tablet); err != nil {
				wr.logger.Warningf("failed to delete old master tablet %v: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err)
			}
		}
	}

	// Stop replication on all slaves, get their current
	// replication position
	event.DispatchUpdate(ev, "stop replication on all slaves")
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	statusMap := make(map[topodatapb.TabletAlias]*replicationdatapb.Status)
	for alias, tabletInfo := range tabletMap {
		wg.Add(1)
		go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) {
			defer wg.Done()
			wr.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(&alias))
			ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout)
			defer cancel()
			rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo)
			if err != nil {
				wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(&alias), err)
				return
			}
			mu.Lock()
			statusMap[alias] = rp
			mu.Unlock()
		}(alias, tabletInfo)
	}
	wg.Wait()

	// Verify masterElect is alive and has the most advanced position
	masterElectStatus, ok := statusMap[*masterElectTabletAlias]
	if !ok {
		return fmt.Errorf("couldn't get master elect %v replication position", topoproto.TabletAliasString(masterElectTabletAlias))
	}
	masterElectPos, err := replication.DecodePosition(masterElectStatus.Position)
	if err != nil {
		return fmt.Errorf("cannot decode master elect position %v: %v", masterElectStatus.Position, err)
	}
	for alias, status := range statusMap {
		if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) {
			continue
		}
		pos, err := replication.DecodePosition(status.Position)
		if err != nil {
			return fmt.Errorf("cannot decode slave %v position %v: %v", topoproto.TabletAliasString(&alias), status.Position, err)
		}
		if !masterElectPos.AtLeast(pos) {
			return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", topoproto.TabletAliasString(&alias), topoproto.TabletAliasString(masterElectTabletAlias), status.Position, masterElectStatus)
		}
	}

	// Promote the masterElect
	wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias))
	event.DispatchUpdate(ev, "promoting slave")
	rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo)
	if err != nil {
		return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err)
	}

	// Reset replication on all slaves to point to the new master, and
	// insert test row in the new master.
	// Go through all the tablets:
	// - new master: populate the reparent journal
	// - everybody else: reparent to new master, wait for row
	event.DispatchUpdate(ev, "reparenting all tablets")
	now := time.Now().UnixNano()
	wgMaster := sync.WaitGroup{}
	wgSlaves := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	var masterErr error
	for alias, tabletInfo := range tabletMap {
		if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) {
			wgMaster.Add(1)
			go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) {
				defer wgMaster.Done()
				wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias))
				masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp)
			}(alias, tabletInfo)
		} else {
			wgSlaves.Add(1)
			go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) {
				defer wgSlaves.Done()
				wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias))
				forceStartSlave := false
				if status, ok := statusMap[alias]; ok {
					forceStartSlave = status.SlaveIoRunning || status.SlaveSqlRunning
				}
				if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil {
					rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err))
				}
			}(alias, tabletInfo)
		}
	}

	// After the master is done, we can update the shard record
	// (note with semi-sync, it also means at least one slave is done)
	wgMaster.Wait()
	if masterErr != nil {
		wgSlaves.Wait()
		return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr)
	}
	wr.logger.Infof("updating shard record with new master %v", topoproto.TabletAliasString(masterElectTabletAlias))
	if _, err := wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error {
		s.MasterAlias = masterElectTabletAlias
		return nil
	}); err != nil {
		wgSlaves.Wait()
		return fmt.Errorf("failed to update shard master record: %v", err)
	}

	// Wait for the slaves to complete. If some of them fail, we
	// will rebuild the shard serving graph anyway
	wgSlaves.Wait()
	if err := rec.Error(); err != nil {
		wr.Logger().Errorf("Some slaves failed to reparent: %v", err)
		return err
	}

	// Then we rebuild the entire serving graph for the shard,
	// to account for all changes.
	wr.logger.Infof("rebuilding shard graph")
	event.DispatchUpdate(ev, "rebuilding shard serving graph")
	_, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil)
	return err
}