// Return a replication state that will reparent a slave to the // correct master for a specified position. func (mysqld *Mysqld) ReparentPosition(slavePosition *proto.ReplicationPosition) (rs *proto.ReplicationState, waitPosition *proto.ReplicationPosition, reparentTime int64, err error) { qr, err := mysqld.fetchSuperQuery(fmt.Sprintf("SELECT time_created_ns, new_addr, new_position, wait_position FROM _vt.reparent_log WHERE last_position = '%v'", slavePosition.MapKey())) if err != nil { return } if len(qr.Rows) != 1 { err = fmt.Errorf("no reparent for position: %v", slavePosition.MapKey()) return } reparentTime, err = qr.Rows[0][0].ParseInt64() if err != nil { err = fmt.Errorf("bad reparent time: %v %v %v", slavePosition.MapKey(), qr.Rows[0][0], err) return } file, pos, err := parseReplicationPosition(qr.Rows[0][2].String()) if err != nil { return } rs, err = proto.NewReplicationState(qr.Rows[0][1].String()) if err != nil { return } rs.ReplicationPosition.MasterLogFile = file rs.ReplicationPosition.MasterLogPosition = uint(pos) file, pos, err = parseReplicationPosition(qr.Rows[0][3].String()) if err != nil { return } waitPosition = new(proto.ReplicationPosition) waitPosition.MasterLogFile = file waitPosition.MasterLogPosition = pos return }
// Check all the tablets to see if we can proceed with reparenting. // masterPosition is supplied from the demoted master if we are doing // this gracefully. func (wr *Wrangler) checkSlaveConsistency(tabletMap map[uint32]*topo.TabletInfo, masterPosition *myproto.ReplicationPosition) error { log.V(6).Infof("checkSlaveConsistency %v %#v", mapKeys(tabletMap), masterPosition) // FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it. calls := make(chan *rpcContext, len(tabletMap)) f := func(ti *topo.TabletInfo) { ctx := &rpcContext{tablet: ti} defer func() { calls <- ctx }() var args *myproto.ReplicationPosition if masterPosition != nil { // If the master position is known, do our best to wait for replication to catch up. args = masterPosition } else { // In the case where a master is down, look for the last bit of data copied and wait // for that to apply. That gives us a chance to wait for all data. replPos, err := wr.ai.SlavePosition(ti, wr.actionTimeout()) if err != nil { ctx.err = err return } args = &myproto.ReplicationPosition{ MasterLogFile: replPos.MasterLogFileIo, MasterLogPositionIo: replPos.MasterLogPositionIo, } } // This option waits for the SQL thread to apply all changes to this instance. rp, err := wr.ai.WaitSlavePosition(ti, args, wr.actionTimeout()) if err != nil { ctx.err = err return } ctx.position = rp } for _, tablet := range tabletMap { // Pass loop variable explicitly so we don't have a concurrency issue. go f(tablet) } // map positions to tablets positionMap := make(map[string][]uint32) for i := 0; i < len(tabletMap); i++ { ctx := <-calls mapKey := "unavailable-tablet-error" if ctx.err == nil { mapKey = ctx.position.MapKey() } if _, ok := positionMap[mapKey]; !ok { positionMap[mapKey] = make([]uint32, 0, 32) } positionMap[mapKey] = append(positionMap[mapKey], ctx.tablet.Alias.Uid) } if len(positionMap) == 1 { // great, everyone agrees // demotedMasterReplicationState is nil if demotion failed if masterPosition != nil { demotedMapKey := masterPosition.MapKey() if _, ok := positionMap[demotedMapKey]; !ok { for slaveMapKey := range positionMap { return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey, slaveMapKey) } } } } else { // FIXME(msolomon) in the event of a crash, do you pick replica that is // furthest along or do you promote the majority? data loss vs availability // sounds like you pick the latest group and reclone. items := make([]string, 0, 32) for slaveMapKey, uids := range positionMap { tabletPaths := make([]string, len(uids)) for i, uid := range uids { tabletPaths[i] = tabletMap[uid].Alias.String() } items = append(items, fmt.Sprintf(" %v\n %v", slaveMapKey, strings.Join(tabletPaths, "\n "))) } sort.Strings(items) return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n")) } return nil }