// BinlogInfo returns the filename and position for a Google MySQL group_id. // This command only exists in Google MySQL. func (mysqld *Mysqld) BinlogInfo(pos proto.ReplicationPosition) (fileName string, filePos uint, err error) { if pos.IsZero() { return fileName, filePos, fmt.Errorf("input position for BinlogInfo is uninitialized") } // Extract the group_id from the GoogleGTID. We can't just use String() on the // ReplicationPosition, because that includes the server_id. gtid, ok := pos.GTIDSet.(proto.GoogleGTID) if !ok { return "", 0, fmt.Errorf("Non-Google GTID in BinlogInfo(%#v), which is only supported on Google MySQL", pos) } info, err := mysqld.fetchSuperQueryMap(fmt.Sprintf("SHOW BINLOG INFO FOR %v", gtid.GroupID)) if err != nil { return "", 0, err } fileName = info["Log_name"] temp, err := strconv.ParseUint(info["Pos"], 10, 32) if err != nil { return fileName, filePos, err } filePos = uint(temp) return fileName, filePos, err }
// Check all the tablets to see if we can proceed with reparenting. // masterPosition is supplied from the demoted master if we are doing // this gracefully. func (wr *Wrangler) checkSlaveConsistency(tabletMap map[uint32]*topo.TabletInfo, masterPosition myproto.ReplicationPosition) error { wr.logger.Infof("checkSlaveConsistency %v %#v", topotools.MapKeys(tabletMap), masterPosition) // FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it. calls := make(chan *rpcContext, len(tabletMap)) f := func(ti *topo.TabletInfo) { ctx := &rpcContext{tablet: ti} defer func() { calls <- ctx }() if !masterPosition.IsZero() { // If the master position is known, do our best to wait for replication to catch up. status, err := wr.tmc.WaitSlavePosition(ti, masterPosition, wr.ActionTimeout()) if err != nil { ctx.err = err return } ctx.status = status } else { // If the master is down, just get the slave status. status, err := wr.tmc.SlaveStatus(ti, wr.ActionTimeout()) if err != nil { ctx.err = err return } ctx.status = status } } for _, tablet := range tabletMap { // Pass loop variable explicitly so we don't have a concurrency issue. go f(tablet) } // map positions to tablets positionMap := make(map[string][]uint32) for i := 0; i < len(tabletMap); i++ { ctx := <-calls mapKey := "unavailable-tablet-error" if ctx.err == nil { mapKey = ctx.status.Position.String() } if _, ok := positionMap[mapKey]; !ok { positionMap[mapKey] = make([]uint32, 0, 32) } positionMap[mapKey] = append(positionMap[mapKey], ctx.tablet.Alias.Uid) } if len(positionMap) == 1 { // great, everyone agrees // demotedMasterReplicationState is nil if demotion failed if !masterPosition.IsZero() { demotedMapKey := masterPosition.String() if _, ok := positionMap[demotedMapKey]; !ok { for slaveMapKey := range positionMap { return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey, slaveMapKey) } } } } else { // FIXME(msolomon) in the event of a crash, do you pick replica that is // furthest along or do you promote the majority? data loss vs availability // sounds like you pick the latest group and reclone. items := make([]string, 0, 32) for slaveMapKey, uids := range positionMap { tabletPaths := make([]string, len(uids)) for i, uid := range uids { tabletPaths[i] = tabletMap[uid].Alias.String() } items = append(items, fmt.Sprintf(" %v\n %v", slaveMapKey, strings.Join(tabletPaths, "\n "))) } sort.Strings(items) return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n")) } return nil }