Ejemplo n.º 1
0
// BinlogInfo returns the filename and position for a Google MySQL group_id.
// This command only exists in Google MySQL.
func (mysqld *Mysqld) BinlogInfo(pos proto.ReplicationPosition) (fileName string, filePos uint, err error) {
	if pos.IsZero() {
		return fileName, filePos, fmt.Errorf("input position for BinlogInfo is uninitialized")
	}
	// Extract the group_id from the GoogleGTID. We can't just use String() on the
	// ReplicationPosition, because that includes the server_id.
	gtid, ok := pos.GTIDSet.(proto.GoogleGTID)
	if !ok {
		return "", 0, fmt.Errorf("Non-Google GTID in BinlogInfo(%#v), which is only supported on Google MySQL", pos)
	}
	info, err := mysqld.fetchSuperQueryMap(fmt.Sprintf("SHOW BINLOG INFO FOR %v", gtid.GroupID))
	if err != nil {
		return "", 0, err
	}
	fileName = info["Log_name"]
	temp, err := strconv.ParseUint(info["Pos"], 10, 32)
	if err != nil {
		return fileName, filePos, err
	}
	filePos = uint(temp)
	return fileName, filePos, err
}
Ejemplo n.º 2
0
// Check all the tablets to see if we can proceed with reparenting.
// masterPosition is supplied from the demoted master if we are doing
// this gracefully.
func (wr *Wrangler) checkSlaveConsistency(tabletMap map[uint32]*topo.TabletInfo, masterPosition myproto.ReplicationPosition) error {
	wr.logger.Infof("checkSlaveConsistency %v %#v", topotools.MapKeys(tabletMap), masterPosition)

	// FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it.
	calls := make(chan *rpcContext, len(tabletMap))
	f := func(ti *topo.TabletInfo) {
		ctx := &rpcContext{tablet: ti}
		defer func() {
			calls <- ctx
		}()

		if !masterPosition.IsZero() {
			// If the master position is known, do our best to wait for replication to catch up.
			status, err := wr.tmc.WaitSlavePosition(ti, masterPosition, wr.ActionTimeout())
			if err != nil {
				ctx.err = err
				return
			}
			ctx.status = status
		} else {
			// If the master is down, just get the slave status.
			status, err := wr.tmc.SlaveStatus(ti, wr.ActionTimeout())
			if err != nil {
				ctx.err = err
				return
			}
			ctx.status = status
		}
	}

	for _, tablet := range tabletMap {
		// Pass loop variable explicitly so we don't have a concurrency issue.
		go f(tablet)
	}

	// map positions to tablets
	positionMap := make(map[string][]uint32)
	for i := 0; i < len(tabletMap); i++ {
		ctx := <-calls
		mapKey := "unavailable-tablet-error"
		if ctx.err == nil {
			mapKey = ctx.status.Position.String()
		}
		if _, ok := positionMap[mapKey]; !ok {
			positionMap[mapKey] = make([]uint32, 0, 32)
		}
		positionMap[mapKey] = append(positionMap[mapKey], ctx.tablet.Alias.Uid)
	}

	if len(positionMap) == 1 {
		// great, everyone agrees
		// demotedMasterReplicationState is nil if demotion failed
		if !masterPosition.IsZero() {
			demotedMapKey := masterPosition.String()
			if _, ok := positionMap[demotedMapKey]; !ok {
				for slaveMapKey := range positionMap {
					return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey,
						slaveMapKey)
				}
			}
		}
	} else {
		// FIXME(msolomon) in the event of a crash, do you pick replica that is
		// furthest along or do you promote the majority? data loss vs availability
		// sounds like you pick the latest group and reclone.
		items := make([]string, 0, 32)
		for slaveMapKey, uids := range positionMap {
			tabletPaths := make([]string, len(uids))
			for i, uid := range uids {
				tabletPaths[i] = tabletMap[uid].Alias.String()
			}
			items = append(items, fmt.Sprintf("  %v\n    %v", slaveMapKey, strings.Join(tabletPaths, "\n    ")))
		}
		sort.Strings(items)
		return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n"))
	}
	return nil
}