Example #1
0
// WaitBlpPosition will wait for the filtered replication to reach at least
// the provided position.
func (mysqld *Mysqld) WaitBlpPosition(bp *blproto.BlpPosition, waitTimeout time.Duration) error {
	timeOut := time.Now().Add(waitTimeout)
	for {
		if time.Now().After(timeOut) {
			break
		}

		cmd := binlogplayer.QueryBlpCheckpoint(bp.Uid)
		qr, err := mysqld.fetchSuperQuery(cmd)
		if err != nil {
			return err
		}
		if len(qr.Rows) != 1 {
			return fmt.Errorf("QueryBlpCheckpoint(%v) returned unexpected row count: %v", bp.Uid, len(qr.Rows))
		}
		var pos proto.ReplicationPosition
		if !qr.Rows[0][0].IsNull() {
			pos, err = proto.DecodeReplicationPosition(qr.Rows[0][0].String())
			if err != nil {
				return err
			}
		}
		if pos.AtLeast(bp.Position) {
			return nil
		}

		log.Infof("Sleeping 1 second waiting for binlog replication(%v) to catch up: %v != %v", bp.Uid, pos, bp.Position)
		time.Sleep(1 * time.Second)
	}

	return fmt.Errorf("WaitBlpPosition(%v) timed out", bp.Uid)
}
Example #2
0
// BinlogInfo returns the filename and position for a Google MySQL group_id.
// This command only exists in Google MySQL.
func (mysqld *Mysqld) BinlogInfo(pos proto.ReplicationPosition) (fileName string, filePos uint, err error) {
	if pos.IsZero() {
		return fileName, filePos, fmt.Errorf("input position for BinlogInfo is uninitialized")
	}
	// Extract the group_id from the GoogleGTID. We can't just use String() on the
	// ReplicationPosition, because that includes the server_id.
	gtid, ok := pos.GTIDSet.(proto.GoogleGTID)
	if !ok {
		return "", 0, fmt.Errorf("Non-Google GTID in BinlogInfo(%#v), which is only supported on Google MySQL", pos)
	}
	info, err := mysqld.fetchSuperQueryMap(fmt.Sprintf("SHOW BINLOG INFO FOR %v", gtid.GroupID))
	if err != nil {
		return "", 0, err
	}
	fileName = info["Log_name"]
	temp, err := strconv.ParseUint(info["Pos"], 10, 32)
	if err != nil {
		return fileName, filePos, err
	}
	filePos = uint(temp)
	return fileName, filePos, err
}
Example #3
0
// Check all the tablets to see if we can proceed with reparenting.
// masterPosition is supplied from the demoted master if we are doing
// this gracefully.
func (wr *Wrangler) checkSlaveConsistency(tabletMap map[uint32]*topo.TabletInfo, masterPosition myproto.ReplicationPosition) error {
	wr.logger.Infof("checkSlaveConsistency %v %#v", topotools.MapKeys(tabletMap), masterPosition)

	// FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it.
	calls := make(chan *rpcContext, len(tabletMap))
	f := func(ti *topo.TabletInfo) {
		ctx := &rpcContext{tablet: ti}
		defer func() {
			calls <- ctx
		}()

		if !masterPosition.IsZero() {
			// If the master position is known, do our best to wait for replication to catch up.
			status, err := wr.tmc.WaitSlavePosition(context.TODO(), ti, masterPosition, wr.ActionTimeout())
			if err != nil {
				ctx.err = err
				return
			}
			ctx.status = status
		} else {
			// If the master is down, just get the slave status.
			status, err := wr.tmc.SlaveStatus(wr.ctx, ti)
			if err != nil {
				ctx.err = err
				return
			}
			ctx.status = status
		}
	}

	for _, tablet := range tabletMap {
		// Pass loop variable explicitly so we don't have a concurrency issue.
		go f(tablet)
	}

	// map positions to tablets
	positionMap := make(map[string][]uint32)
	for i := 0; i < len(tabletMap); i++ {
		ctx := <-calls
		mapKey := "unavailable-tablet-error"
		if ctx.err == nil {
			mapKey = ctx.status.Position.String()
		}
		if _, ok := positionMap[mapKey]; !ok {
			positionMap[mapKey] = make([]uint32, 0, 32)
		}
		positionMap[mapKey] = append(positionMap[mapKey], ctx.tablet.Alias.Uid)
	}

	if len(positionMap) == 1 {
		// great, everyone agrees
		// demotedMasterReplicationState is nil if demotion failed
		if !masterPosition.IsZero() {
			demotedMapKey := masterPosition.String()
			if _, ok := positionMap[demotedMapKey]; !ok {
				for slaveMapKey := range positionMap {
					return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey,
						slaveMapKey)
				}
			}
		}
	} else {
		// FIXME(msolomon) in the event of a crash, do you pick replica that is
		// furthest along or do you promote the majority? data loss vs availability
		// sounds like you pick the latest group and reclone.
		items := make([]string, 0, 32)
		for slaveMapKey, uids := range positionMap {
			tabletPaths := make([]string, len(uids))
			for i, uid := range uids {
				tabletPaths[i] = tabletMap[uid].Alias.String()
			}
			items = append(items, fmt.Sprintf("  %v\n    %v", slaveMapKey, strings.Join(tabletPaths, "\n    ")))
		}
		sort.Strings(items)
		return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n"))
	}
	return nil
}