Пример #1
0
// RefreshTablesByShard calls RefreshState on all the tables of a
// given type in a shard. It would work for the master, but the
// discovery wouldn't be very efficient.
func (wr *Wrangler) RefreshTablesByShard(si *topo.ShardInfo, tabletType topo.TabletType, cells []string) error {
	tabletMap, err := topo.GetTabletMapForShardByCell(wr.ts, si.Keyspace(), si.ShardName(), cells)
	switch err {
	case nil:
		// keep going
	case topo.ErrPartialResult:
		wr.Logger().Warningf("RefreshTablesByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName())
	default:
		return err
	}

	// ignore errors in this phase
	wg := sync.WaitGroup{}
	for _, ti := range tabletMap {
		if ti.Type != tabletType {
			continue
		}

		wg.Add(1)
		go func(ti *topo.TabletInfo) {
			if err := wr.tmc.RefreshState(ti, wr.ActionTimeout()); err != nil {
				wr.Logger().Warningf("RefreshTablesByShard: failed to refresh %v: %v", ti.Alias, err)
			}
			wg.Done()
		}(ti)
	}
	wg.Wait()

	return nil
}
Пример #2
0
// replicaMigrateServedFrom handles the slave (replica, rdonly) migration.
func (wr *Wrangler) replicaMigrateServedFrom(ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool, tables []string, ev *events.MigrateServedFrom) error {
	// Save the destination keyspace (its ServedFrom has been changed)
	event.DispatchUpdate(ev, "updating keyspace")
	if err := topo.UpdateKeyspace(wr.ts, ki); err != nil {
		return err
	}

	// Save the source shard (its blacklisted tables field has changed)
	event.DispatchUpdate(ev, "updating source shard")
	if sourceShard.BlacklistedTablesMap == nil {
		sourceShard.BlacklistedTablesMap = make(map[topo.TabletType][]string)
	}
	if reverse {
		delete(sourceShard.BlacklistedTablesMap, servedType)
	} else {
		sourceShard.BlacklistedTablesMap[servedType] = tables
	}
	if err := topo.UpdateShard(wr.ts, sourceShard); err != nil {
		return err
	}

	// Now refresh the source servers so they reload their
	// blacklisted table list
	event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables")
	if err := wr.RefreshTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType); err != nil {
		return err
	}

	return nil
}
Пример #3
0
// replicaMigrateServedFrom handles the slave (replica, rdonly) migration.
func (wr *Wrangler) replicaMigrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, tables []string, ev *events.MigrateServedFrom) error {
	// Save the destination keyspace (its ServedFrom has been changed)
	event.DispatchUpdate(ev, "updating keyspace")
	if err := wr.ts.UpdateKeyspace(ctx, ki); err != nil {
		return err
	}

	// Save the source shard (its blacklisted tables field has changed)
	event.DispatchUpdate(ev, "updating source shard")
	if err := sourceShard.UpdateSourceBlacklistedTables(servedType, cells, reverse, tables); err != nil {
		return fmt.Errorf("UpdateSourceBlacklistedTables(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err)
	}
	if err := wr.ts.UpdateShard(ctx, sourceShard); err != nil {
		return fmt.Errorf("UpdateShard(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err)
	}

	// Now refresh the source servers so they reload their
	// blacklisted table list
	event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables")
	if err := wr.RefreshTablesByShard(ctx, sourceShard, servedType, cells); err != nil {
		return err
	}

	return nil
}
Пример #4
0
func (wr *Wrangler) validateReplication(shardInfo *topo.ShardInfo, tabletMap map[topo.TabletAlias]*topo.TabletInfo, results chan<- vresult) {
	_, ok := tabletMap[shardInfo.MasterAlias]
	if !ok {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("master not in tablet map")}
		return
	}

	actionPath, err := wr.ai.GetSlaves(shardInfo.MasterAlias)
	if err != nil {
		results <- vresult{shardInfo.MasterAlias.String(), err}
		return
	}
	sa, err := wr.ai.WaitForCompletionReply(actionPath, wr.actionTimeout())
	if err != nil {
		results <- vresult{shardInfo.MasterAlias.String(), err}
		return
	}
	slaveAddrs := sa.(*tm.SlaveList).Addrs
	if len(slaveAddrs) == 0 {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("no slaves found")}
		return
	}

	// Some addresses don't resolve in all locations, just use IP address
	if err != nil {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("resolve slaves failed: %v", err)}
		return
	}

	tabletIpMap := make(map[string]*topo.Tablet)
	for _, tablet := range tabletMap {
		ipAddr, _, err := net.SplitHostPort(tablet.MysqlIpAddr)
		if err != nil {
			results <- vresult{tablet.Alias().String(), fmt.Errorf("bad mysql addr: %v %v", tablet.MysqlIpAddr, err)}
			continue
		}
		tabletIpMap[ipAddr] = tablet.Tablet
	}

	// See if every slave is in the replication graph.
	for _, slaveAddr := range slaveAddrs {
		if tabletIpMap[slaveAddr] == nil {
			results <- vresult{shardInfo.Keyspace() + "/" + shardInfo.ShardName(), fmt.Errorf("slave not in replication graph: %v (mysql instance without vttablet?)", slaveAddr)}
		}
	}

	// See if every entry in the replication graph is connected to the master.
	for _, tablet := range tabletMap {
		if !tablet.IsSlaveType() {
			continue
		}

		ipAddr, _, err := net.SplitHostPort(tablet.MysqlIpAddr)
		if err != nil {
			results <- vresult{tablet.Alias().String(), fmt.Errorf("bad mysql addr: %v", err)}
		} else if !strInList(slaveAddrs, ipAddr) {
			results <- vresult{tablet.Alias().String(), fmt.Errorf("slave not replicating: %v %q", ipAddr, slaveAddrs)}
		}
	}
}
Пример #5
0
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) {

	// re-read and update keyspace info record
	ki, err = wr.ts.GetKeyspace(ki.KeyspaceName())
	if err != nil {
		return err
	}
	if reverse {
		if _, ok := ki.ServedFrom[servedType]; ok {
			return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType)
		}
		ki.ServedFrom[servedType] = destinationShard.SourceShards[0].Keyspace
	} else {
		if _, ok := ki.ServedFrom[servedType]; !ok {
			return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType)
		}
		delete(ki.ServedFrom, servedType)
	}

	// re-read and check the destination shard
	destinationShard, err = wr.ts.GetShard(destinationShard.Keyspace(), destinationShard.ShardName())
	if err != nil {
		return err
	}
	if len(destinationShard.SourceShards) != 1 {
		return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName())
	}
	tables := destinationShard.SourceShards[0].Tables

	// read the source shard, we'll need its master, and we'll need to
	// update the blacklisted tables.
	var sourceShard *topo.ShardInfo
	sourceShard, err = wr.ts.GetShard(destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard)
	if err != nil {
		return err
	}

	ev := &events.MigrateServedFrom{
		Keyspace:         *ki,
		SourceShard:      *sourceShard,
		DestinationShard: *destinationShard,
		ServedType:       servedType,
		Reverse:          reverse,
	}
	event.DispatchUpdate(ev, "start")
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()

	if servedType == topo.TYPE_MASTER {
		err = wr.masterMigrateServedFrom(ki, sourceShard, destinationShard, servedType, tables, ev)
	} else {
		err = wr.replicaMigrateServedFrom(ki, sourceShard, destinationShard, servedType, reverse, tables, ev)
	}
	event.DispatchUpdate(ev, "finished")
	return
}
Пример #6
0
// UpdateSrvShard creates the SrvShard object based on the global ShardInfo,
// and writes it to the given cell.
func UpdateSrvShard(ctx context.Context, ts topo.Server, cell string, si *topo.ShardInfo) error {
	srvShard := &topo.SrvShard{
		Name:       si.ShardName(),
		KeyRange:   si.KeyRange,
		MasterCell: si.MasterAlias.Cell,
	}
	return ts.UpdateSrvShard(ctx, cell, si.Keyspace(), si.ShardName(), srvShard)
}
Пример #7
0
func (wr *Wrangler) shardReplicationStatuses(ctx context.Context, shardInfo *topo.ShardInfo) ([]*topo.TabletInfo, []*myproto.ReplicationStatus, error) {
	// FIXME(msolomon) this assumes no hierarchical replication, which is currently the case.
	tabletMap, err := wr.ts.GetTabletMapForShard(ctx, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, nil, err
	}
	tablets := topotools.CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
	stats, err := wr.tabletReplicationStatuses(ctx, tablets)
	return tablets, stats, err
}
Пример #8
0
func (wr *Wrangler) shardReplicationPositions(shardInfo *topo.ShardInfo) ([]*topo.TabletInfo, []*mysqlctl.ReplicationPosition, error) {
	// FIXME(msolomon) this assumes no hierarchical replication, which is currently the case.
	tabletMap, err := GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, nil, err
	}
	tablets := CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
	positions, err := wr.tabletReplicationPositions(tablets)
	return tablets, positions, err
}
Пример #9
0
func (zkts *Server) UpdateShard(si *topo.ShardInfo) error {
	shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName())
	_, err := zkts.zconn.Set(shardPath, jscfg.ToJson(si.Shard), -1)
	if err != nil {
		if zookeeper.IsError(err, zookeeper.ZNONODE) {
			err = topo.ErrNoNode
		}
	}
	return err
}
Пример #10
0
// updateShardCellsAndMaster will update the 'Cells' and possibly
// MasterAlias records for the shard, if needed.
func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias topo.TabletAlias, tabletType topo.TabletType, force bool) error {
	// See if we need to update the Shard:
	// - add the tablet's cell to the shard's Cells if needed
	// - change the master if needed
	shardUpdateRequired := false
	if !si.HasCell(tabletAlias.Cell) {
		shardUpdateRequired = true
	}
	if tabletType == topo.TYPE_MASTER && si.MasterAlias != tabletAlias {
		shardUpdateRequired = true
	}
	if !shardUpdateRequired {
		return nil
	}

	actionNode := actionnode.UpdateShard()
	keyspace := si.Keyspace()
	shard := si.ShardName()
	lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode)
	if err != nil {
		return err
	}

	// re-read the shard with the lock
	si, err = wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
	}

	// update it
	wasUpdated := false
	if !si.HasCell(tabletAlias.Cell) {
		si.Cells = append(si.Cells, tabletAlias.Cell)
		wasUpdated = true
	}
	if tabletType == topo.TYPE_MASTER && si.MasterAlias != tabletAlias {
		if !si.MasterAlias.IsZero() && !force {
			return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, keyspace, shard))
		}
		si.MasterAlias = tabletAlias
		wasUpdated = true
	}

	if wasUpdated {
		// write it back
		if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
			return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
		}
	}

	// and unlock
	return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
}
Пример #11
0
func (wr *Wrangler) migrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, filteredReplicationWaitTime time.Duration) (err error) {

	// re-read and update keyspace info record
	ki, err = wr.ts.GetKeyspace(ctx, ki.KeyspaceName())
	if err != nil {
		return err
	}
	if reverse {
		ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, false, nil)
	} else {
		ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, true, destinationShard.Cells)
	}

	// re-read and check the destination shard
	destinationShard, err = wr.ts.GetShard(ctx, destinationShard.Keyspace(), destinationShard.ShardName())
	if err != nil {
		return err
	}
	if len(destinationShard.SourceShards) != 1 {
		return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName())
	}
	tables := destinationShard.SourceShards[0].Tables

	// read the source shard, we'll need its master, and we'll need to
	// update the blacklisted tables.
	var sourceShard *topo.ShardInfo
	sourceShard, err = wr.ts.GetShard(ctx, destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard)
	if err != nil {
		return err
	}

	ev := &events.MigrateServedFrom{
		KeyspaceName:     ki.KeyspaceName(),
		SourceShard:      *sourceShard,
		DestinationShard: *destinationShard,
		ServedType:       servedType,
		Reverse:          reverse,
	}
	event.DispatchUpdate(ev, "start")
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()

	if servedType == topodatapb.TabletType_MASTER {
		err = wr.masterMigrateServedFrom(ctx, ki, sourceShard, destinationShard, tables, ev, filteredReplicationWaitTime)
	} else {
		err = wr.replicaMigrateServedFrom(ctx, ki, sourceShard, destinationShard, servedType, cells, reverse, tables, ev)
	}
	event.DispatchUpdate(ev, "finished")
	return
}
Пример #12
0
func (tee *Tee) UpdateShard(si *topo.ShardInfo) error {
	if err := tee.primary.UpdateShard(si); err != nil {
		// failed on primary, not updating secondary
		return err
	}

	if err := tee.secondary.UpdateShard(si); err != nil {
		// not critical enough to fail
		log.Warningf("secondary.UpdateShard(%v,%v) failed: %v", si.Keyspace(), si.ShardName(), err)
	}
	return nil
}
Пример #13
0
func (wr *Wrangler) applySchemaShard(ctx context.Context, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, simple, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err
	}

	// build the array of tabletStatus we're going to use
	statusArray := make([]*tabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master
			continue
		}

		ti, err := wr.ts.GetTablet(ctx, alias)
		if err != nil {
			return nil, err
		}
		statusArray = append(statusArray, &tabletStatus{ti: ti})
	}

	// get schema on all tablets.
	log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		wg.Add(1)
		go func(status *tabletStatus) {
			status.beforeSchema, status.lastError = wr.tmc.GetSchema(ctx, status.ti, nil, nil, false)
			wg.Done()
		}(status)
	}
	wg.Wait()

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError)
		}
	}

	// simple or complex?
	if simple {
		return wr.applySchemaShardSimple(ctx, statusArray, preflight, masterTabletAlias, change, force)
	}

	return wr.applySchemaShardComplex(ctx, statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force, waitSlaveTimeout)
}
Пример #14
0
// updateShardCellsAndMaster will update the 'Cells' and possibly
// MasterAlias records for the shard, if needed.
func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error {
	// See if we need to update the Shard:
	// - add the tablet's cell to the shard's Cells if needed
	// - change the master if needed
	shardUpdateRequired := false
	if !si.HasCell(tabletAlias.Cell) {
		shardUpdateRequired = true
	}
	if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) {
		shardUpdateRequired = true
	}
	if !shardUpdateRequired {
		return nil
	}

	// we do need to update the shard, lock it to not interfere with
	// reparenting operations.
	actionNode := actionnode.UpdateShard()
	keyspace := si.Keyspace()
	shard := si.ShardName()
	lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode)
	if err != nil {
		return err
	}

	// run the update
	_, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error {
		wasUpdated := false
		if !topoproto.ShardHasCell(s, tabletAlias.Cell) {
			s.Cells = append(s.Cells, tabletAlias.Cell)
			wasUpdated = true
		}

		if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) {
			if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride {
				return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), keyspace, shard)
			}
			s.MasterAlias = tabletAlias
			wasUpdated = true
		}

		if !wasUpdated {
			return topo.ErrNoUpdateNeeded
		}
		return nil
	})
	return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
}
Пример #15
0
func (wr *Wrangler) validateReplication(shardInfo *topo.ShardInfo, tabletMap map[topo.TabletAlias]*topo.TabletInfo, results chan<- vresult) {
	masterTablet, ok := tabletMap[shardInfo.MasterAlias]
	if !ok {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("master not in tablet map")}
		return
	}

	slaveList, err := wr.ai.GetSlaves(masterTablet, wr.ActionTimeout())
	if err != nil {
		results <- vresult{shardInfo.MasterAlias.String(), err}
		return
	}
	if len(slaveList) == 0 {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("no slaves found")}
		return
	}

	// Some addresses don't resolve in all locations, just use IP address
	if err != nil {
		results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("resolve slaves failed: %v", err)}
		return
	}

	tabletIpMap := make(map[string]*topo.Tablet)
	slaveIpMap := make(map[string]bool)
	for _, tablet := range tabletMap {
		tabletIpMap[normalizeIP(tablet.IPAddr)] = tablet.Tablet
	}

	// See if every slave is in the replication graph.
	for _, slaveAddr := range slaveList {
		if tabletIpMap[normalizeIP(slaveAddr)] == nil {
			results <- vresult{shardInfo.Keyspace() + "/" + shardInfo.ShardName(), fmt.Errorf("slave not in replication graph: %v (mysql instance without vttablet?)", slaveAddr)}
		}
		slaveIpMap[normalizeIP(slaveAddr)] = true
	}

	// See if every entry in the replication graph is connected to the master.
	for _, tablet := range tabletMap {
		if !tablet.IsSlaveType() {
			continue
		}

		if !slaveIpMap[normalizeIP(tablet.IPAddr)] {
			results <- vresult{tablet.Alias.String(), fmt.Errorf("slave not replicating: %v %q", tablet.IPAddr, slaveList)}
		}
	}
}
Пример #16
0
func (zkts *Server) UpdateShard(si *topo.ShardInfo) error {
	shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName())
	_, err := zkts.zconn.Set(shardPath, jscfg.ToJson(si.Shard), -1)
	if err != nil {
		if zookeeper.IsError(err, zookeeper.ZNONODE) {
			err = topo.ErrNoNode
		}
		return err
	}

	event.Dispatch(&events.ShardChange{
		ShardInfo: *si,
		Status:    "updated",
	})
	return nil
}
Пример #17
0
// UpdateShard is part of the topo.Server interface
func (zkts *Server) UpdateShard(ctx context.Context, si *topo.ShardInfo, existingVersion int64) (int64, error) {
	shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName())
	stat, err := zkts.zconn.Set(shardPath, jscfg.ToJSON(si.Shard), int(existingVersion))
	if err != nil {
		if zookeeper.IsError(err, zookeeper.ZNONODE) {
			err = topo.ErrNoNode
		}
		return -1, err
	}

	event.Dispatch(&events.ShardChange{
		ShardInfo: *si,
		Status:    "updated",
	})
	return int64(stat.Version()), nil
}
Пример #18
0
func (wr *Wrangler) validateReplication(ctx context.Context, shardInfo *topo.ShardInfo, tabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, results chan<- error) {
	if shardInfo.MasterAlias == nil {
		results <- fmt.Errorf("no master in shard record %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
		return
	}

	masterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias]
	if !ok {
		results <- fmt.Errorf("master %v not in tablet map", topoproto.TabletAliasString(shardInfo.MasterAlias))
		return
	}

	slaveList, err := wr.tmc.GetSlaves(ctx, masterTabletInfo.Tablet)
	if err != nil {
		results <- fmt.Errorf("GetSlaves(%v) failed: %v", masterTabletInfo, err)
		return
	}
	if len(slaveList) == 0 {
		results <- fmt.Errorf("no slaves of tablet %v found", topoproto.TabletAliasString(shardInfo.MasterAlias))
		return
	}

	tabletIPMap := make(map[string]*topodatapb.Tablet)
	slaveIPMap := make(map[string]bool)
	for _, tablet := range tabletMap {
		tabletIPMap[normalizeIP(tablet.Ip)] = tablet.Tablet
	}

	// See if every slave is in the replication graph.
	for _, slaveAddr := range slaveList {
		if tabletIPMap[normalizeIP(slaveAddr)] == nil {
			results <- fmt.Errorf("slave %v not in replication graph for shard %v/%v (mysql instance without vttablet?)", slaveAddr, shardInfo.Keyspace(), shardInfo.ShardName())
		}
		slaveIPMap[normalizeIP(slaveAddr)] = true
	}

	// See if every entry in the replication graph is connected to the master.
	for _, tablet := range tabletMap {
		if !tablet.IsSlaveType() {
			continue
		}

		if !slaveIPMap[normalizeIP(tablet.Ip)] {
			results <- fmt.Errorf("slave %v not replicating: %v slave list: %q", topoproto.TabletAliasString(tablet.Alias), tablet.Ip, slaveList)
		}
	}
}
Пример #19
0
// Update shard file with new master, replicas, etc.
//
// Re-read from TopologyServer to make sure we are using the side
// effects of all actions.
//
// This function should only be used with an action lock on the shard
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
func (wr *Wrangler) rebuildShard(keyspace, shard string, options rebuildShardOptions) error {
	log.Infof("rebuildShard %v/%v", keyspace, shard)

	// read the existing shard info. It has to exist.

	var (
		shardInfo *topo.ShardInfo
		err       error
	)
	if options.Critical {
		shardInfo, err = wr.ts.GetShardCritical(keyspace, shard)
	} else {
		shardInfo, err = wr.ts.GetShard(keyspace, shard)
	}

	if err != nil {
		return err
	}

	tabletMap, err := GetTabletMapForShardByCell(wr.ts, keyspace, shard, options.Cells)
	if err != nil {
		if options.IgnorePartialResult && err == topo.ErrPartialResult {
			log.Warningf("rebuildShard: got topo.ErrPartialResult from GetTabletMapForShard, but skipping error as it was expected")
		} else {
			return err
		}
	}

	tablets := make([]*topo.TabletInfo, 0, len(tabletMap))
	for _, ti := range tabletMap {
		if ti.Keyspace != shardInfo.Keyspace() || ti.Shard != shardInfo.ShardName() {
			return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v (maybe remove its replication path in shard %v/%v)", ti.Alias, keyspace, shard, ti.Keyspace, ti.Shard, keyspace, shard)
		}
		if !ti.IsInReplicationGraph() {
			// only valid case is a scrapped master in the
			// catastrophic reparent case
			if ti.Parent.Uid != topo.NO_TABLET {
				log.Warningf("Tablet %v should not be in the replication graph, please investigate (it will be ignored in the rebuild)", ti.Alias)
			}
		}
		tablets = append(tablets, ti)
	}

	return wr.rebuildShardSrvGraph(shardInfo, tablets, options.Cells)
}
Пример #20
0
// UpdateShard implements topo.Server.
func (s *Server) UpdateShard(ctx context.Context, si *topo.ShardInfo, existingVersion int64) (int64, error) {
	data := jscfg.ToJSON(si.Shard)

	resp, err := s.getGlobal().CompareAndSwap(shardFilePath(si.Keyspace(), si.ShardName()),
		data, 0 /* ttl */, "" /* prevValue */, uint64(existingVersion))
	if err != nil {
		return -1, convertError(err)
	}
	if resp.Node == nil {
		return -1, ErrBadResponse
	}

	event.Dispatch(&events.ShardChange{
		ShardInfo: *si,
		Status:    "updated",
	})
	return int64(resp.Node.ModifiedIndex), nil
}
Пример #21
0
// updateShardCellsAndMaster will update the 'Cells' and possibly
// MasterAlias records for the shard, if needed.
func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error {
	// See if we need to update the Shard:
	// - add the tablet's cell to the shard's Cells if needed
	// - change the master if needed
	shardUpdateRequired := false
	if !si.HasCell(tabletAlias.Cell) {
		shardUpdateRequired = true
	}
	if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) {
		shardUpdateRequired = true
	}
	if !shardUpdateRequired {
		return nil
	}

	// run the update
	_, err := wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(s *topo.ShardInfo) error {
		wasUpdated := false
		if !s.HasCell(tabletAlias.Cell) {
			s.Cells = append(s.Cells, tabletAlias.Cell)
			wasUpdated = true
		}

		if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) {
			if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride {
				return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), si.Keyspace(), si.ShardName())
			}
			s.MasterAlias = tabletAlias
			wasUpdated = true
		}

		if !wasUpdated {
			return topo.ErrNoUpdateNeeded
		}
		return nil
	})
	return err
}
Пример #22
0
// RefreshTablesByShard calls RefreshState on all the tables of a
// given type in a shard. It would work for the master, but the
// discovery wouldn't be very efficient.
func (wr *Wrangler) RefreshTablesByShard(ctx context.Context, si *topo.ShardInfo, tabletType topodatapb.TabletType, cells []string) error {
	wr.Logger().Infof("RefreshTablesByShard called on shard %v/%v", si.Keyspace(), si.ShardName())
	tabletMap, err := wr.ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), cells)
	switch err {
	case nil:
		// keep going
	case topo.ErrPartialResult:
		wr.Logger().Warningf("RefreshTablesByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName())
	default:
		return err
	}

	// ignore errors in this phase
	wg := sync.WaitGroup{}
	for _, ti := range tabletMap {
		if ti.Type != tabletType {
			continue
		}

		wg.Add(1)
		go func(ti *topo.TabletInfo) {
			wr.Logger().Infof("Calling RefreshState on tablet %v", ti.AliasString())
			// Setting an upper bound timeout to fail faster in case of an error.
			// Using 60 seconds because RefreshState should not take more than 30 seconds.
			// (RefreshState will restart the tablet's QueryService and most time will be spent on the shutdown, i.e. waiting up to 30 seconds on transactions (see Config.TransactionTimeout)).
			ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
			if err := wr.tmc.RefreshState(ctx, ti); err != nil {
				wr.Logger().Warningf("RefreshTablesByShard: failed to refresh %v: %v", ti.AliasString(), err)
			}
			cancel()
			wg.Done()
		}(ti)
	}
	wg.Wait()

	return nil
}
Пример #23
0
func (wr *Wrangler) applySchemaShardComplex(ctx context.Context, statusArray []*tabletStatus, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) {
	// apply the schema change to all replica / slave tablets
	for _, status := range statusArray {
		// if already applied, we skip this guy
		diffs := myproto.DiffSchemaToArray("after", preflight.AfterSchema, topo.TabletAliasString(status.ti.Alias), status.beforeSchema)
		if len(diffs) == 0 {
			log.Infof("Tablet %v already has the AfterSchema, skipping", status.ti.Alias)
			continue
		}

		// make sure the before schema matches
		diffs = myproto.DiffSchemaToArray("master", preflight.BeforeSchema, topo.TabletAliasString(status.ti.Alias), status.beforeSchema)
		if len(diffs) > 0 {
			if force {
				log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.Alias, strings.Join(diffs, "\n"))
			} else {
				return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.Alias, strings.Join(diffs, "\n"))
			}
		}

		// take this guy out of the serving graph if necessary
		ti, err := wr.ts.GetTablet(ctx, status.ti.Alias)
		if err != nil {
			return nil, err
		}
		typeChangeRequired := ti.IsInServingGraph()
		if typeChangeRequired {
			// note we want to update the serving graph there
			err = wr.changeTypeInternal(ctx, ti.Alias, pb.TabletType_SCHEMA_UPGRADE)
			if err != nil {
				return nil, err
			}
		}

		// apply the schema change
		log.Infof("Applying schema change to slave %v in complex mode", status.ti.Alias)
		sc := &myproto.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}
		_, err = wr.ApplySchema(ctx, status.ti.Alias, sc)
		if err != nil {
			return nil, err
		}

		// put this guy back into the serving graph
		if typeChangeRequired {
			err = wr.changeTypeInternal(ctx, ti.Alias, ti.Tablet.Type)
			if err != nil {
				return nil, err
			}
		}
	}

	// if newParentTabletAlias is passed in, use that as the new master
	if !topo.TabletAliasIsZero(newParentTabletAlias) {
		log.Infof("Reparenting with new master set to %v", newParentTabletAlias)
		oldMasterAlias := shardInfo.MasterAlias

		// Create reusable Reparent event with available info
		ev := &events.Reparent{}

		if err := wr.plannedReparentShardLocked(ctx, ev, shardInfo.Keyspace(), shardInfo.ShardName(), newParentTabletAlias, waitSlaveTimeout); err != nil {
			return nil, err
		}

		// Here we would apply the schema change to the old
		// master, but we just scrap it, to be consistent
		// with the previous implementation of the reparent.
		// (this code will be refactored at some point anyway)
		if err := wr.Scrap(ctx, oldMasterAlias, false, false); err != nil {
			wr.Logger().Warningf("Scrapping old master %v from shard %v/%v failed: %v", oldMasterAlias, shardInfo.Keyspace(), shardInfo.ShardName(), err)
		}
	}
	return &myproto.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil
}
Пример #24
0
func (wr *Wrangler) applySchemaShardComplex(statusArray []*TabletStatus, shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, force bool) (*mysqlctl.SchemaChangeResult, error) {
	// apply the schema change to all replica / slave tablets
	for _, status := range statusArray {
		// if already applied, we skip this guy
		diffs := mysqlctl.DiffSchemaToArray("after", preflight.AfterSchema, status.ti.Alias().String(), status.beforeSchema)
		if len(diffs) == 0 {
			relog.Info("Tablet %v already has the AfterSchema, skipping", status.ti.Alias())
			continue
		}

		// make sure the before schema matches
		diffs = mysqlctl.DiffSchemaToArray("master", preflight.BeforeSchema, status.ti.Alias().String(), status.beforeSchema)
		if len(diffs) > 0 {
			if force {
				relog.Warning("Tablet %v has inconsistent schema, ignoring: %v", status.ti.Alias(), strings.Join(diffs, "\n"))
			} else {
				return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.Alias(), strings.Join(diffs, "\n"))
			}
		}

		// take this guy out of the serving graph if necessary
		ti, err := wr.ts.GetTablet(status.ti.Alias())
		if err != nil {
			return nil, err
		}
		typeChangeRequired := ti.Tablet.IsServingType()
		if typeChangeRequired {
			// note we want to update the serving graph there
			err = wr.changeTypeInternal(ti.Alias(), topo.TYPE_SCHEMA_UPGRADE)
			if err != nil {
				return nil, err
			}
		}

		// apply the schema change
		relog.Info("Applying schema change to slave %v in complex mode", status.ti.Alias())
		sc := &mysqlctl.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}
		_, err = wr.ApplySchema(status.ti.Alias(), sc)
		if err != nil {
			return nil, err
		}

		// put this guy back into the serving graph
		if typeChangeRequired {
			err = wr.changeTypeInternal(ti.Alias(), ti.Tablet.Type)
			if err != nil {
				return nil, err
			}
		}
	}

	// if newParentTabletAlias is passed in, use that as the new master
	if newParentTabletAlias != (topo.TabletAlias{}) {
		relog.Info("Reparenting with new master set to %v", newParentTabletAlias)
		tabletMap, err := GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
		if err != nil {
			return nil, err
		}

		slaveTabletMap, foundMaster, err := slaveTabletMap(tabletMap)
		if err != nil {
			return nil, err
		}

		newMasterTablet, err := wr.ts.GetTablet(newParentTabletAlias)
		if err != nil {
			return nil, err
		}

		err = wr.reparentShardGraceful(slaveTabletMap, foundMaster, newMasterTablet /*leaveMasterReadOnly*/, false)
		if err != nil {
			return nil, err
		}

		// Here we would apply the schema change to the old
		// master, but after a reparent it's in Scrap state,
		// so no need to.  When/if reparent leaves the
		// original master in a different state (like replica
		// or rdonly), then we should apply the schema there
		// too.
		relog.Info("Skipping schema change on old master %v in complex mode, it's been Scrapped", masterTabletAlias)
	}
	return &mysqlctl.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil
}
Пример #25
0
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*mysqlctl.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err
	}

	// build the array of TabletStatus we're going to use
	statusArray := make([]*TabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master
			continue
		}

		ti, err := wr.ts.GetTablet(alias)
		if err != nil {
			return nil, err
		}
		if ti.Type == topo.TYPE_LAG {
			// lag tablets are usually behind, not replicating,
			// and a general pain. So let's just skip them
			// all together.
			// TODO(alainjobart) figure out other types to skip:
			// ValidateSchemaShard only does the serving types.
			// We do everything in the replication graph
			// but LAG. This seems fine for now.
			relog.Info("Skipping tablet %v as it is LAG", ti.Alias())
			continue
		}

		statusArray = append(statusArray, &TabletStatus{ti: ti})
	}

	// get schema on all tablets.
	relog.Info("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		wg.Add(1)
		go func(status *TabletStatus) {
			status.beforeSchema, status.lastError = wr.GetSchemaTablet(status.ti, nil, false)
			wg.Done()
		}(status)
	}
	wg.Wait()

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias(), status.lastError)
		}
	}

	// simple or complex?
	if simple {
		return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force)
	}

	return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force)
}
Пример #26
0
// rebuildCellSrvShard computes and writes the serving graph data to a
// single cell
func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) {
	log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell)

	for {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		// Read existing EndPoints node versions, so we know if any
		// changes sneak in after we read the tablets.
		versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName())

		// Get all tablets in this cell/shard.
		tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell})
		if err != nil {
			if err != topo.ErrPartialResult {
				return err
			}
			log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell)
		}

		// Build up the serving graph from scratch.
		serving := make(map[pb.TabletType]*pb.EndPoints)
		for _, tablet := range tablets {
			// Only add serving types.
			if !tablet.IsInServingGraph() {
				continue
			}

			// Check the Keyspace and Shard for the tablet are right.
			if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() {
				return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard)
			}

			// Add the tablet to the list.
			endpoints, ok := serving[tablet.Type]
			if !ok {
				endpoints = topo.NewEndPoints()
				serving[tablet.Type] = endpoints
			}
			entry, err := topo.TabletEndPoint(tablet.Tablet)
			if err != nil {
				log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err)
				continue
			}
			endpoints.Entries = append(endpoints.Entries, entry)
		}

		wg := sync.WaitGroup{}
		fatalErrs := concurrency.AllErrorRecorder{}
		retryErrs := concurrency.AllErrorRecorder{}

		// Write nodes that should exist.
		for tabletType, endpoints := range serving {
			wg.Add(1)
			go func(tabletType pb.TabletType, endpoints *pb.EndPoints) {
				defer wg.Done()

				log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType)

				version, ok := versions[tabletType]
				if !ok {
					// This type didn't exist when we first checked.
					// Try to create, but only if it still doesn't exist.
					if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil {
						log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err)
						switch err {
						case topo.ErrNodeExists:
							retryErrs.RecordError(err)
						default:
							fatalErrs.RecordError(err)
						}
					}
					return
				}

				// Update only if the version matches.
				if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil {
					log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err)
					switch err {
					case topo.ErrBadVersion, topo.ErrNoNode:
						retryErrs.RecordError(err)
					default:
						fatalErrs.RecordError(err)
					}
				}
			}(tabletType, endpoints)
		}

		// Delete nodes that shouldn't exist.
		for tabletType, version := range versions {
			if _, ok := serving[tabletType]; !ok {
				wg.Add(1)
				go func(tabletType pb.TabletType, version int64) {
					defer wg.Done()
					log.Infof("removing stale db type from serving graph: %v", tabletType)
					if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode {
						log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err)
						switch err {
						case topo.ErrNoNode:
							// Someone else deleted it, which is fine.
						case topo.ErrBadVersion:
							retryErrs.RecordError(err)
						default:
							fatalErrs.RecordError(err)
						}
					}
				}(tabletType, version)
			}
		}

		// Update srvShard object
		wg.Add(1)
		go func() {
			defer wg.Done()
			log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName())
			if err := UpdateSrvShard(ctx, ts, cell, si); err != nil {
				fatalErrs.RecordError(err)
				log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err)
			}
		}()

		wg.Wait()

		// If there are any fatal errors, give up.
		if fatalErrs.HasErrors() {
			return fatalErrs.Error()
		}
		// If there are any retry errors, try again.
		if retryErrs.HasErrors() {
			continue
		}
		// Otherwise, success!
		return nil
	}
}
Пример #27
0
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, si *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) {

	// re-read and update keyspace info record
	ki, err = wr.ts.GetKeyspace(ki.KeyspaceName())
	if err != nil {
		return err
	}
	if reverse {
		if _, ok := ki.ServedFrom[servedType]; ok {
			return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType)
		}
		ki.ServedFrom[servedType] = si.SourceShards[0].Keyspace
	} else {
		if _, ok := ki.ServedFrom[servedType]; !ok {
			return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType)
		}
		delete(ki.ServedFrom, servedType)
	}

	// re-read and check the destination shard
	si, err = wr.ts.GetShard(si.Keyspace(), si.ShardName())
	if err != nil {
		return err
	}
	if len(si.SourceShards) != 1 {
		return fmt.Errorf("Destination shard %v/%v is not a vertical split target", si.Keyspace(), si.ShardName())
	}
	tables := si.SourceShards[0].Tables

	// read the source shard, we'll need its master
	sourceShard, err := wr.ts.GetShard(si.SourceShards[0].Keyspace, si.SourceShards[0].Shard)
	if err != nil {
		return err
	}

	ev := &events.MigrateServedFrom{
		Keyspace:         *ki,
		SourceShard:      *sourceShard,
		DestinationShard: *si,
		ServedType:       servedType,
		Reverse:          reverse,
	}
	event.DispatchUpdate(ev, "start")
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()

	// For master type migration, need to:
	// - switch the source shard to read-only
	// - gather the replication point
	// - wait for filtered replication to catch up before we continue
	// - disable filtered replication after the fact
	var sourceMasterTabletInfo *topo.TabletInfo
	if servedType == topo.TYPE_MASTER {
		// set master to read-only
		event.DispatchUpdate(ev, "setting source shard master to read-only")
		actionPath, err := wr.ai.SetReadOnly(sourceShard.MasterAlias)
		if err != nil {
			return err
		}
		if err := wr.WaitForCompletion(actionPath); err != nil {
			return err
		}

		// get the position
		event.DispatchUpdate(ev, "getting master position")
		sourceMasterTabletInfo, err = wr.ts.GetTablet(sourceShard.MasterAlias)
		if err != nil {
			return err
		}
		masterPosition, err := wr.ai.MasterPosition(sourceMasterTabletInfo, wr.ActionTimeout())
		if err != nil {
			return err
		}

		// wait for it
		event.DispatchUpdate(ev, "waiting for destination master to catch up to source master")
		if err := wr.ai.WaitBlpPosition(si.MasterAlias, blproto.BlpPosition{
			Uid:      0,
			Position: masterPosition,
		}, wr.ActionTimeout()); err != nil {
			return err
		}

		// and clear the shard record
		si.SourceShards = nil
	}

	// All is good, we can save the keyspace and shard (if needed) now
	event.DispatchUpdate(ev, "updating keyspace")
	if err = topo.UpdateKeyspace(wr.ts, ki); err != nil {
		return err
	}
	event.DispatchUpdate(ev, "updating destination shard")
	if servedType == topo.TYPE_MASTER {
		if err := topo.UpdateShard(wr.ts, si); err != nil {
			return err
		}
	}

	// Tell the new shards masters they can now be read-write.
	// Invoking a remote action will also make the tablet stop filtered
	// replication.
	event.DispatchUpdate(ev, "setting destination shard masters read-write")
	if servedType == topo.TYPE_MASTER {
		if err := wr.makeMastersReadWrite([]*topo.ShardInfo{si}); err != nil {
			return err
		}
	}

	// Now blacklist the table list on the right servers
	event.DispatchUpdate(ev, "setting blacklisted tables on source shard")
	if servedType == topo.TYPE_MASTER {
		if err := wr.ai.SetBlacklistedTables(sourceMasterTabletInfo, tables, wr.ActionTimeout()); err != nil {
			return err
		}
	} else {
		// We use the list of tables that are replicating
		// for the blacklist. In case of a reverse move, we clear the
		// blacklist.
		if reverse {
			tables = nil
		}
		if err := wr.SetBlacklistedTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType, tables); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}
Пример #28
0
// Write serving graph data to the cells
func (wr *Wrangler) rebuildShardSrvGraph(shardInfo *topo.ShardInfo, tablets []*topo.TabletInfo, cells []string) error {
	log.Infof("rebuildShardSrvGraph %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())

	// Get all existing db types so they can be removed if nothing
	// had been editted.  This applies to all cells, which can't
	// be determined until you walk through all the tablets.
	//
	// existingDbTypeLocations is a map:
	//   key: {cell,keyspace,shard,tabletType}
	//   value: true
	existingDbTypeLocations := make(map[cellKeyspaceShardType]bool)

	// Update db type addresses in the serving graph
	//
	// locationAddrsMap is a map:
	//   key: {cell,keyspace,shard,tabletType}
	//   value: topo.EndPoints (list of server records)
	locationAddrsMap := make(map[cellKeyspaceShardType]*topo.EndPoints)

	// we keep track of the existingDbTypeLocations we've already looked at
	knownShardLocations := make(map[cellKeyspaceShard]bool)

	for _, tablet := range tablets {
		// only look at tablets in the cells we want to rebuild
		if !topo.InCellList(tablet.Tablet.Alias.Cell, cells) {
			continue
		}

		// this is {cell,keyspace,shard}
		// we'll get the children to find the existing types
		shardLocation := cellKeyspaceShard{tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard}
		// only need to do this once per cell
		if !knownShardLocations[shardLocation] {
			log.Infof("Getting tablet types on cell %v for %v/%v", tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard)
			tabletTypes, err := wr.ts.GetSrvTabletTypesPerShard(tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard)
			if err != nil {
				if err != topo.ErrNoNode {
					return err
				}
			} else {
				for _, tabletType := range tabletTypes {
					existingDbTypeLocations[cellKeyspaceShardType{tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard, tabletType}] = true
				}
			}
			knownShardLocations[shardLocation] = true
		}

		// Check IsInServingGraph after we have populated
		// existingDbTypeLocations so we properly prune data
		// if the definition of serving type changes.
		if !tablet.IsInServingGraph() {
			continue
		}

		location := cellKeyspaceShardType{tablet.Tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tablet.Type}
		addrs, ok := locationAddrsMap[location]
		if !ok {
			addrs = topo.NewEndPoints()
			locationAddrsMap[location] = addrs
		}

		entry, err := tabletmanager.EndPointForTablet(tablet.Tablet)
		if err != nil {
			log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err)
			continue
		}
		addrs.Entries = append(addrs.Entries, *entry)
	}

	// we're gonna parallelize a lot here
	rec := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// write all the {cell,keyspace,shard,type}
	// nodes everywhere we want them
	for location, addrs := range locationAddrsMap {
		wg.Add(1)
		go func(location cellKeyspaceShardType, addrs *topo.EndPoints) {
			log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", location.cell, location.keyspace, location.shard, location.tabletType)
			if err := wr.ts.UpdateEndPoints(location.cell, location.keyspace, location.shard, location.tabletType, addrs); err != nil {
				rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", location.cell, location.keyspace, location.shard, location.tabletType, err))
			}
			wg.Done()
		}(location, addrs)
	}

	// Delete any pre-existing paths that were not updated by this process.
	// That's the existingDbTypeLocations - locationAddrsMap
	for dbTypeLocation := range existingDbTypeLocations {
		if _, ok := locationAddrsMap[dbTypeLocation]; !ok {
			cell := dbTypeLocation.cell
			if !topo.InCellList(cell, cells) {
				continue
			}

			wg.Add(1)
			go func(dbTypeLocation cellKeyspaceShardType) {
				log.Infof("removing stale db type from serving graph: %v", dbTypeLocation)
				if err := wr.ts.DeleteSrvTabletType(dbTypeLocation.cell, dbTypeLocation.keyspace, dbTypeLocation.shard, dbTypeLocation.tabletType); err != nil {
					log.Warningf("unable to remove stale db type %v from serving graph: %v", dbTypeLocation, err)
				}
				wg.Done()
			}(dbTypeLocation)
		}
	}

	// wait until we're done with the background stuff to do the rest
	// FIXME(alainjobart) this wouldn't be necessary if UpdateSrvShard
	// below was creating the zookeeper nodes recursively.
	wg.Wait()
	if err := rec.Error(); err != nil {
		return err
	}

	// Update per-shard information per cell-specific serving path.
	//
	// srvShardByPath is a map:
	//   key: {cell,keyspace,shard}
	//   value: topo.SrvShard
	// this will create all the SrvShard objects
	srvShardByPath := make(map[cellKeyspaceShard]*topo.SrvShard)
	for location := range locationAddrsMap {
		// location will be {cell,keyspace,shard,type}
		srvShardPath := cellKeyspaceShard{location.cell, location.keyspace, location.shard}
		srvShard, ok := srvShardByPath[srvShardPath]
		if !ok {
			srvShard = &topo.SrvShard{
				KeyRange:    shardInfo.KeyRange,
				ServedTypes: shardInfo.ServedTypes,
				TabletTypes: make([]topo.TabletType, 0, 2),
			}
			srvShardByPath[srvShardPath] = srvShard
		}
		foundType := false
		for _, t := range srvShard.TabletTypes {
			if t == location.tabletType {
				foundType = true
			}
		}
		if !foundType {
			srvShard.TabletTypes = append(srvShard.TabletTypes, location.tabletType)
		}
	}

	// Save the shard entries
	for cks, srvShard := range srvShardByPath {
		wg.Add(1)
		go func(cks cellKeyspaceShard, srvShard *topo.SrvShard) {
			log.Infof("updating shard serving graph in cell %v for %v/%v", cks.cell, cks.keyspace, cks.shard)
			if err := wr.ts.UpdateSrvShard(cks.cell, cks.keyspace, cks.shard, srvShard); err != nil {
				rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cks.cell, cks.keyspace, cks.shard, err))
			}
			wg.Done()
		}(cks, srvShard)
	}
	wg.Wait()
	return rec.Error()
}
Пример #29
0
func (wr *Wrangler) applySchemaShard(ctx context.Context, shardInfo *topo.ShardInfo, preflight *tmutils.SchemaChangeResult, masterTabletAlias *topodatapb.TabletAlias, change string, newParentTabletAlias *topodatapb.TabletAlias, force bool, waitSlaveTimeout time.Duration) (*tmutils.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err
	}

	// build the array of tabletStatus we're going to use
	statusArray := make([]*tabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master
			continue
		}

		ti, err := wr.ts.GetTablet(ctx, alias)
		if err != nil {
			return nil, err
		}
		statusArray = append(statusArray, &tabletStatus{ti: ti})
	}

	// get schema on all tablets.
	log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		wg.Add(1)
		go func(status *tabletStatus) {
			status.beforeSchema, status.lastError = wr.tmc.GetSchema(ctx, status.ti, nil, nil, false)
			wg.Done()
		}(status)
	}
	wg.Wait()

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.AliasString(), status.lastError)
		}
	}

	// check all tablets have the same schema as the master's
	// BeforeSchema. If not, we shouldn't proceed
	log.Infof("Checking schema on all tablets")
	for _, status := range statusArray {
		diffs := tmutils.DiffSchemaToArray("master", preflight.BeforeSchema, topoproto.TabletAliasString(status.ti.Alias), status.beforeSchema)
		if len(diffs) > 0 {
			if force {
				log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.AliasString(), strings.Join(diffs, "\n"))
			} else {
				return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.AliasString(), strings.Join(diffs, "\n"))
			}
		}
	}

	// we're good, just send to the master
	log.Infof("Applying schema change to master")
	sc := &tmutils.SchemaChange{
		SQL:              change,
		Force:            force,
		AllowReplication: true,
		BeforeSchema:     preflight.BeforeSchema,
		AfterSchema:      preflight.AfterSchema,
	}
	return wr.ApplySchema(ctx, masterTabletAlias, sc)
}
Пример #30
0
// UpdateShard is part of the topo.Server interface
func (tee *Tee) UpdateShard(ctx context.Context, si *topo.ShardInfo, existingVersion int64) (newVersion int64, err error) {
	if newVersion, err = tee.primary.UpdateShard(ctx, si, existingVersion); err != nil {
		// failed on primary, not updating secondary
		return
	}

	// if we have a mapping between shard version in first topo
	// and shard version in second topo, replace the version number.
	// if not, this will probably fail and log.
	tee.mu.Lock()
	svm, ok := tee.shardVersionMapping[si.Keyspace()+"/"+si.ShardName()]
	if ok && svm.readFromVersion == existingVersion {
		existingVersion = svm.readFromSecondVersion
		delete(tee.shardVersionMapping, si.Keyspace()+"/"+si.ShardName())
	}
	tee.mu.Unlock()
	if newVersion2, serr := tee.secondary.UpdateShard(ctx, si, existingVersion); serr != nil {
		// not critical enough to fail
		if serr == topo.ErrNoNode {
			// the shard doesn't exist on the secondary, let's
			// just create it
			if serr = tee.secondary.CreateShard(ctx, si.Keyspace(), si.ShardName(), si.Shard); serr != nil {
				log.Warningf("secondary.CreateShard(%v,%v) failed (after UpdateShard returned ErrNoNode): %v", si.Keyspace(), si.ShardName(), serr)
			} else {
				log.Infof("secondary.UpdateShard(%v, %v) failed with ErrNoNode, CreateShard then worked.", si.Keyspace(), si.ShardName())
				si, gerr := tee.secondary.GetShard(ctx, si.Keyspace(), si.ShardName())
				if gerr != nil {
					log.Warningf("Failed to re-read shard(%v, %v) after creating it on secondary: %v", si.Keyspace(), si.ShardName(), gerr)
				} else {
					tee.mu.Lock()
					tee.shardVersionMapping[si.Keyspace()+"/"+si.ShardName()] = versionMapping{
						readFromVersion:       newVersion,
						readFromSecondVersion: si.Version(),
					}
					tee.mu.Unlock()
				}
			}
		} else {
			log.Warningf("secondary.UpdateShard(%v, %v) failed: %v", si.Keyspace(), si.ShardName(), serr)
		}
	} else {
		tee.mu.Lock()
		tee.shardVersionMapping[si.Keyspace()+"/"+si.ShardName()] = versionMapping{
			readFromVersion:       newVersion,
			readFromSecondVersion: newVersion2,
		}
		tee.mu.Unlock()
	}
	return
}