Пример #1
0
// DeleteKeyspaceShards implements topo.Server.
func (s *Server) DeleteKeyspaceShards(ctx context.Context, keyspace string) error {
	shards, err := s.GetShardNames(ctx, keyspace)
	if err != nil {
		return err
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	global := s.getGlobal()
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			defer wg.Done()
			_, err := global.Delete(shardDirPath(keyspace, shard), true /* recursive */)
			rec.RecordError(convertError(err))
		}(shard)
	}
	wg.Wait()

	if err = rec.Error(); err != nil {
		return err
	}

	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, nil, -1),
		Status:       "deleted all shards",
	})
	return nil
}
Пример #2
0
// CreateKeyspace is part of the topo.Server interface
func (zkts *Server) CreateKeyspace(ctx context.Context, keyspace string, value *pb.Keyspace) error {
	keyspacePath := path.Join(globalKeyspacesPath, keyspace)
	pathList := []string{
		keyspacePath,
		path.Join(keyspacePath, "action"),
		path.Join(keyspacePath, "actionlog"),
		path.Join(keyspacePath, "shards"),
	}

	alreadyExists := false
	for i, zkPath := range pathList {
		c := ""
		if i == 0 {
			c = jscfg.ToJSON(value)
		}
		_, err := zk.CreateRecursive(zkts.zconn, zkPath, c, 0, zookeeper.WorldACL(zookeeper.PERM_ALL))
		if err != nil {
			if zookeeper.IsError(err, zookeeper.ZNODEEXISTS) {
				alreadyExists = true
			} else {
				return fmt.Errorf("error creating keyspace: %v %v", zkPath, err)
			}
		}
	}
	if alreadyExists {
		return topo.ErrNodeExists
	}

	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, value, -1),
		Status:       "created",
	})
	return nil
}
Пример #3
0
func (wr *Wrangler) setKeyspaceShardingInfo(keyspace, shardingColumnName string, shardingColumnType key.KeyspaceIdType, force bool) error {
	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		// Temporary change: we try to keep going even if node
		// doesn't exist
		if err != topo.ErrNoNode {
			return err
		}
		ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{})
	}

	if ki.ShardingColumnName != "" && ki.ShardingColumnName != shardingColumnName {
		if force {
			log.Warningf("Forcing keyspace ShardingColumnName change from %v to %v", ki.ShardingColumnName, shardingColumnName)
		} else {
			return fmt.Errorf("Cannot change ShardingColumnName from %v to %v (use -force to override)", ki.ShardingColumnName, shardingColumnName)
		}
	}

	if ki.ShardingColumnType != key.KIT_UNSET && ki.ShardingColumnType != shardingColumnType {
		if force {
			log.Warningf("Forcing keyspace ShardingColumnType change from %v to %v", ki.ShardingColumnType, shardingColumnType)
		} else {
			return fmt.Errorf("Cannot change ShardingColumnType from %v to %v (use -force to override)", ki.ShardingColumnType, shardingColumnType)
		}
	}

	ki.ShardingColumnName = shardingColumnName
	ki.ShardingColumnType = shardingColumnType
	return wr.ts.UpdateKeyspace(ki)
}
Пример #4
0
// CreateKeyspace implements topo.Server.
func (s *Server) CreateKeyspace(ctx context.Context, keyspace string, value *topo.Keyspace) error {
	data := jscfg.ToJSON(value)
	global := s.getGlobal()

	resp, err := global.Create(keyspaceFilePath(keyspace), data, 0 /* ttl */)
	if err != nil {
		return convertError(err)
	}
	if err := initLockFile(global, keyspaceDirPath(keyspace)); err != nil {
		return err
	}

	// We don't return ErrBadResponse in this case because the Create() suceeeded
	// and we don't really need the version to satisfy our contract - we're only
	// logging it.
	version := int64(-1)
	if resp.Node != nil {
		version = int64(resp.Node.ModifiedIndex)
	}
	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, value, version),
		Status:       "created",
	})
	return nil
}
Пример #5
0
// DeleteKeyspace implements topo.Server.
func (s *Server) DeleteKeyspace(ctx context.Context, keyspace string) error {
	_, err := s.getGlobal().Delete(keyspaceDirPath(keyspace), true /* recursive */)
	if err != nil {
		return convertError(err)
	}

	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, nil, -1),
		Status:       "deleted",
	})
	return nil
}
Пример #6
0
// DeleteKeyspaceShards is part of the topo.Server interface
func (zkts *Server) DeleteKeyspaceShards(ctx context.Context, keyspace string) error {
	shardsPath := path.Join(globalKeyspacesPath, keyspace, "shards")
	if err := zk.DeleteRecursive(zkts.zconn, shardsPath, -1); err != nil && !zookeeper.IsError(err, zookeeper.ZNONODE) {
		return err
	}

	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, nil, -1),
		Status:       "deleted all shards",
	})
	return nil
}
func TestKeyspaceChangeSyslog(t *testing.T) {
	wantSev, wantMsg := syslog.LOG_INFO, "keyspace-123 [keyspace] status"
	kc := &KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo("keyspace-123", nil, -1),
		Status:       "status",
	}
	gotSev, gotMsg := kc.Syslog()

	if gotSev != wantSev {
		t.Errorf("wrong severity: got %v, want %v", gotSev, wantSev)
	}
	if gotMsg != wantMsg {
		t.Errorf("wrong message: got %v, want %v", gotMsg, wantMsg)
	}
}
Пример #8
0
// GetKeyspace implements topo.Server.
func (s *Server) GetKeyspace(ctx context.Context, keyspace string) (*topo.KeyspaceInfo, error) {
	resp, err := s.getGlobal().Get(keyspaceFilePath(keyspace), false /* sort */, false /* recursive */)
	if err != nil {
		return nil, convertError(err)
	}
	if resp.Node == nil {
		return nil, ErrBadResponse
	}

	value := &topo.Keyspace{}
	if err := json.Unmarshal([]byte(resp.Node.Value), value); err != nil {
		return nil, fmt.Errorf("bad keyspace data (%v): %q", err, resp.Node.Value)
	}

	return topo.NewKeyspaceInfo(keyspace, value, int64(resp.Node.ModifiedIndex)), nil
}
Пример #9
0
// DeleteKeyspace is part of the topo.Server interface.
func (zkts *Server) DeleteKeyspace(ctx context.Context, keyspace string) error {
	keyspacePath := path.Join(globalKeyspacesPath, keyspace)
	err := zk.DeleteRecursive(zkts.zconn, keyspacePath, -1)
	if err != nil {
		if zookeeper.IsError(err, zookeeper.ZNONODE) {
			err = topo.ErrNoNode
		}
		return err
	}

	event.Dispatch(&events.KeyspaceChange{
		KeyspaceInfo: *topo.NewKeyspaceInfo(keyspace, nil, -1),
		Status:       "deleted",
	})
	return nil
}
Пример #10
0
// GetKeyspace is part of the topo.Server interface
func (zkts *Server) GetKeyspace(ctx context.Context, keyspace string) (*topo.KeyspaceInfo, error) {
	keyspacePath := path.Join(globalKeyspacesPath, keyspace)
	data, stat, err := zkts.zconn.Get(keyspacePath)
	if err != nil {
		if zookeeper.IsError(err, zookeeper.ZNONODE) {
			err = topo.ErrNoNode
		}
		return nil, err
	}

	k := &pb.Keyspace{}
	if err = json.Unmarshal([]byte(data), k); err != nil {
		return nil, fmt.Errorf("bad keyspace data %v", err)
	}

	return topo.NewKeyspaceInfo(keyspace, k, int64(stat.Version())), nil
}
Пример #11
0
func TestMigrateServedFromSyslogReverse(t *testing.T) {
	wantSev, wantMsg := syslog.LOG_INFO, "keyspace-1 [migrate served-from keyspace-2/source-shard <- keyspace-1/dest-shard] status"
	ev := &MigrateServedFrom{
		Keyspace:         *topo.NewKeyspaceInfo("keyspace-1", nil, -1),
		SourceShard:      *topo.NewShardInfo("keyspace-2", "source-shard", nil, -1),
		DestinationShard: *topo.NewShardInfo("keyspace-1", "dest-shard", nil, -1),
		Reverse:          true,
		StatusUpdater:    base.StatusUpdater{Status: "status"},
	}
	gotSev, gotMsg := ev.Syslog()

	if gotSev != wantSev {
		t.Errorf("wrong severity: got %v, want %v", gotSev, wantSev)
	}
	if gotMsg != wantMsg {
		t.Errorf("wrong message: got %v, want %v", gotMsg, wantMsg)
	}
}
Пример #12
0
func TestMigrateServedTypesSyslogReverse(t *testing.T) {
	wantSev, wantMsg := syslog.LOG_INFO, "keyspace-1 [migrate served-types {src1, src2} <- {dst1, dst2}] status"
	ev := &MigrateServedTypes{
		Keyspace: *topo.NewKeyspaceInfo("keyspace-1", nil, -1),
		SourceShards: []*topo.ShardInfo{
			topo.NewShardInfo("keyspace-1", "src1", nil, -1),
			topo.NewShardInfo("keyspace-1", "src2", nil, -1),
		},
		DestinationShards: []*topo.ShardInfo{
			topo.NewShardInfo("keyspace-1", "dst1", nil, -1),
			topo.NewShardInfo("keyspace-1", "dst2", nil, -1),
		},
		Reverse:       true,
		StatusUpdater: base.StatusUpdater{Status: "status"},
	}
	gotSev, gotMsg := ev.Syslog()

	if gotSev != wantSev {
		t.Errorf("wrong severity: got %v, want %v", gotSev, wantSev)
	}
	if gotMsg != wantMsg {
		t.Errorf("wrong message: got %v, want %v", gotMsg, wantMsg)
	}
}
Пример #13
0
// InitAgent initializes the agent within vttablet.
func InitAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
) (agent *tabletmanager.ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	statsType := stats.NewString("TabletType")
	statsKeyspace := stats.NewString("TabletKeyspace")
	statsShard := stats.NewString("TabletShard")
	statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

	agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld)
	if err != nil {
		return nil, err
	}

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld)
	tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// Action agent listens to changes in zookeeper and makes
	// modifications to this tablet.
	agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) {
		allowQuery := true
		var shardInfo *topo.ShardInfo
		var keyspaceInfo *topo.KeyspaceInfo
		if newTablet.Type == topo.TYPE_MASTER {
			// read the shard to get SourceShards
			shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard)
			if err != nil {
				log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err)
			} else {
				allowQuery = len(shardInfo.SourceShards) == 0
			}

			// read the keyspace to get ShardingColumnType
			keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace)
			switch err {
			case nil:
				// continue
			case topo.ErrNoNode:
				// backward compatible mode
				keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{})
			default:
				log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
				keyspaceInfo = nil
			}
		}

		if newTablet.IsRunningQueryService() && allowQuery {
			if dbcfgs.App.DbName == "" {
				dbcfgs.App.DbName = newTablet.DbName()
			}
			dbcfgs.App.Keyspace = newTablet.Keyspace
			dbcfgs.App.Shard = newTablet.Shard
			if newTablet.Type != topo.TYPE_MASTER {
				dbcfgs.App.EnableInvalidator = true
			} else {
				dbcfgs.App.EnableInvalidator = false
			}
			// Transitioning from replica to master, first disconnect
			// existing connections. "false" indicateds that clients must
			// re-resolve their endpoint before reconnecting.
			if newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER {
				ts.DisallowQueries()
			}
			qrs := ts.LoadCustomRules()
			if newTablet.KeyRange.IsPartial() {
				qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY)
				qr.AddPlanCond(sqlparser.PLAN_INSERT_PK)
				err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange)
				if err != nil {
					log.Warningf("Unable to add keyspace rule: %v", err)
				} else {
					qrs.Add(qr)
				}
			}
			ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld)
			// Disable before enabling to force existing streams to stop.
			binlog.DisableUpdateStreamService()
			binlog.EnableUpdateStreamService(dbcfgs)
		} else {
			ts.DisallowQueries()
			binlog.DisableUpdateStreamService()
		}

		statsType.Set(string(newTablet.Type))
		statsKeyspace.Set(newTablet.Keyspace)
		statsShard.Set(newTablet.Shard)
		statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
		statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

		// See if we need to start or stop any binlog player
		if newTablet.Type == topo.TYPE_MASTER {
			agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	})

	if err := agent.Start(mysqld.Port(), port, securePort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.RegisterQueryService()

	return agent, nil
}
Пример #14
0
// changeCallback is run after every action that might
// have changed something in the tablet record.
func (agent *ActionAgent) changeCallback(oldTablet, newTablet topo.Tablet) {

	allowQuery := true
	var shardInfo *topo.ShardInfo
	var keyspaceInfo *topo.KeyspaceInfo
	if newTablet.Type == topo.TYPE_MASTER {
		// read the shard to get SourceShards
		var err error
		shardInfo, err = agent.TopoServer.GetShard(newTablet.Keyspace, newTablet.Shard)
		if err != nil {
			log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err)
		} else {
			allowQuery = len(shardInfo.SourceShards) == 0
		}

		// read the keyspace to get ShardingColumnType
		keyspaceInfo, err = agent.TopoServer.GetKeyspace(newTablet.Keyspace)
		switch err {
		case nil:
			// continue
		case topo.ErrNoNode:
			// backward compatible mode
			keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{})
		default:
			log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
			keyspaceInfo = nil
		}
	}

	if newTablet.IsRunningQueryService() && allowQuery {
		// There are a few transitions when we're
		// going to need to restart the query service:
		// - transitioning from replica to master, so clients
		//   that were already connected don't keep on using
		//   the master as replica or rdonly.
		// - having different parameters for the query
		//   service. It needs to stop and restart with the
		//   new parameters. That includes:
		//   - changing KeyRange
		//   - changing the BlacklistedTables list
		if (newTablet.Type == topo.TYPE_MASTER &&
			oldTablet.Type != topo.TYPE_MASTER) ||
			(newTablet.KeyRange != oldTablet.KeyRange) ||
			!reflect.DeepEqual(newTablet.BlacklistedTables, oldTablet.BlacklistedTables) {
			agent.disallowQueries()
		}
		if err := agent.allowQueries(&newTablet); err != nil {
			log.Errorf("Cannot start query service: %v", err)
		}

		// Disable before enabling to force existing streams to stop.
		binlog.DisableUpdateStreamService()
		binlog.EnableUpdateStreamService(agent.DBConfigs.App.DbName, agent.Mysqld)
	} else {
		agent.disallowQueries()
		binlog.DisableUpdateStreamService()
	}

	statsType.Set(string(newTablet.Type))
	statsKeyspace.Set(newTablet.Keyspace)
	statsShard.Set(newTablet.Shard)
	statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
	statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

	// See if we need to start or stop any binlog player
	if newTablet.Type == topo.TYPE_MASTER {
		agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo)
	} else {
		agent.BinlogPlayerMap.StopAllPlayersAndReset()
	}
}
Пример #15
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string, shardCache map[string]*topo.ShardInfo) error {
	log.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		// Temporary change: we try to keep going even if node
		// doesn't exist
		if err != topo.ErrNoNode {
			return err
		}
		ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{})
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if er.HasErrors() {
		return er.Error()
	}

	// Build the list of cells to work on: we get the union
	// of all the Cells of all the Shards, limited to the provided cells.
	//
	// srvKeyspaceMap is a map:
	//   key: cell
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[string]*topo.SrvKeyspace)
	if err := wr.findCellsForRebuild(ki, keyspace, shards, cells, srvKeyspaceMap); err != nil {
		return err
	}

	// Then we add the cells from the keyspaces we might be 'ServedFrom'.
	for _, servedFrom := range ki.ServedFrom {
		servedFromShards, err := wr.ts.GetShardNames(servedFrom)
		if err != nil {
			return err
		}
		if err := wr.findCellsForRebuild(ki, servedFrom, servedFromShards, cells, srvKeyspaceMap); err != nil {
			return err
		}
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the SrvShard structures for each shard / cell
	// - if not present, build an empty one from global Shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	if shardCache == nil {
		shardCache = make(map[string]*topo.ShardInfo)
	}
	for cell, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(cell, keyspace, shard)
			switch err {
			case nil:
				// we keep going
			case topo.ErrNoNode:
				log.Infof("Cell %v for %v/%v has no SvrShard, using Shard data with no TabletTypes instead", cell, keyspace, shard)
				si, ok := shardCache[shard]
				if !ok {
					si, err = wr.ts.GetShard(keyspace, shard)
					if err != nil {
						return fmt.Errorf("GetShard(%v, %v) (backup for GetSrvShard in cell %v) failed: %v", keyspace, shard, cell, err)
					}
					shardCache[shard] = si
				}
				srvShard = &topo.SrvShard{
					Name:        si.ShardName(),
					KeyRange:    si.KeyRange,
					ServedTypes: si.ServedTypes,
					MasterCell:  si.MasterAlias.Cell,
				}
			default:
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		first := true
		for tabletType, partition := range srvKeyspace.Partitions {
			topo.SrvShardArray(partition.Shards).Sort()

			// check the first Start is MinKey, the last End is MaxKey,
			// and the values in between match: End[i] == Start[i+1]
			if partition.Shards[0].KeyRange.Start != key.MinKey {
				return fmt.Errorf("keyspace partition for %v in cell %v does not start with %v", tabletType, cell, key.MinKey)
			}
			if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey {
				return fmt.Errorf("keyspace partition for %v in cell %v does not end with %v", tabletType, cell, key.MaxKey)
			}
			for i := range partition.Shards[0 : len(partition.Shards)-1] {
				if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start {
					return fmt.Errorf("non-contiguous KeyRange values for %v in cell %v at shard %v to %v: %v != %v", tabletType, cell, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex())
				}
			}

			// backfill Shards
			if first {
				first = false
				srvKeyspace.Shards = partition.Shards
			}
		}
	}

	// and then finally save the keyspace objects
	for cell, srvKeyspace := range srvKeyspaceMap {
		log.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace)
		if err := wr.ts.UpdateSrvKeyspace(cell, keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #16
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	log.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		// Temporary change: we try to keep going even if node
		// doesn't exist
		if err != topo.ErrNoNode {
			return err
		}
		ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{})
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if er.HasErrors() {
		return er.Error()
	}

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0])
	if err != nil {
		return err
	}

	// srvKeyspaceMap is a map:
	//   key: local keyspace {cell,keyspace}
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[cellKeyspace]*topo.SrvKeyspace)
	for _, alias := range aliases {
		keyspaceLocation := cellKeyspace{alias.Cell, keyspace}
		if _, ok := srvKeyspaceMap[keyspaceLocation]; !ok {
			// before adding keyspaceLocation to the map of
			// of KeyspaceByPath, we check this is a
			// serving tablet. No serving tablet in shard
			// 0 means we're not rebuilding the serving
			// graph in that cell.  This is somewhat
			// expensive, but we only do it on all the
			// non-serving tablets in a shard before we
			// find a serving tablet.
			ti, err := wr.ts.GetTablet(alias)
			if err != nil {
				return err
			}
			if !ti.IsInServingGraph() {
				continue
			}

			srvKeyspaceMap[keyspaceLocation] = &topo.SrvKeyspace{
				Shards:             make([]topo.SrvShard, 0, 16),
				ShardingColumnName: ki.ShardingColumnName,
				ShardingColumnType: ki.ShardingColumnType,
				ServedFrom:         ki.ServedFrom,
			}
		}
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the ShardInfo structures for each shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for ck, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(ck.cell, ck.keyspace, shard)
			if err != nil {
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		first := true
		for tabletType, partition := range srvKeyspace.Partitions {
			topo.SrvShardArray(partition.Shards).Sort()

			// check the first Start is MinKey, the last End is MaxKey,
			// and the values in between match: End[i] == Start[i+1]
			if partition.Shards[0].KeyRange.Start != key.MinKey {
				return fmt.Errorf("Keyspace partition for %v does not start with %v", tabletType, key.MinKey)
			}
			if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey {
				return fmt.Errorf("Keyspace partition for %v does not end with %v", tabletType, key.MaxKey)
			}
			for i := range partition.Shards[0 : len(partition.Shards)-1] {
				if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start {
					return fmt.Errorf("Non-contiguous KeyRange values for %v at shard %v to %v: %v != %v", tabletType, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex())
				}
			}

			// backfill Shards
			if first {
				first = false
				srvKeyspace.Shards = partition.Shards
			}
		}
	}

	// and then finally save the keyspace objects
	for ck, srvKeyspace := range srvKeyspaceMap {
		if err := wr.ts.UpdateSrvKeyspace(ck.cell, ck.keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}
Пример #17
0
// migrateServedTypes operates with all concerned shards locked.
func (wr *Wrangler) migrateServedTypes(ctx context.Context, keyspace string, sourceShards, destinationShards []*topo.ShardInfo, cells []string, servedType pb.TabletType, reverse bool, filteredReplicationWaitTime time.Duration) (err error) {

	// re-read all the shards so we are up to date
	wr.Logger().Infof("Re-reading all shards")
	for i, si := range sourceShards {
		if sourceShards[i], err = wr.ts.GetShard(ctx, si.Keyspace(), si.ShardName()); err != nil {
			return err
		}
	}
	for i, si := range destinationShards {
		if destinationShards[i], err = wr.ts.GetShard(ctx, si.Keyspace(), si.ShardName()); err != nil {
			return err
		}
	}

	ev := &events.MigrateServedTypes{
		Keyspace:          *topo.NewKeyspaceInfo(keyspace, nil, -1),
		SourceShards:      sourceShards,
		DestinationShards: destinationShards,
		ServedType:        servedType,
		Reverse:           reverse,
	}
	event.DispatchUpdate(ev, "start")
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()

	// For master type migration, need to:
	// - switch the source shards to read-only by disabling query service
	// - gather all replication points
	// - wait for filtered replication to catch up before we continue
	// - disable filtered replication after the fact
	if servedType == pb.TabletType_MASTER {
		event.DispatchUpdate(ev, "disabling query service on all source masters")
		for _, si := range sourceShards {
			if err := si.UpdateDisableQueryService(pb.TabletType_MASTER, nil, true); err != nil {
				return err
			}
			if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
				return err
			}
		}
		if err := wr.refreshMasters(ctx, sourceShards); err != nil {
			return err
		}

		event.DispatchUpdate(ev, "getting positions of source masters")
		masterPositions, err := wr.getMastersPosition(ctx, sourceShards)
		if err != nil {
			return err
		}

		event.DispatchUpdate(ev, "waiting for destination masters to catch up")
		if err := wr.waitForFilteredReplication(ctx, masterPositions, destinationShards, filteredReplicationWaitTime); err != nil {
			return err
		}

		for _, si := range destinationShards {
			si.SourceShards = nil
		}
	}

	// Check and update all shard records, in memory only.
	// We remember if we need to refresh the state of the source tablets
	// so their query service is enabled again, for reverse migration.
	needToRefreshSourceTablets := false
	for _, si := range sourceShards {
		if err := si.UpdateServedTypesMap(servedType, cells, !reverse); err != nil {
			return err
		}
		if tc := si.GetTabletControl(servedType); reverse && tc != nil && tc.DisableQueryService {
			// this is a backward migration, where the
			// source tablets were disabled previously, so
			// we need to refresh them
			if err := si.UpdateDisableQueryService(servedType, cells, false); err != nil {
				return err
			}
			needToRefreshSourceTablets = true
		}
		if !reverse && servedType != pb.TabletType_MASTER {
			// this is a forward migration, we need to disable
			// query service on the source shards.
			// (this was already done for masters earlier)
			if err := si.UpdateDisableQueryService(servedType, cells, true); err != nil {
				return err
			}
		}
	}
	// We remember if we need to refresh the state of the destination tablets
	// so their query service will be enabled.
	needToRefreshDestinationTablets := false
	for _, si := range destinationShards {
		if err := si.UpdateServedTypesMap(servedType, cells, reverse); err != nil {
			return err
		}
		if tc := si.GetTabletControl(servedType); !reverse && tc != nil && tc.DisableQueryService {
			// This is a forwards migration, and the destination query service was already in a disabled state.
			// We need to enable and force a refresh, otherwise it's possible that both the source and destination
			// will have query service disabled at the same time, and queries would have nowhere to go.
			if err := si.UpdateDisableQueryService(servedType, cells, false); err != nil {
				return err
			}
			needToRefreshDestinationTablets = true
		}
		if reverse && servedType != pb.TabletType_MASTER {
			// this is a backwards migration, we need to disable
			// query service on the destination shards.
			// (we're not allowed to reverse a master migration)
			if err := si.UpdateDisableQueryService(servedType, cells, true); err != nil {
				return err
			}
		}
	}

	// All is good, we can save the shards now
	event.DispatchUpdate(ev, "updating source shards")
	for _, si := range sourceShards {
		if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
			return err
		}
	}
	if needToRefreshSourceTablets {
		event.DispatchUpdate(ev, "refreshing source shard tablets so they restart their query service")
		for _, si := range sourceShards {
			wr.RefreshTablesByShard(ctx, si, servedType, cells)
		}
	}
	event.DispatchUpdate(ev, "updating destination shards")
	for _, si := range destinationShards {
		if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
			return err
		}
	}
	if needToRefreshDestinationTablets {
		event.DispatchUpdate(ev, "refreshing destination shard tablets so they restart their query service")
		for _, si := range destinationShards {
			wr.RefreshTablesByShard(ctx, si, servedType, cells)
		}
	}

	// And tell the new shards masters they can now be read-write.
	// Invoking a remote action will also make the tablet stop filtered
	// replication.
	if servedType == pb.TabletType_MASTER {
		event.DispatchUpdate(ev, "setting destination masters read-write")
		if err := wr.refreshMasters(ctx, destinationShards); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}
Пример #18
0
// migrateServedTypes operates with all concerned shards locked.
func (wr *Wrangler) migrateServedTypes(keyspace string, sourceShards, destinationShards []*topo.ShardInfo, servedType topo.TabletType, reverse bool, shardCache map[string]*topo.ShardInfo) (err error) {

	// re-read all the shards so we are up to date
	for i, si := range sourceShards {
		if sourceShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil {
			return err
		}
		shardCache[si.ShardName()] = sourceShards[i]
	}
	for i, si := range destinationShards {
		if destinationShards[i], err = wr.ts.GetShard(si.Keyspace(), si.ShardName()); err != nil {
			return err
		}
		shardCache[si.ShardName()] = destinationShards[i]
	}

	ev := &events.MigrateServedTypes{
		Keyspace:          *topo.NewKeyspaceInfo(keyspace, nil, -1),
		SourceShards:      sourceShards,
		DestinationShards: destinationShards,
		ServedType:        servedType,
		Reverse:           reverse,
	}
	event.DispatchUpdate(ev, "start")
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()

	// check and update all shard records, in memory only
	for _, si := range sourceShards {
		if reverse {
			// need to add to source
			if topo.IsTypeInList(servedType, si.ServedTypes) {
				return fmt.Errorf("Source shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType)
			}
			si.ServedTypes = append(si.ServedTypes, servedType)
		} else {
			// need to remove from source
			var found bool
			if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found {
				return fmt.Errorf("Source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType)
			}
		}
	}
	for _, si := range destinationShards {
		if reverse {
			// need to remove from destination
			var found bool
			if si.ServedTypes, found = removeType(servedType, si.ServedTypes); !found {
				return fmt.Errorf("Destination shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), servedType)
			}
		} else {
			// need to add to destination
			if topo.IsTypeInList(servedType, si.ServedTypes) {
				return fmt.Errorf("Destination shard %v/%v is already serving type %v", si.Keyspace(), si.ShardName(), servedType)
			}
			si.ServedTypes = append(si.ServedTypes, servedType)
		}
	}

	// For master type migration, need to:
	// - switch the source shards to read-only
	// - gather all replication points
	// - wait for filtered replication to catch up before we continue
	// - disable filtered replication after the fact
	if servedType == topo.TYPE_MASTER {
		event.DispatchUpdate(ev, "setting all source masters read-only")
		err := wr.makeMastersReadOnly(sourceShards)
		if err != nil {
			return err
		}

		event.DispatchUpdate(ev, "getting positions of source masters")
		masterPositions, err := wr.getMastersPosition(sourceShards)
		if err != nil {
			return err
		}

		event.DispatchUpdate(ev, "waiting for destination masters to catch up")
		if err := wr.waitForFilteredReplication(masterPositions, destinationShards); err != nil {
			return err
		}

		for _, si := range destinationShards {
			si.SourceShards = nil
		}
	}

	// All is good, we can save the shards now
	event.DispatchUpdate(ev, "updating source shards")
	for _, si := range sourceShards {
		if err := topo.UpdateShard(wr.ts, si); err != nil {
			return err
		}
		shardCache[si.ShardName()] = si
	}
	event.DispatchUpdate(ev, "updating destination shards")
	for _, si := range destinationShards {
		if err := topo.UpdateShard(wr.ts, si); err != nil {
			return err
		}
		shardCache[si.ShardName()] = si
	}

	// And tell the new shards masters they can now be read-write.
	// Invoking a remote action will also make the tablet stop filtered
	// replication.
	if servedType == topo.TYPE_MASTER {
		event.DispatchUpdate(ev, "setting destination masters read-write")
		if err := wr.makeMastersReadWrite(destinationShards); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}
Пример #19
0
// InitAgent initializes the agent within vttablet.
func InitAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
) (agent *tabletmanager.ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	statsType := stats.NewString("TabletType")
	statsKeyspace := stats.NewString("TabletKeyspace")
	statsShard := stats.NewString("TabletShard")
	statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

	agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld)
	if err != nil {
		return nil, err
	}

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld)
	tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// Action agent listens to changes in zookeeper and makes
	// modifications to this tablet.
	agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) {
		allowQuery := true
		var shardInfo *topo.ShardInfo
		var keyspaceInfo *topo.KeyspaceInfo
		if newTablet.Type == topo.TYPE_MASTER {
			// read the shard to get SourceShards
			shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard)
			if err != nil {
				log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err)
			} else {
				allowQuery = len(shardInfo.SourceShards) == 0
			}

			// read the keyspace to get ShardingColumnType
			keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace)
			switch err {
			case nil:
				// continue
			case topo.ErrNoNode:
				// backward compatible mode
				keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{})
			default:
				log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
				keyspaceInfo = nil
			}
		}

		if newTablet.IsRunningQueryService() && allowQuery {
			if dbcfgs.App.DbName == "" {
				dbcfgs.App.DbName = newTablet.DbName()
			}
			dbcfgs.App.Keyspace = newTablet.Keyspace
			dbcfgs.App.Shard = newTablet.Shard
			if newTablet.Type != topo.TYPE_MASTER {
				dbcfgs.App.EnableInvalidator = true
			} else {
				dbcfgs.App.EnableInvalidator = false
			}

			// There are a few transitions when we're
			// going to need to restart the query service:
			// - transitioning from replica to master, so clients
			//   that were already connected don't keep on using
			//   the master as replica or rdonly.
			// - having different parameters for the query
			//   service. It needs to stop and restart with the
			//   new parameters. That includes:
			//   - changing KeyRange
			//   - changing the BlacklistedTables list
			if (newTablet.Type == topo.TYPE_MASTER &&
				oldTablet.Type != topo.TYPE_MASTER) ||
				(newTablet.KeyRange != oldTablet.KeyRange) ||
				!reflect.DeepEqual(newTablet.BlacklistedTables, oldTablet.BlacklistedTables) {
				ts.DisallowQueries()
			}
			qrs := ts.LoadCustomRules()
			if newTablet.KeyRange.IsPartial() {
				qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY)
				qr.AddPlanCond(sqlparser.PLAN_INSERT_PK)
				err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange)
				if err != nil {
					log.Warningf("Unable to add keyspace rule: %v", err)
				} else {
					qrs.Add(qr)
				}
			}
			if len(newTablet.BlacklistedTables) > 0 {
				log.Infof("Blacklisting tables %v", strings.Join(newTablet.BlacklistedTables, ", "))
				qr := ts.NewQueryRule("enforce blacklisted tables", "blacklisted_table", ts.QR_FAIL_QUERY)
				for _, t := range newTablet.BlacklistedTables {
					qr.AddTableCond(t)
				}
				qrs.Add(qr)
			}
			ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld)
			// Disable before enabling to force existing streams to stop.
			binlog.DisableUpdateStreamService()
			binlog.EnableUpdateStreamService(dbcfgs)
		} else {
			ts.DisallowQueries()
			binlog.DisableUpdateStreamService()
		}

		statsType.Set(string(newTablet.Type))
		statsKeyspace.Set(newTablet.Keyspace)
		statsShard.Set(newTablet.Shard)
		statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
		statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

		// See if we need to start or stop any binlog player
		if newTablet.Type == topo.TYPE_MASTER {
			agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	})

	if err := agent.Start(mysqld.Port(), port, securePort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.RegisterQueryService()

	// start health check if needed
	initHeathCheck(agent)

	return agent, nil
}