文件: utils.go 项目: Eric-Chen/vitess
// If error is not nil, the results in the dictionary are incomplete.
func GetTabletMapForShard(ts topo.Server, keyspace, shard string) (map[topo.TabletAlias]*topo.TabletInfo, error) {
	aliases, err := topo.FindAllTabletAliasesInShard(ts, keyspace, shard)
	if err != nil {
		return nil, err

	return GetTabletMap(ts, aliases)
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'checker' pointing back to us
// - get the aliases of all the targets
func (vscw *VerticalSplitCloneWorker) findTargets() error {

	// find an appropriate endpoint in the source shard
	var err error
	vscw.sourceAlias, err = findChecker(vscw.wr, vscw.cleaner, vscw.cell, vscw.sourceKeyspace, "0")
	if err != nil {
		return fmt.Errorf("cannot find checker for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err)
	vscw.wr.Logger().Infof("Using tablet %v as the source", vscw.sourceAlias)

	// get the tablet info for it
	vscw.sourceTablet, err = vscw.wr.TopoServer().GetTablet(vscw.sourceAlias)
	if err != nil {
		return fmt.Errorf("cannot read tablet %v: %v", vscw.sourceTablet, err)

	// stop replication on it
	if err := vscw.wr.TabletManagerClient().StopSlave(vscw.sourceTablet, 30*time.Second); err != nil {
		return fmt.Errorf("cannot stop replication on tablet %v", vscw.sourceAlias)

	wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet, 30*time.Second)
	action, err := wrangler.FindChangeSlaveTypeActionByTarget(vscw.cleaner, vscw.sourceAlias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", vscw.sourceAlias, err)
	action.TabletType = topo.TYPE_SPARE

	// find all the targets in the destination keyspace / shard
	vscw.destinationAliases, err = topo.FindAllTabletAliasesInShard(vscw.wr.TopoServer(), vscw.destinationKeyspace, vscw.destinationShard)
	if err != nil {
		return fmt.Errorf("cannot find all target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err)
	vscw.wr.Logger().Infof("Found %v target aliases", len(vscw.destinationAliases))

	// get the TabletInfo for all targets
	vscw.destinationTablets, err = topo.GetTabletMap(vscw.wr.TopoServer(), vscw.destinationAliases)
	if err != nil {
		return fmt.Errorf("cannot read all target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err)

	// find and validate the master
	for tabletAlias, ti := range vscw.destinationTablets {
		if ti.Type == topo.TYPE_MASTER {
			if vscw.destinationMasterAlias.IsZero() {
				vscw.destinationMasterAlias = tabletAlias
			} else {
				return fmt.Errorf("multiple masters in destination shard: %v and %v at least", vscw.destinationMasterAlias, tabletAlias)
	if vscw.destinationMasterAlias.IsZero() {
		return fmt.Errorf("no master in destination shard")

	return nil
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'checker' pointing back to us
// - get the aliases of all the targets
func (scw *SplitCloneWorker) findTargets() error {
	var err error

	// find an appropriate endpoint in the source shards
	scw.sourceAliases = make([]topo.TabletAlias, len(scw.sourceShards))
	for i, si := range scw.sourceShards {
		scw.sourceAliases[i], err = findChecker(scw.wr, scw.cleaner, scw.cell, si.Keyspace(), si.ShardName())
		if err != nil {
			return fmt.Errorf("cannot find checker for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err)
		scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", scw.sourceAliases[i], si.Keyspace(), si.ShardName())

	// get the tablet info for them
	scw.sourceTablets = make([]*topo.TabletInfo, len(scw.sourceAliases))
	for i, alias := range scw.sourceAliases {
		scw.sourceTablets[i], err = scw.wr.TopoServer().GetTablet(alias)
		if err != nil {
			return fmt.Errorf("cannot read tablet %v: %v", alias, err)

	// find all the targets in the destination shards
	scw.destinationAliases = make([][]topo.TabletAlias, len(scw.destinationShards))
	scw.destinationTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards))
	scw.destinationMasterAliases = make([]topo.TabletAlias, len(scw.destinationShards))
	for shardIndex, si := range scw.destinationShards {
		scw.destinationAliases[shardIndex], err = topo.FindAllTabletAliasesInShard(scw.wr.TopoServer(), si.Keyspace(), si.ShardName())
		if err != nil {
			return fmt.Errorf("cannot find all target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err)
		scw.wr.Logger().Infof("Found %v target aliases in shard %v/%v", len(scw.destinationAliases[shardIndex]), si.Keyspace(), si.ShardName())

		// get the TabletInfo for all targets
		scw.destinationTablets[shardIndex], err = topo.GetTabletMap(scw.wr.TopoServer(), scw.destinationAliases[shardIndex])
		if err != nil {
			return fmt.Errorf("cannot read all target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err)

		// find and validate the master
		for tabletAlias, ti := range scw.destinationTablets[shardIndex] {
			if ti.Type == topo.TYPE_MASTER {
				if scw.destinationMasterAliases[shardIndex].IsZero() {
					scw.destinationMasterAliases[shardIndex] = tabletAlias
				} else {
					return fmt.Errorf("multiple masters in destination shard: %v and %v at least", scw.destinationMasterAliases[shardIndex], tabletAlias)
		if scw.destinationMasterAliases[shardIndex].IsZero() {
			return fmt.Errorf("no master in destination shard")

	return nil
// FIXME(msolomon) This validate presumes the master is up and running.
// Even when that isn't true, there are validation processes that might be valuable.
func (wr *Wrangler) validateShard(ctx context.Context, keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) {
	shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err)

	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
	if err != nil {
		results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err)

	tabletMap, _ := topo.GetTabletMap(ctx, wr.ts, aliases)

	var masterAlias *pb.TabletAlias
	for _, alias := range aliases {
		tabletInfo, ok := tabletMap[*alias]
		if !ok {
			results <- fmt.Errorf("tablet %v not found in map", alias)
		if tabletInfo.Type == pb.TabletType_MASTER {
			if masterAlias != nil {
				results <- fmt.Errorf("shard %v/%v already has master %v but found other master %v", keyspace, shard, masterAlias, alias)
			} else {
				masterAlias = alias

	if masterAlias == nil {
		results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard)
	} else if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterAlias) {
		results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, masterAlias, shardInfo.MasterAlias)

	for _, alias := range aliases {
		go func(alias *pb.TabletAlias) {
			defer wg.Done()
			if err := topo.Validate(ctx, wr.ts, alias); err != nil {
				results <- fmt.Errorf("Validate(%v) failed: %v", alias, err)
			} else {
				wr.Logger().Infof("tablet %v is valid", alias)

	if pingTablets {
		wr.validateReplication(ctx, shardInfo, tabletMap, results)
		wr.pingTablets(ctx, tabletMap, wg, results)

文件: split.go 项目: ninqing/vitess
func (wr *Wrangler) ShardMultiRestore(keyspace, shard string, sources []topo.TabletAlias, tables []string, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) error {

	// check parameters
	if len(tables) > 0 && len(sources) > 1 {
		return fmt.Errorf("ShardMultiRestore can only handle one source when tables are specified")

	// lock the shard to perform the changes we need done
	actionNode := actionnode.ShardMultiRestore(&actionnode.MultiRestoreArgs{
		SrcTabletAliases:       sources,
		Concurrency:            concurrency,
		FetchConcurrency:       fetchConcurrency,
		InsertTableConcurrency: insertTableConcurrency,
		FetchRetryCount:        fetchRetryCount,
		Strategy:               strategy})
	lockPath, err := wr.lockShard(keyspace, shard, actionNode)
	if err != nil {
		return err

	mrErr := wr.SetSourceShards(keyspace, shard, sources, tables)
	err = wr.unlockShard(keyspace, shard, actionNode, lockPath, mrErr)
	if err != nil {
		if mrErr != nil {
			log.Errorf("unlockShard got error back: %v", err)
			return mrErr
		return err
	if mrErr != nil {
		return mrErr

	// find all tablets in the shard
	destTablets, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
	if err != nil {
		return err

	// now launch MultiRestore on all tablets we need to do
	rec := cc.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, tabletAlias := range destTablets {
		go func(tabletAlias topo.TabletAlias) {
			log.Infof("Starting multirestore on tablet %v", tabletAlias)
			err := wr.MultiRestore(tabletAlias, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy)
			log.Infof("Multirestore on tablet %v is done (err=%v)", tabletAlias, err)

	return rec.Error()
func (wr *Wrangler) ValidatePermissionsKeyspace(keyspace string) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	if len(shards) == 1 {
		return wr.ValidatePermissionsShard(keyspace, shards[0])

	// find the reference permissions using the first shard's master
	si, err := wr.ts.GetShard(keyspace, shards[0])
	if err != nil {
		return err
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	referenceAlias := si.MasterAlias
	log.Infof("Gathering permissions for reference master %v", referenceAlias)
	referencePermissions, err := wr.GetPermissions(si.MasterAlias)
	if err != nil {
		return err

	// then diff with all tablets but master 0
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, shard := range shards {
		aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
		if err != nil {

		for _, alias := range aliases {
			if alias == si.MasterAlias {

			go wr.diffPermissions(referencePermissions, referenceAlias, alias, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error())
	return nil
// ValidateVersionKeyspace validates all versions are the same in all
// tablets in a keyspace
func (wr *Wrangler) ValidateVersionKeyspace(ctx context.Context, keyspace string) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(ctx, keyspace)
	if err != nil {
		return err

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	if len(shards) == 1 {
		return wr.ValidateVersionShard(ctx, keyspace, shards[0])

	// find the reference version using the first shard's master
	si, err := wr.ts.GetShard(ctx, keyspace, shards[0])
	if err != nil {
		return err
	if topo.TabletAliasIsZero(si.MasterAlias) {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	referenceAlias := si.MasterAlias
	log.Infof("Gathering version for reference master %v", topo.TabletAliasString(referenceAlias))
	referenceVersion, err := wr.GetVersion(ctx, referenceAlias)
	if err != nil {
		return err

	// then diff with all tablets but master 0
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, shard := range shards {
		aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
		if err != nil {

		for _, alias := range aliases {
			if topo.TabletAliasEqual(alias, si.MasterAlias) {

			go wr.diffVersion(ctx, referenceVersion, referenceAlias, alias, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	return nil
// FIXME(msolomon) This validate presumes the master is up and running.
// Even when that isn't true, there are validation processes that might be valuable.
func (wr *Wrangler) validateShard(keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- vresult) {
	shardInfo, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		results <- vresult{keyspace + "/" + shard, err}

	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
	if err != nil {
		results <- vresult{keyspace + "/" + shard, err}

	tabletMap, _ := GetTabletMap(wr.ts, aliases)

	var masterAlias topo.TabletAlias
	for _, alias := range aliases {
		tabletInfo, ok := tabletMap[alias]
		if !ok {
			results <- vresult{alias.String(), fmt.Errorf("tablet not found in map")}
		if tabletInfo.Parent.Uid == topo.NO_TABLET {
			if masterAlias.Cell != "" {
				results <- vresult{alias.String(), fmt.Errorf("tablet already has a master %v", masterAlias)}
			} else {
				masterAlias = alias

	if masterAlias.Cell == "" {
		results <- vresult{keyspace + "/" + shard, fmt.Errorf("no master for shard")}
	} else if shardInfo.MasterAlias != masterAlias {
		results <- vresult{keyspace + "/" + shard, fmt.Errorf("master mismatch for shard: found %v, expected %v", masterAlias, shardInfo.MasterAlias)}

	for _, alias := range aliases {
		tabletReplicationPath := masterAlias.String()
		if alias != masterAlias {
			tabletReplicationPath += "/" + alias.String()
		go func(alias topo.TabletAlias) {
			results <- vresult{tabletReplicationPath, topo.Validate(wr.ts, alias, tabletReplicationPath)}

	if pingTablets {
		wr.validateReplication(shardInfo, tabletMap, results)
		wr.pingTablets(tabletMap, wg, results)

文件: schema.go 项目: anusornc/vitess
func (wr *Wrangler) applySchemaShard(ctx context.Context, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, simple, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err

	// build the array of tabletStatus we're going to use
	statusArray := make([]*tabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master

		ti, err := wr.ts.GetTablet(ctx, alias)
		if err != nil {
			return nil, err
		statusArray = append(statusArray, &tabletStatus{ti: ti})

	// get schema on all tablets.
	log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		go func(status *tabletStatus) {
			status.beforeSchema, status.lastError = wr.tmc.GetSchema(ctx, status.ti, nil, nil, false)

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError)

	// simple or complex?
	if simple {
		return wr.applySchemaShardSimple(ctx, statusArray, preflight, masterTabletAlias, change, force)

	return wr.applySchemaShardComplex(ctx, statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force, waitSlaveTimeout)
文件: utils.go 项目: ZhuoRoger/vitess
// GetTabletMapForShard returns the tablets for a shard. It can return
// topo.ErrPartialResult if it couldn't read all the cells, or all
// the individual tablets, in which case the map is valid, but partial.
func GetTabletMapForShard(ts topo.Server, keyspace, shard string) (map[topo.TabletAlias]*topo.TabletInfo, error) {
	// if we get a partial result, we keep going. It most likely means
	// a cell is out of commission.
	aliases, err := topo.FindAllTabletAliasesInShard(ts, keyspace, shard)
	if err != nil && err != topo.ErrPartialResult {
		return nil, err

	// get the tablets for the cells we were able to reach, forward
	// topo.ErrPartialResult from FindAllTabletAliasesInShard
	result, gerr := GetTabletMap(ts, aliases)
	if gerr == nil && err != nil {
		gerr = err
	return result, gerr
// Validate all tablets in all discoverable cells, even if they are
// not in the replication graph.
func (wr *Wrangler) validateAllTablets(ctx context.Context, wg *sync.WaitGroup, results chan<- error) {
	cellSet := make(map[string]bool, 16)

	keyspaces, err := wr.ts.GetKeyspaces(ctx)
	if err != nil {
		results <- fmt.Errorf("TopologyServer.GetKeyspaces failed: %v", err)
	for _, keyspace := range keyspaces {
		shards, err := wr.ts.GetShardNames(ctx, keyspace)
		if err != nil {
			results <- fmt.Errorf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err)

		for _, shard := range shards {
			aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
			if err != nil {
				results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err)
			for _, alias := range aliases {
				cellSet[alias.Cell] = true

	for cell := range cellSet {
		aliases, err := wr.ts.GetTabletsByCell(ctx, cell)
		if err != nil {
			results <- fmt.Errorf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err)

		for _, alias := range aliases {
			go func(alias *pb.TabletAlias) {
				defer wg.Done()
				if err := topo.Validate(ctx, wr.ts, alias); err != nil {
					results <- fmt.Errorf("Validate(%v) failed: %v", alias, err)
				} else {
					wr.Logger().Infof("tablet %v is valid", alias)
func (wr *Wrangler) ShardMultiRestore(keyspace, shard string, sources []topo.TabletAlias, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) error {
	// lock the shard to perform the changes we need done
	actionNode := wr.ai.ShardMultiRestore(&tm.MultiRestoreArgs{
		SrcTabletAliases:       sources,
		Concurrency:            concurrency,
		FetchConcurrency:       fetchConcurrency,
		InsertTableConcurrency: insertTableConcurrency,
		FetchRetryCount:        fetchRetryCount,
		Strategy:               strategy})
	lockPath, err := wr.lockShard(keyspace, shard, actionNode)
	if err != nil {
		return err

	mrErr := wr.shardMultiRestore(keyspace, shard, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy)
	err = wr.unlockShard(keyspace, shard, actionNode, lockPath, mrErr)
	if err != nil {
		return err
	if mrErr != nil {
		return mrErr

	// find all tablets in the shard
	destTablets, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
	if err != nil {
		return err

	// now launch MultiRestore on all tablets we need to do
	rec := cc.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, tabletAlias := range destTablets {
		go func(tabletAlias topo.TabletAlias) {
			log.Infof("Starting multirestore on tablet %v", tabletAlias)
			err := wr.MultiRestore(tabletAlias, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy)
			log.Infof("Multirestore on tablet %v is done (err=%v)", tabletAlias, err)

	return rec.Error()
// Validate all tablets in all discoverable cells, even if they are
// not in the replication graph.
func (wr *Wrangler) validateAllTablets(wg *sync.WaitGroup, results chan<- vresult) {

	cellSet := make(map[string]bool, 16)

	keyspaces, err := wr.ts.GetKeyspaces()
	if err != nil {
		results <- vresult{"TopologyServer.GetKeyspaces", err}
	for _, keyspace := range keyspaces {
		shards, err := wr.ts.GetShardNames(keyspace)
		if err != nil {
			results <- vresult{"TopologyServer.GetShardNames(" + keyspace + ")", err}

		for _, shard := range shards {
			aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
			if err != nil {
				results <- vresult{"TopologyServer.FindAllTabletAliasesInShard(" + keyspace + "," + shard + ")", err}
			for _, alias := range aliases {
				cellSet[alias.Cell] = true

	for cell := range cellSet {
		aliases, err := wr.ts.GetTabletsByCell(cell)
		if err != nil {
			results <- vresult{"GetTabletsByCell(" + cell + ")", err}
		} else {
			for _, alias := range aliases {
				go func(alias topo.TabletAlias) {
					results <- vresult{alias.String(), topo.Validate(wr.ts, alias)}
// Does a topo lookup for a single shard, and returns:
//	1. Slice of all tablet aliases for the shard.
//	2. Map of tablet alias : tablet record for all tablets.
func resolveReloadTabletsForShard(ctx context.Context, keyspace, shard string, wr *wrangler.Wrangler) (reloadAliases []topo.TabletAlias, reloadTablets map[topo.TabletAlias]*topo.TabletInfo, err error) {
	// Keep a long timeout, because we really don't want the copying to succeed, and then the worker to fail at the end.
	shortCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
	reloadAliases, err = topo.FindAllTabletAliasesInShard(shortCtx, wr.TopoServer(), keyspace, shard)
	if err != nil {
		return nil, nil, fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", keyspace, shard, err)
	wr.Logger().Infof("Found %v reload target aliases in shard %v/%v", len(reloadAliases), keyspace, shard)

	shortCtx, cancel = context.WithTimeout(ctx, 5*time.Minute)
	reloadTablets, err = topo.GetTabletMap(shortCtx, wr.TopoServer(), reloadAliases)
	if err != nil {
		return nil, nil, fmt.Errorf("cannot read all reload target tablets in %v/%v: %v",
			keyspace, shard, err)
	return reloadAliases, reloadTablets, nil
// ValidateVersionShard validates all versions are the same in all
// tablets in a shard
func (wr *Wrangler) ValidateVersionShard(ctx context.Context, keyspace, shard string) error {
	si, err := wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return err

	// get version from the master, or error
	if topo.TabletAliasIsZero(si.MasterAlias) {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shard)
	log.Infof("Gathering version for master %v", topo.TabletAliasString(si.MasterAlias))
	masterVersion, err := wr.GetVersion(ctx, si.MasterAlias)
	if err != nil {
		return err

	// read all the aliases in the shard, that is all tablets that are
	// replicating from the master
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
	if err != nil {
		return err

	// then diff with all slaves
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, alias := range aliases {
		if topo.TabletAliasEqual(alias, si.MasterAlias) {

		go wr.diffVersion(ctx, masterVersion, si.MasterAlias, alias, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	return nil
文件: schema.go 项目: shrutip/vitess
func (wr *Wrangler) ValidateSchemaShard(keyspace, shard string, includeViews bool) error {
	si, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		return err

	// get schema from the master, or error
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shard)
	relog.Info("Gathering schema for master %v", si.MasterAlias)
	masterSchema, err := wr.GetSchema(si.MasterAlias, nil, includeViews)
	if err != nil {
		return err

	// read all the aliases in the shard, that is all tablets that are
	// replicating from the master
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard)
	if err != nil {
		return err

	// then diff with all slaves
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, alias := range aliases {
		if alias == si.MasterAlias {

		go wr.diffSchema(masterSchema, si.MasterAlias, alias, includeViews, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Schema diffs:\n%v", er.Error().Error())
	return nil
// ValidatePermissionsShard validates all the permissions are the same
// in a shard
func (wr *Wrangler) ValidatePermissionsShard(ctx context.Context, keyspace, shard string) error {
	si, err := wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return err

	// get permissions from the master, or error
	if topo.TabletAliasIsZero(si.MasterAlias) {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shard)
	log.Infof("Gathering permissions for master %v", si.MasterAlias)
	masterPermissions, err := wr.GetPermissions(ctx, topo.ProtoToTabletAlias(si.MasterAlias))
	if err != nil {
		return err

	// read all the aliases in the shard, that is all tablets that are
	// replicating from the master
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
	if err != nil {
		return err

	// then diff all of them, except master
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, alias := range aliases {
		if alias == topo.ProtoToTabletAlias(si.MasterAlias) {
		go wr.diffPermissions(ctx, masterPermissions, topo.ProtoToTabletAlias(si.MasterAlias), alias, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error())
	return nil
文件: api.go 项目: zhzhy917/vitess
func initAPI(ctx context.Context, ts topo.Server, actions *ActionRepository) {
	tabletHealthCache := newTabletHealthCache(ts)

	// Cells
	handleCollection("cells", func(r *http.Request) (interface{}, error) {
		if getItemPath(r.URL.Path) != "" {
			return nil, errors.New("cells can only be listed, not retrieved")
		return ts.GetKnownCells(ctx)

	// Keyspaces
	handleCollection("keyspaces", func(r *http.Request) (interface{}, error) {
		keyspace := getItemPath(r.URL.Path)

		// List all keyspaces.
		if keyspace == "" {
			return ts.GetKeyspaces(ctx)

		// Perform an action on a keyspace.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyKeyspaceAction(ctx, action, keyspace, r), nil

		// Get the keyspace record.
		return ts.GetKeyspace(ctx, keyspace)

	// Shards
	handleCollection("shards", func(r *http.Request) (interface{}, error) {
		shardPath := getItemPath(r.URL.Path)
		if !strings.Contains(shardPath, "/") {
			return nil, fmt.Errorf("invalid shard path: %q", shardPath)
		parts := strings.SplitN(shardPath, "/", 2)
		keyspace := parts[0]
		shard := parts[1]

		// List the shards in a keyspace.
		if shard == "" {
			return ts.GetShardNames(ctx, keyspace)

		// Perform an action on a shard.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyShardAction(ctx, action, keyspace, shard, r), nil

		// Get the shard record.
		return ts.GetShard(ctx, keyspace, shard)

	// Tablets
	handleCollection("tablets", func(r *http.Request) (interface{}, error) {
		tabletPath := getItemPath(r.URL.Path)

		// List tablets based on query params.
		if tabletPath == "" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			shardRef := r.FormValue("shard")
			cell := r.FormValue("cell")

			if shardRef != "" {
				// Look up by keyspace/shard, and optionally cell.
				keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef)
				if err != nil {
					return nil, err
				if cell != "" {
					return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell})
				return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard)

			// Get all tablets in a cell.
			if cell == "" {
				return nil, errors.New("cell param required")
			return ts.GetTabletsByCell(ctx, cell)

		// Get tablet health.
		if parts := strings.Split(tabletPath, "/"); len(parts) == 2 && parts[1] == "health" {
			tabletAlias, err := topo.ParseTabletAliasString(parts[0])
			if err != nil {
				return nil, err
			return tabletHealthCache.Get(ctx, tabletAlias)

		tabletAlias, err := topo.ParseTabletAliasString(tabletPath)
		if err != nil {
			return nil, err

		// Perform an action on a tablet.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyTabletAction(ctx, action, tabletAlias, r), nil

		// Get the tablet record.
		return ts.GetTablet(ctx, tabletAlias)

	// EndPoints
	handleCollection("endpoints", func(r *http.Request) (interface{}, error) {
		// We expect cell/keyspace/shard/tabletType.
		epPath := getItemPath(r.URL.Path)
		parts := strings.Split(epPath, "/")
		if len(parts) != 4 {
			return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath)

		if parts[3] == "" {
			// tabletType is empty, so list the tablet types.
			return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2])

		// Get the endpoints object for a specific type.
		ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3]))
		return ep, err
func initAPI(ctx context.Context, ts topo.Server) {
	// Get Cells
	handleGet("cells", func(r *http.Request) (interface{}, error) {
		if getItemPath(r.URL.Path) != "" {
			return nil, errors.New("cells can only be listed, not retrieved")
		return ts.GetKnownCells(ctx)

	// Get Keyspaces
	handleGet("keyspaces", func(r *http.Request) (interface{}, error) {
		keyspace := getItemPath(r.URL.Path)
		if keyspace == "" {
			return ts.GetKeyspaces(ctx)
		return ts.GetKeyspace(ctx, keyspace)

	// Get Shards
	handleGet("shards", func(r *http.Request) (interface{}, error) {
		shardPath := getItemPath(r.URL.Path)
		if !strings.Contains(shardPath, "/") {
			return nil, fmt.Errorf("invalid shard path: %q", shardPath)
		parts := strings.SplitN(shardPath, "/", 2)
		if parts[1] == "" {
			// It's just a keyspace. List the shards.
			return ts.GetShardNames(ctx, parts[0])
		// It's a keyspace/shard reference.
		return ts.GetShard(ctx, parts[0], parts[1])

	// Get Tablets
	handleGet("tablets", func(r *http.Request) (interface{}, error) {
		tabletPath := getItemPath(r.URL.Path)
		if tabletPath == "" {
			// List tablets based on query params.
			if err := r.ParseForm(); err != nil {
				return nil, err
			shardRef := r.FormValue("shard")
			cell := r.FormValue("cell")

			if shardRef != "" {
				// Look up by keyspace/shard, and optionally cell.
				keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef)
				if err != nil {
					return nil, err
				if cell != "" {
					return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell})
				return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard)

			// Get all tablets in a cell.
			if cell == "" {
				return nil, errors.New("cell param required")
			return ts.GetTabletsByCell(ctx, cell)

		// Get a specific tablet.
		tabletAlias, err := topo.ParseTabletAliasString(tabletPath)
		if err != nil {
			return nil, err
		return ts.GetTablet(ctx, tabletAlias)

	// Get EndPoints
	handleGet("endpoints", func(r *http.Request) (interface{}, error) {
		// We expect cell/keyspace/shard/tabletType.
		epPath := getItemPath(r.URL.Path)
		parts := strings.Split(epPath, "/")
		if len(parts) != 4 {
			return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath)

		if parts[3] == "" {
			// tabletType is empty, so list the tablet types.
			return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2])

		// Get the endpoints object for a specific type.
		ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3]))
		return ep, err
文件: schema.go 项目: anusornc/vitess
// ValidateSchemaKeyspace will diff the schema from all the tablets in
// the keyspace.
func (wr *Wrangler) ValidateSchemaKeyspace(ctx context.Context, keyspace string, excludeTables []string, includeViews bool) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(ctx, keyspace)
	if err != nil {
		return err

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	if len(shards) == 1 {
		return wr.ValidateSchemaShard(ctx, keyspace, shards[0], excludeTables, includeViews)

	// find the reference schema using the first shard's master
	si, err := wr.ts.GetShard(ctx, keyspace, shards[0])
	if err != nil {
		return err
	if topo.TabletAliasIsZero(si.MasterAlias) {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	referenceAlias := si.MasterAlias
	log.Infof("Gathering schema for reference master %v", referenceAlias)
	referenceSchema, err := wr.GetSchema(ctx, referenceAlias, nil, excludeTables, includeViews)
	if err != nil {
		return err

	// then diff with all other tablets everywhere
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// first diff the slaves in the reference shard 0
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shards[0])
	if err != nil {
		return err

	for _, alias := range aliases {
		if topo.TabletAliasEqual(alias, si.MasterAlias) {

		go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)

	// then diffs all tablets in the other shards
	for _, shard := range shards[1:] {
		si, err := wr.ts.GetShard(ctx, keyspace, shard)
		if err != nil {

		if topo.TabletAliasIsZero(si.MasterAlias) {
			er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard))

		aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard)
		if err != nil {

		for _, alias := range aliases {
			go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)
	if er.HasErrors() {
		return fmt.Errorf("Schema diffs:\n%v", er.Error().Error())
	return nil
文件: schema.go 项目: shrutip/vitess
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*mysqlctl.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err

	// build the array of TabletStatus we're going to use
	statusArray := make([]*TabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master

		ti, err := wr.ts.GetTablet(alias)
		if err != nil {
			return nil, err
		if ti.Type == topo.TYPE_LAG {
			// lag tablets are usually behind, not replicating,
			// and a general pain. So let's just skip them
			// all together.
			// TODO(alainjobart) figure out other types to skip:
			// ValidateSchemaShard only does the serving types.
			// We do everything in the replication graph
			// but LAG. This seems fine for now.
			relog.Info("Skipping tablet %v as it is LAG", ti.Alias())

		statusArray = append(statusArray, &TabletStatus{ti: ti})

	// get schema on all tablets.
	relog.Info("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		go func(status *TabletStatus) {
			status.beforeSchema, status.lastError = wr.GetSchemaTablet(status.ti, nil, false)

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias(), status.lastError)

	// simple or complex?
	if simple {
		return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force)

	return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force)
文件: api.go 项目: haoqoo/vitess
func initAPI(ctx context.Context, ts topo.Server, actions *ActionRepository) {
	tabletHealthCache := newTabletHealthCache(ts)

	// Cells
	handleCollection("cells", func(r *http.Request) (interface{}, error) {
		if getItemPath(r.URL.Path) != "" {
			return nil, errors.New("cells can only be listed, not retrieved")
		return ts.GetKnownCells(ctx)

	// Keyspaces
	handleCollection("keyspaces", func(r *http.Request) (interface{}, error) {
		keyspace := getItemPath(r.URL.Path)

		// List all keyspaces.
		if keyspace == "" {
			return ts.GetKeyspaces(ctx)

		// Perform an action on a keyspace.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyKeyspaceAction(ctx, action, keyspace, r), nil

		// Get the keyspace record.
		return ts.GetKeyspace(ctx, keyspace)

	// Shards
	handleCollection("shards", func(r *http.Request) (interface{}, error) {
		shardPath := getItemPath(r.URL.Path)
		if !strings.Contains(shardPath, "/") {
			return nil, fmt.Errorf("invalid shard path: %q", shardPath)
		parts := strings.SplitN(shardPath, "/", 2)
		keyspace := parts[0]
		shard := parts[1]

		// List the shards in a keyspace.
		if shard == "" {
			return ts.GetShardNames(ctx, keyspace)

		// Perform an action on a shard.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyShardAction(ctx, action, keyspace, shard, r), nil

		// Get the shard record.
		return ts.GetShard(ctx, keyspace, shard)

	// Tablets
	handleCollection("tablets", func(r *http.Request) (interface{}, error) {
		tabletPath := getItemPath(r.URL.Path)

		// List tablets based on query params.
		if tabletPath == "" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			shardRef := r.FormValue("shard")
			cell := r.FormValue("cell")

			if shardRef != "" {
				// Look up by keyspace/shard, and optionally cell.
				keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef)
				if err != nil {
					return nil, err
				if cell != "" {
					return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell})
				return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard)

			// Get all tablets in a cell.
			if cell == "" {
				return nil, errors.New("cell param required")
			return ts.GetTabletsByCell(ctx, cell)

		// Get tablet health.
		if parts := strings.Split(tabletPath, "/"); len(parts) == 2 && parts[1] == "health" {
			tabletAlias, err := topo.ParseTabletAliasString(parts[0])
			if err != nil {
				return nil, err
			return tabletHealthCache.Get(ctx, tabletAlias)

		tabletAlias, err := topo.ParseTabletAliasString(tabletPath)
		if err != nil {
			return nil, err

		// Perform an action on a tablet.
		if r.Method == "POST" {
			if err := r.ParseForm(); err != nil {
				return nil, err
			action := r.FormValue("action")
			if action == "" {
				return nil, errors.New("must specify action")
			return actions.ApplyTabletAction(ctx, action, tabletAlias, r), nil

		// Get the tablet record.
		return ts.GetTablet(ctx, tabletAlias)

	// EndPoints
	handleCollection("endpoints", func(r *http.Request) (interface{}, error) {
		// We expect cell/keyspace/shard/tabletType.
		epPath := getItemPath(r.URL.Path)
		parts := strings.Split(epPath, "/")
		if len(parts) != 4 {
			return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath)

		if parts[3] == "" {
			// tabletType is empty, so list the tablet types.
			return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2])

		// Get the endpoints object for a specific type.
		ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3]))
		return ep, err

	// Schema Change
	http.HandleFunc(apiPrefix+"schema/apply", func(w http.ResponseWriter, r *http.Request) {
		req := struct{ Keyspace, SQL string }{}
		if err := unmarshalRequest(r, &req); err != nil {
			httpErrorf(w, r, "can't unmarshal request: %v", err)

		executor := schemamanager.NewTabletExecutor(

			schemamanager.NewUIController(req.SQL, req.Keyspace, w), executor)

	// VSchema
	http.HandleFunc(apiPrefix+"vschema/", func(w http.ResponseWriter, r *http.Request) {
		schemafier, ok := ts.(topo.Schemafier)
		if !ok {
			httpErrorf(w, r, "%T doesn't support schemafier API", ts)

		// Save VSchema
		if r.Method == "POST" {
			vschema, err := ioutil.ReadAll(r.Body)
			if err != nil {
				httpErrorf(w, r, "can't read request body: %v", err)
			if err := schemafier.SaveVSchema(ctx, string(vschema)); err != nil {
				httpErrorf(w, r, "can't save vschema: %v", err)

		// Get VSchema
		vschema, err := schemafier.GetVSchema(ctx)
		if err != nil {
			httpErrorf(w, r, "can't get vschema: %v", err)
		w.Header().Set("Content-Type", jsonContentType)
func listActionsByShard(wr *wrangler.Wrangler, keyspace, shard string) error {
	// only works with Server
	zkts, ok := wr.TopoServer().(*zktopo.Server)
	if !ok {
		return fmt.Errorf("listActionsByShard only works with zktopo.Server")

	// print the shard action nodes
	shardActionPath := zkts.ShardActionPath(keyspace, shard)
	shardActionNodes, err := getActions(wr, zkts.GetZConn(), shardActionPath)
	if err != nil {
		return err
	for _, shardAction := range shardActionNodes {
		wr.Logger().Printf("%v\n", fmtAction(shardAction))

	// get and print the tablet action nodes
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	actionMap := make(map[string]*actionnode.ActionNode)

	f := func(actionPath string) {
		defer wg.Done()
		actionNodes, err := getActions(wr, zkts.GetZConn(), actionPath)
		if err != nil {
			wr.Logger().Warningf("listActionsByShard %v", err)
		for _, node := range actionNodes {
			actionMap[node.Path] = node

	tabletAliases, err := topo.FindAllTabletAliasesInShard(wr.TopoServer(), keyspace, shard)
	if err != nil {
		return err
	for _, tabletAlias := range tabletAliases {
		actionPath := zktopo.TabletActionPathForAlias(tabletAlias)
		if err != nil {
			wr.Logger().Warningf("listActionsByShard %v", err)
		} else {
			go f(actionPath)

	defer mu.Unlock()

	keys := topotools.CopyMapKeys(actionMap, []string{}).([]string)
	for _, key := range keys {
		action := actionMap[key]
		if action == nil {
			wr.Logger().Warningf("nil action: %v", key)
		} else {
			wr.Logger().Printf("%v\n", fmtAction(action))
	return nil
文件: zkns.go 项目: haoqoo/vitess
// ExportZknsForKeyspace exports addresses from the VT serving graph to a legacy zkns server.
func (wr *Wrangler) ExportZknsForKeyspace(ctx context.Context, keyspace string) error {
	zkTopo, ok := wr.ts.(*zktopo.Server)
	if !ok {
		return fmt.Errorf("ExportZknsForKeyspace only works with zktopo")
	zconn := zkTopo.GetZConn()

	shardNames, err := wr.ts.GetShardNames(ctx, keyspace)
	if err != nil {
		return err

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shardNames[0])
	if err != nil {
		return err

	cellMap := make(map[string]bool)
	for _, alias := range aliases {
		cellMap[alias.Cell] = true

	for cell := range cellMap {
		vtnsRootPath := fmt.Sprintf("/zk/%v/vt/ns/%v", cell, keyspace)
		zknsRootPath := fmt.Sprintf("/zk/%v/zkns/vt/%v", cell, keyspace)

		// Get the existing list of zkns children. If they don't get rewritten,
		// delete them as stale entries.
		zknsChildren, err := zk.ChildrenRecursive(zconn, zknsRootPath)
		if err != nil {
			if zookeeper.IsError(err, zookeeper.ZNONODE) {
				zknsChildren = make([]string, 0)
			} else {
				return err
		staleZknsPaths := make(map[string]bool)
		for _, child := range zknsChildren {
			staleZknsPaths[path.Join(zknsRootPath, child)] = true

		vtnsChildren, err := zk.ChildrenRecursive(zconn, vtnsRootPath)
		if err != nil {
			if zookeeper.IsError(err, zookeeper.ZNONODE) {
				vtnsChildren = make([]string, 0)
			} else {
				return err
		for _, child := range vtnsChildren {
			vtnsAddrPath := path.Join(vtnsRootPath, child)
			zknsAddrPath := path.Join(zknsRootPath, child)

			_, stat, err := zconn.Get(vtnsAddrPath)
			if err != nil {
				return err
			// Leaf nodes correspond to zkns vdns files in the old setup.
			if stat.NumChildren() > 0 {
			zknsPathsWritten, err := wr.exportVtnsToZkns(ctx, zconn, vtnsAddrPath, zknsAddrPath)
			if err != nil {
				return err
			log.V(6).Infof("zknsPathsWritten: %v", zknsPathsWritten)
			for _, zkPath := range zknsPathsWritten {
				delete(staleZknsPaths, zkPath)
		log.V(6).Infof("staleZknsPaths: %v", staleZknsPaths)
		prunePaths := make([]string, 0, len(staleZknsPaths))
		for prunePath := range staleZknsPaths {
			prunePaths = append(prunePaths, prunePath)
		// Prune paths in reverse order so we remove children first
		for i := len(prunePaths) - 1; i >= 0; i-- {
			log.Infof("prune stale zkns path %v", prunePaths[i])
			if err := zconn.Delete(prunePaths[i], -1); err != nil && !zookeeper.IsError(err, zookeeper.ZNOTEMPTY) {
				return err
	return nil
文件: rebuild.go 项目: rn2dy/vitess
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	log.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		// Temporary change: we try to keep going even if node
		// doesn't exist
		if err != topo.ErrNoNode {
			return err
		ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{})

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
	if er.HasErrors() {
		return er.Error()

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0])
	if err != nil {
		return err

	// srvKeyspaceMap is a map:
	//   key: local keyspace {cell,keyspace}
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[cellKeyspace]*topo.SrvKeyspace)
	for _, alias := range aliases {
		keyspaceLocation := cellKeyspace{alias.Cell, keyspace}
		if _, ok := srvKeyspaceMap[keyspaceLocation]; !ok {
			// before adding keyspaceLocation to the map of
			// of KeyspaceByPath, we check this is a
			// serving tablet. No serving tablet in shard
			// 0 means we're not rebuilding the serving
			// graph in that cell.  This is somewhat
			// expensive, but we only do it on all the
			// non-serving tablets in a shard before we
			// find a serving tablet.
			ti, err := wr.ts.GetTablet(alias)
			if err != nil {
				return err
			if !ti.IsInServingGraph() {

			srvKeyspaceMap[keyspaceLocation] = &topo.SrvKeyspace{
				Shards:             make([]topo.SrvShard, 0, 16),
				ShardingColumnName: ki.ShardingColumnName,
				ShardingColumnType: ki.ShardingColumnType,
				ServedFrom:         ki.ServedFrom,

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the ShardInfo structures for each shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for ck, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(ck.cell, ck.keyspace, shard)
			if err != nil {
				return err
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)

		first := true
		for tabletType, partition := range srvKeyspace.Partitions {

			// check the first Start is MinKey, the last End is MaxKey,
			// and the values in between match: End[i] == Start[i+1]
			if partition.Shards[0].KeyRange.Start != key.MinKey {
				return fmt.Errorf("Keyspace partition for %v does not start with %v", tabletType, key.MinKey)
			if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey {
				return fmt.Errorf("Keyspace partition for %v does not end with %v", tabletType, key.MaxKey)
			for i := range partition.Shards[0 : len(partition.Shards)-1] {
				if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start {
					return fmt.Errorf("Non-contiguous KeyRange values for %v at shard %v to %v: %v != %v", tabletType, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex())

			// backfill Shards
			if first {
				first = false
				srvKeyspace.Shards = partition.Shards

	// and then finally save the keyspace objects
	for ck, srvKeyspace := range srvKeyspaceMap {
		if err := wr.ts.UpdateSrvKeyspace(ck.cell, ck.keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
	return nil
文件: rebuild.go 项目: shrutip/vitess
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	relog.Info("rebuildKeyspace %v", keyspace)
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err

	// Rebuild all shards in parallel.
	wg := sync.WaitGroup{}
	er := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		go func(shard string) {
			if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
	if er.HasErrors() {
		return er.Error()

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0])
	if err != nil {
		return err

	// srvKeyspaceByPath is a map:
	//   key: local keyspace {cell,keyspace}
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceByPath := make(map[cellKeyspace]*topo.SrvKeyspace)
	for _, alias := range aliases {
		keyspaceLocation := cellKeyspace{alias.Cell, keyspace}
		if _, ok := srvKeyspaceByPath[keyspaceLocation]; !ok {
			// before adding keyspaceLocation to the map of
			// of KeyspaceByPath, we check this is a
			// serving tablet. No serving tablet in shard
			// 0 means we're not rebuilding the serving
			// graph in that cell.  This is somewhat
			// expensive, but we only do it on all the
			// non-serving tablets in a shard before we
			// find a serving tablet.
			ti, err := wr.ts.GetTablet(alias)
			if err != nil {
				return err
			if !ti.IsServingType() {

			srvKeyspaceByPath[keyspaceLocation] = &topo.SrvKeyspace{Shards: make([]topo.SrvShard, 0, 16)}

	// for each entry in the srvKeyspaceByPath map, we do the following:
	// - read the ShardInfo structures for each shard
	//    - prune the AddrsByType field, result would be too big
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for srvPath, srvKeyspace := range srvKeyspaceByPath {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		for _, shard := range shards {
			srvShard, err := wr.ts.GetSrvShard(srvPath.cell, srvPath.keyspace, shard)
			if err != nil {
				return err
			for dbType, _ := range srvShard.AddrsByType {
				keyspaceDbTypes[topo.TabletType(dbType)] = true
			// Prune addrs, this is unnecessarily expensive right now. It is easier to
			// load on-demand since we have to do that anyway on a reconnect.
			srvShard.AddrsByType = nil
			srvKeyspace.Shards = append(srvKeyspace.Shards, *srvShard)
		tabletTypes := make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType, _ := range keyspaceDbTypes {
			tabletTypes = append(tabletTypes, dbType)
		srvKeyspace.TabletTypes = tabletTypes
		// FIXME(msolomon) currently this only works when the shards are range-based

		// check the first Start is MinKey, the last End is MaxKey,
		// and the values in between match: End[i] == Start[i+1]
		if srvKeyspace.Shards[0].KeyRange.Start != key.MinKey {
			return fmt.Errorf("Keyspace does not start with %v", key.MinKey)
		if srvKeyspace.Shards[len(srvKeyspace.Shards)-1].KeyRange.End != key.MaxKey {
			return fmt.Errorf("Keyspace does not end with %v", key.MaxKey)
		for i, _ := range srvKeyspace.Shards[0 : len(srvKeyspace.Shards)-1] {
			if srvKeyspace.Shards[i].KeyRange.End != srvKeyspace.Shards[i+1].KeyRange.Start {
				return fmt.Errorf("Non-contiguous KeyRange values at shard %v to %v: %v != %v", i, i+1, srvKeyspace.Shards[i].KeyRange.End.Hex(), srvKeyspace.Shards[i+1].KeyRange.Start.Hex())

	// and then finally save the keyspace objects
	for srvPath, srvKeyspace := range srvKeyspaceByPath {
		if err := wr.ts.UpdateSrvKeyspace(srvPath.cell, srvPath.keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
	return nil