// findMasterTargets looks up the master for the destination shard, and set the destinations appropriately.
// It should be used if vtworker will only want to write to masters.
func (vscw *VerticalSplitCloneWorker) findMasterTargets() error {
	var err error
	// find all the targets in the destination keyspace / shard
	vscw.reloadAliases, err = topo.FindAllTabletAliasesInShard(context.TODO(), vscw.wr.TopoServer(), vscw.destinationKeyspace, vscw.destinationShard)
	if err != nil {
		return fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err)
	}
	vscw.wr.Logger().Infof("Found %v reload target aliases", len(vscw.reloadAliases))

	// get the TabletInfo for all targets
	vscw.reloadTablets, err = topo.GetTabletMap(context.TODO(), vscw.wr.TopoServer(), vscw.reloadAliases)
	if err != nil {
		return fmt.Errorf("cannot read all reload target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err)
	}

	// find and validate the master
	for tabletAlias, ti := range vscw.reloadTablets {
		if ti.Type == topo.TYPE_MASTER {
			if vscw.destinationMasterAlias.IsZero() {
				vscw.destinationMasterAlias = tabletAlias
				vscw.destinationAliases = []topo.TabletAlias{tabletAlias}
				vscw.destinationTablets = map[topo.TabletAlias]*topo.TabletInfo{tabletAlias: ti}
			} else {
				return fmt.Errorf("multiple masters in destination shard: %v and %v at least", vscw.destinationMasterAlias, tabletAlias)
			}
		}
	}
	if vscw.destinationMasterAlias.IsZero() {
		return fmt.Errorf("no master in destination shard")
	}
	vscw.wr.Logger().Infof("Found target master alias %v in shard %v/%v", vscw.destinationMasterAlias, vscw.destinationKeyspace, vscw.destinationShard)

	return nil
}
Example #2
0
func (wr *Wrangler) ValidateVersionKeyspace(keyspace string) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	}
	sort.Strings(shards)
	if len(shards) == 1 {
		return wr.ValidateVersionShard(keyspace, shards[0])
	}

	// find the reference version using the first shard's master
	si, err := wr.ts.GetShard(keyspace, shards[0])
	if err != nil {
		return err
	}
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	}
	referenceAlias := si.MasterAlias
	log.Infof("Gathering version for reference master %v", referenceAlias)
	referenceVersion, err := wr.GetVersion(referenceAlias)
	if err != nil {
		return err
	}

	// then diff with all tablets but master 0
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, shard := range shards {
		aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		for _, alias := range aliases {
			if alias == si.MasterAlias {
				continue
			}

			wg.Add(1)
			go wr.diffVersion(referenceVersion, referenceAlias, alias, &wg, &er)
		}
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	}
	return nil
}
Example #3
0
// FIXME(msolomon) This validate presumes the master is up and running.
// Even when that isn't true, there are validation processes that might be valuable.
func (wr *Wrangler) validateShard(keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) {
	shardInfo, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err)
		return
	}

	aliases, err := topo.FindAllTabletAliasesInShard(wr.ctx, wr.ts, keyspace, shard)
	if err != nil {
		results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err)
		return
	}

	tabletMap, _ := topo.GetTabletMap(wr.ctx, wr.ts, aliases)

	var masterAlias topo.TabletAlias
	for _, alias := range aliases {
		tabletInfo, ok := tabletMap[alias]
		if !ok {
			results <- fmt.Errorf("tablet %v not found in map", alias)
			continue
		}
		if tabletInfo.Parent.Uid == topo.NO_TABLET {
			if masterAlias.Cell != "" {
				results <- fmt.Errorf("tablet %v already has a master %v", alias, masterAlias)
			} else {
				masterAlias = alias
			}
		}
	}

	if masterAlias.Cell == "" {
		results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard)
	} else if shardInfo.MasterAlias != masterAlias {
		results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, masterAlias, shardInfo.MasterAlias)
	}

	for _, alias := range aliases {
		wg.Add(1)
		go func(alias topo.TabletAlias) {
			defer wg.Done()
			if err := topo.Validate(wr.ts, alias); err != nil {
				results <- fmt.Errorf("Validate(%v) failed: %v", alias, err)
			} else {
				wr.Logger().Infof("tablet %v is valid", alias)
			}
		}(alias)
	}

	if pingTablets {
		wr.validateReplication(shardInfo, tabletMap, results)
		wr.pingTablets(tabletMap, wg, results)
	}

	return
}
Example #4
0
// Validate all tablets in all discoverable cells, even if they are
// not in the replication graph.
func (wr *Wrangler) validateAllTablets(wg *sync.WaitGroup, results chan<- error) {
	cellSet := make(map[string]bool, 16)

	keyspaces, err := wr.ts.GetKeyspaces()
	if err != nil {
		results <- fmt.Errorf("TopologyServer.GetKeyspaces failed: %v", err)
		return
	}
	for _, keyspace := range keyspaces {
		shards, err := wr.ts.GetShardNames(keyspace)
		if err != nil {
			results <- fmt.Errorf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err)
			return
		}

		for _, shard := range shards {
			aliases, err := topo.FindAllTabletAliasesInShard(wr.ctx, wr.ts, keyspace, shard)
			if err != nil {
				results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err)
				return
			}
			for _, alias := range aliases {
				cellSet[alias.Cell] = true
			}
		}
	}

	for cell := range cellSet {
		aliases, err := wr.ts.GetTabletsByCell(cell)
		if err != nil {
			results <- fmt.Errorf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err)
			continue
		}

		for _, alias := range aliases {
			wg.Add(1)
			go func(alias topo.TabletAlias) {
				defer wg.Done()
				if err := topo.Validate(wr.ts, alias); err != nil {
					results <- fmt.Errorf("Validate(%v) failed: %v", alias, err)
				} else {
					wr.Logger().Infof("tablet %v is valid", alias)
				}
			}(alias)
		}
	}
}
Example #5
0
// findMasterTargets looks up the masters for all destination shards, and set the destinations appropriately.
// It should be used if vtworker will only want to write to masters.
func (scw *SplitCloneWorker) findMasterTargets() error {
	var err error

	scw.destinationAliases = make([][]topo.TabletAlias, len(scw.destinationShards))
	scw.destinationTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards))
	scw.destinationMasterAliases = make([]topo.TabletAlias, len(scw.destinationShards))

	scw.reloadAliases = make([][]topo.TabletAlias, len(scw.destinationShards))
	scw.reloadTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards))

	for shardIndex, si := range scw.destinationShards {
		scw.reloadAliases[shardIndex], err = topo.FindAllTabletAliasesInShard(context.TODO(), scw.wr.TopoServer(), si.Keyspace(), si.ShardName())
		if err != nil {
			return fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err)
		}
		scw.wr.Logger().Infof("Found %v reload target aliases in shard %v/%v", len(scw.reloadAliases[shardIndex]), si.Keyspace(), si.ShardName())

		// get the TabletInfo for all targets
		scw.reloadTablets[shardIndex], err = topo.GetTabletMap(context.TODO(), scw.wr.TopoServer(), scw.reloadAliases[shardIndex])
		if err != nil {
			return fmt.Errorf("cannot read all reload target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err)
		}

		// find and validate the master
		for tabletAlias, ti := range scw.reloadTablets[shardIndex] {
			if ti.Type == topo.TYPE_MASTER {
				if scw.destinationMasterAliases[shardIndex].IsZero() {
					scw.destinationMasterAliases[shardIndex] = tabletAlias
					scw.destinationAliases[shardIndex] = []topo.TabletAlias{tabletAlias}
					scw.destinationTablets[shardIndex] = map[topo.TabletAlias]*topo.TabletInfo{tabletAlias: ti}
				} else {
					return fmt.Errorf("multiple masters in destination shard: %v and %v at least", scw.destinationMasterAliases[shardIndex], tabletAlias)
				}
			}
		}
		if scw.destinationMasterAliases[shardIndex].IsZero() {
			return fmt.Errorf("no master in destination shard")
		}
		scw.wr.Logger().Infof("Found target master alias %v in shard %v/%v", scw.destinationMasterAliases[shardIndex], si.Keyspace(), si.ShardName())
	}

	return nil
}
Example #6
0
func (wr *Wrangler) ValidateVersionShard(keyspace, shard string) error {
	si, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		return err
	}

	// get version from the master, or error
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shard)
	}
	log.Infof("Gathering version for master %v", si.MasterAlias)
	masterVersion, err := wr.GetVersion(si.MasterAlias)
	if err != nil {
		return err
	}

	// read all the aliases in the shard, that is all tablets that are
	// replicating from the master
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
	if err != nil {
		return err
	}

	// then diff with all slaves
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, alias := range aliases {
		if alias == si.MasterAlias {
			continue
		}

		wg.Add(1)
		go wr.diffVersion(masterVersion, si.MasterAlias, alias, &wg, &er)
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	}
	return nil
}
Example #7
0
// Export addresses from the VT serving graph to a legacy zkns server.
func (wr *Wrangler) ExportZknsForKeyspace(keyspace string) error {
	zkTopo, ok := wr.ts.(*zktopo.Server)
	if !ok {
		return fmt.Errorf("ExportZknsForKeyspace only works with zktopo")
	}
	zconn := zkTopo.GetZConn()

	shardNames, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Scan the first shard to discover which cells need local serving data.
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shardNames[0])
	if err != nil {
		return err
	}

	cellMap := make(map[string]bool)
	for _, alias := range aliases {
		cellMap[alias.Cell] = true
	}

	for cell := range cellMap {
		vtnsRootPath := fmt.Sprintf("/zk/%v/vt/ns/%v", cell, keyspace)
		zknsRootPath := fmt.Sprintf("/zk/%v/zkns/vt/%v", cell, keyspace)

		// Get the existing list of zkns children. If they don't get rewritten,
		// delete them as stale entries.
		zknsChildren, err := zk.ChildrenRecursive(zconn, zknsRootPath)
		if err != nil {
			if zookeeper.IsError(err, zookeeper.ZNONODE) {
				zknsChildren = make([]string, 0)
			} else {
				return err
			}
		}
		staleZknsPaths := make(map[string]bool)
		for _, child := range zknsChildren {
			staleZknsPaths[path.Join(zknsRootPath, child)] = true
		}

		vtnsChildren, err := zk.ChildrenRecursive(zconn, vtnsRootPath)
		if err != nil {
			if zookeeper.IsError(err, zookeeper.ZNONODE) {
				vtnsChildren = make([]string, 0)
			} else {
				return err
			}
		}
		for _, child := range vtnsChildren {
			vtnsAddrPath := path.Join(vtnsRootPath, child)
			zknsAddrPath := path.Join(zknsRootPath, child)

			_, stat, err := zconn.Get(vtnsAddrPath)
			if err != nil {
				return err
			}
			// Leaf nodes correspond to zkns vdns files in the old setup.
			if stat.NumChildren() > 0 {
				continue
			}
			zknsPathsWritten, err := wr.exportVtnsToZkns(zconn, vtnsAddrPath, zknsAddrPath)
			if err != nil {
				return err
			}
			log.V(6).Infof("zknsPathsWritten: %v", zknsPathsWritten)
			for _, zkPath := range zknsPathsWritten {
				delete(staleZknsPaths, zkPath)
			}
		}
		log.V(6).Infof("staleZknsPaths: %v", staleZknsPaths)
		prunePaths := make([]string, 0, len(staleZknsPaths))
		for prunePath := range staleZknsPaths {
			prunePaths = append(prunePaths, prunePath)
		}
		sort.Strings(prunePaths)
		// Prune paths in reverse order so we remove children first
		for i := len(prunePaths) - 1; i >= 0; i-- {
			log.Infof("prune stale zkns path %v", prunePaths[i])
			if err := zconn.Delete(prunePaths[i], -1); err != nil && !zookeeper.IsError(err, zookeeper.ZNOTEMPTY) {
				return err
			}
		}
	}
	return nil
}
Example #8
0
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*myproto.SchemaChangeResult, error) {

	// find all the shards we need to handle
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		return nil, err
	}

	// build the array of TabletStatus we're going to use
	statusArray := make([]*TabletStatus, 0, len(aliases)-1)
	for _, alias := range aliases {
		if alias == masterTabletAlias {
			// we skip the master
			continue
		}

		ti, err := wr.ts.GetTablet(alias)
		if err != nil {
			return nil, err
		}
		if ti.Type == topo.TYPE_LAG {
			// lag tablets are usually behind, not replicating,
			// and a general pain. So let's just skip them
			// all together.
			// TODO(alainjobart) figure out other types to skip:
			// ValidateSchemaShard only does the serving types.
			// We do everything in the replication graph
			// but LAG. This seems fine for now.
			log.Infof("Skipping tablet %v as it is LAG", ti.Alias)
			continue
		}

		statusArray = append(statusArray, &TabletStatus{ti: ti})
	}

	// get schema on all tablets.
	log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName())
	wg := &sync.WaitGroup{}
	for _, status := range statusArray {
		wg.Add(1)
		go func(status *TabletStatus) {
			status.beforeSchema, status.lastError = wr.tmc.GetSchema(wr.ctx, status.ti, nil, nil, false)
			wg.Done()
		}(status)
	}
	wg.Wait()

	// quick check for errors
	for _, status := range statusArray {
		if status.lastError != nil {
			return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError)
		}
	}

	// simple or complex?
	if simple {
		return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force)
	}

	return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force)
}
Example #9
0
// ValidateSchemaShard will diff the schema from all the tablets in
// the keyspace.
func (wr *Wrangler) ValidateSchemaKeyspace(keyspace string, excludeTables []string, includeViews bool) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	}
	sort.Strings(shards)
	if len(shards) == 1 {
		return wr.ValidateSchemaShard(keyspace, shards[0], excludeTables, includeViews)
	}

	// find the reference schema using the first shard's master
	si, err := wr.ts.GetShard(keyspace, shards[0])
	if err != nil {
		return err
	}
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	}
	referenceAlias := si.MasterAlias
	log.Infof("Gathering schema for reference master %v", referenceAlias)
	referenceSchema, err := wr.GetSchema(referenceAlias, nil, excludeTables, includeViews)
	if err != nil {
		return err
	}

	// then diff with all other tablets everywhere
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// first diff the slaves in the reference shard 0
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shards[0])
	if err != nil {
		return err
	}

	for _, alias := range aliases {
		if alias == si.MasterAlias {
			continue
		}

		wg.Add(1)
		go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)
	}

	// then diffs all tablets in the other shards
	for _, shard := range shards[1:] {
		si, err := wr.ts.GetShard(keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		if si.MasterAlias.Uid == topo.NO_TABLET {
			er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard))
			continue
		}

		aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		for _, alias := range aliases {
			wg.Add(1)
			go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)
		}
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Schema diffs:\n%v", er.Error().Error())
	}
	return nil
}