// findMasterTargets looks up the master for the destination shard, and set the destinations appropriately. // It should be used if vtworker will only want to write to masters. func (vscw *VerticalSplitCloneWorker) findMasterTargets() error { var err error // find all the targets in the destination keyspace / shard vscw.reloadAliases, err = topo.FindAllTabletAliasesInShard(context.TODO(), vscw.wr.TopoServer(), vscw.destinationKeyspace, vscw.destinationShard) if err != nil { return fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err) } vscw.wr.Logger().Infof("Found %v reload target aliases", len(vscw.reloadAliases)) // get the TabletInfo for all targets vscw.reloadTablets, err = topo.GetTabletMap(context.TODO(), vscw.wr.TopoServer(), vscw.reloadAliases) if err != nil { return fmt.Errorf("cannot read all reload target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err) } // find and validate the master for tabletAlias, ti := range vscw.reloadTablets { if ti.Type == topo.TYPE_MASTER { if vscw.destinationMasterAlias.IsZero() { vscw.destinationMasterAlias = tabletAlias vscw.destinationAliases = []topo.TabletAlias{tabletAlias} vscw.destinationTablets = map[topo.TabletAlias]*topo.TabletInfo{tabletAlias: ti} } else { return fmt.Errorf("multiple masters in destination shard: %v and %v at least", vscw.destinationMasterAlias, tabletAlias) } } } if vscw.destinationMasterAlias.IsZero() { return fmt.Errorf("no master in destination shard") } vscw.wr.Logger().Infof("Found target master alias %v in shard %v/%v", vscw.destinationMasterAlias, vscw.destinationKeyspace, vscw.destinationShard) return nil }
func (wr *Wrangler) ValidateVersionKeyspace(keyspace string) error { // find all the shards shards, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateVersionShard(keyspace, shards[0]) } // find the reference version using the first shard's master si, err := wr.ts.GetShard(keyspace, shards[0]) if err != nil { return err } if si.MasterAlias.Uid == topo.NO_TABLET { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering version for reference master %v", referenceAlias) referenceVersion, err := wr.GetVersion(referenceAlias) if err != nil { return err } // then diff with all tablets but master 0 er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { if alias == si.MasterAlias { continue } wg.Add(1) go wr.diffVersion(referenceVersion, referenceAlias, alias, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
// FIXME(msolomon) This validate presumes the master is up and running. // Even when that isn't true, there are validation processes that might be valuable. func (wr *Wrangler) validateShard(keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) { shardInfo, err := wr.ts.GetShard(keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err) return } aliases, err := topo.FindAllTabletAliasesInShard(wr.ctx, wr.ts, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } tabletMap, _ := topo.GetTabletMap(wr.ctx, wr.ts, aliases) var masterAlias topo.TabletAlias for _, alias := range aliases { tabletInfo, ok := tabletMap[alias] if !ok { results <- fmt.Errorf("tablet %v not found in map", alias) continue } if tabletInfo.Parent.Uid == topo.NO_TABLET { if masterAlias.Cell != "" { results <- fmt.Errorf("tablet %v already has a master %v", alias, masterAlias) } else { masterAlias = alias } } } if masterAlias.Cell == "" { results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard) } else if shardInfo.MasterAlias != masterAlias { results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, masterAlias, shardInfo.MasterAlias) } for _, alias := range aliases { wg.Add(1) go func(alias topo.TabletAlias) { defer wg.Done() if err := topo.Validate(wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", alias, err) } else { wr.Logger().Infof("tablet %v is valid", alias) } }(alias) } if pingTablets { wr.validateReplication(shardInfo, tabletMap, results) wr.pingTablets(tabletMap, wg, results) } return }
// Validate all tablets in all discoverable cells, even if they are // not in the replication graph. func (wr *Wrangler) validateAllTablets(wg *sync.WaitGroup, results chan<- error) { cellSet := make(map[string]bool, 16) keyspaces, err := wr.ts.GetKeyspaces() if err != nil { results <- fmt.Errorf("TopologyServer.GetKeyspaces failed: %v", err) return } for _, keyspace := range keyspaces { shards, err := wr.ts.GetShardNames(keyspace) if err != nil { results <- fmt.Errorf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err) return } for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(wr.ctx, wr.ts, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } for _, alias := range aliases { cellSet[alias.Cell] = true } } } for cell := range cellSet { aliases, err := wr.ts.GetTabletsByCell(cell) if err != nil { results <- fmt.Errorf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err) continue } for _, alias := range aliases { wg.Add(1) go func(alias topo.TabletAlias) { defer wg.Done() if err := topo.Validate(wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", alias, err) } else { wr.Logger().Infof("tablet %v is valid", alias) } }(alias) } } }
// findMasterTargets looks up the masters for all destination shards, and set the destinations appropriately. // It should be used if vtworker will only want to write to masters. func (scw *SplitCloneWorker) findMasterTargets() error { var err error scw.destinationAliases = make([][]topo.TabletAlias, len(scw.destinationShards)) scw.destinationTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards)) scw.destinationMasterAliases = make([]topo.TabletAlias, len(scw.destinationShards)) scw.reloadAliases = make([][]topo.TabletAlias, len(scw.destinationShards)) scw.reloadTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards)) for shardIndex, si := range scw.destinationShards { scw.reloadAliases[shardIndex], err = topo.FindAllTabletAliasesInShard(context.TODO(), scw.wr.TopoServer(), si.Keyspace(), si.ShardName()) if err != nil { return fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Found %v reload target aliases in shard %v/%v", len(scw.reloadAliases[shardIndex]), si.Keyspace(), si.ShardName()) // get the TabletInfo for all targets scw.reloadTablets[shardIndex], err = topo.GetTabletMap(context.TODO(), scw.wr.TopoServer(), scw.reloadAliases[shardIndex]) if err != nil { return fmt.Errorf("cannot read all reload target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err) } // find and validate the master for tabletAlias, ti := range scw.reloadTablets[shardIndex] { if ti.Type == topo.TYPE_MASTER { if scw.destinationMasterAliases[shardIndex].IsZero() { scw.destinationMasterAliases[shardIndex] = tabletAlias scw.destinationAliases[shardIndex] = []topo.TabletAlias{tabletAlias} scw.destinationTablets[shardIndex] = map[topo.TabletAlias]*topo.TabletInfo{tabletAlias: ti} } else { return fmt.Errorf("multiple masters in destination shard: %v and %v at least", scw.destinationMasterAliases[shardIndex], tabletAlias) } } } if scw.destinationMasterAliases[shardIndex].IsZero() { return fmt.Errorf("no master in destination shard") } scw.wr.Logger().Infof("Found target master alias %v in shard %v/%v", scw.destinationMasterAliases[shardIndex], si.Keyspace(), si.ShardName()) } return nil }
func (wr *Wrangler) ValidateVersionShard(keyspace, shard string) error { si, err := wr.ts.GetShard(keyspace, shard) if err != nil { return err } // get version from the master, or error if si.MasterAlias.Uid == topo.NO_TABLET { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering version for master %v", si.MasterAlias) masterVersion, err := wr.GetVersion(si.MasterAlias) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard) if err != nil { return err } // then diff with all slaves er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if alias == si.MasterAlias { continue } wg.Add(1) go wr.diffVersion(masterVersion, si.MasterAlias, alias, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
// Export addresses from the VT serving graph to a legacy zkns server. func (wr *Wrangler) ExportZknsForKeyspace(keyspace string) error { zkTopo, ok := wr.ts.(*zktopo.Server) if !ok { return fmt.Errorf("ExportZknsForKeyspace only works with zktopo") } zconn := zkTopo.GetZConn() shardNames, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // Scan the first shard to discover which cells need local serving data. aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shardNames[0]) if err != nil { return err } cellMap := make(map[string]bool) for _, alias := range aliases { cellMap[alias.Cell] = true } for cell := range cellMap { vtnsRootPath := fmt.Sprintf("/zk/%v/vt/ns/%v", cell, keyspace) zknsRootPath := fmt.Sprintf("/zk/%v/zkns/vt/%v", cell, keyspace) // Get the existing list of zkns children. If they don't get rewritten, // delete them as stale entries. zknsChildren, err := zk.ChildrenRecursive(zconn, zknsRootPath) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { zknsChildren = make([]string, 0) } else { return err } } staleZknsPaths := make(map[string]bool) for _, child := range zknsChildren { staleZknsPaths[path.Join(zknsRootPath, child)] = true } vtnsChildren, err := zk.ChildrenRecursive(zconn, vtnsRootPath) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { vtnsChildren = make([]string, 0) } else { return err } } for _, child := range vtnsChildren { vtnsAddrPath := path.Join(vtnsRootPath, child) zknsAddrPath := path.Join(zknsRootPath, child) _, stat, err := zconn.Get(vtnsAddrPath) if err != nil { return err } // Leaf nodes correspond to zkns vdns files in the old setup. if stat.NumChildren() > 0 { continue } zknsPathsWritten, err := wr.exportVtnsToZkns(zconn, vtnsAddrPath, zknsAddrPath) if err != nil { return err } log.V(6).Infof("zknsPathsWritten: %v", zknsPathsWritten) for _, zkPath := range zknsPathsWritten { delete(staleZknsPaths, zkPath) } } log.V(6).Infof("staleZknsPaths: %v", staleZknsPaths) prunePaths := make([]string, 0, len(staleZknsPaths)) for prunePath := range staleZknsPaths { prunePaths = append(prunePaths, prunePath) } sort.Strings(prunePaths) // Prune paths in reverse order so we remove children first for i := len(prunePaths) - 1; i >= 0; i-- { log.Infof("prune stale zkns path %v", prunePaths[i]) if err := zconn.Delete(prunePaths[i], -1); err != nil && !zookeeper.IsError(err, zookeeper.ZNOTEMPTY) { return err } } } return nil }
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*myproto.SchemaChangeResult, error) { // find all the shards we need to handle aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } // build the array of TabletStatus we're going to use statusArray := make([]*TabletStatus, 0, len(aliases)-1) for _, alias := range aliases { if alias == masterTabletAlias { // we skip the master continue } ti, err := wr.ts.GetTablet(alias) if err != nil { return nil, err } if ti.Type == topo.TYPE_LAG { // lag tablets are usually behind, not replicating, // and a general pain. So let's just skip them // all together. // TODO(alainjobart) figure out other types to skip: // ValidateSchemaShard only does the serving types. // We do everything in the replication graph // but LAG. This seems fine for now. log.Infof("Skipping tablet %v as it is LAG", ti.Alias) continue } statusArray = append(statusArray, &TabletStatus{ti: ti}) } // get schema on all tablets. log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) wg := &sync.WaitGroup{} for _, status := range statusArray { wg.Add(1) go func(status *TabletStatus) { status.beforeSchema, status.lastError = wr.tmc.GetSchema(wr.ctx, status.ti, nil, nil, false) wg.Done() }(status) } wg.Wait() // quick check for errors for _, status := range statusArray { if status.lastError != nil { return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError) } } // simple or complex? if simple { return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force) } return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force) }
// ValidateSchemaShard will diff the schema from all the tablets in // the keyspace. func (wr *Wrangler) ValidateSchemaKeyspace(keyspace string, excludeTables []string, includeViews bool) error { // find all the shards shards, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateSchemaShard(keyspace, shards[0], excludeTables, includeViews) } // find the reference schema using the first shard's master si, err := wr.ts.GetShard(keyspace, shards[0]) if err != nil { return err } if si.MasterAlias.Uid == topo.NO_TABLET { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering schema for reference master %v", referenceAlias) referenceSchema, err := wr.GetSchema(referenceAlias, nil, excludeTables, includeViews) if err != nil { return err } // then diff with all other tablets everywhere er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // first diff the slaves in the reference shard 0 aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shards[0]) if err != nil { return err } for _, alias := range aliases { if alias == si.MasterAlias { continue } wg.Add(1) go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } // then diffs all tablets in the other shards for _, shard := range shards[1:] { si, err := wr.ts.GetShard(keyspace, shard) if err != nil { er.RecordError(err) continue } if si.MasterAlias.Uid == topo.NO_TABLET { er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard)) continue } aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { wg.Add(1) go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs:\n%v", er.Error().Error()) } return nil }