// If error is not nil, the results in the dictionary are incomplete. func GetTabletMapForShard(ts topo.Server, keyspace, shard string) (map[topo.TabletAlias]*topo.TabletInfo, error) { aliases, err := topo.FindAllTabletAliasesInShard(ts, keyspace, shard) if err != nil { return nil, err } return GetTabletMap(ts, aliases) }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'checker' pointing back to us // - get the aliases of all the targets func (vscw *VerticalSplitCloneWorker) findTargets() error { vscw.setState(stateVSCFindTargets) // find an appropriate endpoint in the source shard var err error vscw.sourceAlias, err = findChecker(vscw.wr, vscw.cleaner, vscw.cell, vscw.sourceKeyspace, "0") if err != nil { return fmt.Errorf("cannot find checker for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err) } vscw.wr.Logger().Infof("Using tablet %v as the source", vscw.sourceAlias) // get the tablet info for it vscw.sourceTablet, err = vscw.wr.TopoServer().GetTablet(vscw.sourceAlias) if err != nil { return fmt.Errorf("cannot read tablet %v: %v", vscw.sourceTablet, err) } // stop replication on it if err := vscw.wr.TabletManagerClient().StopSlave(vscw.sourceTablet, 30*time.Second); err != nil { return fmt.Errorf("cannot stop replication on tablet %v", vscw.sourceAlias) } wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet, 30*time.Second) action, err := wrangler.FindChangeSlaveTypeActionByTarget(vscw.cleaner, vscw.sourceAlias) if err != nil { return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", vscw.sourceAlias, err) } action.TabletType = topo.TYPE_SPARE // find all the targets in the destination keyspace / shard vscw.destinationAliases, err = topo.FindAllTabletAliasesInShard(vscw.wr.TopoServer(), vscw.destinationKeyspace, vscw.destinationShard) if err != nil { return fmt.Errorf("cannot find all target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err) } vscw.wr.Logger().Infof("Found %v target aliases", len(vscw.destinationAliases)) // get the TabletInfo for all targets vscw.destinationTablets, err = topo.GetTabletMap(vscw.wr.TopoServer(), vscw.destinationAliases) if err != nil { return fmt.Errorf("cannot read all target tablets in %v/%v: %v", vscw.destinationKeyspace, vscw.destinationShard, err) } // find and validate the master for tabletAlias, ti := range vscw.destinationTablets { if ti.Type == topo.TYPE_MASTER { if vscw.destinationMasterAlias.IsZero() { vscw.destinationMasterAlias = tabletAlias } else { return fmt.Errorf("multiple masters in destination shard: %v and %v at least", vscw.destinationMasterAlias, tabletAlias) } } } if vscw.destinationMasterAlias.IsZero() { return fmt.Errorf("no master in destination shard") } return nil }
// findTargets phase: // - find one rdonly in the source shard // - mark it as 'checker' pointing back to us // - get the aliases of all the targets func (scw *SplitCloneWorker) findTargets() error { scw.setState(stateSCFindTargets) var err error // find an appropriate endpoint in the source shards scw.sourceAliases = make([]topo.TabletAlias, len(scw.sourceShards)) for i, si := range scw.sourceShards { scw.sourceAliases[i], err = findChecker(scw.wr, scw.cleaner, scw.cell, si.Keyspace(), si.ShardName()) if err != nil { return fmt.Errorf("cannot find checker for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", scw.sourceAliases[i], si.Keyspace(), si.ShardName()) } // get the tablet info for them scw.sourceTablets = make([]*topo.TabletInfo, len(scw.sourceAliases)) for i, alias := range scw.sourceAliases { scw.sourceTablets[i], err = scw.wr.TopoServer().GetTablet(alias) if err != nil { return fmt.Errorf("cannot read tablet %v: %v", alias, err) } } // find all the targets in the destination shards scw.destinationAliases = make([][]topo.TabletAlias, len(scw.destinationShards)) scw.destinationTablets = make([]map[topo.TabletAlias]*topo.TabletInfo, len(scw.destinationShards)) scw.destinationMasterAliases = make([]topo.TabletAlias, len(scw.destinationShards)) for shardIndex, si := range scw.destinationShards { scw.destinationAliases[shardIndex], err = topo.FindAllTabletAliasesInShard(scw.wr.TopoServer(), si.Keyspace(), si.ShardName()) if err != nil { return fmt.Errorf("cannot find all target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err) } scw.wr.Logger().Infof("Found %v target aliases in shard %v/%v", len(scw.destinationAliases[shardIndex]), si.Keyspace(), si.ShardName()) // get the TabletInfo for all targets scw.destinationTablets[shardIndex], err = topo.GetTabletMap(scw.wr.TopoServer(), scw.destinationAliases[shardIndex]) if err != nil { return fmt.Errorf("cannot read all target tablets in %v/%v: %v", si.Keyspace(), si.ShardName(), err) } // find and validate the master for tabletAlias, ti := range scw.destinationTablets[shardIndex] { if ti.Type == topo.TYPE_MASTER { if scw.destinationMasterAliases[shardIndex].IsZero() { scw.destinationMasterAliases[shardIndex] = tabletAlias } else { return fmt.Errorf("multiple masters in destination shard: %v and %v at least", scw.destinationMasterAliases[shardIndex], tabletAlias) } } } if scw.destinationMasterAliases[shardIndex].IsZero() { return fmt.Errorf("no master in destination shard") } } return nil }
// FIXME(msolomon) This validate presumes the master is up and running. // Even when that isn't true, there are validation processes that might be valuable. func (wr *Wrangler) validateShard(ctx context.Context, keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err) return } aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } tabletMap, _ := topo.GetTabletMap(ctx, wr.ts, aliases) var masterAlias *pb.TabletAlias for _, alias := range aliases { tabletInfo, ok := tabletMap[*alias] if !ok { results <- fmt.Errorf("tablet %v not found in map", alias) continue } if tabletInfo.Type == pb.TabletType_MASTER { if masterAlias != nil { results <- fmt.Errorf("shard %v/%v already has master %v but found other master %v", keyspace, shard, masterAlias, alias) } else { masterAlias = alias } } } if masterAlias == nil { results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard) } else if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterAlias) { results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, masterAlias, shardInfo.MasterAlias) } for _, alias := range aliases { wg.Add(1) go func(alias *pb.TabletAlias) { defer wg.Done() if err := topo.Validate(ctx, wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", alias, err) } else { wr.Logger().Infof("tablet %v is valid", alias) } }(alias) } if pingTablets { wr.validateReplication(ctx, shardInfo, tabletMap, results) wr.pingTablets(ctx, tabletMap, wg, results) } return }
func (wr *Wrangler) ShardMultiRestore(keyspace, shard string, sources []topo.TabletAlias, tables []string, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) error { // check parameters if len(tables) > 0 && len(sources) > 1 { return fmt.Errorf("ShardMultiRestore can only handle one source when tables are specified") } // lock the shard to perform the changes we need done actionNode := actionnode.ShardMultiRestore(&actionnode.MultiRestoreArgs{ SrcTabletAliases: sources, Concurrency: concurrency, FetchConcurrency: fetchConcurrency, InsertTableConcurrency: insertTableConcurrency, FetchRetryCount: fetchRetryCount, Strategy: strategy}) lockPath, err := wr.lockShard(keyspace, shard, actionNode) if err != nil { return err } mrErr := wr.SetSourceShards(keyspace, shard, sources, tables) err = wr.unlockShard(keyspace, shard, actionNode, lockPath, mrErr) if err != nil { if mrErr != nil { log.Errorf("unlockShard got error back: %v", err) return mrErr } return err } if mrErr != nil { return mrErr } // find all tablets in the shard destTablets, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { return err } // now launch MultiRestore on all tablets we need to do rec := cc.AllErrorRecorder{} wg := sync.WaitGroup{} for _, tabletAlias := range destTablets { wg.Add(1) go func(tabletAlias topo.TabletAlias) { log.Infof("Starting multirestore on tablet %v", tabletAlias) err := wr.MultiRestore(tabletAlias, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy) log.Infof("Multirestore on tablet %v is done (err=%v)", tabletAlias, err) rec.RecordError(err) wg.Done() }(tabletAlias) } wg.Wait() return rec.Error() }
func (wr *Wrangler) ValidatePermissionsKeyspace(keyspace string) error { // find all the shards shards, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidatePermissionsShard(keyspace, shards[0]) } // find the reference permissions using the first shard's master si, err := wr.ts.GetShard(keyspace, shards[0]) if err != nil { return err } if si.MasterAlias.Uid == topo.NO_TABLET { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering permissions for reference master %v", referenceAlias) referencePermissions, err := wr.GetPermissions(si.MasterAlias) if err != nil { return err } // then diff with all tablets but master 0 er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { if alias == si.MasterAlias { continue } wg.Add(1) go wr.diffPermissions(referencePermissions, referenceAlias, alias, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error()) } return nil }
// ValidateVersionKeyspace validates all versions are the same in all // tablets in a keyspace func (wr *Wrangler) ValidateVersionKeyspace(ctx context.Context, keyspace string) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateVersionShard(ctx, keyspace, shards[0]) } // find the reference version using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering version for reference master %v", topo.TabletAliasString(referenceAlias)) referenceVersion, err := wr.GetVersion(ctx, referenceAlias) if err != nil { return err } // then diff with all tablets but master 0 er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffVersion(ctx, referenceVersion, referenceAlias, alias, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
// FIXME(msolomon) This validate presumes the master is up and running. // Even when that isn't true, there are validation processes that might be valuable. func (wr *Wrangler) validateShard(keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- vresult) { shardInfo, err := wr.ts.GetShard(keyspace, shard) if err != nil { results <- vresult{keyspace + "/" + shard, err} return } aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { results <- vresult{keyspace + "/" + shard, err} } tabletMap, _ := GetTabletMap(wr.ts, aliases) var masterAlias topo.TabletAlias for _, alias := range aliases { tabletInfo, ok := tabletMap[alias] if !ok { results <- vresult{alias.String(), fmt.Errorf("tablet not found in map")} continue } if tabletInfo.Parent.Uid == topo.NO_TABLET { if masterAlias.Cell != "" { results <- vresult{alias.String(), fmt.Errorf("tablet already has a master %v", masterAlias)} } else { masterAlias = alias } } } if masterAlias.Cell == "" { results <- vresult{keyspace + "/" + shard, fmt.Errorf("no master for shard")} } else if shardInfo.MasterAlias != masterAlias { results <- vresult{keyspace + "/" + shard, fmt.Errorf("master mismatch for shard: found %v, expected %v", masterAlias, shardInfo.MasterAlias)} } for _, alias := range aliases { tabletReplicationPath := masterAlias.String() if alias != masterAlias { tabletReplicationPath += "/" + alias.String() } wg.Add(1) go func(alias topo.TabletAlias) { results <- vresult{tabletReplicationPath, topo.Validate(wr.ts, alias, tabletReplicationPath)} wg.Done() }(alias) } if pingTablets { wr.validateReplication(shardInfo, tabletMap, results) wr.pingTablets(tabletMap, wg, results) } return }
func (wr *Wrangler) applySchemaShard(ctx context.Context, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, simple, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) { // find all the shards we need to handle aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } // build the array of tabletStatus we're going to use statusArray := make([]*tabletStatus, 0, len(aliases)-1) for _, alias := range aliases { if alias == masterTabletAlias { // we skip the master continue } ti, err := wr.ts.GetTablet(ctx, alias) if err != nil { return nil, err } statusArray = append(statusArray, &tabletStatus{ti: ti}) } // get schema on all tablets. log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) wg := &sync.WaitGroup{} for _, status := range statusArray { wg.Add(1) go func(status *tabletStatus) { status.beforeSchema, status.lastError = wr.tmc.GetSchema(ctx, status.ti, nil, nil, false) wg.Done() }(status) } wg.Wait() // quick check for errors for _, status := range statusArray { if status.lastError != nil { return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError) } } // simple or complex? if simple { return wr.applySchemaShardSimple(ctx, statusArray, preflight, masterTabletAlias, change, force) } return wr.applySchemaShardComplex(ctx, statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force, waitSlaveTimeout) }
// GetTabletMapForShard returns the tablets for a shard. It can return // topo.ErrPartialResult if it couldn't read all the cells, or all // the individual tablets, in which case the map is valid, but partial. func GetTabletMapForShard(ts topo.Server, keyspace, shard string) (map[topo.TabletAlias]*topo.TabletInfo, error) { // if we get a partial result, we keep going. It most likely means // a cell is out of commission. aliases, err := topo.FindAllTabletAliasesInShard(ts, keyspace, shard) if err != nil && err != topo.ErrPartialResult { return nil, err } // get the tablets for the cells we were able to reach, forward // topo.ErrPartialResult from FindAllTabletAliasesInShard result, gerr := GetTabletMap(ts, aliases) if gerr == nil && err != nil { gerr = err } return result, gerr }
// Validate all tablets in all discoverable cells, even if they are // not in the replication graph. func (wr *Wrangler) validateAllTablets(ctx context.Context, wg *sync.WaitGroup, results chan<- error) { cellSet := make(map[string]bool, 16) keyspaces, err := wr.ts.GetKeyspaces(ctx) if err != nil { results <- fmt.Errorf("TopologyServer.GetKeyspaces failed: %v", err) return } for _, keyspace := range keyspaces { shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { results <- fmt.Errorf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err) return } for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } for _, alias := range aliases { cellSet[alias.Cell] = true } } } for cell := range cellSet { aliases, err := wr.ts.GetTabletsByCell(ctx, cell) if err != nil { results <- fmt.Errorf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err) continue } for _, alias := range aliases { wg.Add(1) go func(alias *pb.TabletAlias) { defer wg.Done() if err := topo.Validate(ctx, wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", alias, err) } else { wr.Logger().Infof("tablet %v is valid", alias) } }(alias) } } }
func (wr *Wrangler) ShardMultiRestore(keyspace, shard string, sources []topo.TabletAlias, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) error { // lock the shard to perform the changes we need done actionNode := wr.ai.ShardMultiRestore(&tm.MultiRestoreArgs{ SrcTabletAliases: sources, Concurrency: concurrency, FetchConcurrency: fetchConcurrency, InsertTableConcurrency: insertTableConcurrency, FetchRetryCount: fetchRetryCount, Strategy: strategy}) lockPath, err := wr.lockShard(keyspace, shard, actionNode) if err != nil { return err } mrErr := wr.shardMultiRestore(keyspace, shard, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy) err = wr.unlockShard(keyspace, shard, actionNode, lockPath, mrErr) if err != nil { return err } if mrErr != nil { return mrErr } // find all tablets in the shard destTablets, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { return err } // now launch MultiRestore on all tablets we need to do rec := cc.AllErrorRecorder{} wg := sync.WaitGroup{} for _, tabletAlias := range destTablets { wg.Add(1) go func(tabletAlias topo.TabletAlias) { log.Infof("Starting multirestore on tablet %v", tabletAlias) err := wr.MultiRestore(tabletAlias, sources, concurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount, strategy) log.Infof("Multirestore on tablet %v is done (err=%v)", tabletAlias, err) rec.RecordError(err) wg.Done() }(tabletAlias) } wg.Wait() return rec.Error() }
// Validate all tablets in all discoverable cells, even if they are // not in the replication graph. func (wr *Wrangler) validateAllTablets(wg *sync.WaitGroup, results chan<- vresult) { cellSet := make(map[string]bool, 16) keyspaces, err := wr.ts.GetKeyspaces() if err != nil { results <- vresult{"TopologyServer.GetKeyspaces", err} return } for _, keyspace := range keyspaces { shards, err := wr.ts.GetShardNames(keyspace) if err != nil { results <- vresult{"TopologyServer.GetShardNames(" + keyspace + ")", err} return } for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { results <- vresult{"TopologyServer.FindAllTabletAliasesInShard(" + keyspace + "," + shard + ")", err} return } for _, alias := range aliases { cellSet[alias.Cell] = true } } } for cell := range cellSet { aliases, err := wr.ts.GetTabletsByCell(cell) if err != nil { results <- vresult{"GetTabletsByCell(" + cell + ")", err} } else { for _, alias := range aliases { wg.Add(1) go func(alias topo.TabletAlias) { results <- vresult{alias.String(), topo.Validate(wr.ts, alias)} wg.Done() }(alias) } } } }
// Does a topo lookup for a single shard, and returns: // 1. Slice of all tablet aliases for the shard. // 2. Map of tablet alias : tablet record for all tablets. func resolveReloadTabletsForShard(ctx context.Context, keyspace, shard string, wr *wrangler.Wrangler) (reloadAliases []topo.TabletAlias, reloadTablets map[topo.TabletAlias]*topo.TabletInfo, err error) { // Keep a long timeout, because we really don't want the copying to succeed, and then the worker to fail at the end. shortCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) reloadAliases, err = topo.FindAllTabletAliasesInShard(shortCtx, wr.TopoServer(), keyspace, shard) cancel() if err != nil { return nil, nil, fmt.Errorf("cannot find all reload target tablets in %v/%v: %v", keyspace, shard, err) } wr.Logger().Infof("Found %v reload target aliases in shard %v/%v", len(reloadAliases), keyspace, shard) shortCtx, cancel = context.WithTimeout(ctx, 5*time.Minute) reloadTablets, err = topo.GetTabletMap(shortCtx, wr.TopoServer(), reloadAliases) cancel() if err != nil { return nil, nil, fmt.Errorf("cannot read all reload target tablets in %v/%v: %v", keyspace, shard, err) } return reloadAliases, reloadTablets, nil }
// ValidateVersionShard validates all versions are the same in all // tablets in a shard func (wr *Wrangler) ValidateVersionShard(ctx context.Context, keyspace, shard string) error { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } // get version from the master, or error if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering version for master %v", topo.TabletAliasString(si.MasterAlias)) masterVersion, err := wr.GetVersion(ctx, si.MasterAlias) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // then diff with all slaves er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffVersion(ctx, masterVersion, si.MasterAlias, alias, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
func (wr *Wrangler) ValidateSchemaShard(keyspace, shard string, includeViews bool) error { si, err := wr.ts.GetShard(keyspace, shard) if err != nil { return err } // get schema from the master, or error if si.MasterAlias.Uid == topo.NO_TABLET { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } relog.Info("Gathering schema for master %v", si.MasterAlias) masterSchema, err := wr.GetSchema(si.MasterAlias, nil, includeViews) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shard) if err != nil { return err } // then diff with all slaves er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if alias == si.MasterAlias { continue } wg.Add(1) go wr.diffSchema(masterSchema, si.MasterAlias, alias, includeViews, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs:\n%v", er.Error().Error()) } return nil }
// ValidatePermissionsShard validates all the permissions are the same // in a shard func (wr *Wrangler) ValidatePermissionsShard(ctx context.Context, keyspace, shard string) error { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } // get permissions from the master, or error if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering permissions for master %v", si.MasterAlias) masterPermissions, err := wr.GetPermissions(ctx, topo.ProtoToTabletAlias(si.MasterAlias)) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // then diff all of them, except master er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if alias == topo.ProtoToTabletAlias(si.MasterAlias) { continue } wg.Add(1) go wr.diffPermissions(ctx, masterPermissions, topo.ProtoToTabletAlias(si.MasterAlias), alias, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error()) } return nil }
func initAPI(ctx context.Context, ts topo.Server, actions *ActionRepository) { tabletHealthCache := newTabletHealthCache(ts) // Cells handleCollection("cells", func(r *http.Request) (interface{}, error) { if getItemPath(r.URL.Path) != "" { return nil, errors.New("cells can only be listed, not retrieved") } return ts.GetKnownCells(ctx) }) // Keyspaces handleCollection("keyspaces", func(r *http.Request) (interface{}, error) { keyspace := getItemPath(r.URL.Path) // List all keyspaces. if keyspace == "" { return ts.GetKeyspaces(ctx) } // Perform an action on a keyspace. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyKeyspaceAction(ctx, action, keyspace, r), nil } // Get the keyspace record. return ts.GetKeyspace(ctx, keyspace) }) // Shards handleCollection("shards", func(r *http.Request) (interface{}, error) { shardPath := getItemPath(r.URL.Path) if !strings.Contains(shardPath, "/") { return nil, fmt.Errorf("invalid shard path: %q", shardPath) } parts := strings.SplitN(shardPath, "/", 2) keyspace := parts[0] shard := parts[1] // List the shards in a keyspace. if shard == "" { return ts.GetShardNames(ctx, keyspace) } // Perform an action on a shard. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyShardAction(ctx, action, keyspace, shard, r), nil } // Get the shard record. return ts.GetShard(ctx, keyspace, shard) }) // Tablets handleCollection("tablets", func(r *http.Request) (interface{}, error) { tabletPath := getItemPath(r.URL.Path) // List tablets based on query params. if tabletPath == "" { if err := r.ParseForm(); err != nil { return nil, err } shardRef := r.FormValue("shard") cell := r.FormValue("cell") if shardRef != "" { // Look up by keyspace/shard, and optionally cell. keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef) if err != nil { return nil, err } if cell != "" { return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell}) } return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard) } // Get all tablets in a cell. if cell == "" { return nil, errors.New("cell param required") } return ts.GetTabletsByCell(ctx, cell) } // Get tablet health. if parts := strings.Split(tabletPath, "/"); len(parts) == 2 && parts[1] == "health" { tabletAlias, err := topo.ParseTabletAliasString(parts[0]) if err != nil { return nil, err } return tabletHealthCache.Get(ctx, tabletAlias) } tabletAlias, err := topo.ParseTabletAliasString(tabletPath) if err != nil { return nil, err } // Perform an action on a tablet. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyTabletAction(ctx, action, tabletAlias, r), nil } // Get the tablet record. return ts.GetTablet(ctx, tabletAlias) }) // EndPoints handleCollection("endpoints", func(r *http.Request) (interface{}, error) { // We expect cell/keyspace/shard/tabletType. epPath := getItemPath(r.URL.Path) parts := strings.Split(epPath, "/") if len(parts) != 4 { return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath) } if parts[3] == "" { // tabletType is empty, so list the tablet types. return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2]) } // Get the endpoints object for a specific type. ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3])) return ep, err }) }
func initAPI(ctx context.Context, ts topo.Server) { // Get Cells handleGet("cells", func(r *http.Request) (interface{}, error) { if getItemPath(r.URL.Path) != "" { return nil, errors.New("cells can only be listed, not retrieved") } return ts.GetKnownCells(ctx) }) // Get Keyspaces handleGet("keyspaces", func(r *http.Request) (interface{}, error) { keyspace := getItemPath(r.URL.Path) if keyspace == "" { return ts.GetKeyspaces(ctx) } return ts.GetKeyspace(ctx, keyspace) }) // Get Shards handleGet("shards", func(r *http.Request) (interface{}, error) { shardPath := getItemPath(r.URL.Path) if !strings.Contains(shardPath, "/") { return nil, fmt.Errorf("invalid shard path: %q", shardPath) } parts := strings.SplitN(shardPath, "/", 2) if parts[1] == "" { // It's just a keyspace. List the shards. return ts.GetShardNames(ctx, parts[0]) } // It's a keyspace/shard reference. return ts.GetShard(ctx, parts[0], parts[1]) }) // Get Tablets handleGet("tablets", func(r *http.Request) (interface{}, error) { tabletPath := getItemPath(r.URL.Path) if tabletPath == "" { // List tablets based on query params. if err := r.ParseForm(); err != nil { return nil, err } shardRef := r.FormValue("shard") cell := r.FormValue("cell") if shardRef != "" { // Look up by keyspace/shard, and optionally cell. keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef) if err != nil { return nil, err } if cell != "" { return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell}) } return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard) } // Get all tablets in a cell. if cell == "" { return nil, errors.New("cell param required") } return ts.GetTabletsByCell(ctx, cell) } // Get a specific tablet. tabletAlias, err := topo.ParseTabletAliasString(tabletPath) if err != nil { return nil, err } return ts.GetTablet(ctx, tabletAlias) }) // Get EndPoints handleGet("endpoints", func(r *http.Request) (interface{}, error) { // We expect cell/keyspace/shard/tabletType. epPath := getItemPath(r.URL.Path) parts := strings.Split(epPath, "/") if len(parts) != 4 { return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath) } if parts[3] == "" { // tabletType is empty, so list the tablet types. return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2]) } // Get the endpoints object for a specific type. ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3])) return ep, err }) }
// ValidateSchemaKeyspace will diff the schema from all the tablets in // the keyspace. func (wr *Wrangler) ValidateSchemaKeyspace(ctx context.Context, keyspace string, excludeTables []string, includeViews bool) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateSchemaShard(ctx, keyspace, shards[0], excludeTables, includeViews) } // find the reference schema using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering schema for reference master %v", referenceAlias) referenceSchema, err := wr.GetSchema(ctx, referenceAlias, nil, excludeTables, includeViews) if err != nil { return err } // then diff with all other tablets everywhere er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // first diff the slaves in the reference shard 0 aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shards[0]) if err != nil { return err } for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } // then diffs all tablets in the other shards for _, shard := range shards[1:] { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { er.RecordError(err) continue } if topo.TabletAliasIsZero(si.MasterAlias) { er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard)) continue } aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs:\n%v", er.Error().Error()) } return nil }
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*mysqlctl.SchemaChangeResult, error) { // find all the shards we need to handle aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } // build the array of TabletStatus we're going to use statusArray := make([]*TabletStatus, 0, len(aliases)-1) for _, alias := range aliases { if alias == masterTabletAlias { // we skip the master continue } ti, err := wr.ts.GetTablet(alias) if err != nil { return nil, err } if ti.Type == topo.TYPE_LAG { // lag tablets are usually behind, not replicating, // and a general pain. So let's just skip them // all together. // TODO(alainjobart) figure out other types to skip: // ValidateSchemaShard only does the serving types. // We do everything in the replication graph // but LAG. This seems fine for now. relog.Info("Skipping tablet %v as it is LAG", ti.Alias()) continue } statusArray = append(statusArray, &TabletStatus{ti: ti}) } // get schema on all tablets. relog.Info("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) wg := &sync.WaitGroup{} for _, status := range statusArray { wg.Add(1) go func(status *TabletStatus) { status.beforeSchema, status.lastError = wr.GetSchemaTablet(status.ti, nil, false) wg.Done() }(status) } wg.Wait() // quick check for errors for _, status := range statusArray { if status.lastError != nil { return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias(), status.lastError) } } // simple or complex? if simple { return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force) } return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force) }
func initAPI(ctx context.Context, ts topo.Server, actions *ActionRepository) { tabletHealthCache := newTabletHealthCache(ts) // Cells handleCollection("cells", func(r *http.Request) (interface{}, error) { if getItemPath(r.URL.Path) != "" { return nil, errors.New("cells can only be listed, not retrieved") } return ts.GetKnownCells(ctx) }) // Keyspaces handleCollection("keyspaces", func(r *http.Request) (interface{}, error) { keyspace := getItemPath(r.URL.Path) // List all keyspaces. if keyspace == "" { return ts.GetKeyspaces(ctx) } // Perform an action on a keyspace. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyKeyspaceAction(ctx, action, keyspace, r), nil } // Get the keyspace record. return ts.GetKeyspace(ctx, keyspace) }) // Shards handleCollection("shards", func(r *http.Request) (interface{}, error) { shardPath := getItemPath(r.URL.Path) if !strings.Contains(shardPath, "/") { return nil, fmt.Errorf("invalid shard path: %q", shardPath) } parts := strings.SplitN(shardPath, "/", 2) keyspace := parts[0] shard := parts[1] // List the shards in a keyspace. if shard == "" { return ts.GetShardNames(ctx, keyspace) } // Perform an action on a shard. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyShardAction(ctx, action, keyspace, shard, r), nil } // Get the shard record. return ts.GetShard(ctx, keyspace, shard) }) // Tablets handleCollection("tablets", func(r *http.Request) (interface{}, error) { tabletPath := getItemPath(r.URL.Path) // List tablets based on query params. if tabletPath == "" { if err := r.ParseForm(); err != nil { return nil, err } shardRef := r.FormValue("shard") cell := r.FormValue("cell") if shardRef != "" { // Look up by keyspace/shard, and optionally cell. keyspace, shard, err := topo.ParseKeyspaceShardString(shardRef) if err != nil { return nil, err } if cell != "" { return topo.FindAllTabletAliasesInShardByCell(ctx, ts, keyspace, shard, []string{cell}) } return topo.FindAllTabletAliasesInShard(ctx, ts, keyspace, shard) } // Get all tablets in a cell. if cell == "" { return nil, errors.New("cell param required") } return ts.GetTabletsByCell(ctx, cell) } // Get tablet health. if parts := strings.Split(tabletPath, "/"); len(parts) == 2 && parts[1] == "health" { tabletAlias, err := topo.ParseTabletAliasString(parts[0]) if err != nil { return nil, err } return tabletHealthCache.Get(ctx, tabletAlias) } tabletAlias, err := topo.ParseTabletAliasString(tabletPath) if err != nil { return nil, err } // Perform an action on a tablet. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyTabletAction(ctx, action, tabletAlias, r), nil } // Get the tablet record. return ts.GetTablet(ctx, tabletAlias) }) // EndPoints handleCollection("endpoints", func(r *http.Request) (interface{}, error) { // We expect cell/keyspace/shard/tabletType. epPath := getItemPath(r.URL.Path) parts := strings.Split(epPath, "/") if len(parts) != 4 { return nil, fmt.Errorf("invalid cell/keyspace/shard/tabletType: %q", epPath) } if parts[3] == "" { // tabletType is empty, so list the tablet types. return ts.GetSrvTabletTypesPerShard(ctx, parts[0], parts[1], parts[2]) } // Get the endpoints object for a specific type. ep, _, err := ts.GetEndPoints(ctx, parts[0], parts[1], parts[2], topo.TabletType(parts[3])) return ep, err }) // Schema Change http.HandleFunc(apiPrefix+"schema/apply", func(w http.ResponseWriter, r *http.Request) { req := struct{ Keyspace, SQL string }{} if err := unmarshalRequest(r, &req); err != nil { httpErrorf(w, r, "can't unmarshal request: %v", err) return } executor := schemamanager.NewTabletExecutor( tmclient.NewTabletManagerClient(), ts) schemamanager.Run(ctx, schemamanager.NewUIController(req.SQL, req.Keyspace, w), executor) }) // VSchema http.HandleFunc(apiPrefix+"vschema/", func(w http.ResponseWriter, r *http.Request) { schemafier, ok := ts.(topo.Schemafier) if !ok { httpErrorf(w, r, "%T doesn't support schemafier API", ts) return } // Save VSchema if r.Method == "POST" { vschema, err := ioutil.ReadAll(r.Body) if err != nil { httpErrorf(w, r, "can't read request body: %v", err) return } if err := schemafier.SaveVSchema(ctx, string(vschema)); err != nil { httpErrorf(w, r, "can't save vschema: %v", err) } return } // Get VSchema vschema, err := schemafier.GetVSchema(ctx) if err != nil { httpErrorf(w, r, "can't get vschema: %v", err) return } w.Header().Set("Content-Type", jsonContentType) w.Write([]byte(vschema)) }) }
func listActionsByShard(wr *wrangler.Wrangler, keyspace, shard string) error { // only works with Server zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return fmt.Errorf("listActionsByShard only works with zktopo.Server") } // print the shard action nodes shardActionPath := zkts.ShardActionPath(keyspace, shard) shardActionNodes, err := getActions(wr, zkts.GetZConn(), shardActionPath) if err != nil { return err } for _, shardAction := range shardActionNodes { wr.Logger().Printf("%v\n", fmtAction(shardAction)) } // get and print the tablet action nodes wg := sync.WaitGroup{} mu := sync.Mutex{} actionMap := make(map[string]*actionnode.ActionNode) f := func(actionPath string) { defer wg.Done() actionNodes, err := getActions(wr, zkts.GetZConn(), actionPath) if err != nil { wr.Logger().Warningf("listActionsByShard %v", err) return } mu.Lock() for _, node := range actionNodes { actionMap[node.Path] = node } mu.Unlock() } tabletAliases, err := topo.FindAllTabletAliasesInShard(wr.TopoServer(), keyspace, shard) if err != nil { return err } for _, tabletAlias := range tabletAliases { actionPath := zktopo.TabletActionPathForAlias(tabletAlias) if err != nil { wr.Logger().Warningf("listActionsByShard %v", err) } else { wg.Add(1) go f(actionPath) } } wg.Wait() mu.Lock() defer mu.Unlock() keys := topotools.CopyMapKeys(actionMap, []string{}).([]string) sort.Strings(keys) for _, key := range keys { action := actionMap[key] if action == nil { wr.Logger().Warningf("nil action: %v", key) } else { wr.Logger().Printf("%v\n", fmtAction(action)) } } return nil }
// ExportZknsForKeyspace exports addresses from the VT serving graph to a legacy zkns server. func (wr *Wrangler) ExportZknsForKeyspace(ctx context.Context, keyspace string) error { zkTopo, ok := wr.ts.(*zktopo.Server) if !ok { return fmt.Errorf("ExportZknsForKeyspace only works with zktopo") } zconn := zkTopo.GetZConn() shardNames, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // Scan the first shard to discover which cells need local serving data. aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shardNames[0]) if err != nil { return err } cellMap := make(map[string]bool) for _, alias := range aliases { cellMap[alias.Cell] = true } for cell := range cellMap { vtnsRootPath := fmt.Sprintf("/zk/%v/vt/ns/%v", cell, keyspace) zknsRootPath := fmt.Sprintf("/zk/%v/zkns/vt/%v", cell, keyspace) // Get the existing list of zkns children. If they don't get rewritten, // delete them as stale entries. zknsChildren, err := zk.ChildrenRecursive(zconn, zknsRootPath) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { zknsChildren = make([]string, 0) } else { return err } } staleZknsPaths := make(map[string]bool) for _, child := range zknsChildren { staleZknsPaths[path.Join(zknsRootPath, child)] = true } vtnsChildren, err := zk.ChildrenRecursive(zconn, vtnsRootPath) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { vtnsChildren = make([]string, 0) } else { return err } } for _, child := range vtnsChildren { vtnsAddrPath := path.Join(vtnsRootPath, child) zknsAddrPath := path.Join(zknsRootPath, child) _, stat, err := zconn.Get(vtnsAddrPath) if err != nil { return err } // Leaf nodes correspond to zkns vdns files in the old setup. if stat.NumChildren() > 0 { continue } zknsPathsWritten, err := wr.exportVtnsToZkns(ctx, zconn, vtnsAddrPath, zknsAddrPath) if err != nil { return err } log.V(6).Infof("zknsPathsWritten: %v", zknsPathsWritten) for _, zkPath := range zknsPathsWritten { delete(staleZknsPaths, zkPath) } } log.V(6).Infof("staleZknsPaths: %v", staleZknsPaths) prunePaths := make([]string, 0, len(staleZknsPaths)) for prunePath := range staleZknsPaths { prunePaths = append(prunePaths, prunePath) } sort.Strings(prunePaths) // Prune paths in reverse order so we remove children first for i := len(prunePaths) - 1; i >= 0; i-- { log.Infof("prune stale zkns path %v", prunePaths[i]) if err := zconn.Delete(prunePaths[i], -1); err != nil && !zookeeper.IsError(err, zookeeper.ZNOTEMPTY) { return err } } } return nil }
// This function should only be used with an action lock on the keyspace // - otherwise the consistency of the serving graph data can't be // guaranteed. // // Take data from the global keyspace and rebuild the local serving // copies in each cell. func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error { log.Infof("rebuildKeyspace %v", keyspace) ki, err := wr.ts.GetKeyspace(keyspace) if err != nil { // Temporary change: we try to keep going even if node // doesn't exist if err != topo.ErrNoNode { return err } ki = topo.NewKeyspaceInfo(keyspace, &topo.Keyspace{}) } shards, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // Rebuild all shards in parallel. wg := sync.WaitGroup{} er := concurrency.FirstErrorRecorder{} for _, shard := range shards { wg.Add(1) go func(shard string) { if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil { er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err)) } wg.Done() }(shard) } wg.Wait() if er.HasErrors() { return er.Error() } // Scan the first shard to discover which cells need local serving data. aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0]) if err != nil { return err } // srvKeyspaceMap is a map: // key: local keyspace {cell,keyspace} // value: topo.SrvKeyspace object being built srvKeyspaceMap := make(map[cellKeyspace]*topo.SrvKeyspace) for _, alias := range aliases { keyspaceLocation := cellKeyspace{alias.Cell, keyspace} if _, ok := srvKeyspaceMap[keyspaceLocation]; !ok { // before adding keyspaceLocation to the map of // of KeyspaceByPath, we check this is a // serving tablet. No serving tablet in shard // 0 means we're not rebuilding the serving // graph in that cell. This is somewhat // expensive, but we only do it on all the // non-serving tablets in a shard before we // find a serving tablet. ti, err := wr.ts.GetTablet(alias) if err != nil { return err } if !ti.IsInServingGraph() { continue } srvKeyspaceMap[keyspaceLocation] = &topo.SrvKeyspace{ Shards: make([]topo.SrvShard, 0, 16), ShardingColumnName: ki.ShardingColumnName, ShardingColumnType: ki.ShardingColumnType, ServedFrom: ki.ServedFrom, } } } // for each entry in the srvKeyspaceMap map, we do the following: // - read the ShardInfo structures for each shard // - compute the union of the db types (replica, master, ...) // - sort the shards in the list by range // - check the ranges are compatible (no hole, covers everything) for ck, srvKeyspace := range srvKeyspaceMap { keyspaceDbTypes := make(map[topo.TabletType]bool) srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition) for _, shard := range shards { srvShard, err := wr.ts.GetSrvShard(ck.cell, ck.keyspace, shard) if err != nil { return err } for _, tabletType := range srvShard.TabletTypes { keyspaceDbTypes[tabletType] = true } // for each type this shard is supposed to serve, // add it to srvKeyspace.Partitions for _, tabletType := range srvShard.ServedTypes { if _, ok := srvKeyspace.Partitions[tabletType]; !ok { srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{ Shards: make([]topo.SrvShard, 0)} } srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard) } } srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes)) for dbType := range keyspaceDbTypes { srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType) } first := true for tabletType, partition := range srvKeyspace.Partitions { topo.SrvShardArray(partition.Shards).Sort() // check the first Start is MinKey, the last End is MaxKey, // and the values in between match: End[i] == Start[i+1] if partition.Shards[0].KeyRange.Start != key.MinKey { return fmt.Errorf("Keyspace partition for %v does not start with %v", tabletType, key.MinKey) } if partition.Shards[len(partition.Shards)-1].KeyRange.End != key.MaxKey { return fmt.Errorf("Keyspace partition for %v does not end with %v", tabletType, key.MaxKey) } for i := range partition.Shards[0 : len(partition.Shards)-1] { if partition.Shards[i].KeyRange.End != partition.Shards[i+1].KeyRange.Start { return fmt.Errorf("Non-contiguous KeyRange values for %v at shard %v to %v: %v != %v", tabletType, i, i+1, partition.Shards[i].KeyRange.End.Hex(), partition.Shards[i+1].KeyRange.Start.Hex()) } } // backfill Shards if first { first = false srvKeyspace.Shards = partition.Shards } } } // and then finally save the keyspace objects for ck, srvKeyspace := range srvKeyspaceMap { if err := wr.ts.UpdateSrvKeyspace(ck.cell, ck.keyspace, srvKeyspace); err != nil { return fmt.Errorf("writing serving data failed: %v", err) } } return nil }
// This function should only be used with an action lock on the keyspace // - otherwise the consistency of the serving graph data can't be // guaranteed. // // Take data from the global keyspace and rebuild the local serving // copies in each cell. func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error { relog.Info("rebuildKeyspace %v", keyspace) shards, err := wr.ts.GetShardNames(keyspace) if err != nil { return err } // Rebuild all shards in parallel. wg := sync.WaitGroup{} er := concurrency.FirstErrorRecorder{} for _, shard := range shards { wg.Add(1) go func(shard string) { if err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil { er.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err)) } wg.Done() }(shard) } wg.Wait() if er.HasErrors() { return er.Error() } // Scan the first shard to discover which cells need local serving data. aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, keyspace, shards[0]) if err != nil { return err } // srvKeyspaceByPath is a map: // key: local keyspace {cell,keyspace} // value: topo.SrvKeyspace object being built srvKeyspaceByPath := make(map[cellKeyspace]*topo.SrvKeyspace) for _, alias := range aliases { keyspaceLocation := cellKeyspace{alias.Cell, keyspace} if _, ok := srvKeyspaceByPath[keyspaceLocation]; !ok { // before adding keyspaceLocation to the map of // of KeyspaceByPath, we check this is a // serving tablet. No serving tablet in shard // 0 means we're not rebuilding the serving // graph in that cell. This is somewhat // expensive, but we only do it on all the // non-serving tablets in a shard before we // find a serving tablet. ti, err := wr.ts.GetTablet(alias) if err != nil { return err } if !ti.IsServingType() { continue } srvKeyspaceByPath[keyspaceLocation] = &topo.SrvKeyspace{Shards: make([]topo.SrvShard, 0, 16)} } } // for each entry in the srvKeyspaceByPath map, we do the following: // - read the ShardInfo structures for each shard // - prune the AddrsByType field, result would be too big // - compute the union of the db types (replica, master, ...) // - sort the shards in the list by range // - check the ranges are compatible (no hole, covers everything) for srvPath, srvKeyspace := range srvKeyspaceByPath { keyspaceDbTypes := make(map[topo.TabletType]bool) for _, shard := range shards { srvShard, err := wr.ts.GetSrvShard(srvPath.cell, srvPath.keyspace, shard) if err != nil { return err } for dbType, _ := range srvShard.AddrsByType { keyspaceDbTypes[topo.TabletType(dbType)] = true } // Prune addrs, this is unnecessarily expensive right now. It is easier to // load on-demand since we have to do that anyway on a reconnect. srvShard.AddrsByType = nil srvKeyspace.Shards = append(srvKeyspace.Shards, *srvShard) } tabletTypes := make([]topo.TabletType, 0, len(keyspaceDbTypes)) for dbType, _ := range keyspaceDbTypes { tabletTypes = append(tabletTypes, dbType) } srvKeyspace.TabletTypes = tabletTypes // FIXME(msolomon) currently this only works when the shards are range-based topo.SrvShardArray(srvKeyspace.Shards).Sort() // check the first Start is MinKey, the last End is MaxKey, // and the values in between match: End[i] == Start[i+1] if srvKeyspace.Shards[0].KeyRange.Start != key.MinKey { return fmt.Errorf("Keyspace does not start with %v", key.MinKey) } if srvKeyspace.Shards[len(srvKeyspace.Shards)-1].KeyRange.End != key.MaxKey { return fmt.Errorf("Keyspace does not end with %v", key.MaxKey) } for i, _ := range srvKeyspace.Shards[0 : len(srvKeyspace.Shards)-1] { if srvKeyspace.Shards[i].KeyRange.End != srvKeyspace.Shards[i+1].KeyRange.Start { return fmt.Errorf("Non-contiguous KeyRange values at shard %v to %v: %v != %v", i, i+1, srvKeyspace.Shards[i].KeyRange.End.Hex(), srvKeyspace.Shards[i+1].KeyRange.Start.Hex()) } } } // and then finally save the keyspace objects for srvPath, srvKeyspace := range srvKeyspaceByPath { if err := wr.ts.UpdateSrvKeyspace(srvPath.cell, srvPath.keyspace, srvKeyspace); err != nil { return fmt.Errorf("writing serving data failed: %v", err) } } return nil }