// FixShardReplication will fix the first problem it encounters within // a ShardReplication object func FixShardReplication(ctx context.Context, ts Server, logger logutil.Logger, cell, keyspace, shard string) error { sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { return err } for _, node := range sri.Nodes { ti, err := ts.GetTablet(ctx, node.TabletAlias) if err == ErrNoNode { logger.Warningf("Tablet %v is in the replication graph, but does not exist, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } if err != nil { // unknown error, we probably don't want to continue return err } if ti.Type == pb.TabletType_SCRAP { logger.Warningf("Tablet %v is in the replication graph, but is scrapped, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } logger.Infof("Keeping tablet %v in the replication graph", node.TabletAlias) } logger.Infof("All entries in replication graph are valid") return nil }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTabletAlias topo.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *actionnode.SlaveWasRestartedArgs) error) { wg := sync.WaitGroup{} swrd := actionnode.SlaveWasRestartedArgs{ Parent: masterElectTabletAlias, } log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, &swrd); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, &swrd) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just scrap it. // We don't rebuild the Shard just yet though. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topo.TYPE_SPARE ti.Parent = masterElectTabletAlias if err := topo.UpdateTablet(ts, ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, masterElectTabletAlias *topodatapb.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *topodatapb.TabletAlias) error) { wg := sync.WaitGroup{} log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, masterElectTabletAlias); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, masterElectTabletAlias) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just make it spare. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topodatapb.TabletType_SPARE if err := ts.UpdateTablet(context.TODO(), ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) error { log.Infof("RebuildShard %v/%v", keyspace, shard) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // Lock the SrvShard so we write a consistent data set. actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // write the data we need to rebuildErr := rebuildCellSrvShard(log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return rec.Error() }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error { log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell) // Get all existing db types so they can be removed if nothing // had been edited. existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { if err != topo.ErrNoNode { return err } } // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: tabletType // value: EndPoints (list of server records) locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints) for _, tablet := range tablets { if !tablet.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if tablet.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias) } continue } // Check IsInServingGraph, we don't want to add tablets that // are not serving if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list addrs, ok := locationAddrsMap[tablet.Type] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[tablet.Type] = addrs } entry, err := tablet.Tablet.EndPoint() if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here: // - writing all the tabletTypes records // - removing the unused records // - writing SrvShard rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the EndPoints nodes everywhere we want them for tabletType, addrs := range locationAddrsMap { wg.Add(1) go func(tabletType topo.TabletType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType) if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err)) } wg.Done() }(tabletType, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingTabletTypes - locationAddrsMap for _, tabletType := range existingTabletTypes { if _, ok := locationAddrsMap[tabletType]; !ok { wg.Add(1) go func(tabletType topo.TabletType) { log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err) } wg.Done() }(tabletType) } } // Update srvShard object wg.Add(1) go func() { log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName()) srvShard := &topo.SrvShard{ Name: shardInfo.ShardName(), KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.ServedTypes, MasterCell: shardInfo.MasterAlias.Cell, TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)), } for tabletType := range locationAddrsMap { srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType) } if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err)) } wg.Done() }() wg.Wait() return rec.Error() }