func (wr *Wrangler) validateReplication(shardInfo *topo.ShardInfo, tabletMap map[topo.TabletAlias]*topo.TabletInfo, results chan<- vresult) { _, ok := tabletMap[shardInfo.MasterAlias] if !ok { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("master not in tablet map")} return } actionPath, err := wr.ai.GetSlaves(shardInfo.MasterAlias) if err != nil { results <- vresult{shardInfo.MasterAlias.String(), err} return } sa, err := wr.ai.WaitForCompletionReply(actionPath, wr.actionTimeout()) if err != nil { results <- vresult{shardInfo.MasterAlias.String(), err} return } slaveAddrs := sa.(*tm.SlaveList).Addrs if len(slaveAddrs) == 0 { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("no slaves found")} return } // Some addresses don't resolve in all locations, just use IP address if err != nil { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("resolve slaves failed: %v", err)} return } tabletIpMap := make(map[string]*topo.Tablet) for _, tablet := range tabletMap { ipAddr, _, err := net.SplitHostPort(tablet.MysqlIpAddr) if err != nil { results <- vresult{tablet.Alias().String(), fmt.Errorf("bad mysql addr: %v %v", tablet.MysqlIpAddr, err)} continue } tabletIpMap[ipAddr] = tablet.Tablet } // See if every slave is in the replication graph. for _, slaveAddr := range slaveAddrs { if tabletIpMap[slaveAddr] == nil { results <- vresult{shardInfo.Keyspace() + "/" + shardInfo.ShardName(), fmt.Errorf("slave not in replication graph: %v (mysql instance without vttablet?)", slaveAddr)} } } // See if every entry in the replication graph is connected to the master. for _, tablet := range tabletMap { if !tablet.IsSlaveType() { continue } ipAddr, _, err := net.SplitHostPort(tablet.MysqlIpAddr) if err != nil { results <- vresult{tablet.Alias().String(), fmt.Errorf("bad mysql addr: %v", err)} } else if !strInList(slaveAddrs, ipAddr) { results <- vresult{tablet.Alias().String(), fmt.Errorf("slave not replicating: %v %q", ipAddr, slaveAddrs)} } } }
// RefreshTablesByShard calls RefreshState on all the tables of a // given type in a shard. It would work for the master, but the // discovery wouldn't be very efficient. func (wr *Wrangler) RefreshTablesByShard(si *topo.ShardInfo, tabletType topo.TabletType, cells []string) error { tabletMap, err := topo.GetTabletMapForShardByCell(wr.ts, si.Keyspace(), si.ShardName(), cells) switch err { case nil: // keep going case topo.ErrPartialResult: wr.Logger().Warningf("RefreshTablesByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName()) default: return err } // ignore errors in this phase wg := sync.WaitGroup{} for _, ti := range tabletMap { if ti.Type != tabletType { continue } wg.Add(1) go func(ti *topo.TabletInfo) { if err := wr.tmc.RefreshState(ti, wr.ActionTimeout()); err != nil { wr.Logger().Warningf("RefreshTablesByShard: failed to refresh %v: %v", ti.Alias, err) } wg.Done() }(ti) } wg.Wait() return nil }
// replicaMigrateServedFrom handles the slave (replica, rdonly) migration. func (wr *Wrangler) replicaMigrateServedFrom(ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool, tables []string, ev *events.MigrateServedFrom) error { // Save the destination keyspace (its ServedFrom has been changed) event.DispatchUpdate(ev, "updating keyspace") if err := topo.UpdateKeyspace(wr.ts, ki); err != nil { return err } // Save the source shard (its blacklisted tables field has changed) event.DispatchUpdate(ev, "updating source shard") if sourceShard.BlacklistedTablesMap == nil { sourceShard.BlacklistedTablesMap = make(map[topo.TabletType][]string) } if reverse { delete(sourceShard.BlacklistedTablesMap, servedType) } else { sourceShard.BlacklistedTablesMap[servedType] = tables } if err := topo.UpdateShard(wr.ts, sourceShard); err != nil { return err } // Now refresh the source servers so they reload their // blacklisted table list event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables") if err := wr.RefreshTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType); err != nil { return err } return nil }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // allowUpdate is true, and a tablet with the same ID exists, just update it. // If a tablet is created as master, and there is already a different // master in the shard, allowMasterOverride must be set. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topodatapb.Tablet, allowMasterOverride, createShardAndKeyspace, allowUpdate bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = wr.ts.GetOrCreateShard(ctx, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if !key.KeyRangeEqual(si.KeyRange, tablet.KeyRange) { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topodatapb.TabletType_MASTER && si.HasMaster() && !topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v, use allow_master_override flag", topoproto.TabletAliasString(si.MasterAlias), tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, tablet.Alias, tablet.Type, allowMasterOverride); err != nil { return err } err = wr.ts.CreateTablet(ctx, tablet) if err == topo.ErrNodeExists && allowUpdate { // Try to update then oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("failed reading existing tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } // Check we have the same keyspace / shard, and if not, // require the allowDifferentShard flag. if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("old tablet has shard %v/%v. Cannot override with shard %v/%v. Delete and re-add tablet if you want to change the tablet's keyspace/shard", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } *(oldTablet.Tablet) = *tablet if err := wr.ts.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("failed updating tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } } return nil }
func (zkts *Server) UpdateShard(si *topo.ShardInfo) error { shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName()) _, err := zkts.zconn.Set(shardPath, jscfg.ToJson(si.Shard), -1) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { err = topo.ErrNoNode } } return err }
func (wr *Wrangler) shardReplicationPositions(shardInfo *topo.ShardInfo) ([]*topo.TabletInfo, []*mysqlctl.ReplicationPosition, error) { // FIXME(msolomon) this assumes no hierarchical replication, which is currently the case. tabletMap, err := GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, nil, err } tablets := CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo) positions, err := wr.tabletReplicationPositions(tablets) return tablets, positions, err }
func (wr *Wrangler) shardReplicationStatuses(ctx context.Context, shardInfo *topo.ShardInfo) ([]*topo.TabletInfo, []*myproto.ReplicationStatus, error) { // FIXME(msolomon) this assumes no hierarchical replication, which is currently the case. tabletMap, err := wr.ts.GetTabletMapForShard(ctx, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, nil, err } tablets := topotools.CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo) stats, err := wr.tabletReplicationStatuses(ctx, tablets) return tablets, stats, err }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias topo.TabletAlias, tabletType topo.TabletType, force bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topo.TYPE_MASTER && si.MasterAlias != tabletAlias { shardUpdateRequired = true } if !shardUpdateRequired { return nil } actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // re-read the shard with the lock si, err = wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } // update it wasUpdated := false if !si.HasCell(tabletAlias.Cell) { si.Cells = append(si.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topo.TYPE_MASTER && si.MasterAlias != tabletAlias { if !si.MasterAlias.IsZero() && !force { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, keyspace, shard)) } si.MasterAlias = tabletAlias wasUpdated = true } if wasUpdated { // write it back if err := topo.UpdateShard(ctx, wr.ts, si); err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } } // and unlock return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
func (tee *Tee) UpdateShard(si *topo.ShardInfo) error { if err := tee.primary.UpdateShard(si); err != nil { // failed on primary, not updating secondary return err } if err := tee.secondary.UpdateShard(si); err != nil { // not critical enough to fail log.Warningf("secondary.UpdateShard(%v,%v) failed: %v", si.Keyspace(), si.ShardName(), err) } return nil }
// replicaMigrateServedFrom handles the slave (replica, rdonly) migration. func (wr *Wrangler) replicaMigrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, sourceShard *topo.ShardInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, tables []string, ev *events.MigrateServedFrom) error { // Save the destination keyspace (its ServedFrom has been changed) event.DispatchUpdate(ev, "updating keyspace") if err := wr.ts.UpdateKeyspace(ctx, ki); err != nil { return err } // Save the source shard (its blacklisted tables field has changed) event.DispatchUpdate(ev, "updating source shard") if err := sourceShard.UpdateSourceBlacklistedTables(servedType, cells, reverse, tables); err != nil { return fmt.Errorf("UpdateSourceBlacklistedTables(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } if err := wr.ts.UpdateShard(ctx, sourceShard); err != nil { return fmt.Errorf("UpdateShard(%v/%v) failed: %v", sourceShard.Keyspace(), sourceShard.ShardName(), err) } // Now refresh the source servers so they reload their // blacklisted table list event.DispatchUpdate(ev, "refreshing sources tablets state so they update their blacklisted tables") if err := wr.RefreshTablesByShard(ctx, sourceShard, servedType, cells); err != nil { return err } return nil }
func (wr *Wrangler) validateReplication(shardInfo *topo.ShardInfo, tabletMap map[topo.TabletAlias]*topo.TabletInfo, results chan<- vresult) { masterTablet, ok := tabletMap[shardInfo.MasterAlias] if !ok { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("master not in tablet map")} return } slaveList, err := wr.ai.GetSlaves(masterTablet, wr.ActionTimeout()) if err != nil { results <- vresult{shardInfo.MasterAlias.String(), err} return } if len(slaveList) == 0 { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("no slaves found")} return } // Some addresses don't resolve in all locations, just use IP address if err != nil { results <- vresult{shardInfo.MasterAlias.String(), fmt.Errorf("resolve slaves failed: %v", err)} return } tabletIpMap := make(map[string]*topo.Tablet) slaveIpMap := make(map[string]bool) for _, tablet := range tabletMap { tabletIpMap[normalizeIP(tablet.IPAddr)] = tablet.Tablet } // See if every slave is in the replication graph. for _, slaveAddr := range slaveList { if tabletIpMap[normalizeIP(slaveAddr)] == nil { results <- vresult{shardInfo.Keyspace() + "/" + shardInfo.ShardName(), fmt.Errorf("slave not in replication graph: %v (mysql instance without vttablet?)", slaveAddr)} } slaveIpMap[normalizeIP(slaveAddr)] = true } // See if every entry in the replication graph is connected to the master. for _, tablet := range tabletMap { if !tablet.IsSlaveType() { continue } if !slaveIpMap[normalizeIP(tablet.IPAddr)] { results <- vresult{tablet.Alias.String(), fmt.Errorf("slave not replicating: %v %q", tablet.IPAddr, slaveList)} } } }
func (zkts *Server) UpdateShard(si *topo.ShardInfo) error { shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName()) _, err := zkts.zconn.Set(shardPath, jscfg.ToJson(si.Shard), -1) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { err = topo.ErrNoNode } return err } event.Dispatch(&events.ShardChange{ ShardInfo: *si, Status: "updated", }) return nil }
func (wr *Wrangler) finishReparent(si *topo.ShardInfo, masterElect *topo.TabletInfo, majorityRestart, leaveMasterReadOnly bool) error { // If the majority of slaves restarted, move ahead. if majorityRestart { if leaveMasterReadOnly { wr.logger.Warningf("leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } else { wr.logger.Infof("marking master-elect read-write %v", masterElect.Alias) if err := wr.tmc.SetReadWrite(masterElect, wr.ActionTimeout()); err != nil { wr.logger.Warningf("master master-elect read-write failed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } } } else { wr.logger.Warningf("minority reparent, manual fixes are needed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } // save the new master in the shard info si.MasterAlias = masterElect.Alias if err := topo.UpdateShard(wr.ts, si); err != nil { wr.logger.Errorf("Failed to save new master into shard: %v", err) return err } // We rebuild all the cells, as we may have taken tablets in and // out of the graph. wr.logger.Infof("rebuilding shard serving graph data") _, err := topotools.RebuildShard(wr.logger, wr.ts, masterElect.Keyspace, masterElect.Shard, nil, wr.lockTimeout, interrupted) return err }
func (wr *Wrangler) finishReparent(si *topo.ShardInfo, masterElect *topo.TabletInfo, majorityRestart, leaveMasterReadOnly bool) error { // If the majority of slaves restarted, move ahead. if majorityRestart { if leaveMasterReadOnly { log.Warningf("leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } else { log.Infof("marking master-elect read-write %v", masterElect.Alias) actionPath, err := wr.ai.SetReadWrite(masterElect.Alias) if err == nil { err = wr.ai.WaitForCompletion(actionPath, wr.actionTimeout()) } if err != nil { log.Warningf("master master-elect read-write failed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } } } else { log.Warningf("minority reparent, manual fixes are needed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias) } // save the new master in the shard info si.MasterAlias = masterElect.Alias if err := wr.ts.UpdateShard(si); err != nil { log.Errorf("Failed to save new master into shard: %v", err) return err } // We rebuild all the cells, as we may have taken tablets in and // out of the graph. log.Infof("rebuilding shard serving graph data") return topotools.RebuildShard(wr.ts, masterElect.Keyspace, masterElect.Shard, topotools.RebuildShardOptions{IgnorePartialResult: false}, wr.lockTimeout, interrupted) }
// UpdateShard is part of the topo.Server interface func (zkts *Server) UpdateShard(ctx context.Context, si *topo.ShardInfo, existingVersion int64) (int64, error) { shardPath := path.Join(globalKeyspacesPath, si.Keyspace(), "shards", si.ShardName()) stat, err := zkts.zconn.Set(shardPath, jscfg.ToJSON(si.Shard), int(existingVersion)) if err != nil { if zookeeper.IsError(err, zookeeper.ZNONODE) { err = topo.ErrNoNode } return -1, err } event.Dispatch(&events.ShardChange{ ShardInfo: *si, Status: "updated", }) return int64(stat.Version()), nil }
func (wr *Wrangler) validateReplication(ctx context.Context, shardInfo *topo.ShardInfo, tabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, results chan<- error) { if shardInfo.MasterAlias == nil { results <- fmt.Errorf("no master in shard record %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) return } masterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if !ok { results <- fmt.Errorf("master %v not in tablet map", topoproto.TabletAliasString(shardInfo.MasterAlias)) return } slaveList, err := wr.tmc.GetSlaves(ctx, masterTabletInfo.Tablet) if err != nil { results <- fmt.Errorf("GetSlaves(%v) failed: %v", masterTabletInfo, err) return } if len(slaveList) == 0 { results <- fmt.Errorf("no slaves of tablet %v found", topoproto.TabletAliasString(shardInfo.MasterAlias)) return } tabletIPMap := make(map[string]*topodatapb.Tablet) slaveIPMap := make(map[string]bool) for _, tablet := range tabletMap { tabletIPMap[normalizeIP(tablet.Ip)] = tablet.Tablet } // See if every slave is in the replication graph. for _, slaveAddr := range slaveList { if tabletIPMap[normalizeIP(slaveAddr)] == nil { results <- fmt.Errorf("slave %v not in replication graph for shard %v/%v (mysql instance without vttablet?)", slaveAddr, shardInfo.Keyspace(), shardInfo.ShardName()) } slaveIPMap[normalizeIP(slaveAddr)] = true } // See if every entry in the replication graph is connected to the master. for _, tablet := range tabletMap { if !tablet.IsSlaveType() { continue } if !slaveIPMap[normalizeIP(tablet.Ip)] { results <- fmt.Errorf("slave %v not replicating: %v slave list: %q", topoproto.TabletAliasString(tablet.Alias), tablet.Ip, slaveList) } } }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function should only be used with an action lock on the shard // - otherwise the consistency of the serving graph data can't be // guaranteed. func (wr *Wrangler) rebuildShard(keyspace, shard string, options rebuildShardOptions) error { log.Infof("rebuildShard %v/%v", keyspace, shard) // read the existing shard info. It has to exist. var ( shardInfo *topo.ShardInfo err error ) if options.Critical { shardInfo, err = wr.ts.GetShardCritical(keyspace, shard) } else { shardInfo, err = wr.ts.GetShard(keyspace, shard) } if err != nil { return err } tabletMap, err := GetTabletMapForShardByCell(wr.ts, keyspace, shard, options.Cells) if err != nil { if options.IgnorePartialResult && err == topo.ErrPartialResult { log.Warningf("rebuildShard: got topo.ErrPartialResult from GetTabletMapForShard, but skipping error as it was expected") } else { return err } } tablets := make([]*topo.TabletInfo, 0, len(tabletMap)) for _, ti := range tabletMap { if ti.Keyspace != shardInfo.Keyspace() || ti.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v (maybe remove its replication path in shard %v/%v)", ti.Alias, keyspace, shard, ti.Keyspace, ti.Shard, keyspace, shard) } if !ti.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if ti.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it will be ignored in the rebuild)", ti.Alias) } } tablets = append(tablets, ti) } return wr.rebuildShardSrvGraph(shardInfo, tablets, options.Cells) }
// UpdateShard implements topo.Server. func (s *Server) UpdateShard(ctx context.Context, si *topo.ShardInfo, existingVersion int64) (int64, error) { data := jscfg.ToJSON(si.Shard) resp, err := s.getGlobal().CompareAndSwap(shardFilePath(si.Keyspace(), si.ShardName()), data, 0 /* ttl */, "" /* prevValue */, uint64(existingVersion)) if err != nil { return -1, convertError(err) } if resp.Node == nil { return -1, ErrBadResponse } event.Dispatch(&events.ShardChange{ ShardInfo: *si, Status: "updated", }) return int64(resp.Node.ModifiedIndex), nil }
func (wr *Wrangler) applySchemaShard(ctx context.Context, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, simple, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) { // find all the shards we need to handle aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } // build the array of tabletStatus we're going to use statusArray := make([]*tabletStatus, 0, len(aliases)-1) for _, alias := range aliases { if alias == masterTabletAlias { // we skip the master continue } ti, err := wr.ts.GetTablet(ctx, alias) if err != nil { return nil, err } statusArray = append(statusArray, &tabletStatus{ti: ti}) } // get schema on all tablets. log.Infof("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) wg := &sync.WaitGroup{} for _, status := range statusArray { wg.Add(1) go func(status *tabletStatus) { status.beforeSchema, status.lastError = wr.tmc.GetSchema(ctx, status.ti, nil, nil, false) wg.Done() }(status) } wg.Wait() // quick check for errors for _, status := range statusArray { if status.lastError != nil { return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias, status.lastError) } } // simple or complex? if simple { return wr.applySchemaShardSimple(ctx, statusArray, preflight, masterTabletAlias, change, force) } return wr.applySchemaShardComplex(ctx, statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force, waitSlaveTimeout) }
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ki.KeyspaceName()) if err != nil { return err } if reverse { if _, ok := ki.ServedFrom[servedType]; ok { return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType) } ki.ServedFrom[servedType] = destinationShard.SourceShards[0].Keyspace } else { if _, ok := ki.ServedFrom[servedType]; !ok { return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType) } delete(ki.ServedFrom, servedType) } // re-read and check the destination shard destinationShard, err = wr.ts.GetShard(destinationShard.Keyspace(), destinationShard.ShardName()) if err != nil { return err } if len(destinationShard.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName()) } tables := destinationShard.SourceShards[0].Tables // read the source shard, we'll need its master, and we'll need to // update the blacklisted tables. var sourceShard *topo.ShardInfo sourceShard, err = wr.ts.GetShard(destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ Keyspace: *ki, SourceShard: *sourceShard, DestinationShard: *destinationShard, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() if servedType == topo.TYPE_MASTER { err = wr.masterMigrateServedFrom(ki, sourceShard, destinationShard, servedType, tables, ev) } else { err = wr.replicaMigrateServedFrom(ki, sourceShard, destinationShard, servedType, reverse, tables, ev) } event.DispatchUpdate(ev, "finished") return }
// UpdateSrvShard creates the SrvShard object based on the global ShardInfo, // and writes it to the given cell. func UpdateSrvShard(ctx context.Context, ts topo.Server, cell string, si *topo.ShardInfo) error { srvShard := &topo.SrvShard{ Name: si.ShardName(), KeyRange: si.KeyRange, MasterCell: si.MasterAlias.Cell, } return ts.UpdateSrvShard(ctx, cell, si.Keyspace(), si.ShardName(), srvShard) }
func (wr *Wrangler) migrateServedFrom(ctx context.Context, ki *topo.KeyspaceInfo, destinationShard *topo.ShardInfo, servedType topodatapb.TabletType, cells []string, reverse bool, filteredReplicationWaitTime time.Duration) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ctx, ki.KeyspaceName()) if err != nil { return err } if reverse { ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, false, nil) } else { ki.UpdateServedFromMap(servedType, cells, destinationShard.SourceShards[0].Keyspace, true, destinationShard.Cells) } // re-read and check the destination shard destinationShard, err = wr.ts.GetShard(ctx, destinationShard.Keyspace(), destinationShard.ShardName()) if err != nil { return err } if len(destinationShard.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", destinationShard.Keyspace(), destinationShard.ShardName()) } tables := destinationShard.SourceShards[0].Tables // read the source shard, we'll need its master, and we'll need to // update the blacklisted tables. var sourceShard *topo.ShardInfo sourceShard, err = wr.ts.GetShard(ctx, destinationShard.SourceShards[0].Keyspace, destinationShard.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ KeyspaceName: ki.KeyspaceName(), SourceShard: *sourceShard, DestinationShard: *destinationShard, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() if servedType == topodatapb.TabletType_MASTER { err = wr.masterMigrateServedFrom(ctx, ki, sourceShard, destinationShard, tables, ev, filteredReplicationWaitTime) } else { err = wr.replicaMigrateServedFrom(ctx, ki, sourceShard, destinationShard, servedType, cells, reverse, tables, ev) } event.DispatchUpdate(ev, "finished") return }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // we do need to update the shard, lock it to not interfere with // reparenting operations. actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // run the update _, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error { wasUpdated := false if !topoproto.ShardHasCell(s, tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), keyspace, shard) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
// RefreshTablesByShard calls RefreshState on all the tables of a // given type in a shard. It would work for the master, but the // discovery wouldn't be very efficient. func (wr *Wrangler) RefreshTablesByShard(ctx context.Context, si *topo.ShardInfo, tabletType topodatapb.TabletType, cells []string) error { wr.Logger().Infof("RefreshTablesByShard called on shard %v/%v", si.Keyspace(), si.ShardName()) tabletMap, err := wr.ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), cells) switch err { case nil: // keep going case topo.ErrPartialResult: wr.Logger().Warningf("RefreshTablesByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName()) default: return err } // ignore errors in this phase wg := sync.WaitGroup{} for _, ti := range tabletMap { if ti.Type != tabletType { continue } wg.Add(1) go func(ti *topo.TabletInfo) { wr.Logger().Infof("Calling RefreshState on tablet %v", ti.AliasString()) // Setting an upper bound timeout to fail faster in case of an error. // Using 60 seconds because RefreshState should not take more than 30 seconds. // (RefreshState will restart the tablet's QueryService and most time will be spent on the shutdown, i.e. waiting up to 30 seconds on transactions (see Config.TransactionTimeout)). ctx, cancel := context.WithTimeout(ctx, 60*time.Second) if err := wr.tmc.RefreshState(ctx, ti); err != nil { wr.Logger().Warningf("RefreshTablesByShard: failed to refresh %v: %v", ti.AliasString(), err) } cancel() wg.Done() }(ti) } wg.Wait() return nil }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // run the update _, err := wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(s *topo.ShardInfo) error { wasUpdated := false if !s.HasCell(tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), si.Keyspace(), si.ShardName()) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return err }
func (wr *Wrangler) applySchemaShardComplex(ctx context.Context, statusArray []*tabletStatus, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) { // apply the schema change to all replica / slave tablets for _, status := range statusArray { // if already applied, we skip this guy diffs := myproto.DiffSchemaToArray("after", preflight.AfterSchema, topo.TabletAliasString(status.ti.Alias), status.beforeSchema) if len(diffs) == 0 { log.Infof("Tablet %v already has the AfterSchema, skipping", status.ti.Alias) continue } // make sure the before schema matches diffs = myproto.DiffSchemaToArray("master", preflight.BeforeSchema, topo.TabletAliasString(status.ti.Alias), status.beforeSchema) if len(diffs) > 0 { if force { log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.Alias, strings.Join(diffs, "\n")) } else { return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.Alias, strings.Join(diffs, "\n")) } } // take this guy out of the serving graph if necessary ti, err := wr.ts.GetTablet(ctx, status.ti.Alias) if err != nil { return nil, err } typeChangeRequired := ti.IsInServingGraph() if typeChangeRequired { // note we want to update the serving graph there err = wr.changeTypeInternal(ctx, ti.Alias, pb.TabletType_SCHEMA_UPGRADE) if err != nil { return nil, err } } // apply the schema change log.Infof("Applying schema change to slave %v in complex mode", status.ti.Alias) sc := &myproto.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema} _, err = wr.ApplySchema(ctx, status.ti.Alias, sc) if err != nil { return nil, err } // put this guy back into the serving graph if typeChangeRequired { err = wr.changeTypeInternal(ctx, ti.Alias, ti.Tablet.Type) if err != nil { return nil, err } } } // if newParentTabletAlias is passed in, use that as the new master if !topo.TabletAliasIsZero(newParentTabletAlias) { log.Infof("Reparenting with new master set to %v", newParentTabletAlias) oldMasterAlias := shardInfo.MasterAlias // Create reusable Reparent event with available info ev := &events.Reparent{} if err := wr.plannedReparentShardLocked(ctx, ev, shardInfo.Keyspace(), shardInfo.ShardName(), newParentTabletAlias, waitSlaveTimeout); err != nil { return nil, err } // Here we would apply the schema change to the old // master, but we just scrap it, to be consistent // with the previous implementation of the reparent. // (this code will be refactored at some point anyway) if err := wr.Scrap(ctx, oldMasterAlias, false, false); err != nil { wr.Logger().Warningf("Scrapping old master %v from shard %v/%v failed: %v", oldMasterAlias, shardInfo.Keyspace(), shardInfo.ShardName(), err) } } return &myproto.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil }
func (wr *Wrangler) migrateServedFrom(ki *topo.KeyspaceInfo, si *topo.ShardInfo, servedType topo.TabletType, reverse bool) (err error) { // re-read and update keyspace info record ki, err = wr.ts.GetKeyspace(ki.KeyspaceName()) if err != nil { return err } if reverse { if _, ok := ki.ServedFrom[servedType]; ok { return fmt.Errorf("Destination Keyspace %s is not serving type %v", ki.KeyspaceName(), servedType) } ki.ServedFrom[servedType] = si.SourceShards[0].Keyspace } else { if _, ok := ki.ServedFrom[servedType]; !ok { return fmt.Errorf("Destination Keyspace %s is already serving type %v", ki.KeyspaceName(), servedType) } delete(ki.ServedFrom, servedType) } // re-read and check the destination shard si, err = wr.ts.GetShard(si.Keyspace(), si.ShardName()) if err != nil { return err } if len(si.SourceShards) != 1 { return fmt.Errorf("Destination shard %v/%v is not a vertical split target", si.Keyspace(), si.ShardName()) } tables := si.SourceShards[0].Tables // read the source shard, we'll need its master sourceShard, err := wr.ts.GetShard(si.SourceShards[0].Keyspace, si.SourceShards[0].Shard) if err != nil { return err } ev := &events.MigrateServedFrom{ Keyspace: *ki, SourceShard: *sourceShard, DestinationShard: *si, ServedType: servedType, Reverse: reverse, } event.DispatchUpdate(ev, "start") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() // For master type migration, need to: // - switch the source shard to read-only // - gather the replication point // - wait for filtered replication to catch up before we continue // - disable filtered replication after the fact var sourceMasterTabletInfo *topo.TabletInfo if servedType == topo.TYPE_MASTER { // set master to read-only event.DispatchUpdate(ev, "setting source shard master to read-only") actionPath, err := wr.ai.SetReadOnly(sourceShard.MasterAlias) if err != nil { return err } if err := wr.WaitForCompletion(actionPath); err != nil { return err } // get the position event.DispatchUpdate(ev, "getting master position") sourceMasterTabletInfo, err = wr.ts.GetTablet(sourceShard.MasterAlias) if err != nil { return err } masterPosition, err := wr.ai.MasterPosition(sourceMasterTabletInfo, wr.ActionTimeout()) if err != nil { return err } // wait for it event.DispatchUpdate(ev, "waiting for destination master to catch up to source master") if err := wr.ai.WaitBlpPosition(si.MasterAlias, blproto.BlpPosition{ Uid: 0, Position: masterPosition, }, wr.ActionTimeout()); err != nil { return err } // and clear the shard record si.SourceShards = nil } // All is good, we can save the keyspace and shard (if needed) now event.DispatchUpdate(ev, "updating keyspace") if err = topo.UpdateKeyspace(wr.ts, ki); err != nil { return err } event.DispatchUpdate(ev, "updating destination shard") if servedType == topo.TYPE_MASTER { if err := topo.UpdateShard(wr.ts, si); err != nil { return err } } // Tell the new shards masters they can now be read-write. // Invoking a remote action will also make the tablet stop filtered // replication. event.DispatchUpdate(ev, "setting destination shard masters read-write") if servedType == topo.TYPE_MASTER { if err := wr.makeMastersReadWrite([]*topo.ShardInfo{si}); err != nil { return err } } // Now blacklist the table list on the right servers event.DispatchUpdate(ev, "setting blacklisted tables on source shard") if servedType == topo.TYPE_MASTER { if err := wr.ai.SetBlacklistedTables(sourceMasterTabletInfo, tables, wr.ActionTimeout()); err != nil { return err } } else { // We use the list of tables that are replicating // for the blacklist. In case of a reverse move, we clear the // blacklist. if reverse { tables = nil } if err := wr.SetBlacklistedTablesByShard(sourceShard.Keyspace(), sourceShard.ShardName(), servedType, tables); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
func (wr *Wrangler) applySchemaShardComplex(statusArray []*TabletStatus, shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, force bool) (*mysqlctl.SchemaChangeResult, error) { // apply the schema change to all replica / slave tablets for _, status := range statusArray { // if already applied, we skip this guy diffs := mysqlctl.DiffSchemaToArray("after", preflight.AfterSchema, status.ti.Alias().String(), status.beforeSchema) if len(diffs) == 0 { relog.Info("Tablet %v already has the AfterSchema, skipping", status.ti.Alias()) continue } // make sure the before schema matches diffs = mysqlctl.DiffSchemaToArray("master", preflight.BeforeSchema, status.ti.Alias().String(), status.beforeSchema) if len(diffs) > 0 { if force { relog.Warning("Tablet %v has inconsistent schema, ignoring: %v", status.ti.Alias(), strings.Join(diffs, "\n")) } else { return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.Alias(), strings.Join(diffs, "\n")) } } // take this guy out of the serving graph if necessary ti, err := wr.ts.GetTablet(status.ti.Alias()) if err != nil { return nil, err } typeChangeRequired := ti.Tablet.IsServingType() if typeChangeRequired { // note we want to update the serving graph there err = wr.changeTypeInternal(ti.Alias(), topo.TYPE_SCHEMA_UPGRADE) if err != nil { return nil, err } } // apply the schema change relog.Info("Applying schema change to slave %v in complex mode", status.ti.Alias()) sc := &mysqlctl.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema} _, err = wr.ApplySchema(status.ti.Alias(), sc) if err != nil { return nil, err } // put this guy back into the serving graph if typeChangeRequired { err = wr.changeTypeInternal(ti.Alias(), ti.Tablet.Type) if err != nil { return nil, err } } } // if newParentTabletAlias is passed in, use that as the new master if newParentTabletAlias != (topo.TabletAlias{}) { relog.Info("Reparenting with new master set to %v", newParentTabletAlias) tabletMap, err := GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } slaveTabletMap, foundMaster, err := slaveTabletMap(tabletMap) if err != nil { return nil, err } newMasterTablet, err := wr.ts.GetTablet(newParentTabletAlias) if err != nil { return nil, err } err = wr.reparentShardGraceful(slaveTabletMap, foundMaster, newMasterTablet /*leaveMasterReadOnly*/, false) if err != nil { return nil, err } // Here we would apply the schema change to the old // master, but after a reparent it's in Scrap state, // so no need to. When/if reparent leaves the // original master in a different state (like replica // or rdonly), then we should apply the schema there // too. relog.Info("Skipping schema change on old master %v in complex mode, it's been Scrapped", masterTabletAlias) } return &mysqlctl.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil }
func (wr *Wrangler) applySchemaShard(shardInfo *topo.ShardInfo, preflight *mysqlctl.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, simple, force bool) (*mysqlctl.SchemaChangeResult, error) { // find all the shards we need to handle aliases, err := topo.FindAllTabletAliasesInShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { return nil, err } // build the array of TabletStatus we're going to use statusArray := make([]*TabletStatus, 0, len(aliases)-1) for _, alias := range aliases { if alias == masterTabletAlias { // we skip the master continue } ti, err := wr.ts.GetTablet(alias) if err != nil { return nil, err } if ti.Type == topo.TYPE_LAG { // lag tablets are usually behind, not replicating, // and a general pain. So let's just skip them // all together. // TODO(alainjobart) figure out other types to skip: // ValidateSchemaShard only does the serving types. // We do everything in the replication graph // but LAG. This seems fine for now. relog.Info("Skipping tablet %v as it is LAG", ti.Alias()) continue } statusArray = append(statusArray, &TabletStatus{ti: ti}) } // get schema on all tablets. relog.Info("Getting schema on all tablets for shard %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) wg := &sync.WaitGroup{} for _, status := range statusArray { wg.Add(1) go func(status *TabletStatus) { status.beforeSchema, status.lastError = wr.GetSchemaTablet(status.ti, nil, false) wg.Done() }(status) } wg.Wait() // quick check for errors for _, status := range statusArray { if status.lastError != nil { return nil, fmt.Errorf("Error getting schema on tablet %v: %v", status.ti.Alias(), status.lastError) } } // simple or complex? if simple { return wr.applySchemaShardSimple(statusArray, preflight, masterTabletAlias, change, force) } return wr.applySchemaShardComplex(statusArray, shardInfo, preflight, masterTabletAlias, change, newParentTabletAlias, force) }