// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // re-read the shard with the lock si, err = wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } // update it wasUpdated := false if !si.HasCell(tabletAlias.Cell) { si.Cells = append(si.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { if si.HasMaster() && !allowMasterOverride { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(si.MasterAlias), keyspace, shard)) } si.MasterAlias = tabletAlias wasUpdated = true } if wasUpdated { // write it back if err := wr.ts.UpdateShard(ctx, si); err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } } // and unlock return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // we do need to update the shard, lock it to not interfere with // reparenting operations. actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // run the update _, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error { wasUpdated := false if !topoproto.ShardHasCell(s, tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), keyspace, shard) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
// Validate makes sure a tablet is represented correctly in the topology server. func Validate(ctx context.Context, ts Server, tabletAlias *topodatapb.TabletAlias) error { // read the tablet record, make sure it parses tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return err } if !topoproto.TabletAliasEqual(tablet.Alias, tabletAlias) { return fmt.Errorf("bad tablet alias data for tablet %v: %#v", topoproto.TabletAliasString(tabletAlias), tablet.Alias) } // Validate the entry in the shard replication nodes if err = ts.ValidateShard(ctx, tablet.Keyspace, tablet.Shard); err != nil { return err } si, err := ts.GetShardReplication(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { return err } if _, err = si.GetShardReplicationNode(tabletAlias); err != nil { return fmt.Errorf("tablet %v not found in cell %v shard replication: %v", tabletAlias, tablet.Alias.Cell, err) } return nil }
// remove takes in an array and returns it with the specified element removed // (leaves the array unchanged if element isn't in the array). func remove(tablets []*discovery.TabletStats, tabletAlias *topodata.TabletAlias) []*discovery.TabletStats { filteredTablets := tablets[:0] for _, tablet := range tablets { if !topoproto.TabletAliasEqual(tablet.Tablet.Alias, tabletAlias) { filteredTablets = append(filteredTablets, tablet) } } return filteredTablets }
// ValidatePermissionsKeyspace validates all the permissions are the same // in a keyspace func (wr *Wrangler) ValidatePermissionsKeyspace(ctx context.Context, keyspace string) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidatePermissionsShard(ctx, keyspace, shards[0]) } // find the reference permissions using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if !si.HasMaster() { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering permissions for reference master %v", topoproto.TabletAliasString(referenceAlias)) referencePermissions, err := wr.GetPermissions(ctx, si.MasterAlias) if err != nil { return err } // then diff with all tablets but master 0 er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, shard := range shards { aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { if topoproto.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffPermissions(ctx, referencePermissions, referenceAlias, alias, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error()) } return nil }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // allowUpdate is true, and a tablet with the same ID exists, just update it. // If a tablet is created as master, and there is already a different // master in the shard, allowMasterOverride must be set. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topodatapb.Tablet, allowMasterOverride, createShardAndKeyspace, allowUpdate bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = wr.ts.GetOrCreateShard(ctx, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if !key.KeyRangeEqual(si.KeyRange, tablet.KeyRange) { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topodatapb.TabletType_MASTER && si.HasMaster() && !topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v, use allow_master_override flag", topoproto.TabletAliasString(si.MasterAlias), tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, tablet.Alias, tablet.Type, allowMasterOverride); err != nil { return err } err = wr.ts.CreateTablet(ctx, tablet) if err == topo.ErrNodeExists && allowUpdate { // Try to update then oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("failed reading existing tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } // Check we have the same keyspace / shard, and if not, // require the allowDifferentShard flag. if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("old tablet has shard %v/%v. Cannot override with shard %v/%v. Delete and re-add tablet if you want to change the tablet's keyspace/shard", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } *(oldTablet.Tablet) = *tablet if err := wr.ts.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("failed updating tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } } return nil }
func (agent *ActionAgent) startReplication(ctx context.Context, pos replication.Position) error { // Set the position at which to resume from the master. cmds, err := agent.MysqlDaemon.SetSlavePositionCommands(pos) if err != nil { return err } if err := agent.MysqlDaemon.ExecuteSuperQueryList(ctx, cmds); err != nil { return fmt.Errorf("failed to set slave position: %v", err) } // Read the shard to find the current master, and its location. tablet := agent.Tablet() si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { return fmt.Errorf("can't read shard: %v", err) } if si.MasterAlias == nil { // We've restored, but there's no master. This is fine, since we've // already set the position at which to resume when we're later reparented. // If we had instead considered this fatal, all tablets would crash-loop // until a master appears, which would make it impossible to elect a master. log.Warningf("Can't start replication after restore: shard %v/%v has no master.", tablet.Keyspace, tablet.Shard) return nil } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We used to be the master before we got restarted in an empty data dir, // and no other master has been elected in the meantime. // This shouldn't happen, so we'll let the operator decide which tablet // should actually be promoted to master. log.Warningf("Can't start replication after restore: master record still points to this tablet.") return nil } ti, err := agent.TopoServer.GetTablet(ctx, si.MasterAlias) if err != nil { return fmt.Errorf("Cannot read master tablet %v: %v", si.MasterAlias, err) } // If using semi-sync, we need to enable it before connecting to master. if *enableSemiSync { if err := agent.enableSemiSync(false); err != nil { return err } } // Set master and start slave. cmds, err = agent.MysqlDaemon.SetMasterCommands(ti.Hostname, int(ti.PortMap["mysql"])) if err != nil { return fmt.Errorf("MysqlDaemon.SetMasterCommands failed: %v", err) } cmds = append(cmds, "START SLAVE") if err := agent.MysqlDaemon.ExecuteSuperQueryList(ctx, cmds); err != nil { return fmt.Errorf("failed to start replication: %v", err) } return nil }
// DeleteTablet removes a tablet from a shard. // - if allowMaster is set, we can Delete a master tablet (and clear // its record from the Shard record if it was the master). // - if skipRebuild is set, we do not rebuild the serving graph. func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.TabletAlias, allowMaster, skipRebuild bool) error { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(ctx, tabletAlias) if err != nil { return err } rebuildRequired := ti.IsInServingGraph() wasMaster := ti.Type == topodatapb.TabletType_MASTER if wasMaster && !allowMaster { return fmt.Errorf("cannot delete tablet %v as it is a master, use allow_master flag", topoproto.TabletAliasString(tabletAlias)) } // remove the record and its replication graph entry if err := topotools.DeleteTablet(ctx, wr.ts, ti.Tablet); err != nil { return err } // update the Shard object if the master was scrapped. // we lock the shard to not conflict with reparent operations. if wasMaster { actionNode := actionnode.UpdateShard() lockPath, err := wr.lockShard(ctx, ti.Keyspace, ti.Shard, actionNode) if err != nil { return err } // update the shard record's master if _, err := wr.ts.UpdateShardFields(ctx, ti.Keyspace, ti.Shard, func(s *topodatapb.Shard) error { if !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { wr.Logger().Warningf("Deleting master %v from shard %v/%v but master in Shard object was %v", topoproto.TabletAliasString(tabletAlias), ti.Keyspace, ti.Shard, topoproto.TabletAliasString(s.MasterAlias)) return topo.ErrNoUpdateNeeded } s.MasterAlias = nil return nil }); err != nil { return wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err) } // and unlock if err := wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil { return err } } // and rebuild the original shard if needed if !rebuildRequired { wr.Logger().Infof("Rebuild not required") return nil } if skipRebuild { wr.Logger().Warningf("Rebuild required, but skipping it") return nil } _, err = wr.RebuildShardGraph(ctx, ti.Keyspace, ti.Shard, []string{ti.Alias.Cell}) return err }
// FIXME(msolomon) This validate presumes the master is up and running. // Even when that isn't true, there are validation processes that might be valuable. func (wr *Wrangler) validateShard(ctx context.Context, keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err) return } aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } tabletMap, _ := wr.ts.GetTabletMap(ctx, aliases) var masterAlias *pb.TabletAlias for _, alias := range aliases { tabletInfo, ok := tabletMap[*alias] if !ok { results <- fmt.Errorf("tablet %v not found in map", topoproto.TabletAliasString(alias)) continue } if tabletInfo.Type == pb.TabletType_MASTER { if masterAlias != nil { results <- fmt.Errorf("shard %v/%v already has master %v but found other master %v", keyspace, shard, topoproto.TabletAliasString(masterAlias), topoproto.TabletAliasString(alias)) } else { masterAlias = alias } } } if masterAlias == nil { results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard) } else if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterAlias) { results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, topoproto.TabletAliasString(masterAlias), topoproto.TabletAliasString(shardInfo.MasterAlias)) } for _, alias := range aliases { wg.Add(1) go func(alias *pb.TabletAlias) { defer wg.Done() if err := topo.Validate(ctx, wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", topoproto.TabletAliasString(alias), err) } else { wr.Logger().Infof("tablet %v is valid", topoproto.TabletAliasString(alias)) } }(alias) } if pingTablets { wr.validateReplication(ctx, shardInfo, tabletMap, results) wr.pingTablets(ctx, tabletMap, wg, results) } return }
// Validate makes sure a tablet is represented correctly in the topology server. func Validate(ctx context.Context, ts Server, tabletAlias *pb.TabletAlias) error { // read the tablet record, make sure it parses tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return err } if !topoproto.TabletAliasEqual(tablet.Alias, tabletAlias) { return fmt.Errorf("bad tablet alias data for tablet %v: %#v", topoproto.TabletAliasString(tabletAlias), tablet.Alias) } // Some tablets have no information to generate valid replication paths. // We have three cases to handle: // - we are a tablet in the replication graph, and should have // replication data (first case below) // - we are in scrap mode but used to be assigned in the graph // somewhere (second case below) // Idle tablets are just not in any graph at all, we don't even know // their keyspace / shard to know where to check. if tablet.IsInReplicationGraph() { if err = ts.ValidateShard(ctx, tablet.Keyspace, tablet.Shard); err != nil { return err } si, err := ts.GetShardReplication(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { return err } _, err = si.GetShardReplicationNode(tabletAlias) if err != nil { return fmt.Errorf("tablet %v not found in cell %v shard replication: %v", tabletAlias, tablet.Alias.Cell, err) } } else if tablet.IsAssigned() { // this case is to make sure a scrap node that used to be in // a replication graph doesn't leave a node behind. // However, while an action is running, there is some // time where this might be inconsistent. si, err := ts.GetShardReplication(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { return err } node, err := si.GetShardReplicationNode(tabletAlias) if err != ErrNoNode { return fmt.Errorf("unexpected replication data found(possible pending action?): %v (%v)", node, tablet.Type) } } return nil }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // run the update _, err := wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(s *topo.ShardInfo) error { wasUpdated := false if !s.HasCell(tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), si.Keyspace(), si.ShardName()) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return err }
// TabletEquality returns true iff two Tablet are representing the same tablet // process: same uid/cell, running on the same host / ports. func TabletEquality(left, right *topodatapb.Tablet) bool { if !topoproto.TabletAliasEqual(left.Alias, right.Alias) { return false } if left.Hostname != right.Hostname { return false } if len(left.PortMap) != len(right.PortMap) { return false } for key, lvalue := range left.PortMap { rvalue, ok := right.PortMap[key] if !ok { return false } if lvalue != rvalue { return false } } return true }
// ValidateSchemaShard will diff the schema from all the tablets in the shard. func (wr *Wrangler) ValidateSchemaShard(ctx context.Context, keyspace, shard string, excludeTables []string, includeViews bool) error { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } // get schema from the master, or error if !si.HasMaster() { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering schema for master %v", topoproto.TabletAliasString(si.MasterAlias)) masterSchema, err := wr.GetSchema(ctx, si.MasterAlias, nil, excludeTables, includeViews) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil { return err } // then diff with all slaves er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if topoproto.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffSchema(ctx, masterSchema, si.MasterAlias, alias, excludeTables, includeViews, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs: %v", er.Error().Error()) } return nil }
// DeleteTablet removes a tablet from a shard. // - if allowMaster is set, we can Delete a master tablet (and clear // its record from the Shard record if it was the master). func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.TabletAlias, allowMaster bool) (err error) { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(ctx, tabletAlias) if err != nil { return err } wasMaster := ti.Type == topodatapb.TabletType_MASTER if wasMaster && !allowMaster { return fmt.Errorf("cannot delete tablet %v as it is a master, use allow_master flag", topoproto.TabletAliasString(tabletAlias)) } // remove the record and its replication graph entry if err := topotools.DeleteTablet(ctx, wr.ts, ti.Tablet); err != nil { return err } // update the Shard object if the master was scrapped. if wasMaster { // We lock the shard to not conflict with reparent operations. ctx, unlock, lockErr := wr.ts.LockShard(ctx, ti.Keyspace, ti.Shard, fmt.Sprintf("DeleteTablet(%v)", topoproto.TabletAliasString(tabletAlias))) if lockErr != nil { return lockErr } defer unlock(&err) // update the shard record's master _, err = wr.ts.UpdateShardFields(ctx, ti.Keyspace, ti.Shard, func(si *topo.ShardInfo) error { if !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { wr.Logger().Warningf("Deleting master %v from shard %v/%v but master in Shard object was %v", topoproto.TabletAliasString(tabletAlias), ti.Keyspace, ti.Shard, topoproto.TabletAliasString(si.MasterAlias)) return topo.ErrNoUpdateNeeded } si.MasterAlias = nil return nil }) return err } return nil }
// ValidatePermissionsShard validates all the permissions are the same // in a shard func (wr *Wrangler) ValidatePermissionsShard(ctx context.Context, keyspace, shard string) error { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } // get permissions from the master, or error if !si.HasMaster() { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering permissions for master %v", topoproto.TabletAliasString(si.MasterAlias)) masterPermissions, err := wr.GetPermissions(ctx, si.MasterAlias) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil { return err } // then diff all of them, except master er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if topoproto.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffPermissions(ctx, masterPermissions, si.MasterAlias, alias, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Permissions diffs:\n%v", er.Error().Error()) } return nil }
// chooseNewMaster finds a tablet that is going to become master after reparent. The criterias // for the new master-elect are (preferably) to be in the same cell as the current master, and // to be different from avoidMasterTabletAlias. The tablet with the largest replication // position is chosen to minimize the time of catching up with the master. Note that the search // for largest replication position will race with transactions being executed on the master at // the same time, so when all tablets are roughly at the same position then the choice of the // new master-elect will be somewhat unpredictable. func (wr *Wrangler) chooseNewMaster( ctx context.Context, shardInfo *topo.ShardInfo, tabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, avoidMasterTabletAlias *topodatapb.TabletAlias, waitSlaveTimeout time.Duration) (*topodatapb.TabletAlias, error) { if avoidMasterTabletAlias == nil { return nil, fmt.Errorf("tablet to avoid for reparent is not provided, cannot choose new master") } var masterCell string if shardInfo.MasterAlias != nil { masterCell = shardInfo.MasterAlias.Cell } maxPosSearch := maxReplPosSearch{ wrangler: wr, ctx: ctx, waitSlaveTimeout: waitSlaveTimeout, waitGroup: sync.WaitGroup{}, maxPosLock: sync.Mutex{}, } for tabletAlias, tabletInfo := range tabletMap { if (masterCell != "" && tabletAlias.Cell != masterCell) || topoproto.TabletAliasEqual(&tabletAlias, avoidMasterTabletAlias) { continue } maxPosSearch.waitGroup.Add(1) go maxPosSearch.processTablet(tabletInfo.Tablet) } maxPosSearch.waitGroup.Wait() if maxPosSearch.maxPosTablet == nil { return nil, nil } return maxPosSearch.maxPosTablet.Alias, nil }
func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if !ok { return fmt.Errorf("old master tablet %v is not in the shard", topoproto.TabletAliasString(shardInfo.MasterAlias)) } ev.OldMaster = *oldMasterTabletInfo.Tablet // Demote the current master, get its replication position wr.logger.Infof("demote current master %v", shardInfo.MasterAlias) event.DispatchUpdate(ev, "demoting old master") rp, err := wr.tmc.DemoteMaster(ctx, oldMasterTabletInfo) if err != nil { return fmt.Errorf("old master tablet %v DemoteMaster failed: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) } // Wait on the master-elect tablet until it reaches that position, // then promote it wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(ctx, masterElectTabletInfo, rp) if err != nil { return fmt.Errorf("master-elect tablet %v failed to catch up with replication or be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, plannedReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) // also restart replication on old master forceStartSlave := topoproto.TabletAliasEqual(&alias, oldMasterTabletInfo.Alias) if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) return } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias) shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check the master elect is in tabletMap masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet // Check the master is the only master is the shard, or -force was used. _, masterTabletMap := topotools.SortedTabletMap(tabletMap) if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if !force { return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok { if !force { return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } haveOtherMaster := false for alias, ti := range masterTabletMap { if !topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) && ti.Type != pb.TabletType_SCRAP { haveOtherMaster = true } } if haveOtherMaster { if !force { return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } // First phase: reset replication on all tablets. If anyone fails, // we stop. It is probably because it is unreachable, and may leave // an unstable database process in the mix, with a database daemon // at a wrong replication spot. event.DispatchUpdate(ev, "resetting replication on all tablets") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("resetting replication on tablet %v", topoproto.TabletAliasString(&alias)) if err := wr.TabletManagerClient().ResetReplication(ctx, tabletInfo); err != nil { rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } wg.Wait() if err := rec.Error(); err != nil { return err } // Tell the new master to break its slaves, return its replication // position wr.logger.Infof("initializing master on %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "initializing master") rp, err := wr.TabletManagerClient().InitMaster(ctx, masterElectTabletInfo) if err != nil { return err } // Now tell the new master to insert the reparent_journal row, // and tell everybody else to become a slave of the new master, // and wait for the row in the reparent_journal table. // We start all these in parallel, to handle the semi-sync // case: for the master to be able to commit its row in the // reparent_journal table, it needs connected slaves. event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, initShardMasterOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("initializing slave %v", topoproto.TabletAliasString(&alias)) if err := wr.TabletManagerClient().InitSlave(ctx, tabletInfo, masterElectTabletAlias, rp, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v InitSlave failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } } // Wait for the slaves to complete. If some of them fail, we // don't want to rebuild the shard serving graph (the failure // will most likely be a timeout, and our context will be // expired, so the rebuild will fail anyway) wgSlaves.Wait() if err := rec.Error(); err != nil { return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. event.DispatchUpdate(ev, "rebuilding shard graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func TestTabletExternallyReparented(t *testing.T) { tabletmanager.SetReparentFlags(time.Minute /* finalizeTimeout */) ctx := context.Background() db := fakesqldb.Register() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) vp := NewVtctlPipe(t, ts) defer vp.Close() // Create an old master, a new master, two good slaves, one bad slave oldMaster := NewFakeTablet(t, wr, "cell1", 0, pb.TabletType_MASTER, db) newMaster := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_REPLICA, db) goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, pb.TabletType_REPLICA, db) goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, pb.TabletType_REPLICA, db) badSlave := NewFakeTablet(t, wr, "cell1", 4, pb.TabletType_REPLICA, db) // Add a new Cell to the Shard, that doesn't map to any read topo cell, // to simulate a data center being unreachable. si, err := ts.GetShard(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.Cells = append(si.Cells, "cell666") if err := ts.UpdateShard(ctx, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // Slightly unrelated test: make sure we can find the tablets // even with a datacenter being down. tabletMap, err := ts.GetTabletMapForShardByCell(ctx, "test_keyspace", "0", []string{"cell1"}) if err != nil { t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err) } master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != nil || !topoproto.TabletAliasEqual(&master, oldMaster.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.Ip, "vt", goodSlave1.Tablet.PortMap["vt"]) if err != nil || !topoproto.TabletAliasEqual(&slave1, goodSlave1.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master) } slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.Ip, "vt", goodSlave2.Tablet.PortMap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2) } // Make sure the master is not exported in other cells tabletMap, err = ts.GetTabletMapForShardByCell(ctx, "test_keyspace", "0", []string{"cell2"}) master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master) } tabletMap, err = ts.GetTabletMapForShard(ctx, "test_keyspace", "0") if err != topo.ErrPartialResult { t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err) } master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != nil || !topoproto.TabletAliasEqual(&master, oldMaster.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } // On the elected master, we will respond to // TabletActionSlaveWasPromoted newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only respond to // TabletActionSlaveWasRestarted. oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // On the good slaves, we will respond to // TabletActionSlaveWasRestarted. goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // On the bad slave, we will respond to // TabletActionSlaveWasRestarted with bad data. badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // First test: reparent to the same master, make sure it works // as expected. tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(ctx, oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := vp.Run([]string{"TabletExternallyReparented", topoproto.TabletAliasString(oldMaster.Tablet.Alias)}); err != nil { t.Fatalf("TabletExternallyReparented(same master) should have worked: %v", err) } // Second test: reparent to a replica, and pretend the old // master is still good to go. // This tests a bad case; the new designated master is a slave, // but we should do what we're told anyway ti, err = ts.GetTablet(ctx, goodSlave1.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } waitID := makeWaitID() if err := tmc.TabletExternallyReparented(context.Background(), ti, waitID); err != nil { t.Fatalf("TabletExternallyReparented(slave) error: %v", err) } waitForExternalReparent(t, waitID) // This tests the good case, where everything works as planned t.Logf("TabletExternallyReparented(new master) expecting success") ti, err = ts.GetTablet(ctx, newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } waitID = makeWaitID() if err := tmc.TabletExternallyReparented(context.Background(), ti, waitID); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } waitForExternalReparent(t, waitID) // Now double-check the serving graph is good. // Should only have one good replica left. addrs, _, err := ts.GetEndPoints(ctx, "cell1", "test_keyspace", "0", pb.TabletType_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } }
// TestInitMasterShard is the good scenario test, where everything // works as planned func TestInitMasterShard(t *testing.T) { ctx := context.Background() db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) defer vp.Close() db.AddQuery("CREATE DATABASE IF NOT EXISTS `vt_test_keyspace`", &sqltypes.Result{}) // Create a master, a couple good slaves master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) goodSlave1 := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, db) goodSlave2 := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, db) // Master: set a plausible ReplicationPosition to return, // and expect to add entry in _vt.reparent_journal master.FakeMysqlDaemon.CurrentMasterPosition = replication.Position{ GTIDSet: replication.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } master.FakeMysqlDaemon.ReadOnly = true master.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 1"} master.FakeMysqlDaemon.SetSlavePositionCommandsResult = []string{"new master shouldn't use this"} master.FakeMysqlDaemon.SetMasterCommandsResult = []string{"new master shouldn't use this"} master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 1", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // Slave1: expect to be reset and re-parented goodSlave1.FakeMysqlDaemon.ReadOnly = true goodSlave1.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 1"} goodSlave1.FakeMysqlDaemon.SetSlavePositionCommandsPos = master.FakeMysqlDaemon.CurrentMasterPosition goodSlave1.FakeMysqlDaemon.SetSlavePositionCommandsResult = []string{"cmd1"} goodSlave1.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", master.Tablet.Hostname, master.Tablet.PortMap["mysql"]) goodSlave1.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 1", "cmd1", "set master cmd 1", "START SLAVE", } goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) // Slave2: expect to be re-parented goodSlave2.FakeMysqlDaemon.ReadOnly = true goodSlave2.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 2"} goodSlave2.FakeMysqlDaemon.SetSlavePositionCommandsPos = master.FakeMysqlDaemon.CurrentMasterPosition goodSlave2.FakeMysqlDaemon.SetSlavePositionCommandsResult = []string{"cmd1", "cmd2"} goodSlave2.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", master.Tablet.Hostname, master.Tablet.PortMap["mysql"]) goodSlave2.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 2", "cmd1", "cmd2", "set master cmd 1", "START SLAVE", } goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // run InitShardMaster if err := vp.Run([]string{"InitShardMaster", "-wait_slave_timeout", "10s", master.Tablet.Keyspace + "/" + master.Tablet.Shard, topoproto.TabletAliasString(master.Tablet.Alias)}); err != nil { t.Fatalf("InitShardMaster failed: %v", err) } // check what was run if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master was not turned read-write") } si, err := ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } if !topoproto.TabletAliasEqual(si.MasterAlias, master.Tablet.Alias) { t.Errorf("unexpected shard master alias, got %v expected %v", si.MasterAlias, master.Tablet.Alias) } if err := master.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("master.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := goodSlave2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("goodSlave2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } }
// TestInitMasterShardOneSlaveFails makes sure that if one slave fails to // proceed, the action completes anyway func TestInitMasterShardOneSlaveFails(t *testing.T) { ctx := context.Background() db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) // Create a master, a couple slaves master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) goodSlave := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, db) badSlave := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, db) // Master: set a plausible ReplicationPosition to return, // and expect to add entry in _vt.reparent_journal master.FakeMysqlDaemon.CurrentMasterPosition = replication.Position{ GTIDSet: replication.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } master.FakeMysqlDaemon.ReadOnly = true master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // goodSlave: expect to be re-parented goodSlave.FakeMysqlDaemon.ReadOnly = true goodSlave.FakeMysqlDaemon.SetSlavePositionCommandsPos = master.FakeMysqlDaemon.CurrentMasterPosition goodSlave.FakeMysqlDaemon.SetSlavePositionCommandsResult = []string{"cmd1"} goodSlave.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", master.Tablet.Hostname, master.Tablet.PortMap["mysql"]) goodSlave.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} goodSlave.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "cmd1", "set master cmd 1", "START SLAVE", } goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // badSlave: insert an error by failing the master hostname input // on purpose badSlave.FakeMysqlDaemon.ReadOnly = true badSlave.FakeMysqlDaemon.SetSlavePositionCommandsPos = master.FakeMysqlDaemon.CurrentMasterPosition badSlave.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", "", master.Tablet.PortMap["mysql"]) badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // also change the master alias in the Shard object, to make sure it // is set back. si, err := ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } si.MasterAlias.Uid++ if err := ts.UpdateShard(ctx, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // run InitShardMaster without force, it fails because master is // changing. if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, master.Tablet.Alias, false /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is not the shard master") { t.Errorf("InitShardMaster with mismatched new master returned wrong error: %v", err) } // run InitShardMaster if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, master.Tablet.Alias, true /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "wrong input for SetMasterCommands") { t.Errorf("InitShardMaster with one failed slave returned wrong error: %v", err) } // check what was run: master should still be good if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master was not turned read-write") } si, err = ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } if !topoproto.TabletAliasEqual(si.MasterAlias, master.Tablet.Alias) { t.Errorf("unexpected shard master alias, got %v expected %v", si.MasterAlias, master.Tablet.Alias) } }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet.Tablet, OldMaster: pb.Tablet{ Alias: si.MasterAlias, Type: pb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := *tablet.Tablet newTablet := oldTablet newTablet.Type = pb.TabletType_MASTER newTablet.HealthMap = nil agent.setTablet(topo.NewTabletInfo(&newTablet, -1)) if err := agent.updateState(ctx, &oldTablet, "fastTabletExternallyReparented"); err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err) } agent.mutex.Lock() agent._tabletExternallyReparentedTime = time.Now() agent.mutex.Unlock() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err) } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), pb.TabletType_MASTER, &pb.EndPoints{Entries: []*pb.EndPoint{ep}}, -1) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err) } externalReparentStats.Record("NewMasterVisible", startTime) // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
// ValidateSchemaKeyspace will diff the schema from all the tablets in // the keyspace. func (wr *Wrangler) ValidateSchemaKeyspace(ctx context.Context, keyspace string, excludeTables []string, includeViews bool) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateSchemaShard(ctx, keyspace, shards[0], excludeTables, includeViews) } // find the reference schema using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if !si.HasMaster() { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering schema for reference master %v", topoproto.TabletAliasString(referenceAlias)) referenceSchema, err := wr.GetSchema(ctx, referenceAlias, nil, excludeTables, includeViews) if err != nil { return err } // then diff with all other tablets everywhere er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // first diff the slaves in the reference shard 0 aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shards[0]) if err != nil { return err } for _, alias := range aliases { if topoproto.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } // then diffs all tablets in the other shards for _, shard := range shards[1:] { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { er.RecordError(err) continue } if !si.HasMaster() { er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard)) continue } aliases, err := wr.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs: %v", er.Error().Error()) } return nil }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // it should be either we have all three of init_keyspace, // init_shard and init_tablet_type, or none. if *initKeyspace == "" && *initShard == "" && *initTabletType == "" { // not initializing the record return nil } if *initKeyspace == "" || *initShard == "" || *initTabletType == "" { return fmt.Errorf("either need all of init_keyspace, init_shard and init_tablet_type, or none") } // parse init_tablet_type tabletType, err := topoproto.ParseTabletType(*initTabletType) if err != nil { return fmt.Errorf("invalid init_tablet_type %v: %v", *initTabletType, err) } if tabletType == topodatapb.TabletType_MASTER { // We disallow MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. return fmt.Errorf("init_tablet_type cannot be master, use replica instead") } // parse and validate shard name shard, _, err := topo.ValidateShardName(*initShard) if err != nil { return fmt.Errorf("cannot validate shard name %v: %v", *initShard, err) } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // read the shard, create it if necessary log.Infof("Reading shard record %v/%v", *initKeyspace, shard) si, err := agent.TopoServer.GetOrCreateShard(ctx, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // We're marked as master in the shard record, which could mean the master // tablet process was just restarted. However, we need to check if a new // master is in the process of taking over. In that case, it will let us // know by forcibly updating the old master's tablet record. oldTablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) switch err { case topo.ErrNoNode: // There's no existing tablet record, so we can assume // no one has left us a message to step down. tabletType = topodatapb.TabletType_MASTER case nil: if oldTablet.Type == topodatapb.TabletType_MASTER { // We're marked as master in the shard record, // and our existing tablet record agrees. tabletType = topodatapb.TabletType_MASTER } default: return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } } // See if we need to add the tablet's cell to the shard's cell list. if !si.HasCell(agent.TabletAlias.Cell) { si, err = agent.TopoServer.UpdateShardFields(ctx, *initKeyspace, shard, func(si *topo.ShardInfo) error { if si.HasCell(agent.TabletAlias.Cell) { // Someone else already did it. return topo.ErrNoUpdateNeeded } si.Cells = append(si.Cells, agent.TabletAlias.Cell) return nil }) if err != nil { return fmt.Errorf("couldn't add tablet's cell to shard record: %v", err) } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname != "" { log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname) } else { hostname, err := netutil.FullyQualifiedHostname() if err != nil { return err } log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname) } // create and populate tablet record tablet := &topodatapb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // Now try to create the record (it will also fix up the // ShardReplication record if necessary). err = agent.TopoServer.CreateTablet(ctx, tablet) switch err { case nil: // It worked, we're good. case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // Then overwrite everything, ignoring version mismatch. if err := agent.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, -1)); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } default: return fmt.Errorf("CreateTablet failed: %v", err) } return nil }
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *topodatapb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check the master elect is in tabletMap masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet // Check the master is the only master is the shard, or -force was used. _, masterTabletMap := topotools.SortedTabletMap(tabletMap) if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if !force { return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok { if !force { return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } haveOtherMaster := false for alias := range masterTabletMap { if !topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { haveOtherMaster = true } } if haveOtherMaster { if !force { return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", topoproto.TabletAliasString(masterElectTabletAlias)) } wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(masterElectTabletAlias)) } // First phase: reset replication on all tablets. If anyone fails, // we stop. It is probably because it is unreachable, and may leave // an unstable database process in the mix, with a database daemon // at a wrong replication spot. event.DispatchUpdate(ev, "resetting replication on all tablets") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("resetting replication on tablet %v", topoproto.TabletAliasString(&alias)) if err := wr.tmc.ResetReplication(ctx, tabletInfo.Tablet); err != nil { rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } wg.Wait() if err := rec.Error(); err != nil { return err } // Tell the new master to break its slaves, return its replication // position wr.logger.Infof("initializing master on %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "initializing master") rp, err := wr.tmc.InitMaster(ctx, masterElectTabletInfo.Tablet) if err != nil { return err } // Now tell the new master to insert the reparent_journal row, // and tell everybody else to become a slave of the new master, // and wait for the row in the reparent_journal table. // We start all these in parallel, to handle the semi-sync // case: for the master to be able to commit its row in the // reparent_journal table, it needs connected slaves. event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.tmc.PopulateReparentJournal(ctx, tabletInfo.Tablet, now, initShardMasterOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias topodatapb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("initializing slave %v", topoproto.TabletAliasString(&alias)) if err := wr.tmc.InitSlave(ctx, tabletInfo.Tablet, masterElectTabletAlias, rp, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v InitSlave failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } if !topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if _, err := wr.ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { si.MasterAlias = masterElectTabletAlias return nil }); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } } // Wait for the slaves to complete. If some of them fail, we // don't want to rebuild the shard serving graph (the failure // will most likely be a timeout, and our context will be // expired, so the rebuild will fail anyway) wgSlaves.Wait() if err := rec.Error(); err != nil { return err } // Create database if necessary on the master. Slaves will get it too through // replication. Since the user called InitShardMaster, they've told us to // assume that whatever data is on all the slaves is what they intended. // If the database doesn't exist, it means the user intends for these tablets // to begin serving with no data (i.e. first time initialization). createDB := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`", topoproto.TabletDbName(masterElectTabletInfo.Tablet)) if _, err := wr.tmc.ExecuteFetchAsDba(ctx, masterElectTabletInfo.Tablet, false, []byte(createDB), 1, false, true); err != nil { return fmt.Errorf("failed to create database: %v", err) } return nil }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Remember when we were first told we're the master. // If another tablet claims to be master and offers a more recent time, // that tablet will be trusted over us. agent.mutex.Lock() agent._tabletExternallyReparentedTime = startTime agent._replicationDelay = 0 agent.mutex.Unlock() // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet, OldMaster: topodatapb.Tablet{ Alias: si.MasterAlias, Type: topodatapb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") var wg sync.WaitGroup var errs concurrency.AllErrorRecorder // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := proto.Clone(tablet).(*topodatapb.Tablet) tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil agent.setTablet(tablet) wg.Add(1) go func() { defer wg.Done() // This is where updateState will block for gracePeriod, while it gives // vtgate a chance to stop sending replica queries. if err := agent.updateState(ctx, oldTablet, "fastTabletExternallyReparented"); err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err)) } }() wg.Add(1) go func() { defer wg.Done() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err)) return } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER, &topodatapb.EndPoints{Entries: []*topodatapb.EndPoint{ep}}, -1) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err)) return } externalReparentStats.Record("NewMasterVisible", startTime) }() // Wait for serving state grace period and serving graph update. wg.Wait() if errs.HasErrors() { return errs.Error() } // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
func TestShardReplicationStatuses(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) // create shard and tablets if err := ts.CreateShard(ctx, "test_keyspace", "0"); err != nil { t.Fatalf("CreateShard failed: %v", err) } master := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_MASTER) slave := NewFakeTablet(t, wr, "cell1", 2, pb.TabletType_REPLICA) // mark the master inside the shard si, err := ts.GetShard(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.MasterAlias = master.Tablet.Alias if err := ts.UpdateShard(ctx, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // master action loop (to initialize host and port) master.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 892, }, } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // slave loop slave.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } slave.FakeMysqlDaemon.CurrentMasterHost = master.Tablet.Hostname slave.FakeMysqlDaemon.CurrentMasterPort = int(master.Tablet.PortMap["mysql"]) slave.StartActionLoop(t, wr) defer slave.StopActionLoop(t) // run ShardReplicationStatuses ti, rs, err := wr.ShardReplicationStatuses(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("ShardReplicationStatuses failed: %v", err) } // check result (make master first in the array) if len(ti) != 2 || len(rs) != 2 { t.Fatalf("ShardReplicationStatuses returned wrong results: %v %v", ti, rs) } if topoproto.TabletAliasEqual(ti[0].Alias, slave.Tablet.Alias) { ti[0], ti[1] = ti[1], ti[0] rs[0], rs[1] = rs[1], rs[0] } if !topoproto.TabletAliasEqual(ti[0].Alias, master.Tablet.Alias) || !topoproto.TabletAliasEqual(ti[1].Alias, slave.Tablet.Alias) || rs[0].MasterHost != "" || rs[1].MasterHost != master.Tablet.Hostname { t.Fatalf("ShardReplicationStatuses returend wrong results: %v %v", ti, rs) } }
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading all tablets") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } // Deal with the old master: try to remote-scrap it, if it's // truely dead we force-scrap it. Remove it from our map in any case. if shardInfo.HasMaster() { scrapOldMaster := true oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if ok { delete(tabletMap, *shardInfo.MasterAlias) } else { oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias) if err != nil { wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) scrapOldMaster = false } } if scrapOldMaster { ev.OldMaster = *oldMasterTabletInfo.Tablet wr.logger.Infof("scrapping old master %v", topoproto.TabletAliasString(shardInfo.MasterAlias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() if err := wr.tmc.Scrap(ctx, oldMasterTabletInfo); err != nil { wr.logger.Warningf("remote scrapping failed master failed, will force the scrap: %v", err) if err := topotools.Scrap(ctx, wr.ts, shardInfo.MasterAlias, true); err != nil { wr.logger.Warningf("old master topo scrapping failed, continuing anyway: %v", err) } } } } // Stop replication on all slaves, get their current // replication position event.DispatchUpdate(ev, "stop replication on all slaves") wg := sync.WaitGroup{} mu := sync.Mutex{} statusMap := make(map[pb.TabletAlias]myproto.ReplicationStatus) for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(&alias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo) if err != nil { wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(&alias), err) return } mu.Lock() statusMap[alias] = rp mu.Unlock() }(alias, tabletInfo) } wg.Wait() // Verify masterElect is alive and has the most advanced position masterElectStatus, ok := statusMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("couldn't get master elect %v replication position", topoproto.TabletAliasString(masterElectTabletAlias)) } for alias, status := range statusMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { continue } if !masterElectStatus.Position.AtLeast(status.Position) { return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", topoproto.TabletAliasString(&alias), topoproto.TabletAliasString(masterElectTabletAlias), status.Position, masterElectStatus) } } // Promote the masterElect wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo) if err != nil { return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Reset replication on all slaves to point to the new master, and // insert test row in the new master. // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) forceStartSlave := false if status, ok := statusMap[alias]; ok { forceStartSlave = status.SlaveIORunning || status.SlaveSQLRunning } if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", topoproto.TabletAliasString(masterElectTabletAlias)) shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // only enabled if one of init_tablet_type (when healthcheck // is disabled) or init_keyspace (when healthcheck is enabled) // is passed in, then check other parameters if *initTabletType == "" && *initKeyspace == "" { return nil } // figure out our default target type var tabletType topodatapb.TabletType if *initTabletType != "" { if *targetTabletType != "" { log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)") } // use the type specified on the command line var err error tabletType, err = topoproto.ParseTabletType(*initTabletType) if err != nil { log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err) } if tabletType == topodatapb.TabletType_MASTER { // We disallow MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. log.Fatalf("init_tablet_type cannot be %v", tabletType) } } else if *targetTabletType != "" { if strings.ToUpper(*targetTabletType) == topodatapb.TabletType_name[int32(topodatapb.TabletType_MASTER)] { log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topodatapb.TabletType_REPLICA) } // use spare, the healthcheck will turn us into what // we need to be eventually tabletType = topodatapb.TabletType_SPARE } else { log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified") } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // since we're assigned to a shard, make sure it exists, see if // we are its master, and update its cells list if necessary if *initKeyspace == "" || *initShard == "" { log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified") } shard, _, err := topo.ValidateShardName(*initShard) if err != nil { log.Fatalf("cannot validate shard name: %v", err) } log.Infof("Reading shard record %v/%v", *initKeyspace, shard) // read the shard, create it if necessary si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // we are the current master for this shard (probably // means the master tablet process was just restarted), // so InitTablet as master. tabletType = topodatapb.TabletType_MASTER } // See if we need to add the tablet's cell to the shard's cell // list. If we do, it has to be under the shard lock. if !si.HasCell(agent.TabletAlias.Cell) { actionNode := actionnode.UpdateShard() lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err) } // re-read the shard with the lock si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard) if err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } // see if we really need to update it now if !si.HasCell(agent.TabletAlias.Cell) { si.Cells = append(si.Cells, agent.TabletAlias.Cell) // write it back if err := agent.TopoServer.UpdateShard(ctx, si); err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } } // and unlock if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil { return err } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname != "" { log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname) } else { hostname, err := netutil.FullyQualifiedHostname() if err != nil { return err } log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname) } // create and populate tablet record tablet := &topodatapb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // now try to create the record err = agent.TopoServer.CreateTablet(ctx, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // And overwrite the rest *(oldTablet.Tablet) = *tablet if err := agent.TopoServer.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } // and now update the serving graph. Note we do that in any case, // to clean any inaccurate record from any part of the serving graph. if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } return nil }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { if err := agent.lock(ctx); err != nil { return err } defer agent.unlock() startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Remember when we were first told we're the master. // If another tablet claims to be master and offers a more recent time, // that tablet will be trusted over us. agent.mutex.Lock() agent._tabletExternallyReparentedTime = startTime agent._replicationDelay = 0 agent.mutex.Unlock() // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet, OldMaster: topodatapb.Tablet{ Alias: si.MasterAlias, Type: topodatapb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := proto.Clone(tablet).(*topodatapb.Tablet) tablet.Type = topodatapb.TabletType_MASTER agent.setTablet(tablet) // This is where updateState will block for gracePeriod, while it gives // vtgate a chance to stop sending replica queries. agent.updateState(ctx, oldTablet, "fastTabletExternallyReparented") // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }