// updateReplicationGraphForPromotedSlave makes sure the newly promoted slave // is correctly represented in the replication graph func (agent *ActionAgent) updateReplicationGraphForPromotedSlave(tablet *topo.TabletInfo) error { // Update tablet regardless - trend towards consistency. tablet.State = topo.STATE_READ_WRITE tablet.Type = topo.TYPE_MASTER tablet.Parent.Cell = "" tablet.Parent.Uid = topo.NO_TABLET tablet.Health = nil err := topo.UpdateTablet(agent.TopoServer, tablet) if err != nil { return err } // NOTE(msolomon) A serving graph update is required, but in // order for the shard to be consistent the old master must be // scrapped first. That is externally coordinated by the // wrangler reparent action. // Insert the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// SlaveWasRestarted updates the parent record for a tablet. // Should be called under RpcWrapLockAction. func (agent *ActionAgent) SlaveWasRestarted(swrd *actionnode.SlaveWasRestartedArgs) error { tablet, err := agent.TopoServer.GetTablet(agent.TabletAlias) if err != nil { return err } // Once this action completes, update authoritive tablet node first. tablet.Parent = swrd.Parent if tablet.Type == topo.TYPE_MASTER { tablet.Type = topo.TYPE_SPARE tablet.State = topo.STATE_READ_ONLY } err = topo.UpdateTablet(agent.TopoServer, tablet) if err != nil { return err } // Update the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// updateReplicationGraphForPromotedSlave makes sure the newly promoted slave // is correctly represented in the replication graph func (agent *ActionAgent) updateReplicationGraphForPromotedSlave(ctx context.Context, tablet *topo.TabletInfo) error { // Update tablet regardless - trend towards consistency. tablet.Type = pb.TabletType_MASTER tablet.HealthMap = nil err := agent.TopoServer.UpdateTablet(ctx, tablet) if err != nil { return err } // NOTE(msolomon) A serving graph update is required, but in // order for the shard to be consistent the old master must be // dealt with first. That is externally coordinated by the // wrangler reparent action. // Insert the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// change a tablet type to RESTORE and set all the other arguments. // from now on, we can go to: // - back to IDLE if we don't use the tablet at all (after for instance // a successful ReserveForRestore but a failed Snapshot) // - to SCRAP if something in the process on the target host fails // - to SPARE if the clone works func (agent *ActionAgent) changeTypeToRestore(tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error { // run the optional preflight_assigned hook hk := hook.NewSimpleHook("preflight_assigned") topotools.ConfigureTabletHook(hk, agent.TabletAlias) if err := hk.ExecuteOptional(); err != nil { return err } // change the type tablet.Parent = parentAlias tablet.Keyspace = sourceTablet.Keyspace tablet.Shard = sourceTablet.Shard tablet.Type = topo.TYPE_RESTORE tablet.KeyRange = keyRange tablet.DbNameOverride = sourceTablet.DbNameOverride if err := topo.UpdateTablet(agent.TopoServer, tablet); err != nil { return err } // and create the replication graph items return topo.UpdateTabletReplicationData(agent.TopoServer, tablet.Tablet) }
// SlaveWasRestarted updates the parent record for a tablet. // Should be called under RPCWrapLockAction. func (agent *ActionAgent) SlaveWasRestarted(ctx context.Context, swrd *actionnode.SlaveWasRestartedArgs) error { tablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) if err != nil { return err } // Once this action completes, update authoritative tablet node first. if tablet.Type == pb.TabletType_MASTER { tablet.Type = pb.TabletType_SPARE } err = agent.TopoServer.UpdateTablet(ctx, tablet) if err != nil { return err } // Update the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // only enabled if one of init_tablet_type (when healthcheck // is disabled) or init_keyspace (when healthcheck is enabled) // is passed in, then check other parameters if *initTabletType == "" && *initKeyspace == "" { return nil } // figure out our default target type var tabletType topodatapb.TabletType if *initTabletType != "" { if *targetTabletType != "" { log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)") } // use the type specified on the command line var err error tabletType, err = topoproto.ParseTabletType(*initTabletType) if err != nil { log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err) } if tabletType == topodatapb.TabletType_MASTER { // We disallow MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. log.Fatalf("init_tablet_type cannot be %v", tabletType) } } else if *targetTabletType != "" { if strings.ToUpper(*targetTabletType) == topodatapb.TabletType_name[int32(topodatapb.TabletType_MASTER)] { log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topodatapb.TabletType_REPLICA) } // use spare, the healthcheck will turn us into what // we need to be eventually tabletType = topodatapb.TabletType_SPARE } else { log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified") } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // since we're assigned to a shard, make sure it exists, see if // we are its master, and update its cells list if necessary if *initKeyspace == "" || *initShard == "" { log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified") } shard, _, err := topo.ValidateShardName(*initShard) if err != nil { log.Fatalf("cannot validate shard name: %v", err) } log.Infof("Reading shard record %v/%v", *initKeyspace, shard) // read the shard, create it if necessary si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // we are the current master for this shard (probably // means the master tablet process was just restarted), // so InitTablet as master. tabletType = topodatapb.TabletType_MASTER } // See if we need to add the tablet's cell to the shard's cell // list. If we do, it has to be under the shard lock. if !si.HasCell(agent.TabletAlias.Cell) { actionNode := actionnode.UpdateShard() lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err) } // re-read the shard with the lock si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard) if err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } // see if we really need to update it now if !si.HasCell(agent.TabletAlias.Cell) { si.Cells = append(si.Cells, agent.TabletAlias.Cell) // write it back if err := agent.TopoServer.UpdateShard(ctx, si); err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } } // and unlock if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil { return err } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname != "" { log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname) } else { hostname, err := netutil.FullyQualifiedHostname() if err != nil { return err } log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname) } // create and populate tablet record tablet := &topodatapb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // now try to create the record err = agent.TopoServer.CreateTablet(ctx, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // And overwrite the rest *(oldTablet.Tablet) = *tablet if err := agent.TopoServer.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } // and now update the serving graph. Note we do that in any case, // to clean any inaccurate record from any part of the serving graph. if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } return nil }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // update is true, and a tablet with the same ID exists, update it. // If Force is true, and a tablet with the same ID already exists, it // will be scrapped and deleted, and then recreated. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } if topo.IsInReplicationGraph(tablet.Type) { // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = topotools.GetOrCreateShard(ctx, wr.ts, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if key.ProtoToKeyRange(si.KeyRange) != tablet.KeyRange { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topo.TYPE_MASTER && !topo.TabletAliasIsZero(si.MasterAlias) && topo.ProtoToTabletAlias(si.MasterAlias) != tablet.Alias && !force { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, topo.TabletAliasToProto(tablet.Alias), topo.TabletTypeToProto(tablet.Type), force); err != nil { return err } } err := topo.CreateTablet(ctx, wr.ts, tablet) if err != nil && err == topo.ErrNodeExists { // Try to update nicely, but if it fails fall back to force behavior. if update || force { oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { wr.Logger().Warningf("failed reading tablet %v: %v", tablet.Alias, err) } else { if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard { *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(ctx, wr.ts, oldTablet); err != nil { wr.Logger().Warningf("failed updating tablet %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { if !topo.IsInReplicationGraph(tablet.Type) { return nil } if err := topo.UpdateTabletReplicationData(ctx, wr.ts, tablet); err != nil { wr.Logger().Warningf("failed updating tablet replication data for %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { return nil } } } } } if force { if err = wr.Scrap(ctx, tablet.Alias, force, false); err != nil { wr.Logger().Errorf("failed scrapping tablet %v: %v", tablet.Alias, err) return err } if err := wr.ts.DeleteTablet(ctx, tablet.Alias); err != nil { // we ignore this wr.Logger().Errorf("failed deleting tablet %v: %v", tablet.Alias, err) } return topo.CreateTablet(ctx, wr.ts, tablet) } } return err }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // update is true, and a tablet with the same ID exists, update it. // If Force is true, and a tablet with the same ID already exists, it // will be scrapped and deleted, and then recreated. func (wr *Wrangler) InitTablet(tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error { if err := tablet.Complete(); err != nil { return err } if tablet.IsInReplicationGraph() { // create the parent keyspace and shard if needed if createShardAndKeyspace { if err := wr.ts.CreateKeyspace(tablet.Keyspace, &topo.Keyspace{}); err != nil && err != topo.ErrNodeExists { return err } if err := topo.CreateShard(wr.ts, tablet.Keyspace, tablet.Shard); err != nil && err != topo.ErrNodeExists { return err } } // get the shard, checks a couple things si, err := wr.ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } if si.KeyRange != tablet.KeyRange { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topo.TYPE_MASTER && !si.MasterAlias.IsZero() && si.MasterAlias != tablet.Alias && !force { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard) } // see if we specified a parent, otherwise get it from the shard if tablet.Parent.IsZero() && tablet.Type.IsSlaveType() { if si.MasterAlias.IsZero() { return fmt.Errorf("trying to create tablet %v in shard %v/%v without a master", tablet.Alias, tablet.Keyspace, tablet.Shard) } tablet.Parent = si.MasterAlias } // update the shard record if needed if err := wr.updateShardCellsAndMaster(si, tablet.Alias, tablet.Type, force); err != nil { return err } } err := topo.CreateTablet(wr.ts, tablet) if err != nil && err == topo.ErrNodeExists { // Try to update nicely, but if it fails fall back to force behavior. if update || force { oldTablet, err := wr.ts.GetTablet(tablet.Alias) if err != nil { log.Warningf("failed reading tablet %v: %v", tablet.Alias, err) } else { if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard { *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(wr.ts, oldTablet); err != nil { log.Warningf("failed updating tablet %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { if !tablet.IsInReplicationGraph() { return nil } if err := topo.UpdateTabletReplicationData(wr.ts, tablet); err != nil { log.Warningf("failed updating tablet replication data for %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { return nil } } } } } if force { if err = wr.Scrap(tablet.Alias, force, false); err != nil { log.Errorf("failed scrapping tablet %v: %v", tablet.Alias, err) return err } if err := wr.ts.DeleteTablet(tablet.Alias); err != nil { // we ignore this log.Errorf("failed deleting tablet %v: %v", tablet.Alias, err) } return topo.CreateTablet(wr.ts, tablet) } } return err }
// RebuildReplicationGraph is a quick and dirty tool to resurrect the TopologyServer data from the // canonical data stored in the tablet nodes. // // cells: local vt cells to scan for all tablets // keyspaces: list of keyspaces to rebuild func (wr *Wrangler) RebuildReplicationGraph(ctx context.Context, cells []string, keyspaces []string) error { if cells == nil || len(cells) == 0 { return fmt.Errorf("must specify cells to rebuild replication graph") } if keyspaces == nil || len(keyspaces) == 0 { return fmt.Errorf("must specify keyspaces to rebuild replication graph") } allTablets := make([]*topo.TabletInfo, 0, 1024) for _, cell := range cells { tablets, err := topotools.GetAllTablets(ctx, wr.ts, cell) if err != nil { return err } allTablets = append(allTablets, tablets...) } for _, keyspace := range keyspaces { wr.logger.Infof("delete keyspace shards: %v", keyspace) if err := wr.ts.DeleteKeyspaceShards(ctx, keyspace); err != nil { return err } } keyspacesToRebuild := make(map[string]bool) shardsCreated := make(map[string]bool) hasErr := false mu := sync.Mutex{} wg := sync.WaitGroup{} for _, ti := range allTablets { wg.Add(1) go func(ti *topo.TabletInfo) { defer wg.Done() if !strInList(keyspaces, ti.Keyspace) { return } mu.Lock() keyspacesToRebuild[ti.Keyspace] = true shardPath := ti.Keyspace + "/" + ti.Shard if !shardsCreated[shardPath] { if err := wr.ts.CreateShard(ctx, ti.Keyspace, ti.Shard); err != nil && err != topo.ErrNodeExists { wr.logger.Warningf("failed re-creating shard %v: %v", shardPath, err) hasErr = true } else { shardsCreated[shardPath] = true } } mu.Unlock() err := topo.UpdateTabletReplicationData(ctx, wr.ts, ti.Tablet) if err != nil { mu.Lock() hasErr = true mu.Unlock() wr.logger.Warningf("failed updating replication data: %v", err) } }(ti) } wg.Wait() for keyspace := range keyspacesToRebuild { wg.Add(1) go func(keyspace string) { defer wg.Done() if err := wr.RebuildKeyspaceGraph(ctx, keyspace, nil); err != nil { mu.Lock() hasErr = true mu.Unlock() wr.logger.Warningf("RebuildKeyspaceGraph(%v) failed: %v", keyspace, err) return } }(keyspace) } wg.Wait() if hasErr { return fmt.Errorf("some errors occurred rebuilding replication graph, consult log") } return nil }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // allowUpdate is true, and a tablet with the same ID exists, just update it. // If a tablet already exists, and has a different keyspace / shard, // allowDifferentShard must be set to accept the update. // If a tablet is created as master, and there is already a different // master in the shard, allowMasterOverride must be set. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topodatapb.Tablet, allowMasterOverride, allowDifferentShard, createShardAndKeyspace, allowUpdate bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = topotools.GetOrCreateShard(ctx, wr.ts, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if !key.KeyRangeEqual(si.KeyRange, tablet.KeyRange) { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topodatapb.TabletType_MASTER && si.HasMaster() && !topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v, use allow_master_override flag", topoproto.TabletAliasString(si.MasterAlias), tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, tablet.Alias, tablet.Type, allowMasterOverride); err != nil { return err } err = wr.ts.CreateTablet(ctx, tablet) if err == topo.ErrNodeExists && allowUpdate { // Try to update then oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("failed reading existing tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } // Check we have the same keyspace / shard, and if not, // require the allowDifferentShard flag. if (oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard) && !allowDifferentShard { return fmt.Errorf("old tablet has shard %v/%v, cannot override with shard %v/%v unless allow_different_shard is set", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } *(oldTablet.Tablet) = *tablet if err := wr.ts.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("failed updating tablet %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } } // now update the replication data if err := topo.UpdateTabletReplicationData(ctx, wr.ts, tablet); err != nil { return fmt.Errorf("failed updating tablet replication data for %v: %v", topoproto.TabletAliasString(tablet.Alias), err) } return nil }
// RestartSlave tells the tablet it has a new master // Should be called under RpcWrapLockAction. func (agent *ActionAgent) RestartSlave(rsd *actionnode.RestartSlaveData) error { tablet, err := agent.TopoServer.GetTablet(agent.TabletAlias) if err != nil { return err } // If this check fails, we seem reparented. The only part that // could have failed is the insert in the replication // graph. Do NOT try to reparent again. That will either wedge // replication or corrupt data. if tablet.Parent != rsd.Parent { log.V(6).Infof("restart with new parent") // Remove tablet from the replication graph. if err = topo.DeleteTabletReplicationData(agent.TopoServer, tablet.Tablet); err != nil && err != topo.ErrNoNode { return err } // Move a lag slave into the orphan lag type so we can safely ignore // this reparenting until replication catches up. if tablet.Type == topo.TYPE_LAG { tablet.Type = topo.TYPE_LAG_ORPHAN } else { err = agent.Mysqld.RestartSlave(rsd.ReplicationStatus, rsd.WaitPosition, rsd.TimePromoted) if err != nil { return err } } // Once this action completes, update authoritive tablet node first. tablet.Parent = rsd.Parent err = topo.UpdateTablet(agent.TopoServer, tablet) if err != nil { return err } } else if rsd.Force { err = agent.Mysqld.RestartSlave(rsd.ReplicationStatus, rsd.WaitPosition, rsd.TimePromoted) if err != nil { return err } // Complete the special orphan accounting. if tablet.Type == topo.TYPE_LAG_ORPHAN { tablet.Type = topo.TYPE_LAG err = topo.UpdateTablet(agent.TopoServer, tablet) if err != nil { return err } } } else { // There is nothing to safely reparent, so check replication. If // either replication thread is not running, report an error. status, err := agent.Mysqld.SlaveStatus() if err != nil { return fmt.Errorf("cannot verify replication for slave: %v", err) } if !status.SlaveRunning() { return fmt.Errorf("replication not running for slave") } } // Insert the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // it should be either we have all three of init_keyspace, // init_shard and init_tablet_type, or none. if *initKeyspace == "" && *initShard == "" && *initTabletType == "" { // not initializing the record return nil } if *initKeyspace == "" || *initShard == "" || *initTabletType == "" { return fmt.Errorf("either need all of init_keyspace, init_shard and init_tablet_type, or none") } // parse init_tablet_type tabletType, err := topoproto.ParseTabletType(*initTabletType) if err != nil { return fmt.Errorf("invalid init_tablet_type %v: %v", *initTabletType, err) } if tabletType == topodatapb.TabletType_MASTER { // We disallow MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. return fmt.Errorf("init_tablet_type cannot be master, use replica instead") } // parse and validate shard name shard, _, err := topo.ValidateShardName(*initShard) if err != nil { return fmt.Errorf("cannot validate shard name %v: %v", *initShard, err) } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // read the shard, create it if necessary log.Infof("Reading shard record %v/%v", *initKeyspace, shard) si, err := agent.TopoServer.GetOrCreateShard(ctx, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // We're marked as master in the shard record, which could mean the master // tablet process was just restarted. However, we need to check if a new // master is in the process of taking over. In that case, it will let us // know by forcibly updating the old master's tablet record. oldTablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) switch err { case topo.ErrNoNode: // There's no existing tablet record, so we can assume // no one has left us a message to step down. tabletType = topodatapb.TabletType_MASTER case nil: if oldTablet.Type == topodatapb.TabletType_MASTER { // We're marked as master in the shard record, // and our existing tablet record agrees. tabletType = topodatapb.TabletType_MASTER } default: return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } } // See if we need to add the tablet's cell to the shard's cell list. if !si.HasCell(agent.TabletAlias.Cell) { si, err = agent.TopoServer.UpdateShardFields(ctx, *initKeyspace, shard, func(si *topo.ShardInfo) error { if si.HasCell(agent.TabletAlias.Cell) { // Someone else already did it. return topo.ErrNoUpdateNeeded } si.Cells = append(si.Cells, agent.TabletAlias.Cell) return nil }) if err != nil { return fmt.Errorf("couldn't add tablet's cell to shard record: %v", err) } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname != "" { log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname) } else { hostname, err := netutil.FullyQualifiedHostname() if err != nil { return err } log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname) } // create and populate tablet record tablet := &topodatapb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // now try to create the record err = agent.TopoServer.CreateTablet(ctx, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } case topo.ErrNodeExists: // The node already exists, will just try to update it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // Then overwrite everything, ignoring version mismatch. if err := agent.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, -1)); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } return nil }