// ChangeTypeNoRebuild changes a tablet's type, and returns whether // there's a shard that should be rebuilt, along with its cell, // keyspace, and shard. If force is true, it will bypass the vtaction // system and make the data change directly, and not run the remote // hooks. // // Note we don't update the master record in the Shard here, as we // can't ChangeType from and out of master anyway. func (wr *Wrangler) ChangeTypeNoRebuild(tabletAlias topo.TabletAlias, tabletType topo.TabletType, force bool) (rebuildRequired bool, cell, keyspace, shard string, err error) { // Load tablet to find keyspace and shard assignment. // Don't load after the ChangeType which might have unassigned // the tablet. ti, err := wr.ts.GetTablet(tabletAlias) if err != nil { return false, "", "", "", err } if force { if err := actionnode.ChangeType(wr.ts, tabletAlias, tabletType, nil, false); err != nil { return false, "", "", "", err } } else { if wr.UseRPCs { if err := wr.ai.RpcChangeType(ti, tabletType, wr.actionTimeout()); err != nil { return false, "", "", "", err } } else { // the remote action will run the hooks actionPath, err := wr.ai.ChangeType(tabletAlias, tabletType) if err != nil { return false, "", "", "", err } // You don't have a choice - you must wait for // completion before rebuilding. if err := wr.ai.WaitForCompletion(actionPath, wr.actionTimeout()); err != nil { return false, "", "", "", err } } } if !ti.Tablet.IsInServingGraph() { // re-read the tablet, see if we become serving ti, err = wr.ts.GetTablet(tabletAlias) if err != nil { return false, "", "", "", err } if !ti.Tablet.IsInServingGraph() { return false, "", "", "", nil } } return true, ti.Alias.Cell, ti.Keyspace, ti.Shard, nil }
func (tm *TabletManager) ChangeType(context *rpcproto.Context, args *topo.TabletType, reply *rpc.UnusedResponse) error { return tm.agent.RpcWrapLockAction(context.RemoteAddr, actionnode.TABLET_ACTION_CHANGE_TYPE, args, reply, func() error { return actionnode.ChangeType(tm.agent.TopoServer, tm.agent.TabletAlias, *args, nil, true /*runHooks*/) }) }
// Operate on restore tablet. // Check that the SnapshotManifest is valid and the master has not changed. // Shutdown mysqld. // Load the snapshot from source tablet. // Restart mysqld and replication. // Put tablet into the replication graph as a spare. func (ta *TabletActor) restore(actionNode *actionnode.ActionNode) error { args := actionNode.Args.(*actionnode.RestoreArgs) // read our current tablet, verify its state tablet, err := ta.ts.GetTablet(ta.tabletAlias) if err != nil { return err } if args.WasReserved { if tablet.Type != topo.TYPE_RESTORE { return fmt.Errorf("expected restore type, not %v: %v", tablet.Type, ta.tabletAlias) } } else { if tablet.Type != topo.TYPE_IDLE { return fmt.Errorf("expected idle type, not %v: %v", tablet.Type, ta.tabletAlias) } } // read the source tablet, compute args.SrcFilePath if default sourceTablet, err := ta.ts.GetTablet(args.SrcTabletAlias) if err != nil { return err } if strings.ToLower(args.SrcFilePath) == "default" { args.SrcFilePath = path.Join(mysqlctl.SnapshotURLPath, mysqlctl.SnapshotManifestFile) } // read the parent tablet, verify its state parentTablet, err := ta.ts.GetTablet(args.ParentAlias) if err != nil { return err } if parentTablet.Type != topo.TYPE_MASTER && parentTablet.Type != topo.TYPE_SNAPSHOT_SOURCE { return fmt.Errorf("restore expected master or snapshot_source parent: %v %v", parentTablet.Type, args.ParentAlias) } // read & unpack the manifest sm := new(mysqlctl.SnapshotManifest) if err := fetchAndParseJsonFile(sourceTablet.GetAddr(), args.SrcFilePath, sm); err != nil { return err } if !args.WasReserved { if err := ta.changeTypeToRestore(tablet, sourceTablet, parentTablet.Alias, sourceTablet.KeyRange); err != nil { return err } } // do the work if err := ta.mysqld.RestoreFromSnapshot(sm, args.FetchConcurrency, args.FetchRetryCount, args.DontWaitForSlaveStart, ta.hookExtraEnv()); err != nil { log.Errorf("RestoreFromSnapshot failed (%v), scrapping", err) if err := actionnode.Scrap(ta.ts, ta.tabletAlias, false); err != nil { log.Errorf("Failed to Scrap after failed RestoreFromSnapshot: %v", err) } return err } // change to TYPE_SPARE, we're done! return actionnode.ChangeType(ta.ts, ta.tabletAlias, topo.TYPE_SPARE, nil, true) }
func (ta *TabletActor) changeType(actionNode *actionnode.ActionNode) error { dbType := actionNode.Args.(*topo.TabletType) return actionnode.ChangeType(ta.ts, ta.tabletAlias, *dbType, nil, true /*runHooks*/) }
// RunHealthCheck takes the action mutex, runs the health check, // and if we need to change our state, do it. // If we are the master, we don't change our type, healthy or not. // If we are not the master, we change to spare if not healthy, // or to the passed in targetTabletType if healthy. // // Note we only update the topo record if we need to, that is if our type or // health details changed. func (agent *ActionAgent) RunHealthCheck(targetTabletType topo.TabletType, lockTimeout time.Duration) { agent.actionMutex.Lock() defer agent.actionMutex.Unlock() // read the current tablet record agent.mutex.Lock() tablet := agent._tablet agent.mutex.Unlock() // run the health check typeForHealthCheck := targetTabletType if tablet.Type == topo.TYPE_MASTER { typeForHealthCheck = topo.TYPE_MASTER } health, err := health.Run(typeForHealthCheck) if len(health) == 0 { health = nil } // start with no change newTabletType := tablet.Type if err != nil { if tablet.Type != targetTabletType { log.Infof("Tablet not healthy and in state %v, not changing it: %v", tablet.Type, err) return } log.Infof("Tablet not healthy, converting it from %v to spare: %v", targetTabletType, err) newTabletType = topo.TYPE_SPARE } else { // We are healthy, maybe with health, see if we need // to update the record. We only change from spare to // our target type. if tablet.Type == topo.TYPE_SPARE { newTabletType = targetTabletType } if tablet.Type == newTabletType && reflect.DeepEqual(health, tablet.Health) { // no change in health, not logging anything, // and we're done return } // we need to update our state log.Infof("Updating tablet record as healthy type %v -> %v with health details %v -> %v", tablet.Type, newTabletType, tablet.Health, health) } // Change the Type, update the health if err := actionnode.ChangeType(agent.TopoServer, tablet.Alias, newTabletType, health, true /*runHooks*/); err != nil { log.Infof("Error updating tablet record: %v", err) return } // Rebuild the serving graph in our cell, only if we're dealing with // a serving type if topo.IsInServingGraph(targetTabletType) { // TODO: interrupted may need to be a global one closed when we exit interrupted := make(chan struct{}) actionNode := actionnode.RebuildShard() lockPath, err := actionNode.LockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockTimeout, interrupted) if err != nil { log.Warningf("Cannot lock shard for rebuild: %v", err) return } err = topo.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topo.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true}) err = actionNode.UnlockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockPath, err) if err != nil { log.Warningf("UnlockShard returned an error: %v", err) return } } // run the post action callbacks agent.afterAction("healthcheck", false /* reloadSchema */) }