func (ta *TabletActor) multiRestore(actionNode *actionnode.ActionNode) (err error) { args := actionNode.Args.(*actionnode.MultiRestoreArgs) // read our current tablet, verify its state // we only support restoring to the master or spare replicas tablet, err := ta.ts.GetTablet(ta.tabletAlias) if err != nil { return err } if tablet.Type != topo.TYPE_MASTER && tablet.Type != topo.TYPE_SPARE && tablet.Type != topo.TYPE_REPLICA && tablet.Type != topo.TYPE_RDONLY { return fmt.Errorf("expected master, spare replica or rdonly type, not %v: %v", tablet.Type, ta.tabletAlias) } // get source tablets addresses sourceAddrs := make([]*url.URL, len(args.SrcTabletAliases)) keyRanges := make([]key.KeyRange, len(args.SrcTabletAliases)) for i, alias := range args.SrcTabletAliases { t, e := ta.ts.GetTablet(alias) if e != nil { return e } sourceAddrs[i] = &url.URL{Host: t.GetAddr(), Path: "/" + t.DbName()} keyRanges[i], e = key.KeyRangesOverlap(tablet.KeyRange, t.KeyRange) if e != nil { return e } } // change type to restore, no change to replication graph originalType := tablet.Type tablet.Type = topo.TYPE_RESTORE err = topo.UpdateTablet(ta.ts, tablet) if err != nil { return err } // run the action, scrap if it fails if err := ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, sourceAddrs, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy); err != nil { if e := topotools.Scrap(ta.ts, ta.tabletAlias, false); e != nil { log.Errorf("Failed to Scrap after failed RestoreFromMultiSnapshot: %v", e) } return err } // restore type back tablet.Type = originalType return topo.UpdateTablet(ta.ts, tablet) }
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading all tablets") tabletMap, err := wr.ts.GetTabletMapForShard(ctx, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", topoproto.TabletAliasString(masterElectTabletAlias)) } ev.NewMaster = *masterElectTabletInfo.Tablet if topoproto.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", topoproto.TabletAliasString(masterElectTabletAlias)) } // Deal with the old master: try to remote-scrap it, if it's // truely dead we force-scrap it. Remove it from our map in any case. if shardInfo.HasMaster() { scrapOldMaster := true oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if ok { delete(tabletMap, *shardInfo.MasterAlias) } else { oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias) if err != nil { wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) scrapOldMaster = false } } if scrapOldMaster { ev.OldMaster = *oldMasterTabletInfo.Tablet wr.logger.Infof("scrapping old master %v", topoproto.TabletAliasString(shardInfo.MasterAlias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() if err := wr.tmc.Scrap(ctx, oldMasterTabletInfo); err != nil { wr.logger.Warningf("remote scrapping failed master failed, will force the scrap: %v", err) if err := topotools.Scrap(ctx, wr.ts, shardInfo.MasterAlias, true); err != nil { wr.logger.Warningf("old master topo scrapping failed, continuing anyway: %v", err) } } } } // Stop replication on all slaves, get their current // replication position event.DispatchUpdate(ev, "stop replication on all slaves") wg := sync.WaitGroup{} mu := sync.Mutex{} statusMap := make(map[pb.TabletAlias]myproto.ReplicationStatus) for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("getting replication position from %v", topoproto.TabletAliasString(&alias)) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo) if err != nil { wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(&alias), err) return } mu.Lock() statusMap[alias] = rp mu.Unlock() }(alias, tabletInfo) } wg.Wait() // Verify masterElect is alive and has the most advanced position masterElectStatus, ok := statusMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("couldn't get master elect %v replication position", topoproto.TabletAliasString(masterElectTabletAlias)) } for alias, status := range statusMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { continue } if !masterElectStatus.Position.AtLeast(status.Position) { return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", topoproto.TabletAliasString(&alias), topoproto.TabletAliasString(masterElectTabletAlias), status.Position, masterElectStatus) } } // Promote the masterElect wr.logger.Infof("promote slave %v", topoproto.TabletAliasString(masterElectTabletAlias)) event.DispatchUpdate(ev, "promoting slave") rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo) if err != nil { return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", topoproto.TabletAliasString(masterElectTabletAlias), err) } // Reset replication on all slaves to point to the new master, and // insert test row in the new master. // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topoproto.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", topoproto.TabletAliasString(&alias)) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", topoproto.TabletAliasString(&alias)) forceStartSlave := false if status, ok := statusMap[alias]; ok { forceStartSlave = status.SlaveIORunning || status.SlaveSQLRunning } if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", topoproto.TabletAliasString(&alias), err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", topoproto.TabletAliasString(masterElectTabletAlias)) shardInfo.MasterAlias = masterElectTabletAlias if err := wr.ts.UpdateShard(ctx, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
// Scrap a tablet. If force is used, we write to topo.Server // directly and don't remote-execute the command. // // If we scrap the master for a shard, we will clear its record // from the Shard object (only if that was the right master) func (wr *Wrangler) Scrap(ctx context.Context, tabletAlias topo.TabletAlias, force, skipRebuild bool) error { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(ctx, tabletAlias) if err != nil { return err } rebuildRequired := ti.IsInServingGraph() wasMaster := ti.Type == topo.TYPE_MASTER if force { err = topotools.Scrap(ctx, wr.ts, ti.Alias, force) } else { err = wr.tmc.Scrap(ctx, ti) } if err != nil { return err } if !rebuildRequired { wr.Logger().Infof("Rebuild not required") return nil } if skipRebuild { wr.Logger().Warningf("Rebuild required, but skipping it") return nil } // update the Shard object if the master was scrapped if wasMaster { actionNode := actionnode.UpdateShard() lockPath, err := wr.lockShard(ctx, ti.Keyspace, ti.Shard, actionNode) if err != nil { return err } // read the shard with the lock si, err := wr.ts.GetShard(ctx, ti.Keyspace, ti.Shard) if err != nil { return wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err) } // update it if the right alias is there if topo.TabletAliasEqual(si.MasterAlias, topo.TabletAliasToProto(tabletAlias)) { si.MasterAlias = nil // write it back if err := topo.UpdateShard(ctx, wr.ts, si); err != nil { return wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err) } } else { wr.Logger().Warningf("Scrapping master %v from shard %v/%v but master in Shard object was %v", tabletAlias, ti.Keyspace, ti.Shard, si.MasterAlias) } // and unlock if err := wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil { return err } } // and rebuild the original shard _, err = wr.RebuildShardGraph(ctx, ti.Keyspace, ti.Shard, []string{ti.Alias.Cell}) return err }
// Scrap a tablet. If force is used, we write to topo.Server // directly and don't remote-execute the command. // // If we scrap the master for a shard, we will clear its record // from the Shard object (only if that was the right master) func (wr *Wrangler) Scrap(tabletAlias topo.TabletAlias, force, skipRebuild bool) (actionPath string, err error) { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(tabletAlias) if err != nil { return "", err } rebuildRequired := ti.Tablet.IsInServingGraph() wasMaster := ti.Type == topo.TYPE_MASTER if force { err = topotools.Scrap(wr.ts, ti.Alias, force) } else { actionPath, err = wr.ai.Scrap(ti.Alias) } if err != nil { return "", err } if !rebuildRequired { log.Infof("Rebuild not required") return } if skipRebuild { log.Warningf("Rebuild required, but skipping it") return } // wait for the remote Scrap if necessary if actionPath != "" { err = wr.WaitForCompletion(actionPath) if err != nil { return "", err } } // update the Shard object if the master was scrapped if wasMaster { actionNode := actionnode.UpdateShard() lockPath, err := wr.lockShard(ti.Keyspace, ti.Shard, actionNode) if err != nil { return "", err } // read the shard with the lock si, err := wr.ts.GetShard(ti.Keyspace, ti.Shard) if err != nil { return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err) } // update it if the right alias is there if si.MasterAlias == tabletAlias { si.MasterAlias = topo.TabletAlias{} // write it back if err := topo.UpdateShard(wr.ts, si); err != nil { return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err) } } else { log.Warningf("Scrapping master %v from shard %v/%v but master in Shard object was %v", tabletAlias, ti.Keyspace, ti.Shard, si.MasterAlias) } // and unlock if err := wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil { return "", err } } // and rebuild the original shard / keyspace return "", wr.RebuildShardGraph(ti.Keyspace, ti.Shard, []string{ti.Alias.Cell}) }
func (ta *TabletActor) multiRestore(actionNode *actionnode.ActionNode) (err error) { args := actionNode.Args.(*actionnode.MultiRestoreArgs) // read our current tablet, verify its state // we only support restoring to the master or active replicas tablet, err := ta.ts.GetTablet(ta.tabletAlias) if err != nil { return err } if tablet.Type != topo.TYPE_MASTER && !topo.IsSlaveType(tablet.Type) { return fmt.Errorf("expected master, or slave type, not %v: %v", tablet.Type, ta.tabletAlias) } // get source tablets addresses sourceAddrs := make([]*url.URL, len(args.SrcTabletAliases)) keyRanges := make([]key.KeyRange, len(args.SrcTabletAliases)) fromStoragePaths := make([]string, len(args.SrcTabletAliases)) for i, alias := range args.SrcTabletAliases { t, e := ta.ts.GetTablet(alias) if e != nil { return e } sourceAddrs[i] = &url.URL{ Host: t.Addr(), Path: "/" + t.DbName(), } keyRanges[i], e = key.KeyRangesOverlap(tablet.KeyRange, t.KeyRange) if e != nil { return e } fromStoragePaths[i] = path.Join(ta.mysqld.SnapshotDir, "from-storage", fmt.Sprintf("from-%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex())) } // change type to restore, no change to replication graph originalType := tablet.Type tablet.Type = topo.TYPE_RESTORE err = topo.UpdateTablet(ta.ts, tablet) if err != nil { return err } // first try to get the data from a remote storage wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for i, alias := range args.SrcTabletAliases { wg.Add(1) go func(i int, alias topo.TabletAlias) { defer wg.Done() h := hook.NewSimpleHook("copy_snapshot_from_storage") h.ExtraEnv = make(map[string]string) for k, v := range ta.hookExtraEnv() { h.ExtraEnv[k] = v } h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex()) h.ExtraEnv["SNAPSHOT_PATH"] = fromStoragePaths[i] h.ExtraEnv["SOURCE_TABLET_ALIAS"] = alias.String() hr := h.Execute() if hr.ExitStatus != hook.HOOK_SUCCESS { rec.RecordError(fmt.Errorf("%v hook failed(%v): %v", h.Name, hr.ExitStatus, hr.Stderr)) } }(i, alias) } wg.Wait() // run the action, scrap if it fails if rec.HasErrors() { log.Infof("Got errors trying to get snapshots from storage, trying to get them from original tablets: %v", rec.Error()) err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, sourceAddrs, nil, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy) } else { log.Infof("Got snapshots from storage, reading them from disk directly") err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, nil, fromStoragePaths, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy) } if err != nil { if e := topotools.Scrap(ta.ts, ta.tabletAlias, false); e != nil { log.Errorf("Failed to Scrap after failed RestoreFromMultiSnapshot: %v", e) } return err } // restore type back tablet.Type = originalType return topo.UpdateTablet(ta.ts, tablet) }
// Operate on restore tablet. // Check that the SnapshotManifest is valid and the master has not changed. // Shutdown mysqld. // Load the snapshot from source tablet. // Restart mysqld and replication. // Put tablet into the replication graph as a spare. func (ta *TabletActor) restore(actionNode *actionnode.ActionNode) error { args := actionNode.Args.(*actionnode.RestoreArgs) // read our current tablet, verify its state tablet, err := ta.ts.GetTablet(ta.tabletAlias) if err != nil { return err } if args.WasReserved { if tablet.Type != topo.TYPE_RESTORE { return fmt.Errorf("expected restore type, not %v: %v", tablet.Type, ta.tabletAlias) } } else { if tablet.Type != topo.TYPE_IDLE { return fmt.Errorf("expected idle type, not %v: %v", tablet.Type, ta.tabletAlias) } } // read the source tablet, compute args.SrcFilePath if default sourceTablet, err := ta.ts.GetTablet(args.SrcTabletAlias) if err != nil { return err } if strings.ToLower(args.SrcFilePath) == "default" { args.SrcFilePath = path.Join(mysqlctl.SnapshotURLPath, mysqlctl.SnapshotManifestFile) } // read the parent tablet, verify its state parentTablet, err := ta.ts.GetTablet(args.ParentAlias) if err != nil { return err } if parentTablet.Type != topo.TYPE_MASTER && parentTablet.Type != topo.TYPE_SNAPSHOT_SOURCE { return fmt.Errorf("restore expected master or snapshot_source parent: %v %v", parentTablet.Type, args.ParentAlias) } // read & unpack the manifest sm := new(mysqlctl.SnapshotManifest) if err := fetchAndParseJsonFile(sourceTablet.Addr(), args.SrcFilePath, sm); err != nil { return err } if !args.WasReserved { if err := ta.changeTypeToRestore(tablet, sourceTablet, parentTablet.Alias, sourceTablet.KeyRange); err != nil { return err } } // do the work if err := ta.mysqld.RestoreFromSnapshot(sm, args.FetchConcurrency, args.FetchRetryCount, args.DontWaitForSlaveStart, ta.hookExtraEnv()); err != nil { log.Errorf("RestoreFromSnapshot failed (%v), scrapping", err) if err := topotools.Scrap(ta.ts, ta.tabletAlias, false); err != nil { log.Errorf("Failed to Scrap after failed RestoreFromSnapshot: %v", err) } return err } // change to TYPE_SPARE, we're done! return topotools.ChangeType(ta.ts, ta.tabletAlias, topo.TYPE_SPARE, nil, true) }
func (ta *TabletActor) scrap() error { return topotools.Scrap(ta.ts, ta.tabletAlias, false) }
// Scrap scraps the live running tablet // Should be called under RPCWrapLockAction. func (agent *ActionAgent) Scrap(ctx context.Context) error { return topotools.Scrap(ctx, agent.TopoServer, agent.TabletAlias, false) }
// reparentShardBrutal executes a brutal reparent. // // Assume the master is dead and not coming back. Just push your way // forward. Force means we are reparenting to the same master // (assuming the data has been externally synched). // // The ev parameter is an event struct prefilled with information that the // caller has on hand, which would be expensive for us to re-query. func (wr *Wrangler) reparentShardBrutal(ev *events.Reparent, si *topo.ShardInfo, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly, force bool) (err error) { event.DispatchUpdate(ev, "starting brutal") defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() wr.logger.Infof("Skipping ValidateShard - not a graceful situation") if _, ok := slaveTabletMap[masterElectTablet.Alias]; !ok && !force { return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias, si.Keyspace(), si.ShardName(), topotools.MapKeys(slaveTabletMap)) } // Check the master-elect and slaves are in good shape when the action // has not been forced. if !force { // Make sure all tablets have the right parent and reasonable positions. event.DispatchUpdate(ev, "checking slave replication positions") if err := wr.checkSlaveReplication(slaveTabletMap, topo.NO_TABLET); err != nil { return err } // Check the master-elect is fit for duty - call out for hardware checks. event.DispatchUpdate(ev, "checking that new master is ready to serve") if err := wr.checkMasterElect(masterElectTablet); err != nil { return err } event.DispatchUpdate(ev, "checking slave consistency") wr.logger.Infof("check slaves %v/%v", masterElectTablet.Keyspace, masterElectTablet.Shard) restartableSlaveTabletMap := wr.restartableTabletMap(slaveTabletMap) err = wr.checkSlaveConsistency(restartableSlaveTabletMap, myproto.ReplicationPosition{}) if err != nil { return err } } else { event.DispatchUpdate(ev, "stopping slave replication") wr.logger.Infof("forcing reparent to same master %v", masterElectTablet.Alias) err := wr.breakReplication(slaveTabletMap, masterElectTablet) if err != nil { return err } } event.DispatchUpdate(ev, "promoting new master") rsd, err := wr.promoteSlave(masterElectTablet) if err != nil { // FIXME(msolomon) This suggests that the master-elect is dead. // We need to classify certain errors as temporary and retry. return fmt.Errorf("promote slave failed: %v %v", err, masterElectTablet.Alias) } // Once the slave is promoted, remove it from our maps delete(slaveTabletMap, masterElectTablet.Alias) delete(masterTabletMap, masterElectTablet.Alias) event.DispatchUpdate(ev, "restarting slaves") majorityRestart, restartSlaveErr := wr.restartSlaves(slaveTabletMap, rsd) if !force { for _, failedMaster := range masterTabletMap { event.DispatchUpdate(ev, "scrapping old master") wr.logger.Infof("scrap dead master %v", failedMaster.Alias) // The master is dead so execute the action locally instead of // enqueing the scrap action for an arbitrary amount of time. if scrapErr := topotools.Scrap(wr.ts, failedMaster.Alias, false); scrapErr != nil { wr.logger.Warningf("scrapping failed master failed: %v", scrapErr) } } } event.DispatchUpdate(ev, "rebuilding shard serving graph") err = wr.finishReparent(si, masterElectTablet, majorityRestart, leaveMasterReadOnly) if err != nil { return err } event.DispatchUpdate(ev, "finished") if restartSlaveErr != nil { // This is more of a warning at this point. return restartSlaveErr } return nil }
// Operate on restore tablet. // Check that the SnapshotManifest is valid and the master has not changed. // Shutdown mysqld. // Load the snapshot from source tablet. // Restart mysqld and replication. // Put tablet into the replication graph as a spare. // Should be called under RpcWrapLockAction. func (agent *ActionAgent) Restore(args *actionnode.RestoreArgs, logger logutil.Logger) error { // read our current tablet, verify its state tablet, err := agent.TopoServer.GetTablet(agent.TabletAlias) if err != nil { return err } if args.WasReserved { if tablet.Type != topo.TYPE_RESTORE { return fmt.Errorf("expected restore type, not %v", tablet.Type) } } else { if tablet.Type != topo.TYPE_IDLE { return fmt.Errorf("expected idle type, not %v", tablet.Type) } } // read the source tablet, compute args.SrcFilePath if default sourceTablet, err := agent.TopoServer.GetTablet(args.SrcTabletAlias) if err != nil { return err } if strings.ToLower(args.SrcFilePath) == "default" { args.SrcFilePath = path.Join(mysqlctl.SnapshotURLPath, mysqlctl.SnapshotManifestFile) } // read the parent tablet, verify its state parentTablet, err := agent.TopoServer.GetTablet(args.ParentAlias) if err != nil { return err } if parentTablet.Type != topo.TYPE_MASTER && parentTablet.Type != topo.TYPE_SNAPSHOT_SOURCE { return fmt.Errorf("restore expected master or snapshot_source parent: %v %v", parentTablet.Type, args.ParentAlias) } // read & unpack the manifest sm := new(mysqlctl.SnapshotManifest) if err := fetchAndParseJsonFile(sourceTablet.Addr(), args.SrcFilePath, sm); err != nil { return err } if !args.WasReserved { if err := agent.changeTypeToRestore(tablet, sourceTablet, parentTablet.Alias, sourceTablet.KeyRange); err != nil { return err } } // create the loggers: tee to console and source l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) // do the work if err := agent.Mysqld.RestoreFromSnapshot(l, sm, args.FetchConcurrency, args.FetchRetryCount, args.DontWaitForSlaveStart, agent.hookExtraEnv()); err != nil { log.Errorf("RestoreFromSnapshot failed (%v), scrapping", err) if err := topotools.Scrap(agent.TopoServer, agent.TabletAlias, false); err != nil { log.Errorf("Failed to Scrap after failed RestoreFromSnapshot: %v", err) } return err } // reload the schema agent.ReloadSchema() // change to TYPE_SPARE, we're done! return topotools.ChangeType(agent.TopoServer, agent.TabletAlias, topo.TYPE_SPARE, nil, true) }
// Scrap scraps the live running tablet // Should be called under RpcWrapLockAction. func (agent *ActionAgent) Scrap() error { return topotools.Scrap(agent.TopoServer, agent.TabletAlias, false) }