// RestoreFromBackup is the main entry point for backup restore. // It will either work, fail gracefully, or return // an error in case of a non-recoverable error. // It takes the action lock so no RPC interferes. func (agent *ActionAgent) RestoreFromBackup(ctx context.Context) error { agent.actionMutex.Lock() defer agent.actionMutex.Unlock() // change type to RESTORE (using UpdateTabletFields so it's // always authorized) tablet := agent.Tablet() originalType := tablet.Type if err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topo.Tablet) error { tablet.Type = topo.TYPE_RESTORE return nil }); err != nil { return fmt.Errorf("Cannot change type to RESTORE: %v", err) } // do the optional restore, if that fails we are in a bad state, // just log.Fatalf out. bucket := fmt.Sprintf("%v/%v", tablet.Keyspace, tablet.Shard) pos, err := mysqlctl.Restore(ctx, agent.MysqlDaemon, bucket, *restoreConcurrency, agent.hookExtraEnv()) if err != nil && err != mysqlctl.ErrNoBackup { return fmt.Errorf("Cannot restore original backup: %v", err) } if err == nil { // now read the shard to find the current master, and its location si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { return fmt.Errorf("Cannot read shard: %v", err) } ti, err := agent.TopoServer.GetTablet(ctx, si.MasterAlias) if err != nil { return fmt.Errorf("Cannot read master tablet %v: %v", si.MasterAlias, err) } // set replication straight status := &myproto.ReplicationStatus{ Position: pos, MasterHost: ti.Hostname, MasterPort: ti.Portmap["mysql"], } cmds, err := agent.MysqlDaemon.StartReplicationCommands(status) if err != nil { return fmt.Errorf("MysqlDaemon.StartReplicationCommands failed: %v", err) } if err := agent.MysqlDaemon.ExecuteSuperQueryList(cmds); err != nil { return fmt.Errorf("MysqlDaemon.ExecuteSuperQueryList failed: %v", err) } } // change type back to original type if err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topo.Tablet) error { tablet.Type = originalType return nil }); err != nil { return fmt.Errorf("Cannot change type back to %v: %v", originalType, err) } return nil }
func (agent *ActionAgent) restoreDataLocked(ctx context.Context, logger logutil.Logger, deleteBeforeRestore bool) error { // change type to RESTORE (using UpdateTabletFields so it's // always authorized) tablet := agent.Tablet() originalType := tablet.Type if _, err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_RESTORE return nil }); err != nil { return fmt.Errorf("Cannot change type to RESTORE: %v", err) } // let's update our internal state (stop query service and other things) if err := agent.refreshTablet(ctx, "restore from backup"); err != nil { return fmt.Errorf("failed to update state before restore: %v", err) } // Try to restore. Depending on the reason for failure, we may be ok. // If we're not ok, return an error and the agent will log.Fatalf, // causing the process to be restarted and the restore retried. dir := fmt.Sprintf("%v/%v", tablet.Keyspace, tablet.Shard) localMetadata, err := agent.getLocalMetadataValues() if err != nil { return err } pos, err := mysqlctl.Restore(ctx, agent.MysqlDaemon, dir, *restoreConcurrency, agent.hookExtraEnv(), localMetadata, logger, deleteBeforeRestore) switch err { case nil: // Reconnect to master. if err := agent.startReplication(ctx, pos); err != nil { return err } case mysqlctl.ErrNoBackup: // No-op, starting with empty database. case mysqlctl.ErrExistingDB: // No-op, assuming we've just restarted. default: return fmt.Errorf("Can't restore backup: %v", err) } // Change type back to original type if we're ok to serve. if _, err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topodatapb.Tablet) error { tablet.Type = originalType return nil }); err != nil { return fmt.Errorf("Cannot change type back to %v: %v", originalType, err) } // let's update our internal state (start query service and other things) if err := agent.refreshTablet(ctx, "after restore from backup"); err != nil { return fmt.Errorf("failed to update state after backup: %v", err) } return nil }
// RestoreFromBackup is the main entry point for backup restore. // It will either work, fail gracefully, or return // an error in case of a non-recoverable error. // It takes the action lock so no RPC interferes. func (agent *ActionAgent) RestoreFromBackup(ctx context.Context) error { agent.actionMutex.Lock() defer agent.actionMutex.Unlock() // change type to RESTORE (using UpdateTabletFields so it's // always authorized) tablet := agent.Tablet() originalType := tablet.Type if _, err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_RESTORE return nil }); err != nil { return fmt.Errorf("Cannot change type to RESTORE: %v", err) } // Try to restore. Depending on the reason for failure, we may be ok. // If we're not ok, return an error and the agent will log.Fatalf, // causing the process to be restarted and the restore retried. dir := fmt.Sprintf("%v/%v", tablet.Keyspace, tablet.Shard) pos, err := mysqlctl.Restore(ctx, agent.MysqlDaemon, dir, *restoreConcurrency, agent.hookExtraEnv()) switch err { case nil: // Reconnect to master. if err := agent.startReplication(ctx, pos); err != nil { return err } case mysqlctl.ErrNoBackup: log.Infof("Auto-restore is enabled, but no backups were found. Starting up empty.") case mysqlctl.ErrExistingDB: log.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.") default: return fmt.Errorf("Can't restore backup: %v", err) } // Change type back to original type if we're ok to serve. if _, err := agent.TopoServer.UpdateTabletFields(ctx, tablet.Alias, func(tablet *topodatapb.Tablet) error { tablet.Type = originalType return nil }); err != nil { return fmt.Errorf("Cannot change type back to %v: %v", originalType, err) } return nil }