// createAndRetrieveFiles attempts to create a migration's state file, // if it does not exist, and retrieve the latest version from the api func (runner *runner) createAndRetrieveFiles(currentMigration *migration.Migration) error { if _, err := os.Stat(currentMigration.FilesDir); err != nil { if os.IsNotExist(err) { err := os.Mkdir(currentMigration.FilesDir, 0777) if err != nil { glog.Errorf("mig_id=%d: error creating pt-osc log directory '%s' (error: %s)", currentMigration.Id, currentMigration.FilesDir, err) return ErrFiles } } else { glog.Errorf("mig_id=%d: error stat-ing pt-osc log directory '%s' (error: %s)", currentMigration.Id, currentMigration.FilesDir, err) return ErrFiles } } // try to retrieve a statefile from the api urlParams := map[string]string{ "migration_id": strconv.Itoa(currentMigration.Id), "file_type": STATE_FILE_TYPE, } stateFile, restErr := runner.RestClient.GetFile(urlParams) if restErr == nil && stateFile["contents"] != nil && len(stateFile["contents"].(string)) > 0 { err := currentMigration.WriteStateFile([]byte(stateFile["contents"].(string))) if err != nil { glog.Errorf("mig_id=%d: error writing to statefile '%s' (error: %s)", currentMigration.Id, currentMigration.StateFile, err) return ErrFiles } } return nil }
func (runner *runner) postStateFile(migration *migration.Migration) { data, err := migration.ReadStateFile() if err != nil { glog.Warningf("Could not read state file (%s)", err) } else { urlParams := make(map[string]string) urlParams["migration_id"] = strconv.Itoa(migration.Id) urlParams["file_type"] = STATE_FILE_TYPE urlParams["contents"] = string(data[:]) _, err = runner.RestClient.WriteFile(urlParams) if err != nil { glog.Errorf("Error POSTing to write_file endpoint (error: %s)", err) } else { glog.Infof("Sent state file to write_file endpoint") } } }
// execPtOsc shells out and uses pt-osc to actually run a migration. func (runner *runner) execPtOsc(currentMigration *migration.Migration, ptOscOptionGenerator commandOptionGenerator, copyPercentChan chan int, unstageDone bool) (bool, error) { // unstageDone = whether or not unstagedMigrationsWaitGroup has already had .Done() called on it defer func() { if !unstageDone { unstagedMigrationsWaitGroup.Done() } }() canceled := false var fileRoutineWaitGroup sync.WaitGroup err := runner.createAndRetrieveFiles(currentMigration) if err != nil { return canceled, err } ptOscLogFile, ptOscLogWriter, err := setupLogWriter(currentMigration.LogFile) if err != nil { glog.Errorf("mig_id=%d: error creating pt-osc log file '%s' (error: %s)", currentMigration.Id, currentMigration.LogFile, err) return canceled, ErrPtOscExec } defer ptOscLogFile.Close() // generate the pt-osc command to run commandOptions := ptOscOptionGenerator(currentMigration) glog.Infof("mig_id=%d: Running %s %v", currentMigration.Id, runner.PtOscPath, strings.Join(commandOptions, " ")) cmd := exec.Command(runner.PtOscPath, commandOptions...) // capture stdout and stderr of the command stdout, err := cmd.StdoutPipe() if err != nil { glog.Errorf("mig_id=%d: error getting stdout pipe for pt-osc exec (error: %s)", currentMigration.Id, err) return canceled, ErrPtOscExec } stderr, err := cmd.StderrPipe() if err != nil { glog.Errorf("mig_id=%d: error getting stderr pipe for pt-osc exec (error: %s)", currentMigration.Id, err) return canceled, ErrPtOscExec } // start the pt-osc comand if err := cmd.Start(); err != nil { glog.Errorf("mig_id=%d: error starting pt-osc exec (error: %s)", currentMigration.Id, err) return canceled, ErrPtOscExec } // setup a channel and goroutine for logging output of stdout/stderr ptOscLogChan := make(chan string) fileRoutineWaitGroup.Add(1) fileSyncWaitGroup.Add(1) go runner.writeToPtOscLog(ptOscLogWriter, ptOscLogChan, writeLineToPtOscLog, currentMigration.Id, &fileRoutineWaitGroup) // setup a goroutine for sending statefiles to the api ptOscStateFileChan := make(chan string) fileRoutineWaitGroup.Add(1) fileSyncWaitGroup.Add(1) go runner.syncStateFile(currentMigration, ptOscStateFileChan, &fileRoutineWaitGroup) // setup goroutines for watching stdout/stderr of the command stdoutErrChan := make(chan error) stderrErrChan := make(chan error) go currentMigration.WatchMigrationStdout(stdout, stdoutErrChan, ptOscLogChan) if currentMigration.Status == migration.RunMigrationStatus { // setup a goroutine to continually update the % copied of the migration fileRoutineWaitGroup.Add(1) fileSyncWaitGroup.Add(1) go runner.updateMigrationCopyPercentage(currentMigration, copyPercentChan, &fileRoutineWaitGroup) go currentMigration.WatchMigrationCopyStderr(stderr, copyPercentChan, stderrErrChan, ptOscLogChan) } else { go currentMigration.WatchMigrationStderr(stderr, stderrErrChan, ptOscLogChan) } // save the pid of the pt-osc process currentMigration.Pid = cmd.Process.Pid glog.Infof("mig_id=%d: pt-osc pid for status %d is %d.", currentMigration.Id, currentMigration.Status, currentMigration.Pid) // add the migration id and pid to the running migration map runningMigMutex.Lock() runningMigrations[currentMigration.Id] = currentMigration.Pid runningMigMutex.Unlock() if !unstageDone { unstageDone = true unstagedMigrationsWaitGroup.Done() } // wait for both stdout and stderr error channels to receive a signal stdoutErr := <-stdoutErrChan stderrErr := <-stderrErrChan close(ptOscLogChan) close(ptOscStateFileChan) // remove the migration id from the running migration map runningMigMutex.Lock() delete(runningMigrations, currentMigration.Id) runningMigMutex.Unlock() // get the exit status of the command. if it was sent a SIGKILL (most likely // by another goroutine) we want to know because we will treat it differently failed := false if err := cmd.Wait(); err != nil { if exiterr, ok := err.(*exec.ExitError); ok { if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { exitSignal := status.Signal().String() glog.Infof("mig_id=%d: exit signal was %s.", currentMigration.Id, exitSignal) if (exitSignal == SIGKILLSignal) && (currentMigration.Status == migration.RunMigrationStatus) { // was killed glog.Infof("mig_id=%d: migration must have been canceled", currentMigration.Id) canceled = true } else if currentMigration.Status == migration.RunMigrationStatus { // died for an unexpected reason glog.Infof("mig_id=%d: migration died for an unexpected reason", currentMigration.Id) failed = true } } } } else { if (stderrErr == nil) && (currentMigration.Status == migration.RunMigrationStatus) { // wasn't killed. copy completed 100% glog.Infof("mig_id=%d: updating migration with copy percentage of 100", currentMigration.Id) copyPercentChan <- 100 } } if copyPercentChan != nil { glog.Infof("Closing copy percent channel") close(copyPercentChan) } fileRoutineWaitGroup.Wait() // wait for go routines to finish // favor returning error from unexpected failure, then error from stderr, // and lastly error from stdout if failed { return canceled, ErrUnexpectedExit } else if stderrErr != nil { return canceled, stderrErr } else if stdoutErr != nil { return canceled, stdoutErr } return canceled, nil }