// killTask kills the running task. A killing event can optionally be passed and // this event is used to mark the task as being killed. It provides a means to // store extra information. func (r *TaskRunner) killTask(killingEvent *structs.TaskEvent) { r.runningLock.Lock() running := r.running r.runningLock.Unlock() if !running { return } // Get the kill timeout timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout) // Build the event var event *structs.TaskEvent if killingEvent != nil { event = killingEvent event.Type = structs.TaskKilling } else { event = structs.NewTaskEvent(structs.TaskKilling) } event.SetKillTimeout(timeout) // Mark that we received the kill event r.setState(structs.TaskStateRunning, event) // Kill the task using an exponential backoff in-case of failures. destroySuccess, err := r.handleDestroy() if !destroySuccess { // We couldn't successfully destroy the resource created. r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err) } r.runningLock.Lock() r.running = false r.runningLock.Unlock() // Store that the task has been destroyed and any associated error. r.setState("", structs.NewTaskEvent(structs.TaskKilled).SetKillError(err)) }
func (r *TaskRunner) run() { // Predeclare things so we can jump to the RESTART var handleEmpty bool var stopCollection chan struct{} for { // Download the task's artifacts if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 { r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts)) taskDir, ok := r.ctx.AllocDir.TaskDirs[r.task.Name] if !ok { err := fmt.Errorf("task directory couldn't be found") r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err)) r.logger.Printf("[ERR] client: task directory for alloc %q task %q couldn't be found", r.alloc.ID, r.task.Name) r.restartTracker.SetStartError(err) goto RESTART } for _, artifact := range r.task.Artifacts { if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil { r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err)) r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true)) goto RESTART } } r.artifactsDownloaded = true } // Start the task if not yet started or it is being forced. This logic // is necessary because in the case of a restore the handle already // exists. r.handleLock.Lock() handleEmpty = r.handle == nil r.handleLock.Unlock() if handleEmpty { startErr := r.startTask() r.restartTracker.SetStartError(startErr) if startErr != nil { r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr)) goto RESTART } // Mark the task as started r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) r.runningLock.Lock() r.running = true r.runningLock.Unlock() } if stopCollection == nil { stopCollection = make(chan struct{}) go r.collectResourceUsageStats(stopCollection) } // Wait for updates WAIT: for { select { case waitRes := <-r.handle.WaitCh(): if waitRes == nil { panic("nil wait") } r.runningLock.Lock() r.running = false r.runningLock.Unlock() // Stop collection of the task's resource usage close(stopCollection) // Log whether the task was successful or not. r.restartTracker.SetWaitResult(waitRes) r.setState(structs.TaskStateDead, r.waitErrorToEvent(waitRes)) if !waitRes.Successful() { r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes) } else { r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID) } break WAIT case update := <-r.updateCh: if err := r.handleUpdate(update); err != nil { r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err) } case <-r.destroyCh: // Mark that we received the kill event timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout) r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskKilling).SetKillTimeout(timeout)) // Kill the task using an exponential backoff in-case of failures. destroySuccess, err := r.handleDestroy() if !destroySuccess { // We couldn't successfully destroy the resource created. r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err) } // Stop collection of the task's resource usage close(stopCollection) // Store that the task has been destroyed and any associated error. r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err)) // Store the task event that provides context on the task destroy. if r.destroyEvent.Type != structs.TaskKilled { r.setState(structs.TaskStateDead, r.destroyEvent) } r.runningLock.Lock() r.running = false r.runningLock.Unlock() return } } RESTART: state, when := r.restartTracker.GetState() r.restartTracker.SetStartError(nil).SetWaitResult(nil) reason := r.restartTracker.GetReason() switch state { case structs.TaskNotRestarting, structs.TaskTerminated: r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID) if state == structs.TaskNotRestarting { r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskNotRestarting). SetRestartReason(reason)) } return case structs.TaskRestarting: r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when) r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskRestarting). SetRestartDelay(when). SetRestartReason(reason)) default: r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state) return } // Sleep but watch for destroy events. select { case <-time.After(when): case <-r.destroyCh: } // Destroyed while we were waiting to restart, so abort. r.destroyLock.Lock() destroyed := r.destroy r.destroyLock.Unlock() if destroyed { r.logger.Printf("[DEBUG] client: Not restarting task: %v because it has been destroyed due to: %s", r.task.Name, r.destroyEvent.Message) r.setState(structs.TaskStateDead, r.destroyEvent) return } // Clear the handle so a new driver will be created. r.handleLock.Lock() r.handle = nil stopCollection = nil r.handleLock.Unlock() } }