Example #1
0
// recoverablePullError wraps the error gotten when trying to pull and image if
// the error is recoverable.
func (d *DockerDriver) recoverablePullError(err error, image string) error {
	recoverable := true
	if imageNotFoundMatcher.MatchString(err.Error()) {
		recoverable = false
	}
	return dstructs.NewRecoverableError(fmt.Errorf("Failed to pull `%s`: %s", image, err), recoverable)
}
Example #2
0
func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
	t.Parallel()
	p := testPolicy(true, structs.RestartPolicyModeFail)
	rt := newRestartTracker(p, structs.JobTypeSystem)
	recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
	for i := 0; i < p.Attempts; i++ {
		state, when := rt.SetStartError(recErr).GetState()
		if state != structs.TaskRestarting {
			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
		}
		if !withinJitter(p.Delay, when) {
			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
		}
	}

	// Next restart should cause fail
	if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting {
		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
	}
}
Example #3
0
func (r *TaskRunner) run() {
	// Predeclare things so we an jump to the RESTART
	var handleEmpty bool

	for {
		// Download the task's artifacts
		if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 {
			r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts))
			taskDir, ok := r.ctx.AllocDir.TaskDirs[r.task.Name]
			if !ok {
				err := fmt.Errorf("task directory couldn't be found")
				r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
				r.logger.Printf("[ERR] client: task directory for alloc %q task %q couldn't be found", r.alloc.ID, r.task.Name)
				r.restartTracker.SetStartError(err)
				goto RESTART
			}

			for _, artifact := range r.task.Artifacts {
				if err := getter.GetArtifact(r.taskEnv, artifact, taskDir, r.logger); err != nil {
					r.setState(structs.TaskStateDead,
						structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
					r.restartTracker.SetStartError(cstructs.NewRecoverableError(err, true))
					goto RESTART
				}
			}

			r.artifactsDownloaded = true
		}

		// Start the task if not yet started or it is being forced. This logic
		// is necessary because in the case of a restore the handle already
		// exists.
		r.handleLock.Lock()
		handleEmpty = r.handle == nil
		r.handleLock.Unlock()
		if handleEmpty {
			startErr := r.startTask()
			r.restartTracker.SetStartError(startErr)
			if startErr != nil {
				r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr))
				goto RESTART
			}
		}

		// Mark the task as started
		r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))

		// Wait for updates
	WAIT:
		for {
			select {
			case waitRes := <-r.handle.WaitCh():
				if waitRes == nil {
					panic("nil wait")
				}

				// Log whether the task was successful or not.
				r.restartTracker.SetWaitResult(waitRes)
				r.setState(structs.TaskStateDead, r.waitErrorToEvent(waitRes))
				if !waitRes.Successful() {
					r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes)
				} else {
					r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID)
				}

				break WAIT
			case update := <-r.updateCh:
				if err := r.handleUpdate(update); err != nil {
					r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err)
				}
			case <-r.destroyCh:
				// Kill the task using an exponential backoff in-case of failures.
				destroySuccess, err := r.handleDestroy()
				if !destroySuccess {
					// We couldn't successfully destroy the resource created.
					r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err)
				}

				// Store that the task has been destroyed and any associated error.
				r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err))
				return
			}
		}

	RESTART:
		state, when := r.restartTracker.GetState()
		r.restartTracker.SetStartError(nil).SetWaitResult(nil)
		reason := r.restartTracker.GetReason()
		switch state {
		case structs.TaskNotRestarting, structs.TaskTerminated:
			r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID)
			if state == structs.TaskNotRestarting {
				r.setState(structs.TaskStateDead,
					structs.NewTaskEvent(structs.TaskNotRestarting).
						SetRestartReason(reason))
			}
			return
		case structs.TaskRestarting:
			r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when)
			r.setState(structs.TaskStatePending,
				structs.NewTaskEvent(structs.TaskRestarting).
					SetRestartDelay(when).
					SetRestartReason(reason))
		default:
			r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state)
			return
		}

		// Sleep but watch for destroy events.
		select {
		case <-time.After(when):
		case <-r.destroyCh:
		}

		// Destroyed while we were waiting to restart, so abort.
		r.destroyLock.Lock()
		destroyed := r.destroy
		r.destroyLock.Unlock()
		if destroyed {
			r.logger.Printf("[DEBUG] client: Not restarting task: %v because it's destroyed by user", r.task.Name)
			r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled))
			return
		}

		// Clear the handle so a new driver will be created.
		r.handleLock.Lock()
		r.handle = nil
		r.handleLock.Unlock()
	}
}