Beispiel #1
0
func (h *ContainerHealer) healContainerIfNeeded(cont container.Container) error {
	if cont.LastSuccessStatusUpdate.IsZero() {
		if !cont.MongoID.Time().Before(time.Now().Add(-h.maxUnresponsiveTime)) {
			return nil
		}
	}
	isAsExpected, err := h.isAsExpected(cont)
	if err != nil {
		log.Errorf("Containers healing: couldn't verify running processes in container %q: %s", cont.ID, err)
	}
	if isAsExpected {
		cont.SetStatus(h.provisioner, cont.ExpectedStatus(), true)
		return nil
	}
	locked := h.locker.Lock(cont.AppName)
	if !locked {
		return errors.Errorf("Containers healing: unable to heal %q couldn't lock app %s", cont.ID, cont.AppName)
	}
	defer h.locker.Unlock(cont.AppName)
	// Sanity check, now we have a lock, let's find out if the container still exists
	_, err = h.provisioner.GetContainer(cont.ID)
	if err != nil {
		if _, isNotFound := err.(*provision.UnitNotFoundError); isNotFound {
			return nil
		}
		return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't verify it still exists", cont.ID)
	}
	a, err := app.GetByName(cont.AppName)
	if err != nil {
		return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't get app %q", cont.ID, cont.AppName)
	}
	log.Errorf("Initiating healing process for container %q, unresponsive since %s.", cont.ID, cont.LastSuccessStatusUpdate)
	evt, err := event.NewInternal(&event.Opts{
		Target:       event.Target{Type: event.TargetTypeContainer, Value: cont.ID},
		InternalKind: "healer",
		CustomData:   cont,
		Allowed: event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams),
			permission.Context(permission.CtxApp, a.Name),
			permission.Context(permission.CtxPool, a.Pool),
		)...),
	})
	if err != nil {
		return errors.Wrap(err, "Error trying to insert container healing event, healing aborted")
	}
	newCont, healErr := h.healContainer(cont)
	if healErr != nil {
		healErr = errors.Errorf("Error healing container %q: %s", cont.ID, healErr.Error())
	}
	err = evt.DoneCustomData(healErr, newCont)
	if err != nil {
		log.Errorf("Error trying to update containers healing event: %s", err)
	}
	return healErr
}
Beispiel #2
0
func (h *ContainerHealer) isAsExpected(cont container.Container) (bool, error) {
	container, err := h.provisioner.Cluster().InspectContainer(cont.ID)
	if err != nil {
		return false, err
	}
	if container.State.Dead || container.State.RemovalInProgress {
		return false, nil
	}
	isRunning := container.State.Running || container.State.Restarting
	if cont.ExpectedStatus() == provision.StatusStopped {
		return !isRunning, nil
	}
	return isRunning, nil
}