func (h *ContainerHealer) healContainerIfNeeded(cont container.Container) error { if cont.LastSuccessStatusUpdate.IsZero() { if !cont.MongoID.Time().Before(time.Now().Add(-h.maxUnresponsiveTime)) { return nil } } isAsExpected, err := h.isAsExpected(cont) if err != nil { log.Errorf("Containers healing: couldn't verify running processes in container %q: %s", cont.ID, err) } if isAsExpected { cont.SetStatus(h.provisioner, cont.ExpectedStatus(), true) return nil } locked := h.locker.Lock(cont.AppName) if !locked { return errors.Errorf("Containers healing: unable to heal %q couldn't lock app %s", cont.ID, cont.AppName) } defer h.locker.Unlock(cont.AppName) // Sanity check, now we have a lock, let's find out if the container still exists _, err = h.provisioner.GetContainer(cont.ID) if err != nil { if _, isNotFound := err.(*provision.UnitNotFoundError); isNotFound { return nil } return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't verify it still exists", cont.ID) } a, err := app.GetByName(cont.AppName) if err != nil { return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't get app %q", cont.ID, cont.AppName) } log.Errorf("Initiating healing process for container %q, unresponsive since %s.", cont.ID, cont.LastSuccessStatusUpdate) evt, err := event.NewInternal(&event.Opts{ Target: event.Target{Type: event.TargetTypeContainer, Value: cont.ID}, InternalKind: "healer", CustomData: cont, Allowed: event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), )...), }) if err != nil { return errors.Wrap(err, "Error trying to insert container healing event, healing aborted") } newCont, healErr := h.healContainer(cont) if healErr != nil { healErr = errors.Errorf("Error healing container %q: %s", cont.ID, healErr.Error()) } err = evt.DoneCustomData(healErr, newCont) if err != nil { log.Errorf("Error trying to update containers healing event: %s", err) } return healErr }
func (h *ContainerHealer) isAsExpected(cont container.Container) (bool, error) { container, err := h.provisioner.Cluster().InspectContainer(cont.ID) if err != nil { return false, err } if container.State.Dead || container.State.RemovalInProgress { return false, nil } isRunning := container.State.Running || container.State.Restarting if cont.ExpectedStatus() == provision.StatusStopped { return !isRunning, nil } return isRunning, nil }