func (h *ContainerHealer) healContainerIfNeeded(cont container.Container) error { if cont.LastSuccessStatusUpdate.IsZero() { if !cont.MongoID.Time().Before(time.Now().Add(-h.maxUnresponsiveTime)) { return nil } } isAsExpected, err := h.isAsExpected(cont) if err != nil { log.Errorf("Containers healing: couldn't verify running processes in container %q: %s", cont.ID, err) } if isAsExpected { cont.SetStatus(h.provisioner, cont.ExpectedStatus(), true) return nil } locked := h.locker.Lock(cont.AppName) if !locked { return errors.Errorf("Containers healing: unable to heal %q couldn't lock app %s", cont.ID, cont.AppName) } defer h.locker.Unlock(cont.AppName) // Sanity check, now we have a lock, let's find out if the container still exists _, err = h.provisioner.GetContainer(cont.ID) if err != nil { if _, isNotFound := err.(*provision.UnitNotFoundError); isNotFound { return nil } return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't verify it still exists", cont.ID) } a, err := app.GetByName(cont.AppName) if err != nil { return errors.Wrapf(err, "Containers healing: unable to heal %q couldn't get app %q", cont.ID, cont.AppName) } log.Errorf("Initiating healing process for container %q, unresponsive since %s.", cont.ID, cont.LastSuccessStatusUpdate) evt, err := event.NewInternal(&event.Opts{ Target: event.Target{Type: event.TargetTypeContainer, Value: cont.ID}, InternalKind: "healer", CustomData: cont, Allowed: event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), )...), }) if err != nil { return errors.Wrap(err, "Error trying to insert container healing event, healing aborted") } newCont, healErr := h.healContainer(cont) if healErr != nil { healErr = errors.Errorf("Error healing container %q: %s", cont.ID, healErr.Error()) } err = evt.DoneCustomData(healErr, newCont) if err != nil { log.Errorf("Error trying to update containers healing event: %s", err) } return healErr }
func (h *ContainerHealer) healContainerIfNeeded(cont container.Container) error { if cont.LastSuccessStatusUpdate.IsZero() { if !cont.MongoID.Time().Before(time.Now().Add(-h.maxUnresponsiveTime)) { return nil } } isRunning, err := h.isRunning(cont) if err != nil { log.Errorf("Containers healing: couldn't verify running processes in container %s: %s", cont.ID, err.Error()) } if isRunning { cont.SetStatus(h.provisioner, provision.StatusStarted, true) return nil } healingCounter, err := healingCountFor("container", cont.ID, consecutiveHealingsTimeframe) if err != nil { return fmt.Errorf("Containers healing: couldn't verify number of previous healings for %s: %s", cont.ID, err.Error()) } if healingCounter > consecutiveHealingsLimitInTimeframe { return fmt.Errorf("Containers healing: number of healings for container %s in the last %d minutes exceeds limit of %d: %d", cont.ID, consecutiveHealingsTimeframe/time.Minute, consecutiveHealingsLimitInTimeframe, healingCounter) } locked := h.locker.Lock(cont.AppName) if !locked { return fmt.Errorf("Containers healing: unable to heal %s couldn't lock app %s", cont.ID, cont.AppName) } defer h.locker.Unlock(cont.AppName) // Sanity check, now we have a lock, let's find out if the container still exists _, err = h.provisioner.GetContainer(cont.ID) if err != nil { if err == mgo.ErrNotFound { return nil } return fmt.Errorf("Containers healing: unable to heal %s couldn't verify it still exists.", cont.ID) } log.Errorf("Initiating healing process for container %s, unresponsive since %s.", cont.ID, cont.LastSuccessStatusUpdate) evt, err := NewHealingEvent(cont) if err != nil { return fmt.Errorf("Error trying to insert container healing event, healing aborted: %s", err.Error()) } newCont, healErr := h.healContainer(cont) if healErr != nil { healErr = fmt.Errorf("Error healing container %s: %s", cont.ID, healErr.Error()) } err = evt.Update(newCont, healErr) if err != nil { log.Errorf("Error trying to update containers healing event: %s", err.Error()) } return healErr }
func (s *S) newContainer(opts *newContainerOpts, p *dockerProvisioner) (*container.Container, error) { container := container.Container{ ID: "id", IP: "10.10.10.10", HostPort: "3333", HostAddr: "127.0.0.1", ProcessName: "web", ExposedPort: "8888/tcp", } if p == nil { p = s.p } imageName := "tsuru/python:latest" var customData map[string]interface{} if opts != nil { if opts.Image != "" { imageName = opts.Image } container.AppName = opts.AppName container.ProcessName = opts.ProcessName customData = opts.ImageCustomData if opts.Provisioner != nil { p = opts.Provisioner } container.SetStatus(p, provision.Status(opts.Status), false) } err := s.newFakeImage(p, imageName, customData) if err != nil { return nil, err } if container.AppName == "" { container.AppName = "container" } routertest.FakeRouter.AddBackend(container.AppName) routertest.FakeRouter.AddRoute(container.AppName, container.Address()) ports := map[docker.Port]struct{}{ docker.Port(s.port + "/tcp"): {}, } config := docker.Config{ Image: imageName, Cmd: []string{"ps"}, ExposedPorts: ports, } createOptions := docker.CreateContainerOptions{Config: &config} createOptions.Name = randomString() _, c, err := p.Cluster().CreateContainer(createOptions, net.StreamInactivityTimeout) if err != nil { return nil, err } container.ID = c.ID container.Image = imageName container.Name = createOptions.Name conn, err := db.Conn() if err != nil { return nil, err } defer conn.Close() err = conn.Collection(s.collName).Insert(&container) if err != nil { return nil, err } imageId, err := image.AppCurrentImageName(container.AppName) if err != nil { return nil, err } err = s.newFakeImage(p, imageId, nil) if err != nil { return nil, err } return &container, nil }