// synchronizeState explicitly goes through each docker container stored in // "state" and updates its KnownStatus appropriately, as well as queueing up // events to push upstream. func (engine *DockerTaskEngine) synchronizeState() { engine.processTasks.Lock() defer engine.processTasks.Unlock() imageStates := engine.state.AllImageStates() if len(imageStates) != 0 { engine.imageManager.AddAllImageStates(imageStates) } tasks := engine.state.AllTasks() for _, task := range tasks { conts, ok := engine.state.ContainerMapByArn(task.Arn) if !ok { engine.startTask(task) continue } for _, cont := range conts { if cont.DockerId == "" { log.Debug("Found container potentially created while we were down", "name", cont.DockerName) // Figure out the dockerid describedCont, err := engine.client.InspectContainer(cont.DockerName) if err != nil { log.Warn("Could not find matching container for expected", "name", cont.DockerName) } else { cont.DockerId = describedCont.ID // update mappings that need dockerid engine.state.AddContainer(cont, task) engine.imageManager.AddContainerReferenceToImageState(cont.Container) } } if cont.DockerId != "" { currentState, metadata := engine.client.DescribeContainer(cont.DockerId) if metadata.Error != nil { currentState = api.ContainerStopped if !cont.Container.KnownTerminal() { cont.Container.ApplyingError = api.NewNamedError(&ContainerVanishedError{}) log.Warn("Could not describe previously known container; assuming dead", "err", metadata.Error, "id", cont.DockerId, "name", cont.DockerName) engine.imageManager.RemoveContainerReferenceFromImageState(cont.Container) } } else { engine.imageManager.AddContainerReferenceToImageState(cont.Container) } if currentState > cont.Container.GetKnownStatus() { cont.Container.SetKnownStatus(currentState) } } } engine.startTask(task) } engine.saver.Save() }
func (mtask *managedTask) handleContainerChange(containerChange dockerContainerChange) { llog := log.New("task", mtask.Task) // Handle container change updates a container's known status. // In addition, if the change mentions interesting information (like // exit codes or ports) this propegates them. container := containerChange.container found := false for _, c := range mtask.Containers { if container == c { found = true } } if !found { llog.Crit("State error; task manager called with another task's container!", "container", container) return } event := containerChange.event llog.Debug("Handling container change", "change", containerChange) // Cases: If this is a forward transition (else) update the container to be known to be at that status. // If this is a backwards transition stopped->running, the first time set it // to be known running so it will be stopped. Subsequently ignore these backward transitions if event.Status <= container.KnownStatus && container.KnownStatus == api.ContainerStopped { if event.Status == api.ContainerRunning { // If the container becomes running after we've stopped it (possibly // because we got an error running it and it ran anyways), the first time // update it to 'known running' so that it will be driven back to stopped mtask.unexpectedStart.Do(func() { llog.Warn("Container that we thought was stopped came back; re-stopping it once") go mtask.engine.transitionContainer(mtask.Task, container, api.ContainerStopped) // This will not proceed afterwards because status <= knownstatus below }) } } if event.Status <= container.KnownStatus { seelog.Infof("Redundant container state change for task %s: %s to %s, but already %s", mtask.Task, container, event.Status, container.KnownStatus) return } container.KnownStatus = event.Status if event.Error != nil { if container.ApplyingError == nil { container.ApplyingError = api.NewNamedError(event.Error) } if event.Status == api.ContainerStopped { // If we were trying to transition to stopped and had an error, we // clearly can't just continue trying to transition it to stopped // again and again... In this case, assume it's stopped (or close // enough) and get on with it // This actually happens a lot for the case of stopping something that was not running. llog.Info("Error for 'docker stop' of container; assuming it's stopped anyways") container.KnownStatus = api.ContainerStopped container.DesiredStatus = api.ContainerStopped } else if event.Status == api.ContainerPulled { // Another special case; a failure to pull might not be fatal if e.g. the image already exists. llog.Info("Error while pulling container; will try to run anyways", "err", event.Error) } else { llog.Warn("Error with docker; stopping container", "container", container, "err", event.Error) container.DesiredStatus = api.ContainerStopped // the above 'knownstatus' is not truthful because of the error // No point in emitting it, just continue on to stopped return } } if event.ExitCode != nil && event.ExitCode != container.KnownExitCode { container.KnownExitCode = event.ExitCode } if event.PortBindings != nil { container.KnownPortBindings = event.PortBindings } if event.Volumes != nil { mtask.UpdateMountPoints(container, event.Volumes) } mtask.engine.emitContainerEvent(mtask.Task, container, "") if mtask.UpdateStatus() { llog.Debug("Container change also resulted in task change") // If knownStatus changed, let it be known mtask.engine.emitTaskEvent(mtask.Task, "") } }