func podStatusResultToResp(result podStatusResult) (*podstore_protos.PodStatusResponse, error) { if statusstore.IsNoStatus(result.err) { return nil, grpc.Errorf(codes.NotFound, result.err.Error()) } else if result.err != nil { return nil, grpc.Errorf(codes.Unavailable, result.err.Error()) } var processStatuses []*podstore_protos.ProcessStatus for _, processStatus := range result.status.ProcessStatuses { processStatuses = append(processStatuses, &podstore_protos.ProcessStatus{ LaunchableId: processStatus.LaunchableID.String(), EntryPoint: processStatus.EntryPoint, LastExit: &podstore_protos.ExitStatus{ ExitTime: processStatus.LastExit.ExitTime.Unix(), ExitCode: int64(processStatus.LastExit.ExitCode), ExitStatus: int64(processStatus.LastExit.ExitStatus), }, }) } return &podstore_protos.PodStatusResponse{ Manifest: result.status.Manifest, PodState: result.status.PodStatus.String(), ProcessStatuses: processStatuses, LastIndex: result.queryMeta.LastIndex, }, nil }
// Convenience function for only mutating a part of the status structure. // First, the status is retrieved and the consul ModifyIndex is read. The // status is then passed to a mutator function, and then the new status is // written back to consul using a CAS operation, guaranteeing that nothing else // about the status changed. func (c *consulStore) MutateStatus(key types.PodUniqueKey, mutator func(PodStatus) (PodStatus, error)) error { var lastIndex uint64 status, queryMeta, err := c.Get(key) switch { case statusstore.IsNoStatus(err): // We just want to make sure the key doesn't exist when we set it, so // use an index of 0 lastIndex = 0 case err != nil: return err default: lastIndex = queryMeta.LastIndex } newStatus, err := mutator(status) if err != nil { return err } return c.CAS(key, newStatus, lastIndex) }
// no return value, no output channels. This should do everything it needs to do // without outside intervention (other than being signalled to quit) func (p *Preparer) handlePods(podChan <-chan ManifestPair, quit <-chan struct{}) { // install new launchables var nextLaunch ManifestPair // used to track if we have work to do (i.e. pod manifest came through channel // and we have yet to operate on it) working := false var manifestLogger logging.Logger // The design of p2-preparer is to continuously retry installation // failures, for example downloading of the launchable. An exponential // backoff is important to avoid putting undue load on the artifact // server, for example. backoffTime := minimumBackoffTime for { select { case <-quit: return case nextLaunch = <-podChan: backoffTime = minimumBackoffTime var sha string // TODO: handle errors appropriately from SHA(). if nextLaunch.Intent != nil { sha, _ = nextLaunch.Intent.SHA() } else { sha, _ = nextLaunch.Reality.SHA() } manifestLogger = p.Logger.SubLogger(logrus.Fields{ "pod": nextLaunch.ID, "sha": sha, "pod_unique_key": nextLaunch.PodUniqueKey, }) manifestLogger.NoFields().Debugln("New manifest received") working = true case <-time.After(backoffTime): if working { var pod *pods.Pod var err error if nextLaunch.PodUniqueKey == "" { pod = p.podFactory.NewLegacyPod(nextLaunch.ID) } else { pod, err = p.podFactory.NewUUIDPod(nextLaunch.ID, nextLaunch.PodUniqueKey) if err != nil { manifestLogger.WithError(err).Errorln("Could not initialize pod") break } } // TODO better solution: force the preparer to have a 0s default timeout, prevent KILLs if pod.Id == constants.PreparerPodID { pod.DefaultTimeout = time.Duration(0) } effectiveLogBridgeExec := p.logExec // pods that are in the blacklist for this preparer shall not use the // preparer's log exec. Instead, they will use the default svlogd logexec. for _, podID := range p.logBridgeBlacklist { if pod.Id.String() == podID { effectiveLogBridgeExec = svlogdExec break } } pod.SetLogBridgeExec(effectiveLogBridgeExec) pod.SetFinishExec(p.finishExec) // podChan is being fed values gathered from a kp.Watch() in // WatchForPodManifestsForNode(). If the watch returns a new pair of // intent/reality values before the previous change has finished // processing in resolvePair(), the reality value will be stale. This // leads to a bug where the preparer will appear to update a package // and when that is finished, "update" it again. // // Example ordering of bad events: // 1) update to /intent for pod A comes in, /reality is read and // resolvePair() handles it // 2) before resolvePair() finishes, another /intent update comes in, // and /reality is read but hasn't been changed. This update cannot // be processed until the previous resolvePair() call finishes, and // updates /reality. Now the reality value used here is stale. We // want to refresh our /reality read so we don't restart the pod if // intent didn't change between updates. // // The correct solution probably involves watching reality and intent // and feeding updated pairs to a control loop. // // This is a quick fix to ensure that the reality value being used is // up-to-date. The de-bouncing logic in this method should ensure that the // intent value is fresh (to the extent that Consul is timely). Fetching // the reality value again ensures its freshness too. if nextLaunch.PodUniqueKey == "" { // legacy pod, get reality manifest from reality tree reality, _, err := p.store.Pod(kp.REALITY_TREE, p.node, nextLaunch.ID) if err == pods.NoCurrentManifest { nextLaunch.Reality = nil } else if err != nil { manifestLogger.WithError(err).Errorln("Error getting reality manifest") break } else { nextLaunch.Reality = reality } } else { // uuid pod, get reality manifest from pod status status, _, err := p.podStatusStore.Get(nextLaunch.PodUniqueKey) switch { case err != nil && !statusstore.IsNoStatus(err): manifestLogger.WithError(err).Errorln("Error getting reality manifest from pod status") break case statusstore.IsNoStatus(err): nextLaunch.Reality = nil default: manifest, err := manifest.FromBytes([]byte(status.Manifest)) if err != nil { manifestLogger.WithError(err).Errorln("Error parsing reality manifest from pod status") break } nextLaunch.Reality = manifest } } ok := p.resolvePair(nextLaunch, pod, manifestLogger) if ok { nextLaunch = ManifestPair{} working = false // Reset the backoff time backoffTime = minimumBackoffTime } else { // Double the backoff time with a maximum of 1 minute backoffTime = backoffTime * 2 if backoffTime > 1*time.Minute { backoffTime = 1 * time.Minute } } } } } }