Exemple #1
0
func (r *replication) ensureInReality(
	node types.NodeName,
	timeoutCh <-chan struct{},
	nodeLogger logging.Logger,
	targetSHA string,
) error {
	for {
		select {
		case <-r.quitCh:
			return errQuit
		case <-timeoutCh:
			return errTimeout
		case <-r.replicationCancelledCh:
			return errCancelled
		case <-time.After(5 * time.Second):
			man, err := r.queryReality(node)
			if err == pods.NoCurrentManifest {
				// if the pod key doesn't exist yet, that's okay just wait longer
			} else if err != nil {
				nodeLogger.WithErrorAndFields(err, logrus.Fields{
					"node": node,
				}).Errorln("Could not read reality for pod manifest")
			} else {
				receivedSHA, _ := man.SHA()
				if receivedSHA == targetSHA {
					nodeLogger.NoFields().Infoln("Node is current")
					return nil
				} else {
					nodeLogger.WithFields(logrus.Fields{"current": receivedSHA, "target": targetSHA}).Infoln("Waiting for current")
				}
			}
		}
	}
}
Exemple #2
0
func (p *Preparer) tryRunHooks(hookType hooks.HookType, pod hooks.Pod, manifest manifest.Manifest, logger logging.Logger) {
	err := p.hooks.RunHookType(hookType, pod, manifest)
	if err != nil {
		logger.WithErrorAndFields(err, logrus.Fields{
			"hooks": hookType}).Warnln("Could not run hooks")
	}
}
Exemple #3
0
func (p *Preparer) installAndLaunchPod(pair ManifestPair, pod Pod, logger logging.Logger) bool {
	p.tryRunHooks(hooks.BEFORE_INSTALL, pod, pair.Intent, logger)

	logger.NoFields().Infoln("Installing pod and launchables")

	err := pod.Install(pair.Intent, p.artifactVerifier, p.artifactRegistry)
	if err != nil {
		// install failed, abort and retry
		logger.WithError(err).Errorln("Install failed")
		return false
	}

	err = pod.Verify(pair.Intent, p.authPolicy)
	if err != nil {
		logger.WithError(err).
			Errorln("Pod digest verification failed")
		p.tryRunHooks(hooks.AFTER_AUTH_FAIL, pod, pair.Intent, logger)
		return false
	}

	p.tryRunHooks(hooks.AFTER_INSTALL, pod, pair.Intent, logger)

	if pair.Reality != nil {
		logger.NoFields().Infoln("Invoking the disable hook and halting runit services")
		success, err := pod.Halt(pair.Reality)
		if err != nil {
			logger.WithError(err).
				Errorln("Pod halt failed")
		} else if !success {
			logger.NoFields().Warnln("One or more launchables did not halt successfully")
		}
	}

	p.tryRunHooks(hooks.BEFORE_LAUNCH, pod, pair.Intent, logger)

	logger.NoFields().Infoln("Setting up new runit services and running the enable hook")

	ok, err := pod.Launch(pair.Intent)
	if err != nil {
		logger.WithError(err).
			Errorln("Launch failed")
	} else {
		duration, err := p.store.SetPod(kp.REALITY_TREE, p.node, pair.Intent)
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{
				"duration": duration}).
				Errorln("Could not set pod in reality store")
		}

		p.tryRunHooks(hooks.AFTER_LAUNCH, pod, pair.Intent, logger)

		pod.Prune(p.maxLaunchableDiskUsage, pair.Intent) // errors are logged internally
	}
	return err == nil && ok
}
Exemple #4
0
func (p *Preparer) installAndLaunchPod(pair ManifestPair, pod Pod, logger logging.Logger) bool {
	p.tryRunHooks(hooks.BEFORE_INSTALL, pod, pair.Intent, logger)

	err := pod.Install(pair.Intent)
	if err != nil {
		// install failed, abort and retry
		logger.WithError(err).Errorln("Install failed")
		return false
	}

	err = pod.Verify(pair.Intent, p.authPolicy)
	if err != nil {
		logger.WithError(err).
			Errorln("Pod digest verification failed")
		p.tryRunHooks(hooks.AFTER_AUTH_FAIL, pod, pair.Intent, logger)
		return false
	}

	p.tryRunHooks(hooks.AFTER_INSTALL, pod, pair.Intent, logger)

	if pair.Reality != nil {
		success, err := pod.Halt(pair.Reality)
		if err != nil {
			logger.WithError(err).
				Errorln("Pod halt failed")
		} else if !success {
			logger.NoFields().Warnln("One or more launchables did not halt successfully")
		}
	}

	p.tryRunHooks(hooks.BEFORE_LAUNCH, pod, pair.Intent, logger)

	ok, err := pod.Launch(pair.Intent)
	if err != nil {
		logger.WithError(err).
			Errorln("Launch failed")
	} else {
		duration, err := p.store.SetPod(kp.RealityPath(p.node, pair.ID), pair.Intent)
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{
				"duration": duration}).
				Errorln("Could not set pod in reality store")
		}

		p.tryRunHooks(hooks.AFTER_LAUNCH, pod, pair.Intent, logger)
	}
	return err == nil && ok
}
Exemple #5
0
func (p *Preparer) stopAndUninstallPod(pair ManifestPair, pod Pod, logger logging.Logger) bool {
	success, err := pod.Halt(pair.Reality)
	if err != nil {
		logger.WithError(err).Errorln("Pod halt failed")
	} else if !success {
		logger.NoFields().Warnln("One or more launchables did not halt successfully")
	}

	p.tryRunHooks(hooks.BEFORE_UNINSTALL, pod, pair.Reality, logger)

	err = pod.Uninstall()
	if err != nil {
		logger.WithError(err).Errorln("Uninstall failed")
		return false
	}
	logger.NoFields().Infoln("Successfully uninstalled")

	if pair.PodUniqueKey == "" {
		dur, err := p.store.DeletePod(kp.REALITY_TREE, p.node, pair.ID)
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{"duration": dur}).
				Errorln("Could not delete pod from reality store")
		}
	} else {
		// We don't delete so that the exit status of the pod's
		// processes can be viewed for some time after installation.
		// It is the responsibility of external systems to delete pod
		// status entries when they are no longer needed.
		err := p.podStatusStore.MutateStatus(pair.PodUniqueKey, func(podStatus podstatus.PodStatus) (podstatus.PodStatus, error) {
			podStatus.PodStatus = podstatus.PodRemoved
			return podStatus, nil
		})
		if err != nil {
			logger.WithError(err).
				Errorln("Could not update pod status to reflect removal")
		}

		err = p.podStore.DeleteRealityIndex(pair.PodUniqueKey, p.node)
		if err != nil {
			logger.WithError(err).
				Errorln("Could not remove reality index for uninstalled pod")
		}
	}
	return true
}
Exemple #6
0
// runDirectory executes all executable files in a given directory path.
func runDirectory(dirpath string, environment []string, logger logging.Logger) error {
	entries, err := ioutil.ReadDir(dirpath)
	if os.IsNotExist(err) {
		logger.WithField("dir", dirpath).Debugln("Hooks not set up")
		return nil
	}
	if err != nil {
		return err
	}

	for _, f := range entries {
		fullpath := path.Join(dirpath, f.Name())
		executable := (f.Mode() & 0111) != 0
		if !executable {
			logger.WithField("path", fullpath).Warnln("Hook is not executable")
			continue
		}
		if f.IsDir() {
			continue
		}

		h := Hook{fullpath, f.Name(), DefaultTimeout, environment, logger}

		err := h.RunWithTimeout()
		if htErr, ok := err.(HookTimeoutError); ok {
			logger.WithErrorAndFields(htErr, logrus.Fields{
				"path":      h.Path,
				"hook_name": h.Name,
				"timeout":   h.Timeout,
			}).Warnln(htErr.Error())
			// we intentionally swallow timeout HookTimeoutErrors
		} else if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{
				"path":      h.Path,
				"hook_name": h.Name,
			}).Warningf("Unknown error in hook %s: %s", h.Name, err)
		}
	}

	return nil
}
Exemple #7
0
func runDirectory(dirpath string, environment []string, logger logging.Logger) error {
	entries, err := ioutil.ReadDir(dirpath)
	if os.IsNotExist(err) {
		logger.WithField("dir", dirpath).Debugln("Hooks not set up")
		return nil
	}
	if err != nil {
		return err
	}

	for _, f := range entries {
		fullpath := path.Join(dirpath, f.Name())
		executable := (f.Mode() & 0111) != 0
		if !executable {
			logger.WithField("path", fullpath).Warnln("Hook is not executable")
			continue
		}
		if f.IsDir() {
			continue
		}
		cmd := exec.Command(fullpath)
		hookOut := &bytes.Buffer{}
		cmd.Stdout = hookOut
		cmd.Stderr = hookOut
		cmd.Env = environment
		err := cmd.Run()
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{
				"path":   fullpath,
				"output": hookOut.String(),
			}).Warnf("Could not execute hook %s", f.Name())
		} else {
			logger.WithFields(logrus.Fields{
				"path":   fullpath,
				"output": hookOut.String(),
			}).Debugln("Executed hook")
		}
	}

	return nil
}
Exemple #8
0
func (h *HookDir) runHooks(dirpath string, hType HookType, pod Pod, podManifest manifest.Manifest, logger logging.Logger) error {
	configFileName, err := podManifest.ConfigFileName()
	if err != nil {
		return err
	}

	// Write manifest to a file so hooks can read it.
	tmpManifestFile, err := ioutil.TempFile("", fmt.Sprintf("%s-manifest.yaml", podManifest.ID()))
	if err != nil {
		logger.WithErrorAndFields(err, logrus.Fields{
			"dir": dirpath,
		}).Warnln("Unable to open manifest file for hooks")
		return err
	}
	defer os.Remove(tmpManifestFile.Name())

	err = podManifest.Write(tmpManifestFile)
	if err != nil {
		logger.WithErrorAndFields(err, logrus.Fields{
			"dir": dirpath,
		}).Warnln("Unable to write manifest file for hooks")
		return err
	}

	hookEnvironment := []string{
		fmt.Sprintf("%s=%s", HOOK_ENV_VAR, path.Base(dirpath)),
		fmt.Sprintf("%s=%s", HOOK_EVENT_ENV_VAR, hType.String()),
		fmt.Sprintf("%s=%s", HOOKED_NODE_ENV_VAR, pod.Node()),
		fmt.Sprintf("%s=%s", HOOKED_POD_ID_ENV_VAR, podManifest.ID()),
		fmt.Sprintf("%s=%s", HOOKED_POD_HOME_ENV_VAR, pod.Home()),
		fmt.Sprintf("%s=%s", HOOKED_POD_MANIFEST_ENV_VAR, tmpManifestFile.Name()),
		fmt.Sprintf("%s=%s", HOOKED_CONFIG_PATH_ENV_VAR, path.Join(pod.ConfigDir(), configFileName)),
		fmt.Sprintf("%s=%s", HOOKED_ENV_PATH_ENV_VAR, pod.EnvDir()),
		fmt.Sprintf("%s=%s", HOOKED_CONFIG_DIR_PATH_ENV_VAR, pod.ConfigDir()),
		fmt.Sprintf("%s=%s", HOOKED_SYSTEM_POD_ROOT_ENV_VAR, h.podRoot),
	}

	return runDirectory(dirpath, hookEnvironment, logger)
}
Exemple #9
0
func (p *Preparer) stopAndUninstallPod(pair ManifestPair, pod Pod, logger logging.Logger) bool {
	success, err := pod.Halt(pair.Reality)
	if err != nil {
		logger.WithError(err).Errorln("Pod halt failed")
	} else if !success {
		logger.NoFields().Warnln("One or more launchables did not halt successfully")
	}

	p.tryRunHooks(hooks.BEFORE_UNINSTALL, pod, pair.Reality, logger)

	err = pod.Uninstall()
	if err != nil {
		logger.WithError(err).Errorln("Uninstall failed")
		return false
	}
	logger.NoFields().Infoln("Successfully uninstalled")

	dur, err := p.store.DeletePod(kp.REALITY_TREE, p.node, pair.ID)
	if err != nil {
		logger.WithErrorAndFields(err, logrus.Fields{"duration": dur}).
			Errorln("Could not delete pod from reality store")
	}
	return true
}
Exemple #10
0
// processHealthUpdater() runs in a goroutine to keep Consul in sync with the local health
// state. It is written as a non-blocking finite state machine: events arrive and update
// internal state, and after each event, the internal state is examined to see if an
// asynchronous action needs to be taken.
//
// Events come from three different sources:
//   1. App monitors send their periodic health check results here. When the service is no
//      longer being checked, the monitor must close this channel.
//   2. The session manager sends notifications whenever the current Consul session
//      expires or is renewed. When the manager exits, it must close this channel.
//   3. Writes to Consul are performed in a separate goroutine, and when each finishes, it
//      notifies the updater of what it just wrote.
//
// In response to these events, two actions can be taken:
//   A. Exit, once the app monitor has exited and the health check in Consul has been
//      removed.
//   B. Write the recent service state to Consul. At most one outstanding write will be
//      in-flight at any time.
func processHealthUpdater(
	client consulutil.ConsulClient,
	checksStream <-chan WatchResult,
	sessionsStream <-chan string,
	logger logging.Logger,
) {
	var localHealth *WatchResult  // Health last reported by checker
	var remoteHealth *WatchResult // Health last written to Consul
	var session string            // Current session

	var write <-chan writeResult  // Future result of an in-flight write
	var throttle <-chan time.Time // If set, writes are throttled

	// Track and limit all writes to avoid crushing Consul
	bucketRefreshRate := time.Minute / time.Duration(*HealthWritesPerMinute)
	rateLimiter, err := limit.NewTokenBucket(
		*HealthMaxBucketSize,
		*HealthMaxBucketSize,
		bucketRefreshRate,
	)
	if err != nil {
		panic("invalid token bucket parameters")
	}

	logger.NoFields().Debug("starting update loop")
	for {
		// Receive event notification; update internal FSM state
		select {
		case h, ok := <-checksStream:
			// The local health checker sent a new result
			if ok {
				logger.NoFields().Debug("new health status: ", h.Status)
				if !healthEquiv(localHealth, &h) {
					msg := fmt.Sprintf("Service %s is now %s", h.Service, h.Status)
					if health.Passing.Is(h.Status) {
						logger.NoFields().Infoln(msg)
					} else {
						logger.NoFields().Warnln(msg)
					}
				}
				localHealth = &h
			} else {
				logger.NoFields().Debug("check stream closed")
				checksStream = nil
				localHealth = nil
			}
		case s, ok := <-sessionsStream:
			// The active Consul session changed
			if ok {
				logger.NoFields().Debug("new session: ", s)
			} else {
				logger.NoFields().Debug("session stream closed")
				sessionsStream = nil
			}
			session = s
			// The old health result is deleted when its session expires
			remoteHealth = nil
		case result := <-write:
			// The in-flight write completed
			logger.NoFields().Debug("write completed: ", result.OK)
			write = nil
			if result.OK {
				remoteHealth = result.Health
				if result.Throttle && throttle == nil {
					throttle = time.After(time.Duration(*HealthResumeLimit) * bucketRefreshRate)
					logger.NoFields().Warningf("Service %s health is flapping; throttling updates", result.Health.Service)
				}
			}
		case <-throttle:
			throttle = nil
			logger.NoFields().Warning("health is stable; resuming updates")
		}

		// Exit
		if checksStream == nil && remoteHealth == nil && write == nil {
			logger.NoFields().Debug("exiting update loop")
			return
		}

		// Send update to Consul
		if !healthEquiv(localHealth, remoteHealth) && session != "" && write == nil &&
			throttle == nil {
			writeLogger := logger.SubLogger(logrus.Fields{
				"session": session,
			})
			w := make(chan writeResult, 1)
			if localHealth == nil {
				// Don't wait on the rate limiter when removing the health status
				rateLimiter.TryUse(1)
				logger.NoFields().Debug("deleting remote health")
				key := HealthPath(remoteHealth.Service, remoteHealth.Node)
				go sendHealthUpdate(writeLogger, w, nil, false, func() error {
					_, err := client.KV().Delete(key, nil)
					if err != nil {
						return consulutil.NewKVError("delete", key, err)
					}
					return nil
				})
			} else {
				writeHealth := localHealth
				doThrottle := false
				if count, _ := rateLimiter.TryUse(1); count <= 1 {
					// This is the last update before the throttle will be engaged. Write a special
					// message.
					logger.NoFields().Debug("writing throttled health")
					writeHealth = toThrottled(localHealth)
					doThrottle = true
				} else {
					logger.NoFields().Debug("writing remote health")
				}
				kv, err := healthToKV(*writeHealth, session)
				if err != nil {
					// Practically, this should never happen.
					logger.WithErrorAndFields(err, logrus.Fields{
						"health": *writeHealth,
					}).Error("could not serialize health update")
					localHealth = nil
					continue
				}
				if remoteHealth == nil {
					go sendHealthUpdate(writeLogger, w, localHealth, doThrottle, func() error {
						ok, _, err := client.KV().Acquire(kv, nil)
						if err != nil {
							return consulutil.NewKVError("acquire", kv.Key, err)
						}
						if !ok {
							return fmt.Errorf("write denied")
						}
						return nil
					})
				} else {
					go sendHealthUpdate(writeLogger, w, localHealth, doThrottle, func() error {
						_, err := client.KV().Put(kv, nil)
						if err != nil {
							return consulutil.NewKVError("put", kv.Key, err)
						}
						return nil
					})
				}
			}
			write = w
		}
	}
}
Exemple #11
0
// Populates the given directory with executor scripts for each launch script of
// the given pod, which must be installed. Any orphaned executor scripts (from a
// past install, but no longer present in this pod) will be cleaned out.
func InstallHookScripts(dir string, hookPod *pods.Pod, manifest pods.Manifest, logger logging.Logger) error {
	err := os.MkdirAll(dir, 0755)
	if err != nil {
		return err
	}

	// TODO: globbing based on the structure of the name is gross, each hook pod
	// should have its own dir of scripts and running hooks should iterate over
	// the directories
	rmPattern := filepath.Join(dir, fmt.Sprintf("%s__*", hookPod.Id))
	matches, err := filepath.Glob(rmPattern)
	if err != nil {
		// error return from filepath.Glob is guaranteed to be ErrBadPattern
		return util.Errorf("error while removing old hook scripts: pattern %q is malformed", rmPattern)
	}
	for _, match := range matches {
		err = os.Remove(match)
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{"script_path": match}).Errorln("Could not remove old hook script")
		}
	}

	launchables, err := hookPod.Launchables(manifest)
	if err != nil {
		return err
	}
	for _, launchable := range launchables {
		if launchable.Type() != "hoist" {
			logger.WithFields(logrus.Fields{
				"id":   launchable.ID(),
				"type": launchable.Type(),
			}).Errorln("hook disabled: unsupported launchable type")
			continue
		}
		executables, err := launchable.Executables(runit.DefaultBuilder)
		if err != nil {
			return err
		}

		for _, executable := range executables {
			// Write a script to the event directory that executes the pod's executables
			// with the correct environment for that pod.
			scriptPath := filepath.Join(dir, executable.Service.Name)
			file, err := os.OpenFile(scriptPath, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0744)
			defer file.Close()
			if err != nil {
				logger.WithErrorAndFields(err, logrus.Fields{"script_path": scriptPath}).Errorln("Could not open new hook script")
			}
			err = executable.WriteExecutor(file)
			if err != nil {
				logger.WithErrorAndFields(err, logrus.Fields{"script_path": scriptPath}).Errorln("Could not write new hook script")
			}
		}
		// for convenience as we do with regular launchables, make these ones
		// current under the launchable directory
		err = launchable.MakeCurrent()
		if err != nil {
			logger.WithErrorAndFields(err, logrus.Fields{"launchable_id": launchable.ID()}).
				Errorln("Could not set hook launchable to current")
		}
	}
	return nil
}
Exemple #12
0
func (p *Preparer) installAndLaunchPod(pair ManifestPair, pod Pod, logger logging.Logger) bool {
	p.tryRunHooks(hooks.BEFORE_INSTALL, pod, pair.Intent, logger)

	logger.NoFields().Infoln("Installing pod and launchables")

	err := pod.Install(pair.Intent, p.artifactVerifier, p.artifactRegistry)
	if err != nil {
		// install failed, abort and retry
		logger.WithError(err).Errorln("Install failed")
		return false
	}

	err = pod.Verify(pair.Intent, p.authPolicy)
	if err != nil {
		logger.WithError(err).
			Errorln("Pod digest verification failed")
		p.tryRunHooks(hooks.AFTER_AUTH_FAIL, pod, pair.Intent, logger)
		return false
	}

	p.tryRunHooks(hooks.AFTER_INSTALL, pod, pair.Intent, logger)

	if pair.Reality != nil {
		logger.NoFields().Infoln("Invoking the disable hook and halting runit services")
		success, err := pod.Halt(pair.Reality)
		if err != nil {
			logger.WithError(err).
				Errorln("Pod halt failed")
		} else if !success {
			logger.NoFields().Warnln("One or more launchables did not halt successfully")
		}
	}

	p.tryRunHooks(hooks.BEFORE_LAUNCH, pod, pair.Intent, logger)

	logger.NoFields().Infoln("Setting up new runit services and running the enable hook")

	ok, err := pod.Launch(pair.Intent)
	if err != nil {
		logger.WithError(err).
			Errorln("Launch failed")
	} else {
		if pair.PodUniqueKey == "" {
			// legacy pod, write the manifest back to reality tree
			duration, err := p.store.SetPod(kp.REALITY_TREE, p.node, pair.Intent)
			if err != nil {
				logger.WithErrorAndFields(err, logrus.Fields{
					"duration": duration}).
					Errorln("Could not set pod in reality store")
			}
		} else {
			// TODO: do this in a transaction
			err = p.podStore.WriteRealityIndex(pair.PodUniqueKey, p.node)
			if err != nil {
				logger.WithError(err).
					Errorln("Could not write uuid index to reality store")
			}

			// uuid pod, write the manifest to the pod status tree.
			mutator := func(ps podstatus.PodStatus) (podstatus.PodStatus, error) {
				manifestBytes, err := pair.Intent.Marshal()
				if err != nil {
					return ps, util.Errorf("Could not convert manifest to string to update pod status")
				}

				ps.PodStatus = podstatus.PodLaunched
				ps.Manifest = string(manifestBytes)
				return ps, nil
			}
			err := p.podStatusStore.MutateStatus(pair.PodUniqueKey, mutator)
			if err != nil {
				logger.WithError(err).Errorln("Could not update manifest in pod status")
			}
		}

		p.tryRunHooks(hooks.AFTER_LAUNCH, pod, pair.Intent, logger)

		pod.Prune(p.maxLaunchableDiskUsage, pair.Intent) // errors are logged internally
	}
	return err == nil && ok
}