func (r *replication) shouldScheduleForNode(node types.NodeName, logger logging.Logger) bool { nodeReality, err := r.queryReality(node) if err != nil { logger.WithError(err).Errorln("Could not read Reality for this node. Will proceed to schedule onto it.") return true } if err == pods.NoCurrentManifest { logger.Infoln("Nothing installed on this node yet.") return true } if nodeReality != nil { nodeRealitySHA, err := nodeReality.SHA() if err != nil { logger.WithError(err).Errorln("Unable to compute manifest SHA for this node. Attempting to schedule anyway") return true } replicationRealitySHA, err := r.manifest.SHA() if err != nil { logger.WithError(err).Errorln("Unable to compute manifest SHA for this daemon set. Attempting to schedule anyway") return true } if nodeRealitySHA == replicationRealitySHA { logger.Info("Reality for this node matches this DS. No action required.") return false } } return true }
func createHelloUUIDPod(dir string, port int, logger logging.Logger) (types.PodUniqueKey, error) { signedManifestPath, err := writeHelloManifest(dir, fmt.Sprintf("hello-uuid-%d.yaml", port), port) if err != nil { return "", err } logger.Infoln("Scheduling uuid pod") cmd := exec.Command("p2-schedule", "--uuid-pod", signedManifestPath) stdout := bytes.Buffer{} stderr := bytes.Buffer{} cmd.Stdout, cmd.Stderr = &stdout, &stderr err = cmd.Run() if err != nil { fmt.Println(stderr.String()) return "", err } var out schedule.Output err = json.Unmarshal(stdout.Bytes(), &out) if err != nil { return "", util.Errorf("Scheduled uuid pod but couldn't parse uuid from p2-schedule output: %s", err) } logger.Infof("Scheduled uuid pod %s", out.PodUniqueKey) return out.PodUniqueKey, nil }
func verifyUUIDPod(errCh chan error, tempDir string, logger logging.Logger) { defer close(errCh) // Schedule a "uuid" hello pod on a different port podUniqueKey, err := createHelloUUIDPod(tempDir, 43771, logger) if err != nil { errCh <- fmt.Errorf("Could not schedule UUID hello pod: %s", err) return } logger.Infoln("p2-schedule'd another hello instance as a uuid pod running on port 43771") err = verifyHelloRunning(podUniqueKey, logger) if err != nil { errCh <- fmt.Errorf("Couldn't get hello running as a uuid pod: %s", err) return } }
func verifyHelloRunning(podUniqueKey types.PodUniqueKey, logger logging.Logger) error { helloPidAppeared := make(chan struct{}) quit := make(chan struct{}) defer close(quit) serviceDir := "/var/service/hello__hello__launch" if podUniqueKey != "" { serviceDir = fmt.Sprintf("/var/service/hello-%s__hello__launch", podUniqueKey) } go func() { for { time.Sleep(100 * time.Millisecond) res := exec.Command("sudo", "sv", "stat", serviceDir).Run() if res == nil { select { case <-quit: logger.Infoln("got a valid stat after timeout") case helloPidAppeared <- struct{}{}: } return } else { select { case <-quit: return default: } } } }() select { case <-time.After(30 * time.Second): return fmt.Errorf("Couldn't start hello after 30 seconds:\n\n %s", targetLogs()) case <-helloPidAppeared: return nil } }
func verifyProcessExit(errCh chan error, tempDir string, logger logging.Logger) { defer close(errCh) // Schedule a uuid pod podUniqueKey, err := createHelloUUIDPod(tempDir, 43772, logger) if err != nil { errCh <- fmt.Errorf("Could not schedule UUID hello pod: %s", err) return } logger = logger.SubLogger(logrus.Fields{ "pod_unique_key": podUniqueKey, }) logger.Infoln("Scheduled hello instance on port 43772") err = verifyHelloRunning(podUniqueKey, logger) if err != nil { errCh <- fmt.Errorf("Couldn't get hello running as a uuid pod: %s", err) return } logger.Infoln("Hello instance launched") time.Sleep(3 * time.Second) logger.Infoln("Waiting for hello instance to listen on 43772") // now wait for the hello server to start running timeout := time.After(30 * time.Second) for { resp, err := http.Get("http://localhost:43772/") if err == nil { resp.Body.Close() break } select { case <-timeout: errCh <- fmt.Errorf("Hello didn't come up listening on 43772: %s", err) return default: } time.Sleep(1 * time.Second) } exitCode := rand.Intn(100) + 1 logger.Infof("Causing hello on 43772 to exit with status %d", exitCode) // Make an http request to hello to make it exit with exitCode. We expect the http request to fail due // to the server exiting, so don't check for http errors. _, err = http.Get(fmt.Sprintf("http://localhost:43772/exit/%d", exitCode)) if err == nil { // This is bad, it means the hello server didn't die and kill our request // in the middle errCh <- util.Errorf("Couldn't kill hello server with http request") return } urlError, ok := err.(*url.Error) if ok && urlError.Err == io.EOF { // This is good, it means the server died } else { errCh <- fmt.Errorf("Couldn't tell hello to die over http: %s", err) return } logger.Infoln("Checking for exit code in SQL database") finishService, err := podprocess.NewSQLiteFinishService(sqliteFinishDatabasePath, logging.DefaultLogger) if err != nil { errCh <- err return } var finishResult podprocess.FinishOutput timeout = time.After(30 * time.Second) for { finishResult, err = finishService.LastFinishForPodUniqueKey(podUniqueKey) if err == nil { break } select { case <-timeout: // Try to manually run the finish script in order to make debugging the test failure easier output, err := exec.Command("sudo", fmt.Sprintf("/var/service/hello-%s__hello__launch/finish", podUniqueKey), "1", "2").CombinedOutput() if err != nil { logger.WithError(err).Infoln("DEBUG: Debug attempt to run finish script failed") } logger.Infof("DEBUG: Output of direct execution of finish script: %s", string(output)) errCh <- fmt.Errorf("Did not find a finish row by the deadline: %s", err) return default: } } if finishResult.PodUniqueKey != podUniqueKey { errCh <- fmt.Errorf("Expected finish result for '%s' but it was for '%s'", podUniqueKey, finishResult.PodUniqueKey) return } if finishResult.ExitCode != exitCode { errCh <- fmt.Errorf("Exit code for '%s' in the sqlite database was expected to be %d but was %d", podUniqueKey, exitCode, finishResult.ExitCode) return } logger.Infoln("Checking for exit code in consul") timeout = time.After(30 * time.Second) podStatusStore := podstatus.NewConsul(statusstore.NewConsul(kp.NewConsulClient(kp.Options{})), kp.PreparerPodStatusNamespace) for { podStatus, _, err := podStatusStore.Get(podUniqueKey) if err != nil { errCh <- err return } found := false for _, processStatus := range podStatus.ProcessStatuses { if processStatus.LaunchableID == "hello" && processStatus.EntryPoint == "launch" { found = true if processStatus.LastExit == nil { errCh <- fmt.Errorf("Found no last exit in consul pod status for %s", podUniqueKey) return } if processStatus.LastExit.ExitCode != exitCode { errCh <- fmt.Errorf("Exit code for '%s' in consul was expected to be %d but was %d", podUniqueKey, exitCode, finishResult.ExitCode) return } } } if found { logger.Infoln("Successful!") break } select { case <-timeout: errCh <- fmt.Errorf("There was no pod process for hello/launch for %s in consul", podUniqueKey) return default: } } }