Beispiel #1
0
func (r *replication) shouldScheduleForNode(node types.NodeName, logger logging.Logger) bool {
	nodeReality, err := r.queryReality(node)
	if err != nil {
		logger.WithError(err).Errorln("Could not read Reality for this node. Will proceed to schedule onto it.")
		return true
	}
	if err == pods.NoCurrentManifest {
		logger.Infoln("Nothing installed on this node yet.")
		return true
	}

	if nodeReality != nil {
		nodeRealitySHA, err := nodeReality.SHA()
		if err != nil {
			logger.WithError(err).Errorln("Unable to compute manifest SHA for this node. Attempting to schedule anyway")
			return true
		}
		replicationRealitySHA, err := r.manifest.SHA()
		if err != nil {
			logger.WithError(err).Errorln("Unable to compute manifest SHA for this daemon set. Attempting to schedule anyway")
			return true
		}

		if nodeRealitySHA == replicationRealitySHA {
			logger.Info("Reality for this node matches this DS. No action required.")
			return false
		}
	}

	return true
}
Beispiel #2
0
func createHelloUUIDPod(dir string, port int, logger logging.Logger) (types.PodUniqueKey, error) {
	signedManifestPath, err := writeHelloManifest(dir, fmt.Sprintf("hello-uuid-%d.yaml", port), port)
	if err != nil {
		return "", err
	}

	logger.Infoln("Scheduling uuid pod")
	cmd := exec.Command("p2-schedule", "--uuid-pod", signedManifestPath)
	stdout := bytes.Buffer{}
	stderr := bytes.Buffer{}
	cmd.Stdout, cmd.Stderr = &stdout, &stderr
	err = cmd.Run()
	if err != nil {
		fmt.Println(stderr.String())
		return "", err
	}

	var out schedule.Output
	err = json.Unmarshal(stdout.Bytes(), &out)
	if err != nil {
		return "", util.Errorf("Scheduled uuid pod but couldn't parse uuid from p2-schedule output: %s", err)
	}

	logger.Infof("Scheduled uuid pod %s", out.PodUniqueKey)
	return out.PodUniqueKey, nil
}
Beispiel #3
0
func verifyUUIDPod(errCh chan error, tempDir string, logger logging.Logger) {
	defer close(errCh)

	// Schedule a "uuid" hello pod on a different port
	podUniqueKey, err := createHelloUUIDPod(tempDir, 43771, logger)
	if err != nil {
		errCh <- fmt.Errorf("Could not schedule UUID hello pod: %s", err)
		return
	}
	logger.Infoln("p2-schedule'd another hello instance as a uuid pod running on port 43771")

	err = verifyHelloRunning(podUniqueKey, logger)
	if err != nil {
		errCh <- fmt.Errorf("Couldn't get hello running as a uuid pod: %s", err)
		return
	}
}
Beispiel #4
0
func verifyHelloRunning(podUniqueKey types.PodUniqueKey, logger logging.Logger) error {
	helloPidAppeared := make(chan struct{})
	quit := make(chan struct{})
	defer close(quit)

	serviceDir := "/var/service/hello__hello__launch"
	if podUniqueKey != "" {
		serviceDir = fmt.Sprintf("/var/service/hello-%s__hello__launch", podUniqueKey)
	}
	go func() {
		for {
			time.Sleep(100 * time.Millisecond)
			res := exec.Command("sudo", "sv", "stat", serviceDir).Run()
			if res == nil {
				select {
				case <-quit:
					logger.Infoln("got a valid stat after timeout")
				case helloPidAppeared <- struct{}{}:
				}
				return
			} else {
				select {
				case <-quit:
					return
				default:
				}
			}
		}
	}()
	select {
	case <-time.After(30 * time.Second):
		return fmt.Errorf("Couldn't start hello after 30 seconds:\n\n %s", targetLogs())
	case <-helloPidAppeared:
		return nil
	}
}
Beispiel #5
0
func verifyProcessExit(errCh chan error, tempDir string, logger logging.Logger) {
	defer close(errCh)

	// Schedule a uuid pod
	podUniqueKey, err := createHelloUUIDPod(tempDir, 43772, logger)
	if err != nil {
		errCh <- fmt.Errorf("Could not schedule UUID hello pod: %s", err)
		return
	}

	logger = logger.SubLogger(logrus.Fields{
		"pod_unique_key": podUniqueKey,
	})
	logger.Infoln("Scheduled hello instance on port 43772")

	err = verifyHelloRunning(podUniqueKey, logger)
	if err != nil {
		errCh <- fmt.Errorf("Couldn't get hello running as a uuid pod: %s", err)
		return
	}
	logger.Infoln("Hello instance launched")

	time.Sleep(3 * time.Second)

	logger.Infoln("Waiting for hello instance to listen on 43772")
	// now wait for the hello server to start running
	timeout := time.After(30 * time.Second)
	for {
		resp, err := http.Get("http://localhost:43772/")
		if err == nil {
			resp.Body.Close()
			break
		}

		select {
		case <-timeout:
			errCh <- fmt.Errorf("Hello didn't come up listening on 43772: %s", err)
			return
		default:
		}

		time.Sleep(1 * time.Second)
	}

	exitCode := rand.Intn(100) + 1
	logger.Infof("Causing hello on 43772 to exit with status %d", exitCode)
	// Make an http request to hello to make it exit with exitCode. We expect the http request to fail due
	// to the server exiting, so don't check for http errors.
	_, err = http.Get(fmt.Sprintf("http://localhost:43772/exit/%d", exitCode))
	if err == nil {
		// This is bad, it means the hello server didn't die and kill our request
		// in the middle
		errCh <- util.Errorf("Couldn't kill hello server with http request")
		return
	}

	urlError, ok := err.(*url.Error)
	if ok && urlError.Err == io.EOF {
		// This is good, it means the server died
	} else {
		errCh <- fmt.Errorf("Couldn't tell hello to die over http: %s", err)
		return
	}

	logger.Infoln("Checking for exit code in SQL database")
	finishService, err := podprocess.NewSQLiteFinishService(sqliteFinishDatabasePath, logging.DefaultLogger)
	if err != nil {
		errCh <- err
		return
	}

	var finishResult podprocess.FinishOutput
	timeout = time.After(30 * time.Second)
	for {
		finishResult, err = finishService.LastFinishForPodUniqueKey(podUniqueKey)
		if err == nil {
			break
		}

		select {
		case <-timeout:
			// Try to manually run the finish script in order to make debugging the test failure easier
			output, err := exec.Command("sudo", fmt.Sprintf("/var/service/hello-%s__hello__launch/finish", podUniqueKey), "1", "2").CombinedOutput()
			if err != nil {
				logger.WithError(err).Infoln("DEBUG: Debug attempt to run finish script failed")
			}

			logger.Infof("DEBUG: Output of direct execution of finish script: %s", string(output))

			errCh <- fmt.Errorf("Did not find a finish row by the deadline: %s", err)
			return
		default:
		}
	}

	if finishResult.PodUniqueKey != podUniqueKey {
		errCh <- fmt.Errorf("Expected finish result for '%s' but it was for '%s'", podUniqueKey, finishResult.PodUniqueKey)
		return
	}

	if finishResult.ExitCode != exitCode {
		errCh <- fmt.Errorf("Exit code for '%s' in the sqlite database was expected to be %d but was %d", podUniqueKey, exitCode, finishResult.ExitCode)
		return
	}

	logger.Infoln("Checking for exit code in consul")
	timeout = time.After(30 * time.Second)
	podStatusStore := podstatus.NewConsul(statusstore.NewConsul(kp.NewConsulClient(kp.Options{})), kp.PreparerPodStatusNamespace)
	for {

		podStatus, _, err := podStatusStore.Get(podUniqueKey)
		if err != nil {
			errCh <- err
			return
		}

		found := false
		for _, processStatus := range podStatus.ProcessStatuses {
			if processStatus.LaunchableID == "hello" && processStatus.EntryPoint == "launch" {
				found = true
				if processStatus.LastExit == nil {
					errCh <- fmt.Errorf("Found no last exit in consul pod status for %s", podUniqueKey)
					return
				}

				if processStatus.LastExit.ExitCode != exitCode {
					errCh <- fmt.Errorf("Exit code for '%s' in consul was expected to be %d but was %d", podUniqueKey, exitCode, finishResult.ExitCode)
					return
				}
			}
		}

		if found {
			logger.Infoln("Successful!")
			break
		}

		select {
		case <-timeout:
			errCh <- fmt.Errorf("There was no pod process for hello/launch for %s in consul", podUniqueKey)
			return
		default:
		}
	}
}