Beispiel #1
0
// setKeys sets n random keys and values across each machine in a
// cluster and returns these values to later be checked with checkKeys.
// If all the values don't get set due to a machine that is down and
// error is NOT returned. An error is returned if no keys are able to be
// set.
func setKeys(cluster platform.Cluster, n int) (map[string]string, error) {
	var written = map[string]string{}
	for _, m := range cluster.Machines() {
		for i := 0; i < n; i++ {
			// random key and value, may overwrwite previous sets if
			// collision which is fine
			key := strconv.Itoa(rand.Int())[0:3]
			value := strconv.Itoa(rand.Int())[0:3]

			cmd := cluster.NewCommand("curl", "-w", "%{http_code}", "-s", fmt.Sprintf("http://%v:2379/v2/keys/%v", m.IP(), key), "-XPUT", "-d", "value="+value)
			b, err := cmd.Output()
			if err != nil {
				continue
			}

			// check for 201 or 200 resp header
			if !bytes.HasSuffix(b, []byte("200")) && !bytes.HasSuffix(b, []byte("201")) {
				continue
			}

			written[key] = value
		}
	}
	if len(written) == 0 {
		return nil, fmt.Errorf("failed to write any keys")
	}

	plog.Infof("wrote %v keys", len(written))
	return written, nil
}
Beispiel #2
0
// setKeys sets n random keys and values across each machine in a
// cluster and returns these values to later be checked with checkKeys.
// If all the values don't get set due to a machine that is down and
// error is NOT returned. An error is returned if no keys are able to be
// set.
func SetKeys(cluster platform.Cluster, n int) (map[string]string, error) {
	var written = map[string]string{}
	for _, m := range cluster.Machines() {
		for i := 0; i < n; i++ {
			// random key and value, may overwrwite previous sets if
			// collision which is fine
			key := strconv.Itoa(rand.Int())[0:3]
			value := strconv.Itoa(rand.Int())[0:3]

			b, err := m.SSH(fmt.Sprintf("curl -s -w %%{http_code} -s http://127.0.0.1:2379/v2/keys/%v -XPUT -d value=%v", key, value))
			if err != nil {
				return nil, err
			}

			// check for 201 or 200 resp header
			if !bytes.HasSuffix(b, []byte("200")) && !bytes.HasSuffix(b, []byte("201")) {
				continue
			}

			written[key] = value
		}
	}
	if len(written) == 0 {
		return nil, fmt.Errorf("failed to write any keys")
	}

	plog.Infof("wrote %v keys", len(written))
	return written, nil
}
Beispiel #3
0
func checkEtcdVersion(cluster platform.Cluster, m platform.Machine, expected string) error {
	const (
		retries   = 5
		retryWait = 3 * time.Second
	)
	var err error
	var b []byte

	for i := 0; i < retries; i++ {
		cmd := cluster.NewCommand("curl", "-L", fmt.Sprintf("http://%v:2379/version", m.IP()))
		b, err = cmd.Output()
		if err != nil {
			plog.Infof("retrying version check, hit failure %v", err)
			time.Sleep(retryWait)
			continue
		}
		break
	}
	if err != nil {
		return fmt.Errorf("curling version: %v", err)
	}

	plog.Infof("got version: %s", b)

	if string(b) != expected {
		return fmt.Errorf("expected %v, got %s", expected, b)
	}
	return nil
}
Beispiel #4
0
func checkEtcdVersion(cluster platform.Cluster, m platform.Machine, expected string) error {
	var b []byte
	var err error

	checker := func() error {
		cmd := cluster.NewCommand("curl", "-L", fmt.Sprintf("http://%v:2379/version", m.IP()))
		b, err = cmd.Output()
		if err != nil {
			return fmt.Errorf("curl failed: %v", err)
		}

		return nil
	}

	if err := util.Retry(15, 10*time.Second, checker); err != nil {
		return err
	}

	plog.Infof("got version: %s", b)

	if string(b) != expected {
		return fmt.Errorf("expected %v, got %s", expected, b)
	}

	return nil
}
Beispiel #5
0
// checkKeys tests that each node in the cluster has the full provided
// key set in keyMap. Quorum get must be used.
func CheckKeys(cluster platform.Cluster, keyMap map[string]string, quorum bool) error {
	for i, m := range cluster.Machines() {
		for k, v := range keyMap {
			var cmd string
			if quorum {
				cmd = fmt.Sprintf("curl http://127.0.0.1:2379/v2/keys/%v?quorum=true", k)
			} else {
				cmd = fmt.Sprintf("curl http://127.0.0.1:2379/v2/keys/%v", k)
			}
			s, err := m.SSHSession()
			if err != nil {
				return err
			}
			defer s.Close()

			b, err := s.Output(cmd)
			if err != nil {
				return fmt.Errorf("error curling key: %v", err)
			}

			var jsonMap map[string]interface{}
			err = json.Unmarshal(b, &jsonMap)
			if err != nil {
				return err
			}

			// error code?
			errorCode, ok := jsonMap["errorCode"]
			if ok {
				msg := jsonMap["message"]
				return fmt.Errorf("machine %v errorCode %v: %v: %s", i, errorCode, msg, b)
			}

			node, ok := jsonMap["node"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no node in resp")
			}

			n := node.(map[string]interface{})
			value, ok := n["value"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no value in resp")
			}

			if value != v {
				return fmt.Errorf("checkKeys got incorrect value! expected:%v got: %v", v, value)
			}
		}
	}
	plog.Infof("checked %v keys", len(keyMap))
	return nil
}
Beispiel #6
0
func runBootchart(cmd *cobra.Command, args []string) {
	if len(args) != 0 {
		fmt.Fprintf(os.Stderr, "No args accepted\n")
		os.Exit(2)
	}

	var (
		cluster platform.Cluster
		err     error
	)
	if kolaPlatform == "qemu" {
		cluster, err = platform.NewQemuCluster(kola.QEMUOptions)
	} else if kolaPlatform == "gce" {
		cluster, err = platform.NewGCECluster(kola.GCEOptions)
	} else if kolaPlatform == "aws" {
		cluster, err = platform.NewAWSCluster(kola.AWSOptions)
	} else {
		fmt.Fprintf(os.Stderr, "Invalid platform: %v", kolaPlatform)
	}

	if err != nil {
		fmt.Fprintf(os.Stderr, "Cluster failed: %v\n", err)
		os.Exit(1)
	}
	defer cluster.Destroy()

	m, err := cluster.NewMachine("")
	if err != nil {
		fmt.Fprintf(os.Stderr, "Machine failed: %v\n", err)
		os.Exit(1)
	}
	defer m.Destroy()

	ssh, err := m.SSHSession()
	if err != nil {
		fmt.Fprintf(os.Stderr, "SSH failed: %v\n", err)
		os.Exit(1)
	}

	ssh.Stdout = os.Stdout
	ssh.Stderr = os.Stderr
	if err = ssh.Run("systemd-analyze plot"); err != nil {
		fmt.Fprintf(os.Stderr, "SSH failed: %v\n", err)
		os.Exit(1)
	}
}
Beispiel #7
0
func runSpawn(cmd *cobra.Command, args []string) {
	var userdata []byte
	var err error
	var cluster platform.Cluster

	if spawnUserData != "" {
		userdata, err = ioutil.ReadFile(spawnUserData)
		if err != nil {
			die("Reading userdata failed: %v", err)
		}
	}

	switch kolaPlatform {
	case "qemu":
		cluster, err = platform.NewQemuCluster(kola.QEMUOptions)
	case "gce":
		cluster, err = platform.NewGCECluster(kola.GCEOptions)
	case "aws":
		cluster, err = platform.NewAWSCluster(kola.AWSOptions)
	default:
		err = fmt.Errorf("invalid platform %q", kolaPlatform)
	}

	if err != nil {
		die("Cluster failed: %v", err)
	}

	mach, err := cluster.NewMachine(string(userdata))
	if err != nil {
		die("Spawning instance failed: %v", err)
	}

	if spawnRemove {
		defer mach.Destroy()
	}

	if spawnShell {
		if err := platform.Manhole(mach); err != nil {
			die("Manhole failed: %v", err)
		}
	}
}
Beispiel #8
0
// checkKeys tests that each node in the cluster has the full provided
// key set in keyMap. Quorum get must be used.
func checkKeys(cluster platform.Cluster, keyMap map[string]string) error {
	for i, m := range cluster.Machines() {
		for k, v := range keyMap {
			cmd := cluster.NewCommand("curl", fmt.Sprintf("http://%v:2379/v2/keys/%v?quorum=true", m.IP(), k))
			b, err := cmd.Output()
			if err != nil {
				return fmt.Errorf("error curling key: %v", err)
			}

			var jsonMap map[string]interface{}
			err = json.Unmarshal(b, &jsonMap)
			if err != nil {
				return err
			}

			// error code?
			errorCode, ok := jsonMap["errorCode"]
			if ok {
				msg := jsonMap["message"]
				return fmt.Errorf("machine %v errorCode %v: %v: %s", i, errorCode, msg, b)
			}

			node, ok := jsonMap["node"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no node in resp")
			}

			n := node.(map[string]interface{})
			value, ok := n["value"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no value in resp")
			}

			if value != v {
				return fmt.Errorf("checkKeys got incorrect value! expected:%v got: %v", v, value)
			}
		}
	}
	plog.Infof("checked %v keys", len(keyMap))
	return nil
}
Beispiel #9
0
// create a cluster and run test
func runTest(t *Test, pltfrm string) error {
	var err error
	var cluster platform.Cluster

	if pltfrm == "qemu" {
		cluster, err = platform.NewQemuCluster(*QemuImage)
	} else if pltfrm == "gce" {
		cluster, err = platform.NewGCECluster(GCEOpts())
	} else {
		plog.Errorf("Invalid platform: %v", pltfrm)
	}

	if err != nil {
		return fmt.Errorf("Cluster failed: %v", err)
	}
	defer func() {
		if err := cluster.Destroy(); err != nil {
			plog.Errorf("cluster.Destroy(): %v", err)
		}
	}()

	url, err := cluster.GetDiscoveryURL(t.ClusterSize)
	if err != nil {
		return fmt.Errorf("Failed to create discovery endpoint: %v", err)
	}

	cfgs := makeConfigs(url, t.CloudConfig, t.ClusterSize)

	for i := 0; i < t.ClusterSize; i++ {
		_, err := cluster.NewMachine(cfgs[i])
		if err != nil {
			return fmt.Errorf("Cluster failed starting machine: %v", err)
		}
		plog.Infof("%v instance up", pltfrm)
	}
	// pass along all registered native functions
	var names []string
	for k := range t.NativeFuncs {
		names = append(names, k)
	}
	// Cluster -> TestCluster
	tcluster := platform.TestCluster{t.Name, names, cluster}

	// drop kolet binary on machines
	if t.NativeFuncs != nil {
		err = scpKolet(tcluster)
		if err != nil {
			return fmt.Errorf("dropping kolet binary: %v", err)
		}
	}

	// run test
	err = t.Run(tcluster)
	return err
}
Beispiel #10
0
func discovery(cluster platform.Cluster, version int) error {
	if plog.LevelAt(capnslog.DEBUG) {
		// get journalctl -f from all machines before starting
		for _, m := range cluster.Machines() {
			if err := platform.StreamJournal(m); err != nil {
				return fmt.Errorf("failed to start journal: %v", err)
			}
		}
	}

	// start etcd on each machine asynchronously.
	for _, m := range cluster.Machines() {
		if err := doStart(m, version, false); err != nil {
			return err
		}
	}

	// block until each instance is reported as started.
	for i, m := range cluster.Machines() {
		if err := doStart(m, version, true); err != nil {
			return err
		}
		plog.Infof("etcd instance%d started", i)
	}

	var keyMap map[string]string
	var retryFuncs []func() error

	retryFuncs = append(retryFuncs, func() error {
		var err error
		keyMap, err = SetKeys(cluster, 5)
		if err != nil {
			return err
		}
		return nil
	})
	retryFuncs = append(retryFuncs, func() error {
		var quorumRead bool
		if version == 2 {
			quorumRead = true
		}
		if err := CheckKeys(cluster, keyMap, quorumRead); err != nil {
			return err
		}
		return nil
	})
	for _, retry := range retryFuncs {
		if err := util.Retry(5, 5*time.Second, retry); err != nil {
			return fmt.Errorf("discovery failed health check: %v", err)
		}
		// NOTE(pb): etcd1 seems to fail in an odd way when I try quorum
		// read, instead just sleep between setting and getting.
		time.Sleep(2 * time.Second)
	}

	return nil
}
Beispiel #11
0
func discovery(cluster platform.Cluster, version int) error {
	csize := len(cluster.Machines())

	if plog.LevelAt(capnslog.DEBUG) {
		// get journalctl -f from all machines before starting
		for _, m := range cluster.Machines() {
			if err := m.StartJournal(); err != nil {
				return fmt.Errorf("failed to start journal: %v", err)
			}
		}
	}

	// point etcd on each machine to discovery
	for i, m := range cluster.Machines() {
		// start etcd instance
		var etcdStart string
		if version == 1 {
			etcdStart = "sudo systemctl start etcd.service"
		} else if version == 2 {
			etcdStart = "sudo systemctl start etcd2.service"
		} else {
			return fmt.Errorf("etcd version unspecified")
		}

		_, err := m.SSH(etcdStart)
		if err != nil {
			return fmt.Errorf("SSH cmd to %v failed: %s", m.IP(), err)
		}
		plog.Infof("etcd instance%d started", i)
	}

	err := getClusterHealth(cluster.Machines()[0], csize)
	if err != nil {
		return fmt.Errorf("discovery failed health check: %v", err)
	}

	return nil
}
Beispiel #12
0
// create a cluster and run test
func RunTest(t *Test, pltfrm string) error {
	var err error
	var cluster platform.Cluster

	switch pltfrm {
	case "qemu":
		cluster, err = platform.NewQemuCluster(QEMUOptions)
	case "gce":
		cluster, err = platform.NewGCECluster(GCEOptions)
	case "aws":
		cluster, err = platform.NewAWSCluster(AWSOptions)
	default:
		err = fmt.Errorf("invalid platform %q", pltfrm)
	}

	if err != nil {
		return fmt.Errorf("Cluster failed: %v", err)
	}
	defer func() {
		if err := cluster.Destroy(); err != nil {
			plog.Errorf("cluster.Destroy(): %v", err)
		}
	}()

	url, err := cluster.GetDiscoveryURL(t.ClusterSize)
	if err != nil {
		return fmt.Errorf("Failed to create discovery endpoint: %v", err)
	}

	cfgs := makeConfigs(url, t.CloudConfig, t.ClusterSize)

	if t.ClusterSize > 0 {
		_, err := platform.NewMachines(cluster, cfgs)
		if err != nil {
			return fmt.Errorf("Cluster failed starting machines: %v", err)
		}
	}

	// pass along all registered native functions
	var names []string
	for k := range t.NativeFuncs {
		names = append(names, k)
	}

	// prevent unsafe access if tests ever become parallel and access
	tempTestOptions := make(map[string]string, 0)
	for k, v := range testOptions {
		tempTestOptions[k] = v
	}

	// Cluster -> TestCluster
	tcluster := platform.TestCluster{
		Name:        t.Name,
		NativeFuncs: names,
		Options:     tempTestOptions,
		Cluster:     cluster,
	}

	// drop kolet binary on machines
	if t.NativeFuncs != nil {
		err = scpKolet(tcluster)
		if err != nil {
			return fmt.Errorf("dropping kolet binary: %v", err)
		}
	}

	// run test
	err = t.Run(tcluster)

	// give some time for the remote journal to be flushed so it can be read
	// before we run the deferred machine destruction
	if err != nil {
		time.Sleep(10 * time.Second)
	}

	return err
}