Esempio n. 1
0
// setKeys sets n random keys and values across each machine in a
// cluster and returns these values to later be checked with checkKeys.
// If all the values don't get set due to a machine that is down and
// error is NOT returned. An error is returned if no keys are able to be
// set.
func SetKeys(cluster platform.Cluster, n int) (map[string]string, error) {
	var written = map[string]string{}
	for _, m := range cluster.Machines() {
		for i := 0; i < n; i++ {
			// random key and value, may overwrwite previous sets if
			// collision which is fine
			key := strconv.Itoa(rand.Int())[0:3]
			value := strconv.Itoa(rand.Int())[0:3]

			b, err := m.SSH(fmt.Sprintf("curl -s -w %%{http_code} -s http://127.0.0.1:2379/v2/keys/%v -XPUT -d value=%v", key, value))
			if err != nil {
				return nil, err
			}

			// check for 201 or 200 resp header
			if !bytes.HasSuffix(b, []byte("200")) && !bytes.HasSuffix(b, []byte("201")) {
				continue
			}

			written[key] = value
		}
	}
	if len(written) == 0 {
		return nil, fmt.Errorf("failed to write any keys")
	}

	plog.Infof("wrote %v keys", len(written))
	return written, nil
}
Esempio n. 2
0
// setKeys sets n random keys and values across each machine in a
// cluster and returns these values to later be checked with checkKeys.
// If all the values don't get set due to a machine that is down and
// error is NOT returned. An error is returned if no keys are able to be
// set.
func setKeys(cluster platform.Cluster, n int) (map[string]string, error) {
	var written = map[string]string{}
	for _, m := range cluster.Machines() {
		for i := 0; i < n; i++ {
			// random key and value, may overwrwite previous sets if
			// collision which is fine
			key := strconv.Itoa(rand.Int())[0:3]
			value := strconv.Itoa(rand.Int())[0:3]

			cmd := cluster.NewCommand("curl", "-w", "%{http_code}", "-s", fmt.Sprintf("http://%v:2379/v2/keys/%v", m.IP(), key), "-XPUT", "-d", "value="+value)
			b, err := cmd.Output()
			if err != nil {
				continue
			}

			// check for 201 or 200 resp header
			if !bytes.HasSuffix(b, []byte("200")) && !bytes.HasSuffix(b, []byte("201")) {
				continue
			}

			written[key] = value
		}
	}
	if len(written) == 0 {
		return nil, fmt.Errorf("failed to write any keys")
	}

	plog.Infof("wrote %v keys", len(written))
	return written, nil
}
Esempio n. 3
0
// checkKeys tests that each node in the cluster has the full provided
// key set in keyMap. Quorum get must be used.
func CheckKeys(cluster platform.Cluster, keyMap map[string]string, quorum bool) error {
	for i, m := range cluster.Machines() {
		for k, v := range keyMap {
			var cmd string
			if quorum {
				cmd = fmt.Sprintf("curl http://127.0.0.1:2379/v2/keys/%v?quorum=true", k)
			} else {
				cmd = fmt.Sprintf("curl http://127.0.0.1:2379/v2/keys/%v", k)
			}
			s, err := m.SSHSession()
			if err != nil {
				return err
			}
			defer s.Close()

			b, err := s.Output(cmd)
			if err != nil {
				return fmt.Errorf("error curling key: %v", err)
			}

			var jsonMap map[string]interface{}
			err = json.Unmarshal(b, &jsonMap)
			if err != nil {
				return err
			}

			// error code?
			errorCode, ok := jsonMap["errorCode"]
			if ok {
				msg := jsonMap["message"]
				return fmt.Errorf("machine %v errorCode %v: %v: %s", i, errorCode, msg, b)
			}

			node, ok := jsonMap["node"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no node in resp")
			}

			n := node.(map[string]interface{})
			value, ok := n["value"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no value in resp")
			}

			if value != v {
				return fmt.Errorf("checkKeys got incorrect value! expected:%v got: %v", v, value)
			}
		}
	}
	plog.Infof("checked %v keys", len(keyMap))
	return nil
}
Esempio n. 4
0
// checkKeys tests that each node in the cluster has the full provided
// key set in keyMap. Quorum get must be used.
func checkKeys(cluster platform.Cluster, keyMap map[string]string) error {
	for i, m := range cluster.Machines() {
		for k, v := range keyMap {
			cmd := cluster.NewCommand("curl", fmt.Sprintf("http://%v:2379/v2/keys/%v?quorum=true", m.IP(), k))
			b, err := cmd.Output()
			if err != nil {
				return fmt.Errorf("error curling key: %v", err)
			}

			var jsonMap map[string]interface{}
			err = json.Unmarshal(b, &jsonMap)
			if err != nil {
				return err
			}

			// error code?
			errorCode, ok := jsonMap["errorCode"]
			if ok {
				msg := jsonMap["message"]
				return fmt.Errorf("machine %v errorCode %v: %v: %s", i, errorCode, msg, b)
			}

			node, ok := jsonMap["node"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no node in resp")
			}

			n := node.(map[string]interface{})
			value, ok := n["value"]
			if !ok {
				return fmt.Errorf("retrieving key in CheckKeys, no value in resp")
			}

			if value != v {
				return fmt.Errorf("checkKeys got incorrect value! expected:%v got: %v", v, value)
			}
		}
	}
	plog.Infof("checked %v keys", len(keyMap))
	return nil
}
Esempio n. 5
0
func discovery(cluster platform.Cluster, version int) error {
	if plog.LevelAt(capnslog.DEBUG) {
		// get journalctl -f from all machines before starting
		for _, m := range cluster.Machines() {
			if err := platform.StreamJournal(m); err != nil {
				return fmt.Errorf("failed to start journal: %v", err)
			}
		}
	}

	// start etcd on each machine asynchronously.
	for _, m := range cluster.Machines() {
		if err := doStart(m, version, false); err != nil {
			return err
		}
	}

	// block until each instance is reported as started.
	for i, m := range cluster.Machines() {
		if err := doStart(m, version, true); err != nil {
			return err
		}
		plog.Infof("etcd instance%d started", i)
	}

	var keyMap map[string]string
	var retryFuncs []func() error

	retryFuncs = append(retryFuncs, func() error {
		var err error
		keyMap, err = SetKeys(cluster, 5)
		if err != nil {
			return err
		}
		return nil
	})
	retryFuncs = append(retryFuncs, func() error {
		var quorumRead bool
		if version == 2 {
			quorumRead = true
		}
		if err := CheckKeys(cluster, keyMap, quorumRead); err != nil {
			return err
		}
		return nil
	})
	for _, retry := range retryFuncs {
		if err := util.Retry(5, 5*time.Second, retry); err != nil {
			return fmt.Errorf("discovery failed health check: %v", err)
		}
		// NOTE(pb): etcd1 seems to fail in an odd way when I try quorum
		// read, instead just sleep between setting and getting.
		time.Sleep(2 * time.Second)
	}

	return nil
}
Esempio n. 6
0
func discovery(cluster platform.Cluster, version int) error {
	csize := len(cluster.Machines())

	if plog.LevelAt(capnslog.DEBUG) {
		// get journalctl -f from all machines before starting
		for _, m := range cluster.Machines() {
			if err := m.StartJournal(); err != nil {
				return fmt.Errorf("failed to start journal: %v", err)
			}
		}
	}

	// point etcd on each machine to discovery
	for i, m := range cluster.Machines() {
		// start etcd instance
		var etcdStart string
		if version == 1 {
			etcdStart = "sudo systemctl start etcd.service"
		} else if version == 2 {
			etcdStart = "sudo systemctl start etcd2.service"
		} else {
			return fmt.Errorf("etcd version unspecified")
		}

		_, err := m.SSH(etcdStart)
		if err != nil {
			return fmt.Errorf("SSH cmd to %v failed: %s", m.IP(), err)
		}
		plog.Infof("etcd instance%d started", i)
	}

	err := getClusterHealth(cluster.Machines()[0], csize)
	if err != nil {
		return fmt.Errorf("discovery failed health check: %v", err)
	}

	return nil
}