func main() {
	flag.Parse()
	glog.Info("QingYuan Elasticsearch logging discovery")

	c, err := client.NewInCluster()
	if err != nil {
		glog.Fatalf("Failed to make client: %v", err)
	}

	var elasticsearch *api.Service
	// Look for endpoints associated with the Elasticsearch loggging service.
	// First wait for the service to become available.
	for t := time.Now(); time.Since(t) < 5*time.Minute; time.Sleep(10 * time.Second) {
		elasticsearch, err = c.Services(api.NamespaceDefault).Get("elasticsearch-logging")
		if err == nil {
			break
		}
	}
	// If we did not find an elasticsearch logging service then log a warning
	// and return without adding any unicast hosts.
	if elasticsearch == nil {
		glog.Warningf("Failed to find the elasticsearch-logging service: %v", err)
		return
	}

	var endpoints *api.Endpoints
	addrs := []string{}
	// Wait for some endpoints.
	count := 0
	for t := time.Now(); time.Since(t) < 5*time.Minute; time.Sleep(10 * time.Second) {
		endpoints, err = c.Endpoints(api.NamespaceDefault).Get("elasticsearch-logging")
		if err != nil {
			continue
		}
		addrs = flattenSubsets(endpoints.Subsets)
		glog.Infof("Found %s", addrs)
		if len(addrs) > 0 && len(addrs) == count {
			break
		}
		count = len(addrs)
	}
	// If there was an error finding endpoints then log a warning and quit.
	if err != nil {
		glog.Warningf("Error finding endpoints: %v", err)
		return
	}

	glog.Infof("Endpoints = %s", addrs)
	fmt.Printf("discovery.zen.ping.unicast.hosts: [%s]\n", strings.Join(addrs, ", "))
}
Beispiel #2
0
// Find all sibling pods in the service and post to their /write handler.
func contactOthers(state *State) {
	defer state.doneContactingPeers()
	client, err := client.NewInCluster()
	if err != nil {
		log.Fatalf("Unable to create client; error: %v\n", err)
	}
	// Double check that that worked by getting the server version.
	if v, err := client.ServerVersion(); err != nil {
		log.Fatalf("Unable to get server version: %v\n", err)
	} else {
		log.Printf("Server version: %#v\n", v)
	}

	// Do this repeatedly, in case there's some propagation delay with getting
	// newly started pods into the endpoints list.
	for i := 0; i < 15; i++ {
		endpoints, err := client.Endpoints(*namespace).Get(*service)
		if err != nil {
			state.Logf("Unable to read the endpoints for %v/%v: %v; will try again.", *namespace, *service, err)
			time.Sleep(time.Duration(1+rand.Intn(10)) * time.Second)
		}

		eps := util.StringSet{}
		for _, ss := range endpoints.Subsets {
			for _, a := range ss.Addresses {
				for _, p := range ss.Ports {
					eps.Insert(fmt.Sprintf("http://%s:%d", a.IP, p.Port))
				}
			}
		}
		for ep := range eps {
			state.Logf("Attempting to contact %s", ep)
			contactSingle(ep, state)
		}

		time.Sleep(5 * time.Second)
	}
}
Beispiel #3
0
func main() {
	flag.Parse()

	glog.Infof("Starting cauldron soak test with queries=%d podsPerNode=%d upTo=%d maxPar=%d",
		*queriesAverage, *podsPerNode, *upTo, *maxPar)

	c, err := client.NewInCluster()
	if err != nil {
		glog.Fatalf("Failed to make client: %v", err)
	}

	var nodes *api.NodeList
	for start := time.Now(); time.Since(start) < nodeListTimeout; time.Sleep(2 * time.Second) {
		nodes, err = c.Nodes().List(labels.Everything(), fields.Everything())
		if err == nil {
			break
		}
		glog.Warningf("Failed to list nodes: %v", err)
	}
	if err != nil {
		glog.Fatalf("Giving up trying to list nodes: %v", err)
	}

	if len(nodes.Items) == 0 {
		glog.Fatalf("Failed to find any nodes.")
	}

	glog.Infof("Found %d nodes on this cluster:", len(nodes.Items))
	for i, node := range nodes.Items {
		glog.Infof("%d: %s", i, node.Name)
	}

	queries := *queriesAverage * len(nodes.Items) * *podsPerNode

	// Create a uniquely named namespace.
	got, err := c.Namespaces().Create(&api.Namespace{ObjectMeta: api.ObjectMeta{GenerateName: "serve-hostnames-"}})
	if err != nil {
		glog.Fatalf("Failed to create namespace: %v", err)
	}
	ns := got.Name
	defer func(ns string) {
		if err := c.Namespaces().Delete(ns); err != nil {
			glog.Warningf("Failed to delete namespace ns: %e", ns, err)
		}
	}(ns)
	glog.Infof("Created namespace %s", ns)

	// Create a service for these pods.
	glog.Infof("Creating service %s/serve-hostnames", ns)
	// Make several attempts to create a service.
	var svc *api.Service
	for start := time.Now(); time.Since(start) < serviceCreateTimeout; time.Sleep(2 * time.Second) {
		t := time.Now()
		svc, err = c.Services(ns).Create(&api.Service{
			ObjectMeta: api.ObjectMeta{
				Name: "serve-hostnames",
				Labels: map[string]string{
					"name": "serve-hostname",
				},
			},
			Spec: api.ServiceSpec{
				Ports: []api.ServicePort{{
					Protocol:   "TCP",
					Port:       9376,
					TargetPort: util.NewIntOrStringFromInt(9376),
				}},
				Selector: map[string]string{
					"name": "serve-hostname",
				},
			},
		})
		glog.V(4).Infof("Service create %s/server-hostnames took %v", ns, time.Since(t))
		if err == nil {
			break
		}
		glog.Warningf("After %v failed to create service %s/serve-hostnames: %v", time.Since(start), ns, err)
	}
	if err != nil {
		glog.Warningf("Unable to create service %s/%s: %v", ns, svc.Name, err)
		return
	}
	// Clean up service
	defer func() {
		glog.Infof("Cleaning up service %s/serve-hostnames", ns)
		// Make several attempts to delete the service.
		for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) {
			if err := c.Services(ns).Delete(svc.Name); err == nil {
				return
			}
			glog.Warningf("After %v unable to delete service %s/%s: %v", time.Since(start), ns, svc.Name, err)
		}
	}()

	// Put serve-hostname pods on each node.
	podNames := []string{}
	for i, node := range nodes.Items {
		for j := 0; j < *podsPerNode; j++ {
			podName := fmt.Sprintf("serve-hostname-%d-%d", i, j)
			podNames = append(podNames, podName)
			// Make several attempts
			for start := time.Now(); time.Since(start) < podCreateTimeout; time.Sleep(2 * time.Second) {
				glog.Infof("Creating pod %s/%s on node %s", ns, podName, node.Name)
				t := time.Now()
				_, err = c.Pods(ns).Create(&api.Pod{
					ObjectMeta: api.ObjectMeta{
						Name: podName,
						Labels: map[string]string{
							"name": "serve-hostname",
						},
					},
					Spec: api.PodSpec{
						Containers: []api.Container{
							{
								Name:  "serve-hostname",
								Image: "qingyuan/serve_hostname:1.1",
								Ports: []api.ContainerPort{{ContainerPort: 9376}},
							},
						},
						NodeName: node.Name,
					},
				})
				glog.V(4).Infof("Pod create %s/%s request took %v", ns, podName, time.Since(t))
				if err == nil {
					break
				}
				glog.Warningf("After %s failed to create pod %s/%s: %v", time.Since(start), ns, podName, err)
			}
			if err != nil {
				glog.Warningf("Failed to create pod %s/%s: %v", ns, podName, err)
				return
			}
		}
	}
	// Clean up the pods
	defer func() {
		glog.Info("Cleaning up pods")
		// Make several attempts to delete the pods.
		for _, podName := range podNames {
			for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) {
				if err = c.Pods(ns).Delete(podName, nil); err == nil {
					break
				}
				glog.Warningf("After %v failed to delete pod %s/%s: %v", time.Since(start), ns, podName, err)
			}
		}
	}()

	glog.Info("Waiting for the serve-hostname pods to be ready")
	for _, podName := range podNames {
		var pod *api.Pod
		for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) {
			pod, err = c.Pods(ns).Get(podName)
			if err != nil {
				glog.Warningf("Get pod %s/%s failed, ignoring for %v: %v", ns, podName, err, podStartTimeout)
				continue
			}
			if pod.Status.Phase == api.PodRunning {
				break
			}
		}
		if pod.Status.Phase != api.PodRunning {
			glog.Warningf("Gave up waiting on pod %s/%s to be running (saw %v)", ns, podName, pod.Status.Phase)
		} else {
			glog.Infof("%s/%s is running", ns, podName)
		}
	}

	// Wait for the endpoints to propagate.
	for start := time.Now(); time.Since(start) < endpointTimeout; time.Sleep(10 * time.Second) {
		_, err = http.Get(fmt.Sprintf("http://serve-hostnames.%s:9376", ns))
		if err == nil {
			break
		}
		glog.Infof("After %v while making a request got error %v", time.Since(start), err)
	}
	if err != nil {
		glog.Errorf("Failed to get a response from service: %v", err)
	}

	// Repeatedly make requests.
	for iteration := 0; iteration != *upTo; iteration++ {
		responseChan := make(chan string, queries)
		// Use a channel of size *maxPar to throttle the number
		// of in-flight requests to avoid overloading the service.
		inFlight := make(chan struct{}, *maxPar)
		start := time.Now()
		for q := 0; q < queries; q++ {
			go func(i int, query int) {
				inFlight <- struct{}{}
				t := time.Now()
				resp, err := http.Get(fmt.Sprintf("http://serve-hostnames.%s:9376", ns))
				glog.V(4).Infof("Call to serve-hostnames in namespace %s took %v", ns, time.Since(t))
				if err != nil {
					glog.Warningf("Call failed during iteration %d query %d : %v", i, query, err)
					// If the query failed return a string which starts with a character
					// that can't be part of a hostname.
					responseChan <- fmt.Sprintf("!failed in iteration %d to issue query %d: %v", i, query, err)
				} else {
					defer resp.Body.Close()
					hostname, err := ioutil.ReadAll(resp.Body)
					if err != nil {
						responseChan <- fmt.Sprintf("!failed in iteration %d to read body of response: %v", i, err)
					} else {
						responseChan <- string(hostname)
					}
				}
				<-inFlight
			}(iteration, q)
		}
		responses := make(map[string]int, *podsPerNode*len(nodes.Items))
		missing := 0
		for q := 0; q < queries; q++ {
			r := <-responseChan
			glog.V(4).Infof("Got response from %s", r)
			responses[r]++
			// If the returned hostname starts with '!' then it indicates
			// an error response.
			if len(r) > 0 && r[0] == '!' {
				glog.V(3).Infof("Got response %s", r)
				missing++
			}
		}
		if missing > 0 {
			glog.Warningf("Missing %d responses out of %d", missing, queries)
		}
		// Report any nodes that did not respond.
		for n, node := range nodes.Items {
			for i := 0; i < *podsPerNode; i++ {
				name := fmt.Sprintf("serve-hostname-%d-%d", n, i)
				if _, ok := responses[name]; !ok {
					glog.Warningf("No response from pod %s on node %s at iteration %d", name, node.Name, iteration)
				}
			}
		}
		glog.Infof("Iteration %d took %v for %d queries (%.2f QPS) with %d missing",
			iteration, time.Since(start), queries-missing, float64(queries-missing)/time.Since(start).Seconds(), missing)
	}
}