Ejemplo n.º 1
0
// authenticateToMaster tests whether we can use the serviceaccount token
// to reach the master and authenticate
func (d PodCheckAuth) authenticateToMaster(token string, r types.DiagnosticResult) {
	clientConfig := &clientcmd.Config{
		MasterAddr:     flagtypes.Addr{Value: d.MasterUrl}.Default(),
		KubernetesAddr: flagtypes.Addr{Value: d.MasterUrl}.Default(),
		CommonConfig: restclient.Config{
			TLSClientConfig: restclient.TLSClientConfig{CAFile: d.MasterCaPath},
			BearerToken:     token,
		},
	}
	oclient, _, _, err := clientConfig.Clients()
	if err != nil {
		r.Error("DP1002", err, fmt.Sprintf("could not create API clients from the service account client config: %v", err))
		return
	}
	rchan := make(chan error, 1) // for concurrency with timeout
	go func() {
		_, err := oclient.Users().Get("~")
		rchan <- err
	}()

	select {
	case <-time.After(time.Second * 4): // timeout per query
		r.Warn("DP1005", nil, "A request to the master timed out.\nThis could be temporary but could also indicate network or DNS problems.")
	case err := <-rchan:
		if err != nil {
			r.Error("DP1003", err, fmt.Sprintf("Could not authenticate to the master with the service account credentials: %v", err))
		} else {
			r.Info("DP1004", "Service account token successfully authenticated to master")
		}
	}
	return
}
Ejemplo n.º 2
0
func (d *ClusterRegistry) verifyRegistryImageStream(service *kapi.Service, r types.DiagnosticResult) {
	if d.PreventModification {
		r.Info("DClu1021", "Skipping creating an ImageStream to test registry service address, because you requested no API modifications.")
		return
	}
	imgStream, err := d.OsClient.ImageStreams(kapi.NamespaceDefault).Create(&osapi.ImageStream{ObjectMeta: kapi.ObjectMeta{GenerateName: "diagnostic-test"}})
	if err != nil {
		r.Error("DClu1015", err, fmt.Sprintf("Creating test ImageStream failed. Error: (%T) %[1]v", err))
		return
	}
	defer func() { // delete what we created, or notify that we couldn't
		if err := d.OsClient.ImageStreams(kapi.NamespaceDefault).Delete(imgStream.ObjectMeta.Name); err != nil {
			r.Warn("DClu1016", err, fmt.Sprintf(clRegISDelFail, imgStream.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
		}
	}()
	imgStream, err = d.OsClient.ImageStreams(kapi.NamespaceDefault).Get(imgStream.ObjectMeta.Name) // status is filled in post-create
	if err != nil {
		r.Error("DClu1017", err, fmt.Sprintf("Getting created test ImageStream failed. Error: (%T) %[1]v", err))
		return
	}
	r.Debug("DClu1018", fmt.Sprintf("Created test ImageStream: %[1]v", imgStream))
	cacheHost := strings.SplitN(imgStream.Status.DockerImageRepository, "/", 2)[0]
	serviceHost := fmt.Sprintf("%s:%d", service.Spec.ClusterIP, service.Spec.Ports[0].Port)
	if cacheHost != serviceHost {
		r.Error("DClu1019", nil, fmt.Sprintf(clRegISMismatch, registryName, serviceHost, cacheHost))
	}
}
Ejemplo n.º 3
0
func (d *ClusterRegistry) getRegistryPods(service *kapi.Service, r types.DiagnosticResult) []*kapi.Pod {
	runningPods := []*kapi.Pod{}
	pods, err := d.KubeClient.Core().Pods(kapi.NamespaceDefault).List(kapi.ListOptions{LabelSelector: labels.SelectorFromSet(service.Spec.Selector)})
	if err != nil {
		r.Error("DClu1005", err, fmt.Sprintf("Finding pods for '%s' service failed. This should never happen. Error: (%T) %[2]v", registryName, err))
		return runningPods
	} else if len(pods.Items) < 1 {
		r.Error("DClu1006", nil, fmt.Sprintf(clRegNoPods, registryName))
		return runningPods
	} else if len(pods.Items) > 1 {
		emptyDir := false     // multiple registry pods using EmptyDir will be inconsistent
		customConfig := false // ... unless the user has configured them for e.g. S3
		configPath := "/config.yml"
		// look through the pod volumes to see if that might be a problem
		podSpec := pods.Items[0].Spec
		container := podSpec.Containers[0]
		for _, volume := range podSpec.Volumes {
			if volume.Name == registryVolume && volume.EmptyDir != nil {
				emptyDir = true
			}
		}
		for _, env := range container.Env {
			if env.Name == "REGISTRY_CONFIGURATION_PATH" {
				configPath = env.Value // look for custom config here
			}
		}
		for _, vmount := range container.VolumeMounts {
			if strings.HasPrefix(configPath, vmount.MountPath) {
				customConfig = true // if something's mounted there, assume custom config.
			}
		}
		if emptyDir {
			if customConfig { // assume they know what they're doing
				r.Info("DClu1020", fmt.Sprintf(clRegMultiCustomCfg, registryName, configPath))
			} else { // assume they scaled up with ephemeral storage
				r.Error("DClu1007", nil, fmt.Sprintf(clRegMultiPods, registryName))
			}
		}
	}
	for _, pod := range pods.Items {
		r.Debug("DClu1008", fmt.Sprintf("Found %s pod with name %s", registryName, pod.ObjectMeta.Name))
		if pod.Status.Phase != kapi.PodRunning {
			r.Warn("DClu1009", nil, fmt.Sprintf(clRegPodDown, pod.ObjectMeta.Name, registryName))
		} else {
			runningPods = append(runningPods, &pod)
			// Check the logs for that pod for common issues (credentials, DNS resolution failure)
			d.checkRegistryLogs(&pod, r)
		}
	}
	return runningPods
}
Ejemplo n.º 4
0
func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string {
	r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL))
	projectName := ""
	if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 {
		r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty")
		return projectName
	}

	loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL)
	if err != nil {
		r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL))
		return projectName
	}

	routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
	if err != nil {
		r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err))
		return projectName
	}

	for _, route := range routeList.Items {
		r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host))
		if loggingUrl.Host == route.Spec.Host {
			if len(projectName) == 0 {
				projectName = route.ObjectMeta.Namespace
				r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName))
			} else {
				r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'.  This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName))
			}
		}
	}
	if len(projectName) == 0 {
		message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl)
		r.Error("AGL0014", errors.New(message), message)
		return ""
	}
	project, err := osClient.Projects().Get(projectName)
	if err != nil {
		r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err))
		return ""
	}
	nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"]
	if !ok || len(nodeSelector) != 0 {
		r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName))
	}
	return projectName
}
Ejemplo n.º 5
0
// ----------------------------------------------------------
// Attempt to open file at path as client config
// If there is a problem and errmsg is set, log an error
func (d ConfigLoading) canOpenConfigFile(path string, errmsg string, r types.DiagnosticResult) bool {
	var file *os.File
	var err error
	if path == "" { // empty param/envvar
		return false
	} else if file, err = os.Open(path); err == nil {
		r.Debug("DCli1004", fmt.Sprintf("Reading client config at %s", path))
	} else if errmsg == "" {
		r.Debug("DCli1005", fmt.Sprintf("Could not read client config at %s:\n%#v", path, err))
	} else if os.IsNotExist(err) {
		r.Debug("DCli1006", errmsg+"but that file does not exist.")
	} else if os.IsPermission(err) {
		r.Error("DCli1007", err, errmsg+"but lack permission to read that file.")
	} else {
		r.Error("DCli1008", err, fmt.Sprintf("%sbut there was an error opening it:\n%#v", errmsg, err))
	}
	if file != nil { // it is open for reading
		defer file.Close()
		if buffer, err := ioutil.ReadAll(file); err != nil {
			r.Error("DCli1009", err, fmt.Sprintf("Unexpected error while reading client config file (%s): %v", path, err))
		} else if _, err := clientcmd.Load(buffer); err != nil {
			r.Error("DCli1010", err, fmt.Sprintf(`
Error reading YAML from client config file (%s):
  %v
This file may have been truncated or mis-edited.
Please fix, remove, or obtain a new client config`, file.Name(), err))
		} else {
			r.Info("DCli1011", fmt.Sprintf("Successfully read a client config file at '%s'", path))
			/* Note, we're not going to use this config file directly.
			 * Instead, we'll defer to the openshift client code to assimilate
			 * flags, env vars, and the potential hierarchy of config files
			 * into an actual configuration that the client uses.
			 * However, for diagnostic purposes, record the files we find.
			 */
			return true
		}
	}
	return false
}
Ejemplo n.º 6
0
// makes a request, handles some http/connection errors, returns any others
func processRegistryRequest(client *http.Client, url string, token string, r types.DiagnosticResult) error {
	req, _ := http.NewRequest("HEAD", url, nil)
	req.SetBasicAuth("anyname", token)
	response, err := client.Do(req)
	if err == nil {
		switch response.StatusCode {
		case 401, 403:
			r.Error("DP1010", nil, "Service account token was not accepted by the integrated registry for authentication.\nThis indicates possible network problems or misconfiguration of the registry.")
		case 200:
			r.Info("DP1011", "Service account token was authenticated by the integrated registry.")
		default:
			r.Error("DP1012", nil, fmt.Sprintf("Unexpected status code from integrated registry authentication:\n%s", response.Status))
		}
		return nil
	} else if strings.Contains(err.Error(), "net/http: request canceled") {
		// (*url.Error) Head https://docker-registry.default.svc.cluster.local:5000/v2/: net/http: request canceled while waiting for connection
		r.Error("DP1014", err, "Request to integrated registry timed out; this typically indicates network or SDN problems.")
		return nil
	}
	return err

	// fall back to non-secured access
}
Ejemplo n.º 7
0
func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) {
	loglevel := d.Level
	if loglevel > 2 {
		loglevel = 2 // need to show summary at least
	}
	imageName := d.ImageTemplate.ExpandOrDie("deployer")
	pod, err := d.KubeClient.Core().Pods(d.Namespace).Create(&kapi.Pod{
		ObjectMeta: kapi.ObjectMeta{GenerateName: "pod-diagnostic-test-"},
		Spec: kapi.PodSpec{
			RestartPolicy: kapi.RestartPolicyNever,
			Containers: []kapi.Container{
				{
					Name:    "pod-diagnostics",
					Image:   imageName,
					Command: []string{"openshift", "infra", "diagnostic-pod", "-l", strconv.Itoa(loglevel)},
				},
			},
		},
	})
	if err != nil {
		r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err))
		return
	}
	defer func() { // delete what we created, or notify that we couldn't
		zero := int64(0)
		delOpts := kapi.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
		if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
			r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
		}
	}()
	pod, err = d.KubeClient.Core().Pods(d.Namespace).Get(pod.ObjectMeta.Name) // status is filled in post-create
	if err != nil {
		r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err))
		return
	}
	r.Debug("DCli2004", fmt.Sprintf("Created diagnostic pod named %v running image %s.", pod.ObjectMeta.Name, imageName))

	bytelim := int64(1024000)
	podLogsOpts := &kapi.PodLogOptions{
		TypeMeta:   pod.TypeMeta,
		Container:  "pod-diagnostics",
		Follow:     true,
		LimitBytes: &bytelim,
	}
	req, err := d.Factory.LogsForObject(pod, podLogsOpts)
	if err != nil {
		r.Error("DCli2005", err, fmt.Sprintf("The request for diagnostic pod logs failed unexpectedly. Error: (%T) %[1]v", err))
		return
	}

	// wait for pod to be started and logs available
	var scanner *bufio.Scanner
	var lastError error
	for times := 1; true; times++ {
		if times <= 25 {
			readCloser, err := req.Stream()
			if err != nil {
				lastError = err
				r.Debug("DCli2010", fmt.Sprintf("Could not get diagnostic pod logs (loop %d): (%T[2]) %[2]v", times, err))
				time.Sleep(time.Duration(times*100) * time.Millisecond)
				continue
			}
			defer readCloser.Close()
			// make sure we can actually get something from the stream before going on.
			// it seems the creation of docker logs can trail the container start a bit.
			lineScanner := bufio.NewScanner(readCloser)
			if lineScanner.Scan() {
				scanner = lineScanner
				break // success - drop down to reading the logs.
			}
			// no luck - try, try again
			lastError = fmt.Errorf("Diagnostics pod is ready but not its logs (loop %d). Retry.", times)
			r.Debug("DCli2010", lastError.Error())
			time.Sleep(time.Duration(times*100) * time.Millisecond)
			continue
		}
		// after 25 times trying:
		r.Warn("DCli2006", err, fmt.Sprintf("Timed out preparing diagnostic pod logs for streaming, so this diagnostic cannot run.\nIt is likely that the image '%s' was not pulled and running yet.\nLast error: (%T[2]) %[2]v", pod.Spec.Containers[0].Image, lastError))
		return
	}
	// then watch logs and wait until it exits
	podLogs, warnings, errors := "", 0, 0
	errorRegex := regexp.MustCompile(`^\[Note\]\s+Errors\s+seen:\s+(\d+)`)
	warnRegex := regexp.MustCompile(`^\[Note\]\s+Warnings\s+seen:\s+(\d+)`)
	// keep in mind one test line was already scanned, so scan after the loop runs once
	for scanned := true; scanned; scanned = scanner.Scan() {
		line := scanner.Text()
		podLogs += line + "\n"
		if matches := errorRegex.FindStringSubmatch(line); matches != nil {
			errors, _ = strconv.Atoi(matches[1])
		} else if matches := warnRegex.FindStringSubmatch(line); matches != nil {
			warnings, _ = strconv.Atoi(matches[1])
		}
	}
	if err := scanner.Err(); err != nil { // Scan terminated abnormally
		r.Error("DCli2009", err, fmt.Sprintf("Unexpected error reading diagnostic pod logs: (%T) %[1]v\nLogs are:\n%[2]s", err, podLogs))
	} else {
		if errors > 0 {
			r.Error("DCli2012", nil, "See the errors below in the output from the diagnostic pod:\n"+podLogs)
		} else if warnings > 0 {
			r.Warn("DCli2013", nil, "See the warnings below in the output from the diagnostic pod:\n"+podLogs)
		} else {
			r.Info("DCli2008", fmt.Sprintf("Output from the diagnostic pod (image %s):\n", imageName)+podLogs)
		}
	}
}