// authenticateToMaster tests whether we can use the serviceaccount token // to reach the master and authenticate func (d PodCheckAuth) authenticateToMaster(token string, r types.DiagnosticResult) { clientConfig := &clientcmd.Config{ MasterAddr: flagtypes.Addr{Value: d.MasterUrl}.Default(), KubernetesAddr: flagtypes.Addr{Value: d.MasterUrl}.Default(), CommonConfig: restclient.Config{ TLSClientConfig: restclient.TLSClientConfig{CAFile: d.MasterCaPath}, BearerToken: token, }, } oclient, _, _, err := clientConfig.Clients() if err != nil { r.Error("DP1002", err, fmt.Sprintf("could not create API clients from the service account client config: %v", err)) return } rchan := make(chan error, 1) // for concurrency with timeout go func() { _, err := oclient.Users().Get("~") rchan <- err }() select { case <-time.After(time.Second * 4): // timeout per query r.Warn("DP1005", nil, "A request to the master timed out.\nThis could be temporary but could also indicate network or DNS problems.") case err := <-rchan: if err != nil { r.Error("DP1003", err, fmt.Sprintf("Could not authenticate to the master with the service account credentials: %v", err)) } else { r.Info("DP1004", "Service account token successfully authenticated to master") } } return }
func (d *ClusterRegistry) verifyRegistryImageStream(service *kapi.Service, r types.DiagnosticResult) { if d.PreventModification { r.Info("DClu1021", "Skipping creating an ImageStream to test registry service address, because you requested no API modifications.") return } imgStream, err := d.OsClient.ImageStreams(kapi.NamespaceDefault).Create(&osapi.ImageStream{ObjectMeta: kapi.ObjectMeta{GenerateName: "diagnostic-test"}}) if err != nil { r.Error("DClu1015", err, fmt.Sprintf("Creating test ImageStream failed. Error: (%T) %[1]v", err)) return } defer func() { // delete what we created, or notify that we couldn't if err := d.OsClient.ImageStreams(kapi.NamespaceDefault).Delete(imgStream.ObjectMeta.Name); err != nil { r.Warn("DClu1016", err, fmt.Sprintf(clRegISDelFail, imgStream.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err))) } }() imgStream, err = d.OsClient.ImageStreams(kapi.NamespaceDefault).Get(imgStream.ObjectMeta.Name) // status is filled in post-create if err != nil { r.Error("DClu1017", err, fmt.Sprintf("Getting created test ImageStream failed. Error: (%T) %[1]v", err)) return } r.Debug("DClu1018", fmt.Sprintf("Created test ImageStream: %[1]v", imgStream)) cacheHost := strings.SplitN(imgStream.Status.DockerImageRepository, "/", 2)[0] serviceHost := fmt.Sprintf("%s:%d", service.Spec.ClusterIP, service.Spec.Ports[0].Port) if cacheHost != serviceHost { r.Error("DClu1019", nil, fmt.Sprintf(clRegISMismatch, registryName, serviceHost, cacheHost)) } }
func (d *ClusterRegistry) getRegistryPods(service *kapi.Service, r types.DiagnosticResult) []*kapi.Pod { runningPods := []*kapi.Pod{} pods, err := d.KubeClient.Core().Pods(kapi.NamespaceDefault).List(kapi.ListOptions{LabelSelector: labels.SelectorFromSet(service.Spec.Selector)}) if err != nil { r.Error("DClu1005", err, fmt.Sprintf("Finding pods for '%s' service failed. This should never happen. Error: (%T) %[2]v", registryName, err)) return runningPods } else if len(pods.Items) < 1 { r.Error("DClu1006", nil, fmt.Sprintf(clRegNoPods, registryName)) return runningPods } else if len(pods.Items) > 1 { emptyDir := false // multiple registry pods using EmptyDir will be inconsistent customConfig := false // ... unless the user has configured them for e.g. S3 configPath := "/config.yml" // look through the pod volumes to see if that might be a problem podSpec := pods.Items[0].Spec container := podSpec.Containers[0] for _, volume := range podSpec.Volumes { if volume.Name == registryVolume && volume.EmptyDir != nil { emptyDir = true } } for _, env := range container.Env { if env.Name == "REGISTRY_CONFIGURATION_PATH" { configPath = env.Value // look for custom config here } } for _, vmount := range container.VolumeMounts { if strings.HasPrefix(configPath, vmount.MountPath) { customConfig = true // if something's mounted there, assume custom config. } } if emptyDir { if customConfig { // assume they know what they're doing r.Info("DClu1020", fmt.Sprintf(clRegMultiCustomCfg, registryName, configPath)) } else { // assume they scaled up with ephemeral storage r.Error("DClu1007", nil, fmt.Sprintf(clRegMultiPods, registryName)) } } } for _, pod := range pods.Items { r.Debug("DClu1008", fmt.Sprintf("Found %s pod with name %s", registryName, pod.ObjectMeta.Name)) if pod.Status.Phase != kapi.PodRunning { r.Warn("DClu1009", nil, fmt.Sprintf(clRegPodDown, pod.ObjectMeta.Name, registryName)) } else { runningPods = append(runningPods, &pod) // Check the logs for that pod for common issues (credentials, DNS resolution failure) d.checkRegistryLogs(&pod, r) } } return runningPods }
func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string { r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL)) projectName := "" if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 { r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty") return projectName } loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL) if err != nil { r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL)) return projectName } routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()}) if err != nil { r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err)) return projectName } for _, route := range routeList.Items { r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host)) if loggingUrl.Host == route.Spec.Host { if len(projectName) == 0 { projectName = route.ObjectMeta.Namespace r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName)) } else { r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'. This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName)) } } } if len(projectName) == 0 { message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl) r.Error("AGL0014", errors.New(message), message) return "" } project, err := osClient.Projects().Get(projectName) if err != nil { r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err)) return "" } nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"] if !ok || len(nodeSelector) != 0 { r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName)) } return projectName }
// ---------------------------------------------------------- // Attempt to open file at path as client config // If there is a problem and errmsg is set, log an error func (d ConfigLoading) canOpenConfigFile(path string, errmsg string, r types.DiagnosticResult) bool { var file *os.File var err error if path == "" { // empty param/envvar return false } else if file, err = os.Open(path); err == nil { r.Debug("DCli1004", fmt.Sprintf("Reading client config at %s", path)) } else if errmsg == "" { r.Debug("DCli1005", fmt.Sprintf("Could not read client config at %s:\n%#v", path, err)) } else if os.IsNotExist(err) { r.Debug("DCli1006", errmsg+"but that file does not exist.") } else if os.IsPermission(err) { r.Error("DCli1007", err, errmsg+"but lack permission to read that file.") } else { r.Error("DCli1008", err, fmt.Sprintf("%sbut there was an error opening it:\n%#v", errmsg, err)) } if file != nil { // it is open for reading defer file.Close() if buffer, err := ioutil.ReadAll(file); err != nil { r.Error("DCli1009", err, fmt.Sprintf("Unexpected error while reading client config file (%s): %v", path, err)) } else if _, err := clientcmd.Load(buffer); err != nil { r.Error("DCli1010", err, fmt.Sprintf(` Error reading YAML from client config file (%s): %v This file may have been truncated or mis-edited. Please fix, remove, or obtain a new client config`, file.Name(), err)) } else { r.Info("DCli1011", fmt.Sprintf("Successfully read a client config file at '%s'", path)) /* Note, we're not going to use this config file directly. * Instead, we'll defer to the openshift client code to assimilate * flags, env vars, and the potential hierarchy of config files * into an actual configuration that the client uses. * However, for diagnostic purposes, record the files we find. */ return true } } return false }
// makes a request, handles some http/connection errors, returns any others func processRegistryRequest(client *http.Client, url string, token string, r types.DiagnosticResult) error { req, _ := http.NewRequest("HEAD", url, nil) req.SetBasicAuth("anyname", token) response, err := client.Do(req) if err == nil { switch response.StatusCode { case 401, 403: r.Error("DP1010", nil, "Service account token was not accepted by the integrated registry for authentication.\nThis indicates possible network problems or misconfiguration of the registry.") case 200: r.Info("DP1011", "Service account token was authenticated by the integrated registry.") default: r.Error("DP1012", nil, fmt.Sprintf("Unexpected status code from integrated registry authentication:\n%s", response.Status)) } return nil } else if strings.Contains(err.Error(), "net/http: request canceled") { // (*url.Error) Head https://docker-registry.default.svc.cluster.local:5000/v2/: net/http: request canceled while waiting for connection r.Error("DP1014", err, "Request to integrated registry timed out; this typically indicates network or SDN problems.") return nil } return err // fall back to non-secured access }
func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) { loglevel := d.Level if loglevel > 2 { loglevel = 2 // need to show summary at least } imageName := d.ImageTemplate.ExpandOrDie("deployer") pod, err := d.KubeClient.Core().Pods(d.Namespace).Create(&kapi.Pod{ ObjectMeta: kapi.ObjectMeta{GenerateName: "pod-diagnostic-test-"}, Spec: kapi.PodSpec{ RestartPolicy: kapi.RestartPolicyNever, Containers: []kapi.Container{ { Name: "pod-diagnostics", Image: imageName, Command: []string{"openshift", "infra", "diagnostic-pod", "-l", strconv.Itoa(loglevel)}, }, }, }, }) if err != nil { r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err)) return } defer func() { // delete what we created, or notify that we couldn't zero := int64(0) delOpts := kapi.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero} if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil { r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err))) } }() pod, err = d.KubeClient.Core().Pods(d.Namespace).Get(pod.ObjectMeta.Name) // status is filled in post-create if err != nil { r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err)) return } r.Debug("DCli2004", fmt.Sprintf("Created diagnostic pod named %v running image %s.", pod.ObjectMeta.Name, imageName)) bytelim := int64(1024000) podLogsOpts := &kapi.PodLogOptions{ TypeMeta: pod.TypeMeta, Container: "pod-diagnostics", Follow: true, LimitBytes: &bytelim, } req, err := d.Factory.LogsForObject(pod, podLogsOpts) if err != nil { r.Error("DCli2005", err, fmt.Sprintf("The request for diagnostic pod logs failed unexpectedly. Error: (%T) %[1]v", err)) return } // wait for pod to be started and logs available var scanner *bufio.Scanner var lastError error for times := 1; true; times++ { if times <= 25 { readCloser, err := req.Stream() if err != nil { lastError = err r.Debug("DCli2010", fmt.Sprintf("Could not get diagnostic pod logs (loop %d): (%T[2]) %[2]v", times, err)) time.Sleep(time.Duration(times*100) * time.Millisecond) continue } defer readCloser.Close() // make sure we can actually get something from the stream before going on. // it seems the creation of docker logs can trail the container start a bit. lineScanner := bufio.NewScanner(readCloser) if lineScanner.Scan() { scanner = lineScanner break // success - drop down to reading the logs. } // no luck - try, try again lastError = fmt.Errorf("Diagnostics pod is ready but not its logs (loop %d). Retry.", times) r.Debug("DCli2010", lastError.Error()) time.Sleep(time.Duration(times*100) * time.Millisecond) continue } // after 25 times trying: r.Warn("DCli2006", err, fmt.Sprintf("Timed out preparing diagnostic pod logs for streaming, so this diagnostic cannot run.\nIt is likely that the image '%s' was not pulled and running yet.\nLast error: (%T[2]) %[2]v", pod.Spec.Containers[0].Image, lastError)) return } // then watch logs and wait until it exits podLogs, warnings, errors := "", 0, 0 errorRegex := regexp.MustCompile(`^\[Note\]\s+Errors\s+seen:\s+(\d+)`) warnRegex := regexp.MustCompile(`^\[Note\]\s+Warnings\s+seen:\s+(\d+)`) // keep in mind one test line was already scanned, so scan after the loop runs once for scanned := true; scanned; scanned = scanner.Scan() { line := scanner.Text() podLogs += line + "\n" if matches := errorRegex.FindStringSubmatch(line); matches != nil { errors, _ = strconv.Atoi(matches[1]) } else if matches := warnRegex.FindStringSubmatch(line); matches != nil { warnings, _ = strconv.Atoi(matches[1]) } } if err := scanner.Err(); err != nil { // Scan terminated abnormally r.Error("DCli2009", err, fmt.Sprintf("Unexpected error reading diagnostic pod logs: (%T) %[1]v\nLogs are:\n%[2]s", err, podLogs)) } else { if errors > 0 { r.Error("DCli2012", nil, "See the errors below in the output from the diagnostic pod:\n"+podLogs) } else if warnings > 0 { r.Warn("DCli2013", nil, "See the warnings below in the output from the diagnostic pod:\n"+podLogs) } else { r.Info("DCli2008", fmt.Sprintf("Output from the diagnostic pod (image %s):\n", imageName)+podLogs) } } }