Esempio n. 1
0
func (d *MetricsApiProxy) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(MetricsApiProxyName)

	// see if it has any active endpoints
	if endpoints, err := d.KubeClient.Core().Endpoints(MetricsApiProxyProject).Get(MetricsApiProxyService); err != nil {
		r.Error("DClu4001", err, fmt.Sprintf("Unexpected error while retrieving %[1]s service endpoints: (%[2]T) %[2]v", MetricsApiProxyService, err))
		return r
	} else {
		active := false
		if endpoints.Subsets != nil {
			for _, endpoint := range endpoints.Subsets {
				if len(endpoint.Addresses) > 0 {
					active = true
					break
				}
			}
		}
		if !active {
			r.Error("DClu4002", nil, fmt.Sprintf(errMsgNoHeapsterEndpoints, MetricsApiProxyService, MetricsApiProxyProject))
			return r
		}
	}

	// the service should respond; see if we can reach it via API proxy
	uri := fmt.Sprintf("/api/v1/proxy/namespaces/%[1]s/services/https:%[2]s:/api/v1/model/metrics", MetricsApiProxyProject, MetricsApiProxyService)
	// note in above, project and service name are already URL-safe
	result := d.KubeClient.CoreClient.RESTClient.Get().RequestURI(uri).Do()
	if err := result.Error(); err != nil {
		r.Error("DClu4003", err, fmt.Sprintf(errMsgApiProxyAccess, uri, err))
	}
	return r
}
func (d NodeConfigCheck) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(NodeConfigCheckName)
	r.Debug("DH1001", fmt.Sprintf("Looking for node config file at '%s'", d.NodeConfigFile))
	nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile)
	if err != nil {
		r.Error("DH1002", err, fmt.Sprintf("Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err))
		return r
	}

	r.Info("DH1003", fmt.Sprintf("Found a node config file: %[1]s", d.NodeConfigFile))

	results := configvalidation.ValidateNodeConfig(nodeConfig, nil)
	if len(results.Errors) > 0 {
		errText := fmt.Sprintf("Validation of node config file '%s' failed:\n", d.NodeConfigFile)
		for _, err := range results.Errors {
			errText += fmt.Sprintf("%v\n", err)
		}
		r.Error("DH1004", nil, errText)
	}
	if len(results.Warnings) > 0 {
		warnText := fmt.Sprintf("Validation of node config file '%s' warned:\n", d.NodeConfigFile)
		for _, warn := range results.Warnings {
			warnText += fmt.Sprintf("%v\n", warn)
		}
		r.Warn("DH1005", nil, warnText)
	}
	return r
}
Esempio n. 3
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
	d.res = types.NewDiagnosticResult(NetworkDiagnosticName)

	var err error
	var ok bool
	d.pluginName, ok, err = util.GetOpenShiftNetworkPlugin(d.OSClient)
	if err != nil {
		d.res.Error("DNet2001", err, fmt.Sprintf("Checking network plugin failed. Error: %s", err))
		return d.res
	}
	if !ok {
		d.res.Warn("DNet2002", nil, "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
		return d.res
	}

	d.nodes, err = util.GetSchedulableNodes(d.KubeClient)
	if err != nil {
		d.res.Error("DNet2003", err, fmt.Sprintf("Fetching schedulable nodes failed. Error: %s", err))
		return d.res
	}
	if len(d.nodes) == 0 {
		d.res.Warn("DNet2004", nil, "Skipping network checks. Reason: No schedulable/ready nodes found.")
		return d.res
	}

	if len(d.LogDir) == 0 {
		d.LogDir = util.NetworkDiagDefaultLogDir
	}
	d.runNetworkDiagnostic()
	return d.res
}
Esempio n. 4
0
func (d *ClusterRoleBindings) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ClusterRoleBindingsName)

	reconcileOptions := &policycmd.ReconcileClusterRoleBindingsOptions{
		Confirmed:         false,
		Union:             false,
		Out:               ioutil.Discard,
		RoleBindingClient: d.ClusterRoleBindingsClient.ClusterRoleBindings(),
	}

	changedClusterRoleBindings, err := reconcileOptions.ChangedClusterRoleBindings()
	if policycmd.IsClusterRoleBindingLookupError(err) {
		// we got a partial match, so we log the error that stopped us from getting a full match
		// but continue to interpret the partial results that we did get
		r.Warn("CRBD1008", err, fmt.Sprintf("Error finding ClusterRoleBindings: %v", err))
	} else if err != nil {
		r.Error("CRBD1000", err, fmt.Sprintf("Error inspecting ClusterRoleBindings: %v", err))
		return r
	}

	// success
	if len(changedClusterRoleBindings) == 0 {
		return r
	}

	for _, changedClusterRoleBinding := range changedClusterRoleBindings {
		actualClusterRole, err := d.ClusterRoleBindingsClient.ClusterRoleBindings().Get(changedClusterRoleBinding.Name)
		if kerrs.IsNotFound(err) {
			r.Error("CRBD1001", nil, fmt.Sprintf("clusterrolebinding/%s is missing.\n\nUse the `oadm policy reconcile-cluster-role-bindings` command to create the role binding.", changedClusterRoleBinding.Name))
			continue
		}
		if err != nil {
			r.Error("CRBD1002", err, fmt.Sprintf("Unable to get clusterrolebinding/%s: %v", changedClusterRoleBinding.Name, err))
		}

		missingSubjects, extraSubjects := policycmd.DiffObjectReferenceLists(changedClusterRoleBinding.Subjects, actualClusterRole.Subjects)
		switch {
		case len(missingSubjects) > 0:
			// Only a warning, because they can remove things like self-provisioner role from system:unauthenticated, and it's not an error
			r.Warn("CRBD1003", nil, fmt.Sprintf("clusterrolebinding/%s is missing expected subjects.\n\nUse the `oadm policy reconcile-cluster-role-bindings` command to update the role binding to include expected subjects.", changedClusterRoleBinding.Name))
		case len(extraSubjects) > 0:
			// Only info, because it is normal to use policy to grant cluster roles to users
			r.Info("CRBD1004", fmt.Sprintf("clusterrolebinding/%s has more subjects than expected.\n\nUse the `oadm policy reconcile-cluster-role-bindings` command to update the role binding to remove extra subjects.", changedClusterRoleBinding.Name))
		}

		for _, missingSubject := range missingSubjects {
			r.Info("CRBD1005", fmt.Sprintf("clusterrolebinding/%s is missing subject %v.", changedClusterRoleBinding.Name, missingSubject))
		}

		for _, extraSubject := range extraSubjects {
			r.Info("CRBD1006", fmt.Sprintf("clusterrolebinding/%s has extra subject %v.", changedClusterRoleBinding.Name, extraSubject))
		}

		r.Debug("CRBD1007", fmt.Sprintf("clusterrolebinding/%s is now %v.", changedClusterRoleBinding.Name, changedClusterRoleBinding))
	}

	return r
}
Esempio n. 5
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d PodCheckDns) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(PodCheckDnsName)

	if resolvConf, err := getResolvConf(r); err == nil {
		connectAndResolve(resolvConf, r)
		resolveSearch(resolvConf, r)
	}
	return r
}
Esempio n. 6
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d CheckServiceNetwork) Check() types.DiagnosticResult {
	d.res = types.NewDiagnosticResult(CheckServiceNetworkName)

	pluginName, ok, err := util.GetOpenShiftNetworkPlugin(d.OSClient)
	if err != nil {
		d.res.Error("DSvcNet1001", err, fmt.Sprintf("Checking network plugin failed. Error: %s", err))
		return d.res
	}
	if !ok {
		d.res.Warn("DSvcNet1002", nil, "Skipping service connectivity test. Reason: Not using openshift network plugin.")
		return d.res
	}

	services, err := getAllServices(d.KubeClient)
	if err != nil {
		d.res.Error("DSvcNet1003", err, fmt.Sprintf("Getting all services failed. Error: %s", err))
		return d.res
	}
	if len(services) == 0 {
		d.res.Warn("DSvcNet1004", nil, "Skipping service connectivity test. Reason: No services found.")
		return d.res
	}

	localPods, _, err := util.GetLocalAndNonLocalDiagnosticPods(d.KubeClient)
	if err != nil {
		d.res.Error("DSvcNet1005", err, fmt.Sprintf("Getting local and nonlocal pods failed. Error: %s", err))
		return d.res
	}

	if sdnapi.IsOpenShiftMultitenantNetworkPlugin(pluginName) {
		netnsList, err := d.OSClient.NetNamespaces().List(kapi.ListOptions{})
		if err != nil {
			d.res.Error("DSvcNet1006", err, fmt.Sprintf("Getting all network namespaces failed. Error: %s", err))
			return d.res
		}

		d.vnidMap = map[string]uint32{}
		for _, netns := range netnsList.Items {
			d.vnidMap[netns.NetName] = netns.NetID
		}
	}

	localGlobalPods, localNonGlobalPods := util.GetGlobalAndNonGlobalPods(localPods, d.vnidMap)

	// Applicable to flat and multitenant networks
	if len(localGlobalPods) > 0 {
		d.checkConnection(localGlobalPods, services, "Skipping service connectivity test for global projects. Reason: Couldn't find a global pod.")
	}

	// Applicable to multitenant network
	isMultitenant := (d.vnidMap != nil)
	if isMultitenant {
		d.checkConnection(localNonGlobalPods, services, "Skipping service connectivity test for non-global projects. Reason: Couldn't find a non-global pod.")
	}
	return d.res
}
Esempio n. 7
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d CheckExternalNetwork) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(CheckExternalNetworkName)

	externalAddress := "www.redhat.com"
	kexecer := kexec.New()
	if _, err := kexecer.Command("ping", "-c1", "-W2", externalAddress).CombinedOutput(); err != nil {
		// Admin may intentionally block access to the external network. If this check fails it doesn't necessarily mean that something is wrong. So just warn in this case.
		r.Warn("DExtNet1001", nil, fmt.Sprintf("Pinging external address %q failed. Check if the admin intentionally blocked access to the external network. Error: %s", externalAddress, err))
	}
	return r
}
Esempio n. 8
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d PodCheckAuth) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(PodCheckAuthName)
	token, err := ioutil.ReadFile(d.TokenPath)
	if err != nil {
		r.Error("DP1001", err, fmt.Sprintf("could not read the service account token: %v", err))
		return r
	}
	d.authenticateToMaster(string(token), r)
	d.authenticateToRegistry(string(token), r)
	return r
}
Esempio n. 9
0
func (d *ClusterRouter) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ClusterRouterName)
	if dc := d.getRouterDC(r); dc != nil {
		// Check that it actually has running pod(s) selected
		if podList := d.getRouterPods(dc, r); podList != nil {
			for _, pod := range podList.Items {
				// Check the logs for that pod for common issues (credentials, DNS resolution failure)
				d.checkRouterLogs(&pod, r)
			}
		}
	}
	return r
}
Esempio n. 10
0
func (d *ServiceExternalIPs) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ServiceExternalIPsName)
	masterConfig, err := hostdiag.GetMasterConfig(r, d.MasterConfigFile)
	if err != nil {
		r.Info("DH2004", "Unreadable master config; skipping this diagnostic.")
		return r
	}

	admit, reject := []*net.IPNet{}, []*net.IPNet{}
	if cidrs := masterConfig.NetworkConfig.ExternalIPNetworkCIDRs; cidrs != nil {
		reject, admit, err = admission.ParseCIDRRules(cidrs)
		if err != nil {
			r.Error("DH2007", err, fmt.Sprintf("Could not parse master config NetworkConfig.ExternalIPNetworkCIDRs: (%[1]T) %[1]v", err))
			return r
		}
	}
	services, err := d.KclusterClient.Services("").List(kapi.ListOptions{})
	if err != nil {
		r.Error("DH2005", err, fmt.Sprintf("Error while listing cluster services: (%[1]T) %[1]v", err))
		return r
	}

	errList := []string{}
	for _, service := range services.Items {
		if len(service.Spec.ExternalIPs) == 0 {
			continue
		}
		if len(admit) == 0 {
			errList = append(errList, fmt.Sprintf("Service %s.%s specifies ExternalIPs %v, but none are permitted.", service.Namespace, service.Name, service.Spec.ExternalIPs))
			continue
		}
		for _, ipString := range service.Spec.ExternalIPs {
			ip := net.ParseIP(ipString)
			if ip == nil {
				continue // we don't really care for the purposes of this diagnostic
			}
			if admission.NetworkSlice(reject).Contains(ip) || !admission.NetworkSlice(admit).Contains(ip) {
				errList = append(errList, fmt.Sprintf("Service %s.%s specifies ExternalIP %s that is not permitted by the master ExternalIPNetworkCIDRs setting.", service.Namespace, service.Name, ipString))
			}
		}
	}
	if len(errList) > 0 {
		r.Error("DH2006", nil, `The following problems were found with service ExternalIPs in the cluster.
These services were created before the master ExternalIPNetworkCIDRs setting changed to exclude them.
The default ExternalIPNetworkCIDRs now excludes all ExternalIPs on services.
`+strings.Join(errList, "\n"))
	}

	return r
}
Esempio n. 11
0
func (d *ClusterRoles) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ClusterRolesName)

	reconcileOptions := &policycmd.ReconcileClusterRolesOptions{
		Confirmed:  false,
		Union:      false,
		Out:        ioutil.Discard,
		RoleClient: d.ClusterRolesClient.ClusterRoles(),
	}

	changedClusterRoles, err := reconcileOptions.ChangedClusterRoles()
	if err != nil {
		r.Error("CRD1000", err, fmt.Sprintf("Error inspecting ClusterRoles: %v", err))
		return r
	}

	// success
	if len(changedClusterRoles) == 0 {
		return r
	}

	for _, changedClusterRole := range changedClusterRoles {
		actualClusterRole, err := d.ClusterRolesClient.ClusterRoles().Get(changedClusterRole.Name)
		if kerrs.IsNotFound(err) {
			r.Error("CRD1002", nil, fmt.Sprintf("clusterrole/%s is missing.\n\nUse the `oadm policy reconcile-cluster-roles` command to create the role.", changedClusterRole.Name))
			continue
		}
		if err != nil {
			r.Error("CRD1001", err, fmt.Sprintf("Unable to get clusterrole/%s: %v", changedClusterRole.Name, err))
		}

		_, missingRules := rulevalidation.Covers(actualClusterRole.Rules, changedClusterRole.Rules)
		if len(missingRules) == 0 {
			r.Warn("CRD1003", nil, fmt.Sprintf("clusterrole/%s has changed, but the existing role has more permissions than the new role.\n\nUse the `oadm policy reconcile-cluster-roles` command to update the role to reduce permissions.", changedClusterRole.Name))
			_, extraRules := rulevalidation.Covers(changedClusterRole.Rules, actualClusterRole.Rules)
			for _, extraRule := range extraRules {
				r.Info("CRD1008", fmt.Sprintf("clusterrole/%s has extra permission %v.", changedClusterRole.Name, extraRule))
			}
			continue
		}

		r.Error("CRD1005", nil, fmt.Sprintf("clusterrole/%s has changed and the existing role does not have enough permissions.\n\nUse the `oadm policy reconcile-cluster-roles` command to update the role.", changedClusterRole.Name))
		for _, missingRule := range missingRules {
			r.Info("CRD1007", fmt.Sprintf("clusterrole/%s is missing permission %v.", changedClusterRole.Name, missingRule))
		}
		r.Debug("CRD1006", fmt.Sprintf("clusterrole/%s is now %v.", changedClusterRole.Name, changedClusterRole))
	}

	return r
}
Esempio n. 12
0
func (d *ClusterRoles) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ClusterRolesName)

	reconcileOptions := &policycmd.ReconcileClusterRolesOptions{
		Confirmed:  false,
		Union:      false,
		Out:        ioutil.Discard,
		RoleClient: d.ClusterRolesClient.ClusterRoles(),
	}

	changedClusterRoles, _, err := reconcileOptions.ChangedClusterRoles()
	if err != nil {
		r.Error("CRD1000", err, fmt.Sprintf("Error inspecting ClusterRoles: %v", err))
		return r
	}

	// success
	if len(changedClusterRoles) == 0 {
		return r
	}

	for _, changedClusterRole := range changedClusterRoles {
		actualClusterRole, err := d.ClusterRolesClient.ClusterRoles().Get(changedClusterRole.Name)
		if kerrs.IsNotFound(err) {
			r.Error("CRD1002", nil, fmt.Sprintf(clusterRoleMissing, changedClusterRole.Name))
			continue
		}
		if err != nil {
			r.Error("CRD1001", err, fmt.Sprintf("Unable to get clusterrole/%s: %v", changedClusterRole.Name, err))
		}

		_, missingRules := rulevalidation.Covers(actualClusterRole.Rules, changedClusterRole.Rules)
		if len(missingRules) == 0 {
			r.Info("CRD1003", fmt.Sprintf(clusterRoleReduced, changedClusterRole.Name))
			_, extraRules := rulevalidation.Covers(changedClusterRole.Rules, actualClusterRole.Rules)
			for _, extraRule := range extraRules {
				r.Info("CRD1008", fmt.Sprintf("clusterrole/%s has extra permission %v.", changedClusterRole.Name, extraRule))
			}
			continue
		}

		r.Error("CRD1005", nil, fmt.Sprintf(clusterRoleChanged, changedClusterRole.Name))
		for _, missingRule := range missingRules {
			r.Info("CRD1007", fmt.Sprintf("clusterrole/%s is missing permission %v.", changedClusterRole.Name, missingRule))
		}
		r.Debug("CRD1006", fmt.Sprintf("clusterrole/%s is now %v.", changedClusterRole.Name, changedClusterRole))
	}

	return r
}
Esempio n. 13
0
func (d *ClusterRegistry) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ClusterRegistryName)
	if service := d.getRegistryService(r); service != nil {
		// Check that it actually has pod(s) selected and running
		if runningPods := d.getRegistryPods(service, r); len(runningPods) == 0 {
			r.Error("DClu1001", nil, fmt.Sprintf(clRegNoRunningPods, registryName))
			return r
		} else if d.checkRegistryEndpoints(runningPods, r) { // Check that matching endpoint exists on the service
			// attempt to create an imagestream and see if it gets the same registry service IP from the service cache
			d.verifyRegistryImageStream(service, r)
		}
	}
	return r
}
Esempio n. 14
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d CollectNetworkInfo) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(CollectNetworkInfoName)

	nodeName, _, err := util.GetLocalNode(d.KubeClient)
	if err != nil {
		r.Error("DColNet1001", err, fmt.Sprintf("Fetching local node info failed: %s", err))
		return r
	}

	l := util.LogInterface{
		Result: r,
		Logdir: filepath.Join(util.NetworkDiagDefaultLogDir, util.NetworkDiagNodeLogDirPrefix, nodeName),
	}
	l.LogNode(d.KubeClient)
	return r
}
Esempio n. 15
0
func (d NodeConfigCheck) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(NodeConfigCheckName)
	r.Debug("DH1001", fmt.Sprintf("Looking for node config file at '%s'", d.NodeConfigFile))
	nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile)
	if err != nil {
		r.Error("DH1002", err, fmt.Sprintf("Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err))
		return r
	}

	r.Info("DH1003", fmt.Sprintf("Found a node config file: %[1]s", d.NodeConfigFile))

	for _, err := range configvalidation.ValidateNodeConfig(nodeConfig) {
		r.Error("DH1004", err, fmt.Sprintf("Validation of node config file '%s' failed:\n(%T) %[2]v", d.NodeConfigFile, err))
	}
	return r
}
Esempio n. 16
0
func (d *MasterNode) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(MasterNodeName)

	nodes, err := d.KubeClient.Nodes().List(kapi.ListOptions{})
	if err != nil {
		r.Error("DClu3002", err, fmt.Sprintf(clientErrorGettingNodes, err))
		return r
	}

	// Provide the actual net.LookupHost as the DNS resolver:
	serverIps, err := resolveServerIP(d.ServerUrl, net.LookupHost)
	if err != nil {
		r.Error("DClu3007", err, "Error resolving servers IP")
		return r
	}

	return searchNodesForIP(nodes.Items, serverIps)
}
Esempio n. 17
0
func (d UnitStatus) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(UnitStatusName)

	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables)
	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["docker"], `Nodes use Docker to run containers.`)
	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["openvswitch"], sdUnitSDNreqOVS)
	unitRequiresUnit(r, d.SystemdUnits["openshift-master"], d.SystemdUnits["openvswitch"], `Masters use openvswitch for access to cluster SDN networking`)
	// all-in-one networking *could* be simpler, so fewer checks
	unitRequiresUnit(r, d.SystemdUnits["openshift"], d.SystemdUnits["docker"], `Nodes use Docker to run containers.`)

	// Anything that is enabled but not running deserves notice
	for name, unit := range d.SystemdUnits {
		if unit.Enabled && !unit.Active {
			r.Error("DS3001", nil, fmt.Sprintf(sdUnitInactive, name))
		}
	}
	return r
}
Esempio n. 18
0
func (d *NodeDefinitions) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult("NodeDefinition")

	nodes, err := d.KubeClient.Core().Nodes().List(kapi.ListOptions{})
	if err != nil {
		r.Error("DClu0001", err, fmt.Sprintf(clientErrorGettingNodes, err))
		return r
	}

	anyNodesAvail := false
	for _, node := range nodes.Items {
		var ready *kapi.NodeCondition
		for i, condition := range node.Status.Conditions {
			switch condition.Type {
			// Each condition appears only once. Currently there's only one... used to be more
			case kapi.NodeReady:
				ready = &node.Status.Conditions[i]
			}
		}

		if ready == nil || ready.Status != kapi.ConditionTrue {
			templateData := log.Hash{"node": node.Name}
			if ready == nil {
				templateData["status"] = "None"
				templateData["reason"] = "There is no readiness record."
			} else {
				templateData["status"] = ready.Status
				templateData["reason"] = ready.Reason
			}
			r.Warn("DClu0002", nil, log.EvalTemplate("DClu0002", nodeNotReady, templateData))
		} else if node.Spec.Unschedulable {
			r.Warn("DClu0003", nil, log.EvalTemplate("DClu0003", nodeNotSched, log.Hash{"node": node.Name}))
		} else {
			anyNodesAvail = true
		}
	}
	if !anyNodesAvail {
		r.Error("DClu0004", nil, "There were no nodes available to use. No new pods can be scheduled.")
	}

	return r
}
Esempio n. 19
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d CheckNodeNetwork) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(CheckNodeNetworkName)

	_, localIP, err := util.GetLocalNode(d.KubeClient)
	if err != nil {
		r.Error("DNodeNet1001", err, err.Error())
		return r
	}

	localPods, _, err := util.GetLocalAndNonLocalDiagnosticPods(d.KubeClient)
	if err != nil {
		r.Error("DNodeNet1002", err, fmt.Sprintf("Getting local and nonlocal pods failed. Error: %s", err))
		return r
	}

	for _, pod := range localPods {
		checkNodeConnection(&pod, localIP, r)
	}
	return r
}
Esempio n. 20
0
func (d UnitStatus) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(UnitStatusName)

	unitRequiresUnit(r, d.SystemdUnits["atomic-openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables)
	unitRequiresUnit(r, d.SystemdUnits["atomic-openshift-node"], d.SystemdUnits["docker"], `Nodes use Docker to run containers.`)
	unitRequiresUnit(r, d.SystemdUnits["atomic-openshift-node"], d.SystemdUnits["openvswitch"], fmt.Sprintf(sdUnitSDNreqOVS, "atomic-openshift-node"))
	unitRequiresUnit(r, d.SystemdUnits["atomic-openshift-master"], d.SystemdUnits["atomic-openshift-node"], `Masters must currently also be nodes for access to cluster SDN networking`)

	unitRequiresUnit(r, d.SystemdUnits["origin-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables)
	unitRequiresUnit(r, d.SystemdUnits["origin-node"], d.SystemdUnits["docker"], `Nodes use Docker to run containers.`)
	unitRequiresUnit(r, d.SystemdUnits["origin-node"], d.SystemdUnits["openvswitch"], fmt.Sprintf(sdUnitSDNreqOVS, "origin-node"))
	unitRequiresUnit(r, d.SystemdUnits["origin-master"], d.SystemdUnits["origin-node"], `Masters must currently also be nodes for access to cluster SDN networking`)

	// Anything that is enabled but not running deserves notice
	for name, unit := range d.SystemdUnits {
		if unit.Enabled && !unit.Active {
			r.Error("DS3001", nil, fmt.Sprintf(sdUnitInactive, name))
		}
	}
	return r
}
Esempio n. 21
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d CheckPodNetwork) Check() types.DiagnosticResult {
	d.res = types.NewDiagnosticResult(CheckPodNetworkName)

	pluginName, ok, err := util.GetOpenShiftNetworkPlugin(d.OSClient)
	if err != nil {
		d.res.Error("DPodNet1001", err, fmt.Sprintf("Checking network plugin failed. Error: %s", err))
		return d.res
	}
	if !ok {
		d.res.Warn("DPodNet1002", nil, "Skipping pod connectivity test. Reason: Not using openshift network plugin.")
		return d.res
	}

	localPods, nonlocalPods, err := util.GetLocalAndNonLocalDiagnosticPods(d.KubeClient)
	if err != nil {
		d.res.Error("DPodNet1003", err, fmt.Sprintf("Getting local and nonlocal pods failed. Error: %s", err))
		return d.res
	}

	if sdnapi.IsOpenShiftMultitenantNetworkPlugin(pluginName) {
		netnsList, err := d.OSClient.NetNamespaces().List(kapi.ListOptions{})
		if err != nil {
			d.res.Error("DPodNet1004", err, fmt.Sprintf("Getting all network namespaces failed. Error: %s", err))
			return d.res
		}

		d.vnidMap = map[string]uint32{}
		for _, netns := range netnsList.Items {
			d.vnidMap[netns.NetName] = netns.NetID
		}
	}

	localGlobalPods, localNonGlobalPods := util.GetGlobalAndNonGlobalPods(localPods, d.vnidMap)
	nonlocalGlobalPods, nonlocalNonGlobalPods := util.GetGlobalAndNonGlobalPods(nonlocalPods, d.vnidMap)

	d.checkSameNodePodToPodConnection(localGlobalPods, localNonGlobalPods)
	d.checkDifferentNodePodToPodConnection(localGlobalPods, localNonGlobalPods, nonlocalGlobalPods, nonlocalNonGlobalPods)
	return d.res
}
Esempio n. 22
0
func searchNodesForIP(nodes []kapi.Node, ips []string) types.DiagnosticResult {
	r := types.NewDiagnosticResult(MasterNodeName)
	r.Debug("DClu3005", fmt.Sprintf("Seaching for a node with master IP: %s", ips))

	// Loops = # of nodes * number of IPs per node (2 commonly) * # of IPs the
	// server hostname resolves to. (should usually be 1)
	for _, node := range nodes {
		for _, address := range node.Status.Addresses {
			for _, ipAddress := range ips {
				r.Debug("DClu3006", fmt.Sprintf("Checking node %s address %s",
					node.ObjectMeta.Name, address.Address))
				if address.Address == ipAddress {
					r.Info("DClu3003", fmt.Sprintf("Found a node with same IP as master: %s",
						node.ObjectMeta.Name))
					return r
				}
			}
		}
	}
	r.Warn("DClu3004", nil, masterNotRunningAsANode)
	return r
}
Esempio n. 23
0
func (d MasterConfigCheck) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(MasterConfigCheckName)
	masterConfig, err := GetMasterConfig(r, d.MasterConfigFile)
	if err != nil {
		return r
	}

	results := configvalidation.ValidateMasterConfig(masterConfig, nil)
	if len(results.Errors) > 0 {
		errText := fmt.Sprintf("Validation of master config file '%s' failed:\n", d.MasterConfigFile)
		for _, err := range results.Errors {
			errText += fmt.Sprintf("%v\n", err)
		}
		r.Error("DH0004", nil, errText)
	}
	if len(results.Warnings) > 0 {
		warnText := fmt.Sprintf("Validation of master config file '%s' warned:\n", d.MasterConfigFile)
		for _, warn := range results.Warnings {
			warnText += fmt.Sprintf("%v\n", warn)
		}
		r.Warn("DH0005", nil, warnText)
	}
	return r
}
Esempio n. 24
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *ConfigLoading) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult("ConfigLoading")
	confFlagValue := d.ClientFlags.Lookup(d.ConfFlagName).Value.String()

	var foundPath string
	rules := config.NewOpenShiftClientConfigLoadingRules()
	paths := append([]string{confFlagValue}, rules.Precedence...)
	for index, path := range paths {
		errmsg := ""
		switch index {
		case 0:
			errmsg = fmt.Sprintf("--%s specified that client config should be at %s\n", d.ConfFlagName, path)
		case len(paths) - 1: // config in ~/.kube
		// no error message indicated if it is not there... user didn't say it would be
		default: // can be multiple paths from the env var in theory; all cases should go here
			if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
				errmsg = fmt.Sprintf("Env var %s specified that client config could be at %s\n", config.OpenShiftConfigPathEnvVar, path)
			}
		}

		if d.canOpenConfigFile(path, errmsg, r) && foundPath == "" {
			d.successfulLoad = true
			foundPath = path
		}
	}
	if foundPath != "" {
		if confFlagValue != "" && confFlagValue != foundPath {
			// found config but not where --config said
			r.Error("DCli1001", nil, fmt.Sprintf(`
The client configuration file was not found where the --%s flag indicated:
  %s
A config file was found at the following location:
  %s
If you wish to use this file for client configuration, you can specify it
with the --%[1]s flag, or just not specify the flag.
			`, d.ConfFlagName, confFlagValue, foundPath))
		}
	} else { // not found, check for master-generated ones to recommend
		if confFlagValue != "" {
			r.Error("DCli1002", nil, fmt.Sprintf("Did not find config file where --%s=%s indicated", d.ConfFlagName, confFlagValue))
		}
		adminWarningF := `
No client config file was available; however, one exists at
    %[2]s
which may have been generated automatically by the master.
If you want to use this config, you should copy it to the
standard location (%[3]s),
or you can set the environment variable %[1]s:
    export %[1]s=%[2]s
If not, obtain a config file and place it in the standard
location for use by the client and diagnostics.
`
		// look for it in auto-generated locations when not found properly
		for _, path := range util.AdminKubeConfigPaths {
			msg := fmt.Sprintf("Looking for a possible client config at %s\n", path)
			if d.canOpenConfigFile(path, msg, r) {
				r.Warn("DCli1003", nil, fmt.Sprintf(adminWarningF, config.OpenShiftConfigPathEnvVar, path, config.RecommendedHomeFile))
				break
			}
		}
	}
	return r
}
Esempio n. 25
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d ConfigContext) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(ConfigContextsName)

	isDefaultContext := d.RawConfig.CurrentContext == d.ContextName

	// prepare bad news message
	errorKey := "DCli0001"
	unusableLine := fmt.Sprintf("The client config context '%s' is unusable", d.ContextName)
	if isDefaultContext {
		errorKey = "DCli0002"
		unusableLine = fmt.Sprintf("The current client config context '%s' is unusable", d.ContextName)
	}

	// check that the context and its constituents are defined in the kubeconfig
	context, exists := d.RawConfig.Contexts[d.ContextName]
	if !exists {
		r.Error(errorKey, nil, fmt.Sprintf("%s:\n Client config context '%s' is not defined.", unusableLine, d.ContextName))
		return r
	}
	clusterName := context.Cluster
	cluster, exists := d.RawConfig.Clusters[clusterName]
	if !exists {
		r.Error(errorKey, nil, fmt.Sprintf("%s:\n Client config context '%s' has a cluster '%s' which is not defined.", unusableLine, d.ContextName, clusterName))
		return r
	}
	authName := context.AuthInfo
	if _, exists := d.RawConfig.AuthInfos[authName]; !exists {
		r.Error(errorKey, nil, fmt.Sprintf("%s:\n Client config context '%s' has a user '%s' which is not defined.", unusableLine, d.ContextName, authName))
		return r
	}

	// we found a fully-defined context
	project := context.Namespace
	if project == "" {
		project = kapi.NamespaceDefault // k8s fills this in anyway if missing from the context
	}
	msgText := contextDesc
	if isDefaultContext {
		msgText = currContextDesc
	}
	msgText = fmt.Sprintf(msgText, d.ContextName, cluster.Server, authName, project)

	// Actually send a request to see if context has connectivity.
	// Note: we cannot reuse factories as they cache the clients, so build new factory for each context.
	osClient, _, _, err := osclientcmd.NewFactory(kclientcmd.NewDefaultClientConfig(*d.RawConfig, &kclientcmd.ConfigOverrides{Context: *context})).Clients()
	// client create now *fails* if cannot connect to server; so, address connectivity errors below
	if err == nil {
		if projects, projerr := osClient.Projects().List(kapi.ListOptions{}); projerr != nil {
			err = projerr
		} else { // success!
			list := []string{}
			for i, project := range projects.Items {
				if i > 9 {
					list = append(list, "...")
					break
				}
				list = append(list, project.Name)
			}
			if len(list) == 0 {
				r.Info("DCli0003", msgText+"Successfully requested project list, but it is empty, so user has no access to anything.")
			} else {
				r.Info("DCli0004", msgText+fmt.Sprintf("Successfully requested project list; has access to project(s):\n  %v", list))
			}
			return r
		}
	}

	// something went wrong; couldn't create client or get project list.
	// interpret the terse error messages with helpful info.
	errMsg := err.Error()
	errFull := fmt.Sprintf("(%T) %[1]v\n", err)
	var reason, errId string
	switch {
	case regexp.MustCompile("dial tcp: lookup (\\S+): no such host").MatchString(errMsg):
		errId, reason = "DCli0005", clientNoResolve
	case strings.Contains(errMsg, "x509: certificate signed by unknown authority"):
		errId, reason = "DCli0006", clientUnknownCa
	case strings.Contains(errMsg, "specifying a root certificates file with the insecure flag is not allowed"):
		errId, reason = "DCli0007", clientUnneededCa
	case invalidCertNameRx.MatchString(errMsg):
		match := invalidCertNameRx.FindStringSubmatch(errMsg)
		serverHost := match[len(match)-1]
		errId, reason = "DCli0008", fmt.Sprintf(clientInvCertName, serverHost)
	case regexp.MustCompile("dial tcp (\\S+): connection refused").MatchString(errMsg):
		errId, reason = "DCli0009", clientConnRefused
	case regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout|no route to host)").MatchString(errMsg):
		errId, reason = "DCli0010", clientConnTimeout
	case strings.Contains(errMsg, "malformed HTTP response"):
		errId, reason = "DCli0011", clientMalformedHTTP
	case strings.Contains(errMsg, "tls: oversized record received with length"):
		errId, reason = "DCli0012", clientMalformedTLS
	case strings.Contains(errMsg, `User "system:anonymous" cannot`):
		errId, reason = "DCli0013", clientUnauthn
	case strings.Contains(errMsg, "provide credentials"):
		errId, reason = "DCli0014", clientUnauthz
	default:
		errId, reason = "DCli0015", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.`
	}
	r.Error(errId, err, msgText+errFull+reason)
	return r
}
Esempio n. 26
0
//NewAggregatedLogging returns the AggregatedLogging Diagnostic
func NewAggregatedLogging(masterConfigFile string, kclient *kclient.Client, osclient *client.Client) *AggregatedLogging {
	return &AggregatedLogging{nil, masterConfigFile, osclient, kclient, types.NewDiagnosticResult(AggregatedLoggingName)}
}
Esempio n. 27
0
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *DiagnosticPod) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult("DiagnosticPod")
	d.runDiagnosticPod(nil, r)
	return r
}
Esempio n. 28
0
func (d AnalyzeLogs) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult(AnalyzeLogsName)

	for _, unit := range unitLogSpecs {
		if svc := d.SystemdUnits[unit.Name]; svc.Enabled || svc.Active {
			r.Info("DS0001", fmt.Sprintf("Checking journalctl logs for '%s' service", unit.Name))

			cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json")
			// JSON comes out of journalctl one line per record
			lineReader, reader, err := func(cmd *exec.Cmd) (*bufio.Scanner, io.ReadCloser, error) {
				stdout, err := cmd.StdoutPipe()
				if err == nil {
					lineReader := bufio.NewScanner(stdout)
					if err = cmd.Start(); err == nil {
						return lineReader, stdout, nil
					}
				}
				return nil, nil, err
			}(cmd)

			if err != nil {
				r.Error("DS0002", err, fmt.Sprintf(sdLogReadErr, unit.Name, errStr(err)))
				return r
			}
			defer func() { // close out pipe once done reading
				reader.Close()
				cmd.Wait()
			}()
			timeLimit := time.Now().Add(-time.Hour)                     // if it didn't happen in the last hour, probably not too relevant
			matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something
			lineCount := 0                                              // each log entry is a line
			for lineReader.Scan() {
				lineCount += 1
				if len(matchCopy) == 0 { // if no rules remain to match
					break // don't waste time reading more log entries
				}
				bytes, entry := lineReader.Bytes(), logEntry{}
				if err := json.Unmarshal(bytes, &entry); err != nil {
					r.Debug("DS0003", fmt.Sprintf("Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err)))
				} else {
					if lineCount > 500 && stampTooOld(entry.TimeStamp, timeLimit) {
						r.Debug("DS0004", fmt.Sprintf("Stopped reading %s log: timestamp %s too old", unit.Name, entry.TimeStamp))
						break // if we've analyzed at least 500 entries, stop when age limit reached (don't scan days of logs)
					}
					if unit.StartMatch.MatchString(entry.Message) {
						break // saw log message for unit startup; don't analyze previous logs
					}
					for index, match := range matchCopy { // match log message against provided matchers
						if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil {
							// if matches: print interpretation, remove from matchCopy, and go on to next log entry
							keep := match.KeepAfterMatch // generic keep logic
							if match.Interpret != nil {  // apply custom match logic
								currKeep := match.Interpret(&entry, strings, r)
								keep = currKeep
							} else { // apply generic match processing
								text := fmt.Sprintf("Found '%s' journald log message:\n  %s\n%s", unit.Name, entry.Message, match.Interpretation)
								switch match.Level {
								case log.DebugLevel:
									r.Debug(match.Id, text)
								case log.InfoLevel:
									r.Info(match.Id, text)
								case log.WarnLevel:
									r.Warn(match.Id, nil, text)
								case log.ErrorLevel:
									r.Error(match.Id, nil, text)
								}
							}

							if !keep { // remove matcher once seen
								matchCopy = append(matchCopy[:index], matchCopy[index+1:]...)
							}
							break
						}
					}
				}
			}

		}
	}

	return r
}