func updateNetworkingCondition(node *api.Node, routeCreated bool) {
	_, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable)
	currentTime := unversioned.Now()
	if routeCreated {
		if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse {
			networkingCondition.Status = api.ConditionFalse
			networkingCondition.Reason = "RouteCreated"
			networkingCondition.Message = "RouteController created a route"
			networkingCondition.LastTransitionTime = currentTime
		} else if networkingCondition == nil {
			node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
				Type:               api.NodeNetworkUnavailable,
				Status:             api.ConditionFalse,
				Reason:             "RouteCreated",
				Message:            "RouteController created a route",
				LastTransitionTime: currentTime,
			})
		}
	} else {
		if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue {
			networkingCondition.Status = api.ConditionTrue
			networkingCondition.Reason = "NoRouteCreated"
			networkingCondition.Message = "RouteController failed to create a route"
			networkingCondition.LastTransitionTime = currentTime
		} else if networkingCondition == nil {
			node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
				Type:               api.NodeNetworkUnavailable,
				Status:             api.ConditionTrue,
				Reason:             "NoRouteCreated",
				Message:            "RouteController failed to create a route",
				LastTransitionTime: currentTime,
			})
		}
	}
}
func TestReconcile(t *testing.T) {
	cluster := "my-k8s"
	node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}
	node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}
	nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}

	testCases := []struct {
		nodes                      []api.Node
		initialRoutes              []*cloudprovider.Route
		expectedRoutes             []*cloudprovider.Route
		expectedNetworkUnavailable []bool
		clientset                  *fake.Clientset
	}{
		// 2 nodes, routes already there
		{
			nodes: []api.Node{
				node1,
				node2,
			},
			initialRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, true},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
		},
		// 2 nodes, one route already there
		{
			nodes: []api.Node{
				node1,
				node2,
			},
			initialRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
			},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, true},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
		},
		// 2 nodes, no routes yet
		{
			nodes: []api.Node{
				node1,
				node2,
			},
			initialRoutes: []*cloudprovider.Route{},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, true},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
		},
		// 2 nodes, a few too many routes
		{
			nodes: []api.Node{
				node1,
				node2,
			},
			initialRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
				{cluster + "-03", "node-3", "10.120.2.0/24"},
				{cluster + "-04", "node-4", "10.120.3.0/24"},
			},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, true},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
		},
		// 2 nodes, 2 routes, but only 1 is right
		{
			nodes: []api.Node{
				node1,
				node2,
			},
			initialRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-03", "node-3", "10.120.2.0/24"},
			},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
				{cluster + "-02", "node-2", "10.120.1.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, true},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
		},
		// 2 nodes, one node without CIDR assigned.
		{
			nodes: []api.Node{
				node1,
				nodeNoCidr,
			},
			initialRoutes: []*cloudprovider.Route{},
			expectedRoutes: []*cloudprovider.Route{
				{cluster + "-01", "node-1", "10.120.0.0/24"},
			},
			expectedNetworkUnavailable: []bool{true, false},
			clientset:                  fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}),
		},
	}
	for i, testCase := range testCases {
		cloud := &fakecloud.FakeCloud{RouteMap: make(map[string]*fakecloud.FakeRoute)}
		for _, route := range testCase.initialRoutes {
			fakeRoute := &fakecloud.FakeRoute{}
			fakeRoute.ClusterName = cluster
			fakeRoute.Route = *route
			cloud.RouteMap[route.Name] = fakeRoute
		}
		routes, ok := cloud.Routes()
		if !ok {
			t.Error("Error in test: fakecloud doesn't support Routes()")
		}
		_, cidr, _ := net.ParseCIDR("10.120.0.0/16")
		rc := New(routes, testCase.clientset, cluster, cidr)
		if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil {
			t.Errorf("%d. Error from rc.reconcile(): %v", i, err)
		}
		for _, action := range testCase.clientset.Actions() {
			if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" {
				node := action.(core.UpdateAction).GetObject().(*api.Node)
				_, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable)
				if condition == nil {
					t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name)
				} else {
					check := func(index int) bool {
						return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index]
					}
					index := -1
					for j := range testCase.nodes {
						if testCase.nodes[j].Name == node.Name {
							index = j
						}
					}
					if index == -1 {
						// Something's wrong
						continue
					}
					if !check(index) {
						t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v",
							i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse))
					}
				}
			}
		}
		var finalRoutes []*cloudprovider.Route
		var err error
		timeoutChan := time.After(200 * time.Millisecond)
		tick := time.NewTicker(10 * time.Millisecond)
		defer tick.Stop()
	poll:
		for {
			select {
			case <-tick.C:
				if finalRoutes, err = routes.ListRoutes(cluster); err == nil && routeListEqual(finalRoutes, testCase.expectedRoutes) {
					break poll
				}
			case <-timeoutChan:
				t.Errorf("%d. rc.reconcile() = %v, routes:\n%v\nexpected: nil, routes:\n%v\n", i, err, flatten(finalRoutes), flatten(testCase.expectedRoutes))
				break poll
			}
		}
	}
}
Esempio n. 3
0
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
// is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) {
	var err error
	var gracePeriod time.Duration
	var observedReadyCondition api.NodeCondition
	_, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
	if currentReadyCondition == nil {
		// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
		// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
		// to node.CreationTimestamp to avoid handle the corner case.
		observedReadyCondition = api.NodeCondition{
			Type:               api.NodeReady,
			Status:             api.ConditionUnknown,
			LastHeartbeatTime:  node.CreationTimestamp,
			LastTransitionTime: node.CreationTimestamp,
		}
		gracePeriod = nc.nodeStartupGracePeriod
		nc.nodeStatusMap[node.Name] = nodeStatusData{
			status:                   node.Status,
			probeTimestamp:           node.CreationTimestamp,
			readyTransitionTimestamp: node.CreationTimestamp,
		}
	} else {
		// If ready condition is not nil, make a copy of it, since we may modify it in place later.
		observedReadyCondition = *currentReadyCondition
		gracePeriod = nc.nodeMonitorGracePeriod
	}

	savedNodeStatus, found := nc.nodeStatusMap[node.Name]
	// There are following cases to check:
	// - both saved and new status have no Ready Condition set - we leave everything as it is,
	// - saved status have no Ready Condition, but current one does - NodeController was restarted with Node data already present in etcd,
	// - saved status have some Ready Condition, but current one does not - it's an error, but we fill it up because that's probably a good thing to do,
	// - both saved and current statuses have Ready Conditions and they have the same LastProbeTime - nothing happened on that Node, it may be
	//   unresponsive, so we leave it as it is,
	// - both saved and current statuses have Ready Conditions, they have different LastProbeTimes, but the same Ready Condition State -
	//   everything's in order, no transition occurred, we update only probeTimestamp,
	// - both saved and current statuses have Ready Conditions, different LastProbeTimes and different Ready Condition State -
	//   Ready Condition changed it state since we last seen it, so we update both probeTimestamp and readyTransitionTimestamp.
	// TODO: things to consider:
	//   - if 'LastProbeTime' have gone back in time its probably an error, currently we ignore it,
	//   - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check
	//     if that's the case, but it does not seem necessary.
	var savedCondition *api.NodeCondition
	if found {
		_, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady)
	}
	_, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
	if !found {
		glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
		savedNodeStatus = nodeStatusData{
			status:                   node.Status,
			probeTimestamp:           nc.now(),
			readyTransitionTimestamp: nc.now(),
		}
	} else if savedCondition == nil && observedCondition != nil {
		glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
		savedNodeStatus = nodeStatusData{
			status:                   node.Status,
			probeTimestamp:           nc.now(),
			readyTransitionTimestamp: nc.now(),
		}
	} else if savedCondition != nil && observedCondition == nil {
		glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
		// TODO: figure out what to do in this case. For now we do the same thing as above.
		savedNodeStatus = nodeStatusData{
			status:                   node.Status,
			probeTimestamp:           nc.now(),
			readyTransitionTimestamp: nc.now(),
		}
	} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
		var transitionTime unversioned.Time
		// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
		// otherwise we leave it as it is.
		if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime {
			glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition.Status, observedCondition)

			transitionTime = nc.now()
		} else {
			transitionTime = savedNodeStatus.readyTransitionTimestamp
		}
		if glog.V(5) {
			glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
		} else {
			glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
		}
		savedNodeStatus = nodeStatusData{
			status:                   node.Status,
			probeTimestamp:           nc.now(),
			readyTransitionTimestamp: transitionTime,
		}
	}
	nc.nodeStatusMap[node.Name] = savedNodeStatus

	if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
		// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
		// (regardless of its current value) in the master.
		if currentReadyCondition == nil {
			glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
			node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
				Type:               api.NodeReady,
				Status:             api.ConditionUnknown,
				Reason:             "NodeStatusNeverUpdated",
				Message:            fmt.Sprintf("Kubelet never posted node status."),
				LastHeartbeatTime:  node.CreationTimestamp,
				LastTransitionTime: nc.now(),
			})
		} else {
			glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
				node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition)
			if observedReadyCondition.Status != api.ConditionUnknown {
				currentReadyCondition.Status = api.ConditionUnknown
				currentReadyCondition.Reason = "NodeStatusUnknown"
				currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
				// LastProbeTime is the last time we heard from kubelet.
				currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
				currentReadyCondition.LastTransitionTime = nc.now()
			}
		}

		// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
		// it to Unknown (regardless of its current value) in the master.
		// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
		_, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk)
		if oodCondition == nil {
			glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name)
			node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
				Type:               api.NodeOutOfDisk,
				Status:             api.ConditionUnknown,
				Reason:             "NodeStatusNeverUpdated",
				Message:            fmt.Sprintf("Kubelet never posted node status."),
				LastHeartbeatTime:  node.CreationTimestamp,
				LastTransitionTime: nc.now(),
			})
		} else {
			glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v",
				node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition)
			if oodCondition.Status != api.ConditionUnknown {
				oodCondition.Status = api.ConditionUnknown
				oodCondition.Reason = "NodeStatusUnknown"
				oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
				oodCondition.LastTransitionTime = nc.now()
			}
		}

		_, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
		if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
			if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil {
				glog.Errorf("Error updating node %s: %v", node.Name, err)
				return gracePeriod, observedReadyCondition, currentReadyCondition, err
			} else {
				nc.nodeStatusMap[node.Name] = nodeStatusData{
					status:                   node.Status,
					probeTimestamp:           nc.nodeStatusMap[node.Name].probeTimestamp,
					readyTransitionTimestamp: nc.now(),
				}
				return gracePeriod, observedReadyCondition, currentReadyCondition, nil
			}
		}
	}

	return gracePeriod, observedReadyCondition, currentReadyCondition, err
}