func updateNetworkingCondition(node *api.Node, routeCreated bool) { _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) currentTime := unversioned.Now() if routeCreated { if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse { networkingCondition.Status = api.ConditionFalse networkingCondition.Reason = "RouteCreated" networkingCondition.Message = "RouteController created a route" networkingCondition.LastTransitionTime = currentTime } else if networkingCondition == nil { node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ Type: api.NodeNetworkUnavailable, Status: api.ConditionFalse, Reason: "RouteCreated", Message: "RouteController created a route", LastTransitionTime: currentTime, }) } } else { if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue { networkingCondition.Status = api.ConditionTrue networkingCondition.Reason = "NoRouteCreated" networkingCondition.Message = "RouteController failed to create a route" networkingCondition.LastTransitionTime = currentTime } else if networkingCondition == nil { node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ Type: api.NodeNetworkUnavailable, Status: api.ConditionTrue, Reason: "NoRouteCreated", Message: "RouteController failed to create a route", LastTransitionTime: currentTime, }) } } }
func TestReconcile(t *testing.T) { cluster := "my-k8s" node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}} node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}} nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}} testCases := []struct { nodes []api.Node initialRoutes []*cloudprovider.Route expectedRoutes []*cloudprovider.Route expectedNetworkUnavailable []bool clientset *fake.Clientset }{ // 2 nodes, routes already there { nodes: []api.Node{ node1, node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedNetworkUnavailable: []bool{true, true}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one route already there { nodes: []api.Node{ node1, node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedNetworkUnavailable: []bool{true, true}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, no routes yet { nodes: []api.Node{ node1, node2, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedNetworkUnavailable: []bool{true, true}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, a few too many routes { nodes: []api.Node{ node1, node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, {cluster + "-03", "node-3", "10.120.2.0/24"}, {cluster + "-04", "node-4", "10.120.3.0/24"}, }, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedNetworkUnavailable: []bool{true, true}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, 2 routes, but only 1 is right { nodes: []api.Node{ node1, node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-03", "node-3", "10.120.2.0/24"}, }, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, expectedNetworkUnavailable: []bool{true, true}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one node without CIDR assigned. { nodes: []api.Node{ node1, nodeNoCidr, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, expectedNetworkUnavailable: []bool{true, false}, clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), }, } for i, testCase := range testCases { cloud := &fakecloud.FakeCloud{RouteMap: make(map[string]*fakecloud.FakeRoute)} for _, route := range testCase.initialRoutes { fakeRoute := &fakecloud.FakeRoute{} fakeRoute.ClusterName = cluster fakeRoute.Route = *route cloud.RouteMap[route.Name] = fakeRoute } routes, ok := cloud.Routes() if !ok { t.Error("Error in test: fakecloud doesn't support Routes()") } _, cidr, _ := net.ParseCIDR("10.120.0.0/16") rc := New(routes, testCase.clientset, cluster, cidr) if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil { t.Errorf("%d. Error from rc.reconcile(): %v", i, err) } for _, action := range testCase.clientset.Actions() { if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" { node := action.(core.UpdateAction).GetObject().(*api.Node) _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) if condition == nil { t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name) } else { check := func(index int) bool { return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index] } index := -1 for j := range testCase.nodes { if testCase.nodes[j].Name == node.Name { index = j } } if index == -1 { // Something's wrong continue } if !check(index) { t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v", i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse)) } } } } var finalRoutes []*cloudprovider.Route var err error timeoutChan := time.After(200 * time.Millisecond) tick := time.NewTicker(10 * time.Millisecond) defer tick.Stop() poll: for { select { case <-tick.C: if finalRoutes, err = routes.ListRoutes(cluster); err == nil && routeListEqual(finalRoutes, testCase.expectedRoutes) { break poll } case <-timeoutChan: t.Errorf("%d. rc.reconcile() = %v, routes:\n%v\nexpected: nil, routes:\n%v\n", i, err, flatten(finalRoutes), flatten(testCase.expectedRoutes)) break poll } } } }
// For a given node checks its conditions and tries to update it. Returns grace period to which given node // is entitled, state of current and last observed Ready Condition, and an error if it occurred. func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) { var err error var gracePeriod time.Duration var observedReadyCondition api.NodeCondition _, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if currentReadyCondition == nil { // If ready condition is nil, then kubelet (or nodecontroller) never posted node status. // A fake ready condition is created, where LastProbeTime and LastTransitionTime is set // to node.CreationTimestamp to avoid handle the corner case. observedReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, LastHeartbeatTime: node.CreationTimestamp, LastTransitionTime: node.CreationTimestamp, } gracePeriod = nc.nodeStartupGracePeriod nc.nodeStatusMap[node.Name] = nodeStatusData{ status: node.Status, probeTimestamp: node.CreationTimestamp, readyTransitionTimestamp: node.CreationTimestamp, } } else { // If ready condition is not nil, make a copy of it, since we may modify it in place later. observedReadyCondition = *currentReadyCondition gracePeriod = nc.nodeMonitorGracePeriod } savedNodeStatus, found := nc.nodeStatusMap[node.Name] // There are following cases to check: // - both saved and new status have no Ready Condition set - we leave everything as it is, // - saved status have no Ready Condition, but current one does - NodeController was restarted with Node data already present in etcd, // - saved status have some Ready Condition, but current one does not - it's an error, but we fill it up because that's probably a good thing to do, // - both saved and current statuses have Ready Conditions and they have the same LastProbeTime - nothing happened on that Node, it may be // unresponsive, so we leave it as it is, // - both saved and current statuses have Ready Conditions, they have different LastProbeTimes, but the same Ready Condition State - // everything's in order, no transition occurred, we update only probeTimestamp, // - both saved and current statuses have Ready Conditions, different LastProbeTimes and different Ready Condition State - // Ready Condition changed it state since we last seen it, so we update both probeTimestamp and readyTransitionTimestamp. // TODO: things to consider: // - if 'LastProbeTime' have gone back in time its probably an error, currently we ignore it, // - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check // if that's the case, but it does not seem necessary. var savedCondition *api.NodeCondition if found { _, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady) } _, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if !found { glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) savedNodeStatus = nodeStatusData{ status: node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } } else if savedCondition == nil && observedCondition != nil { glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name) savedNodeStatus = nodeStatusData{ status: node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } } else if savedCondition != nil && observedCondition == nil { glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name) // TODO: figure out what to do in this case. For now we do the same thing as above. savedNodeStatus = nodeStatusData{ status: node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } } else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime { var transitionTime unversioned.Time // If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now", // otherwise we leave it as it is. if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime { glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition.Status, observedCondition) transitionTime = nc.now() } else { transitionTime = savedNodeStatus.readyTransitionTimestamp } if glog.V(5) { glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status) } else { glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name) } savedNodeStatus = nodeStatusData{ status: node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: transitionTime, } } nc.nodeStatusMap[node.Name] = savedNodeStatus if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) { // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown // (regardless of its current value) in the master. if currentReadyCondition == nil { glog.V(2).Infof("node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, Reason: "NodeStatusNeverUpdated", Message: fmt.Sprintf("Kubelet never posted node status."), LastHeartbeatTime: node.CreationTimestamp, LastTransitionTime: nc.now(), }) } else { glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v", node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition) if observedReadyCondition.Status != api.ConditionUnknown { currentReadyCondition.Status = api.ConditionUnknown currentReadyCondition.Reason = "NodeStatusUnknown" currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") // LastProbeTime is the last time we heard from kubelet. currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime currentReadyCondition.LastTransitionTime = nc.now() } } // Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update // it to Unknown (regardless of its current value) in the master. // TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code. _, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk) if oodCondition == nil { glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ Type: api.NodeOutOfDisk, Status: api.ConditionUnknown, Reason: "NodeStatusNeverUpdated", Message: fmt.Sprintf("Kubelet never posted node status."), LastHeartbeatTime: node.CreationTimestamp, LastTransitionTime: nc.now(), }) } else { glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v", node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition) if oodCondition.Status != api.ConditionUnknown { oodCondition.Status = api.ConditionUnknown oodCondition.Reason = "NodeStatusUnknown" oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") oodCondition.LastTransitionTime = nc.now() } } _, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) { if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil { glog.Errorf("Error updating node %s: %v", node.Name, err) return gracePeriod, observedReadyCondition, currentReadyCondition, err } else { nc.nodeStatusMap[node.Name] = nodeStatusData{ status: node.Status, probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp, readyTransitionTimestamp: nc.now(), } return gracePeriod, observedReadyCondition, currentReadyCondition, nil } } } return gracePeriod, observedReadyCondition, currentReadyCondition, err }