func TestFindFitSomeError(t *testing.T) { nodes := []string{"3", "2", "1"} predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "match": matchesPredicate} pod := &api.Pod{ObjectMeta: api.ObjectMeta{Name: "1"}} nodeNameToInfo := map[string]*schedulercache.NodeInfo{ "3": schedulercache.NewNodeInfo(), "2": schedulercache.NewNodeInfo(), "1": schedulercache.NewNodeInfo(pod), } _, predicateMap, err := findNodesThatFit(pod, nodeNameToInfo, predicates, makeNodeList(nodes), nil) if err != nil && !reflect.DeepEqual(err, algorithmpredicates.ErrFakePredicate) { t.Errorf("unexpected error: %v", err) } if len(predicateMap) != (len(nodes) - 1) { t.Errorf("unexpected failed predicate map: %v", predicateMap) } for _, node := range nodes { if node == pod.Name { continue } failure, found := predicateMap[node] if !found { t.Errorf("failed to find node: %s in %v", node, predicateMap) } if failure != "FakePredicateError" { t.Errorf("unexpected failures: %v", failure) } } }
func TestFindPlaceAllOk(t *testing.T) { pod1 := BuildTestPod("p1", 300, 500000) new1 := BuildTestPod("p2", 600, 500000) new2 := BuildTestPod("p3", 500, 500000) nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": schedulercache.NewNodeInfo(pod1), "n2": schedulercache.NewNodeInfo(), } node1 := BuildTestNode("n1", 1000, 2000000) node2 := BuildTestNode("n2", 1000, 2000000) nodeInfos["n1"].SetNode(node1) nodeInfos["n2"].SetNode(node2) oldHints := make(map[string]string) newHints := make(map[string]string) tracker := NewUsageTracker() err := findPlaceFor( "x", []*apiv1.Pod{new1, new2}, []*apiv1.Node{node1, node2}, nodeInfos, NewTestPredicateChecker(), oldHints, newHints, tracker, time.Now()) assert.Len(t, newHints, 2) assert.Contains(t, newHints, new1.Namespace+"/"+new1.Name) assert.Contains(t, newHints, new2.Namespace+"/"+new2.Name) assert.NoError(t, err) }
func TestFindPlaceAllBas(t *testing.T) { pod1 := BuildTestPod("p1", 300, 500000) new1 := BuildTestPod("p2", 600, 500000) new2 := BuildTestPod("p3", 500, 500000) new3 := BuildTestPod("p4", 700, 500000) nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": schedulercache.NewNodeInfo(pod1), "n2": schedulercache.NewNodeInfo(), "nbad": schedulercache.NewNodeInfo(), } nodebad := BuildTestNode("nbad", 1000, 2000000) node1 := BuildTestNode("n1", 1000, 2000000) node2 := BuildTestNode("n2", 1000, 2000000) nodeInfos["n1"].SetNode(node1) nodeInfos["n2"].SetNode(node2) nodeInfos["nbad"].SetNode(nodebad) oldHints := make(map[string]string) newHints := make(map[string]string) tracker := NewUsageTracker() err := findPlaceFor( "nbad", []*kube_api.Pod{new1, new2, new3}, []*kube_api.Node{nodebad, node1, node2}, nodeInfos, NewTestPredicateChecker(), oldHints, newHints, tracker, time.Now()) assert.Error(t, err) assert.True(t, len(newHints) == 2) assert.Contains(t, newHints, new1.Namespace+"/"+new1.Name) assert.Contains(t, newHints, new2.Namespace+"/"+new2.Name) }
func TestFindFitAllError(t *testing.T) { nodes := []string{"3", "2", "1"} predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "false": falsePredicate} nodeNameToInfo := map[string]*schedulercache.NodeInfo{ "3": schedulercache.NewNodeInfo(), "2": schedulercache.NewNodeInfo(), "1": schedulercache.NewNodeInfo(), } _, predicateMap, err := findNodesThatFit(&api.Pod{}, nodeNameToInfo, makeNodeList(nodes), predicates, nil, algorithm.EmptyMetadataProducer) if err != nil { t.Errorf("unexpected error: %v", err) } if len(predicateMap) != len(nodes) { t.Errorf("unexpected failed predicate map: %v", predicateMap) } for _, node := range nodes { failures, found := predicateMap[node] if !found { t.Errorf("failed to find node: %s in %v", node, predicateMap) } if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate { t.Errorf("unexpected failures: %v", failures) } } }
func TestPredicates(t *testing.T) { p1 := BuildTestPod("p1", 450, 500000) p2 := BuildTestPod("p2", 600, 500000) p3 := BuildTestPod("p3", 8000, 0) p4 := BuildTestPod("p4", 500, 500000) ni1 := schedulercache.NewNodeInfo(p1) ni2 := schedulercache.NewNodeInfo() nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": ni1, "n2": ni2, } node1 := BuildTestNode("n1", 1000, 2000000) node2 := BuildTestNode("n2", 1000, 2000000) ni1.SetNode(node1) ni2.SetNode(node2) predicateChecker := NewTestPredicateChecker() r1, err := predicateChecker.FitsAny(p2, nodeInfos) assert.NoError(t, err) assert.Equal(t, "n2", r1) _, err = predicateChecker.FitsAny(p3, nodeInfos) assert.Error(t, err) assert.Error(t, predicateChecker.CheckPredicates(p2, ni1)) assert.NoError(t, predicateChecker.CheckPredicates(p4, ni1)) assert.NoError(t, predicateChecker.CheckPredicates(p2, ni2)) assert.NoError(t, predicateChecker.CheckPredicates(p4, ni2)) assert.Error(t, predicateChecker.CheckPredicates(p3, ni2)) }
func TestFindFitSomeError(t *testing.T) { nodes := []string{"3", "2", "1"} predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "match": matchesPredicate} pod := &api.Pod{ObjectMeta: api.ObjectMeta{Name: "1"}} nodeNameToInfo := map[string]*schedulercache.NodeInfo{ "3": schedulercache.NewNodeInfo(), "2": schedulercache.NewNodeInfo(), "1": schedulercache.NewNodeInfo(pod), } for name := range nodeNameToInfo { nodeNameToInfo[name].SetNode(&api.Node{ObjectMeta: api.ObjectMeta{Name: name}}) } _, predicateMap, err := findNodesThatFit(pod, nodeNameToInfo, makeNodeList(nodes), predicates, nil, algorithm.EmptyMetadataProducer) if err != nil { t.Errorf("unexpected error: %v", err) } if len(predicateMap) != (len(nodes) - 1) { t.Errorf("unexpected failed predicate map: %v", predicateMap) } for _, node := range nodes { if node == pod.Name { continue } failures, found := predicateMap[node] if !found { t.Errorf("failed to find node: %s in %v", node, predicateMap) } if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate { t.Errorf("unexpected failures: %v", failures) } } }
func TestRBDDiskConflicts(t *testing.T) { volState := api.PodSpec{ Volumes: []api.Volume{ { VolumeSource: api.VolumeSource{ RBD: &api.RBDVolumeSource{ CephMonitors: []string{"a", "b"}, RBDPool: "foo", RBDImage: "bar", FSType: "ext4", }, }, }, }, } volState2 := api.PodSpec{ Volumes: []api.Volume{ { VolumeSource: api.VolumeSource{ RBD: &api.RBDVolumeSource{ CephMonitors: []string{"c", "d"}, RBDPool: "foo", RBDImage: "bar", FSType: "ext4", }, }, }, }, } tests := []struct { pod *api.Pod nodeInfo *schedulercache.NodeInfo isOk bool test string }{ {&api.Pod{}, schedulercache.NewNodeInfo(), true, "nothing"}, {&api.Pod{}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), true, "one state"}, {&api.Pod{Spec: volState}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), false, "same state"}, {&api.Pod{Spec: volState2}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), true, "different state"}, } for _, test := range tests { ok, err := NoDiskConflict(test.pod, "machine", test.nodeInfo) if !reflect.DeepEqual(err, ErrDiskConflict) && err != nil { t.Errorf("unexpected error: %v", err) } if ok == false && !reflect.DeepEqual(err, ErrDiskConflict) { t.Errorf("unexpected error: %v", err) } if test.isOk && !ok { t.Errorf("expected ok, got none. %v %s %s", test.pod, test.nodeInfo, test.test) } if !test.isOk && ok { t.Errorf("expected no ok, got one. %v %s %s", test.pod, test.nodeInfo, test.test) } } }
func TestPodFitsHostPorts(t *testing.T) { tests := []struct { pod *api.Pod nodeInfo *schedulercache.NodeInfo fits bool test string }{ { pod: &api.Pod{}, nodeInfo: schedulercache.NewNodeInfo(), fits: true, test: "nothing running", }, { pod: newPod("m1", 8080), nodeInfo: schedulercache.NewNodeInfo( newPod("m1", 9090)), fits: true, test: "other port", }, { pod: newPod("m1", 8080), nodeInfo: schedulercache.NewNodeInfo( newPod("m1", 8080)), fits: false, test: "same port", }, { pod: newPod("m1", 8000, 8080), nodeInfo: schedulercache.NewNodeInfo( newPod("m1", 8080)), fits: false, test: "second port", }, { pod: newPod("m1", 8000, 8080), nodeInfo: schedulercache.NewNodeInfo( newPod("m1", 8001, 8080)), fits: false, test: "second port", }, } for _, test := range tests { fits, err := PodFitsHostPorts(test.pod, "machine", test.nodeInfo) if !reflect.DeepEqual(err, ErrPodNotFitsHostPorts) && err != nil { t.Errorf("unexpected error: %v", err) } if fits == false && !reflect.DeepEqual(err, ErrPodNotFitsHostPorts) { t.Errorf("unexpected error: %v", err) } if test.fits != fits { t.Errorf("%s: expected %v, saw %v", test.test, test.fits, fits) } } }
func TestAWSDiskConflicts(t *testing.T) { volState := api.PodSpec{ Volumes: []api.Volume{ { VolumeSource: api.VolumeSource{ AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{ VolumeID: "foo", }, }, }, }, } volState2 := api.PodSpec{ Volumes: []api.Volume{ { VolumeSource: api.VolumeSource{ AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{ VolumeID: "bar", }, }, }, }, } tests := []struct { pod *api.Pod nodeInfo *schedulercache.NodeInfo isOk bool test string }{ {&api.Pod{}, schedulercache.NewNodeInfo(), true, "nothing"}, {&api.Pod{}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), true, "one state"}, {&api.Pod{Spec: volState}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), false, "same state"}, {&api.Pod{Spec: volState2}, schedulercache.NewNodeInfo(&api.Pod{Spec: volState}), true, "different state"}, } for _, test := range tests { ok, err := NoDiskConflict(test.pod, "machine", test.nodeInfo) if err != nil { t.Fatalf("unexpected error: %v", err) } if test.isOk && !ok { t.Errorf("expected ok, got none. %v %s %s", test.pod, test.nodeInfo, test.test) } if !test.isOk && ok { t.Errorf("expected no ok, got one. %v %s %s", test.pod, test.nodeInfo, test.test) } } }
// Estimate implements First Fit Decreasing bin-packing approximation algorithm. // See https://en.wikipedia.org/wiki/Bin_packing_problem for more details. // While it is a multi-dimensional bin packing (cpu, mem, ports) in most cases the main dimension // will be cpu thus the estimated overprovisioning of 11/9 * optimal + 6/9 should be // still be maintained. // It is assumed that all pods from the given list can fit to nodeTemplate. // Returns the number of nodes needed to accommodate all pods from the list. func (estimator *BinpackingNodeEstimator) Estimate(pods []*apiv1.Pod, nodeTemplate *schedulercache.NodeInfo) int { podInfos := calculatePodScore(pods, nodeTemplate) sort.Sort(byScoreDesc(podInfos)) // nodeWithPod function returns NodeInfo, which is a copy of nodeInfo argument with an additional pod scheduled on it. nodeWithPod := func(nodeInfo *schedulercache.NodeInfo, pod *apiv1.Pod) *schedulercache.NodeInfo { podsOnNode := nodeInfo.Pods() podsOnNode = append(podsOnNode, pod) newNodeInfo := schedulercache.NewNodeInfo(podsOnNode...) newNodeInfo.SetNode(nodeInfo.Node()) return newNodeInfo } newNodes := make([]*schedulercache.NodeInfo, 0) for _, podInfo := range podInfos { found := false for i, nodeInfo := range newNodes { if err := estimator.predicateChecker.CheckPredicates(podInfo.pod, nodeInfo); err == nil { found = true newNodes[i] = nodeWithPod(nodeInfo, podInfo.pod) break } } if !found { newNodes = append(newNodes, nodeWithPod(nodeTemplate, podInfo.pod)) } } return len(newNodes) }
func TestFindNone(t *testing.T) { pod1 := buildPod("p1", 300, 500000) nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": schedulercache.NewNodeInfo(pod1), "n2": schedulercache.NewNodeInfo(), } node1 := buildNode("n1", 1000, 2000000) node2 := buildNode("n2", 1000, 2000000) err := findPlaceFor( []*kube_api.Pod{}, []*kube_api.Node{node1, node2}, nodeInfos) assert.NoError(t, err) }
func TestReservation(t *testing.T) { pod := buildPod("p1", 100, 200000) pod2 := &kube_api.Pod{ Spec: kube_api.PodSpec{ Containers: []kube_api.Container{ { Resources: kube_api.ResourceRequirements{ Requests: kube_api.ResourceList{}, }, }, }, }, } nodeInfo := schedulercache.NewNodeInfo(pod, pod, pod2) node := &kube_api.Node{ Status: kube_api.NodeStatus{ Capacity: kube_api.ResourceList{ kube_api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), }, }, } reservation, err := calculateReservation(node, nodeInfo, kube_api.ResourceCPU) assert.NoError(t, err) assert.InEpsilon(t, 1.0/10, reservation, 0.01) _, err = calculateReservation(node, nodeInfo, kube_api.ResourceMemory) assert.Error(t, err) }
// BuildNodeInfoForNode build a NodeInfo structure for the given node as if the node was just created. func BuildNodeInfoForNode(nodename string, client *kube_client.Client) (*schedulercache.NodeInfo, error) { requiredPods, err := GetRequiredPodsForNode(nodename, client) if err != nil { return nil, err } return schedulercache.NewNodeInfo(requiredPods...), err }
// TODO: We don't need to pass list of nodes here as they are already available in nodeInfos. func findPlaceFor(pods []*kube_api.Pod, nodes []*kube_api.Node, nodeInfos map[string]*schedulercache.NodeInfo) error { predicateChecker := NewPredicateChecker() for _, pod := range pods { foundPlace := false glog.V(4).Infof("Looking for place for %s/%s", pod.Namespace, pod.Name) // TODO: Sort nodes by reservation nodeloop: for _, node := range nodes { node.Status.Allocatable = node.Status.Capacity if nodeInfo, found := nodeInfos[node.Name]; found { err := predicateChecker.CheckPredicates(pod, nodeInfo) glog.V(4).Infof("Evaluation %s for %s/%s -> %v", node.Name, pod.Namespace, pod.Name, err) if err == nil { foundPlace = true // TODO(mwielgus): Optimize it. podsOnNode := nodeInfo.Pods() podsOnNode = append(podsOnNode, pod) nodeInfos[node.Name] = schedulercache.NewNodeInfo(podsOnNode...) break nodeloop } } } if !foundPlace { return fmt.Errorf("failed to find place for %s/%s", pod.Namespace, pod.Name) } } return nil }
// The caller of this function must remove the taint if this function returns error. func prepareNodeForPod(client *kube_client.Client, recorder kube_record.EventRecorder, predicateChecker *ca_simulator.PredicateChecker, originalNode *kube_api.Node, criticalPod *kube_api.Pod) error { // Operate on a copy of the node to ensure pods running on the node will pass CheckPredicates below. node, err := copyNode(originalNode) if err != nil { return fmt.Errorf("Error while copying node: %v", err) } err = addTaint(client, originalNode, podId(criticalPod)) if err != nil { return fmt.Errorf("Error while adding taint: %v", err) } requiredPods, otherPods, err := groupPods(client, node) if err != nil { return err } nodeInfo := schedulercache.NewNodeInfo(requiredPods...) nodeInfo.SetNode(node) // check whether critical pod still fit if err := predicateChecker.CheckPredicates(criticalPod, nodeInfo); err != nil { return fmt.Errorf("Pod %s doesn't fit to node %v: %v", podId(criticalPod), node.Name, err) } requiredPods = append(requiredPods, criticalPod) nodeInfo = schedulercache.NewNodeInfo(requiredPods...) nodeInfo.SetNode(node) for _, p := range otherPods { if err := predicateChecker.CheckPredicates(p, nodeInfo); err != nil { glog.Infof("Pod %s will be deleted in order to schedule critical pod %s.", podId(p), podId(criticalPod)) recorder.Eventf(p, kube_api.EventTypeNormal, "DeletedByRescheduler", "Deleted by rescheduler in order to schedule critical pod %s.", podId(criticalPod)) // TODO(piosz): add better support of graceful deletion delErr := client.Pods(p.Namespace).Delete(p.Name, kube_api.NewDeleteOptions(10)) if delErr != nil { return fmt.Errorf("Failed to delete pod %s: %v", podId(p), delErr) } } else { newPods := append(nodeInfo.Pods(), p) nodeInfo = schedulercache.NewNodeInfo(newPods...) nodeInfo.SetNode(node) } } // TODO(piosz): how to reset scheduler backoff? return nil }
func TestNodeLabelPresence(t *testing.T) { label := map[string]string{"foo": "bar", "bar": "foo"} tests := []struct { pod *api.Pod labels []string presence bool fits bool test string }{ { labels: []string{"baz"}, presence: true, fits: false, test: "label does not match, presence true", }, { labels: []string{"baz"}, presence: false, fits: true, test: "label does not match, presence false", }, { labels: []string{"foo", "baz"}, presence: true, fits: false, test: "one label matches, presence true", }, { labels: []string{"foo", "baz"}, presence: false, fits: false, test: "one label matches, presence false", }, { labels: []string{"foo", "bar"}, presence: true, fits: true, test: "all labels match, presence true", }, { labels: []string{"foo", "bar"}, presence: false, fits: false, test: "all labels match, presence false", }, } for _, test := range tests { node := api.Node{ObjectMeta: api.ObjectMeta{Labels: label}} labelChecker := NodeLabelChecker{FakeNodeInfo(node), test.labels, test.presence} fits, err := labelChecker.CheckNodeLabelPresence(test.pod, "machine", schedulercache.NewNodeInfo()) if err != nil { t.Errorf("unexpected error: %v", err) } if fits != test.fits { t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits) } } }
func TestFindPlaceAllBas(t *testing.T) { pod1 := buildPod("p1", 300, 500000) new1 := buildPod("p2", 600, 500000) new2 := buildPod("p3", 500, 500000) new3 := buildPod("p4", 700, 500000) nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": schedulercache.NewNodeInfo(pod1), "n2": schedulercache.NewNodeInfo(), } node1 := buildNode("n1", 1000, 2000000) node2 := buildNode("n2", 1000, 2000000) err := findPlaceFor( []*kube_api.Pod{new1, new2, new3}, []*kube_api.Node{node1, node2}, nodeInfos) assert.Error(t, err) }
func TestFindNone(t *testing.T) { pod1 := BuildTestPod("p1", 300, 500000) nodeInfos := map[string]*schedulercache.NodeInfo{ "n1": schedulercache.NewNodeInfo(pod1), "n2": schedulercache.NewNodeInfo(), } node1 := BuildTestNode("n1", 1000, 2000000) node2 := BuildTestNode("n2", 1000, 2000000) nodeInfos["n1"].SetNode(node1) nodeInfos["n2"].SetNode(node2) err := findPlaceFor( "x", []*kube_api.Pod{}, []*kube_api.Node{node1, node2}, nodeInfos, NewTestPredicateChecker()) assert.NoError(t, err) }
func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *api.Node, ds *extensions.DaemonSet) bool { // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { return false } // TODO: Move it to the predicates for _, c := range node.Status.Conditions { if c.Type == api.NodeOutOfDisk && c.Status == api.ConditionTrue { return false } } newPod := &api.Pod{Spec: ds.Spec.Template.Spec, ObjectMeta: ds.Spec.Template.ObjectMeta} newPod.Spec.NodeName = node.Name pods := []*api.Pod{} for _, m := range dsc.podStore.Indexer.List() { pod := m.(*api.Pod) if pod.Spec.NodeName != node.Name { continue } if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed { continue } // ignore pods that belong to the daemonset when taking into account whether // a daemonset should bind to a node. if pds := dsc.getPodDaemonSet(pod); pds != nil && ds.Name == pds.Name { continue } pods = append(pods, pod) } nodeInfo := schedulercache.NewNodeInfo(pods...) nodeInfo.SetNode(node) fit, reasons, err := predicates.GeneralPredicates(newPod, nil, nodeInfo) if err != nil { glog.Warningf("GeneralPredicates failed on pod %s due to unexpected error: %v", newPod.Name, err) } for _, r := range reasons { glog.V(2).Infof("GeneralPredicates failed on pod %s for reason: %v", newPod.Name, r.GetReason()) } if !fit { return false } fit, reasons, err = predicates.PodToleratesNodeTaints(newPod, predicates.PredicateMetadata(newPod, nil), nodeInfo) if err != nil { glog.Warningf("PodToleratesNodeTaints failed on pod %s due to unexpected error: %v", newPod.Name, err) } for _, r := range reasons { glog.V(2).Infof("PodToleratesNodeTaints failed on pod %s for reason: %v", newPod.Name, r.GetReason()) } return fit }
// BuildNodeInfoForNode build a NodeInfo structure for the given node as if the node was just created. func BuildNodeInfoForNode(node *apiv1.Node, client kube_client.Interface) (*schedulercache.NodeInfo, error) { requiredPods, err := GetRequiredPodsForNode(node.Name, client) if err != nil { return nil, err } result := schedulercache.NewNodeInfo(requiredPods...) if err := result.SetNode(node); err != nil { return nil, err } return result, nil }
func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *extensions.DaemonSet) bool { // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { return false } // TODO: Move it to the predicates for _, c := range node.Status.Conditions { if c.Type == v1.NodeOutOfDisk && c.Status == v1.ConditionTrue { return false } } newPod := &v1.Pod{Spec: ds.Spec.Template.Spec, ObjectMeta: ds.Spec.Template.ObjectMeta} newPod.Namespace = ds.Namespace newPod.Spec.NodeName = node.Name pods := []*v1.Pod{} for _, m := range dsc.podStore.Indexer.List() { pod := m.(*v1.Pod) if pod.Spec.NodeName != node.Name { continue } if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { continue } // ignore pods that belong to the daemonset when taking into account whether // a daemonset should bind to a node. if pds := dsc.getPodDaemonSet(pod); pds != nil && ds.Name == pds.Name { continue } pods = append(pods, pod) } nodeInfo := schedulercache.NewNodeInfo(pods...) nodeInfo.SetNode(node) fit, reasons, err := predicates.GeneralPredicates(newPod, nil, nodeInfo) if err != nil { glog.Warningf("GeneralPredicates failed on ds '%s/%s' due to unexpected error: %v", ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, err) } for _, r := range reasons { glog.V(4).Infof("GeneralPredicates failed on ds '%s/%s' for reason: %v", ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { case *predicates.InsufficientResourceError: dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) case *predicates.PredicateFailureError: if reason == predicates.ErrPodNotFitsHostPorts { dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: host port conflict", node.ObjectMeta.Name) } } } return fit }
func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *api.Node, ds *extensions.DaemonSet) bool { // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { return false } // TODO: Move it to the predicates for _, c := range node.Status.Conditions { if c.Type == api.NodeOutOfDisk && c.Status == api.ConditionTrue { return false } } newPod := &api.Pod{Spec: ds.Spec.Template.Spec, ObjectMeta: ds.Spec.Template.ObjectMeta} newPod.Spec.NodeName = node.Name pods := []*api.Pod{} for _, m := range dsc.podStore.Indexer.List() { pod := m.(*api.Pod) if pod.Spec.NodeName != node.Name { continue } if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed { continue } // ignore pods that belong to the daemonset when taking into account wheter // a daemonset should bind to a node. if pds := dsc.getPodDaemonSet(pod); pds != nil && ds.Name == pds.Name { continue } pods = append(pods, pod) } nodeInfo := schedulercache.NewNodeInfo(pods...) nodeInfo.SetNode(node) fit, err := predicates.GeneralPredicates(newPod, nil, nodeInfo) if err != nil { if re, ok := err.(*predicates.PredicateFailureError); ok { message := re.Error() glog.V(2).Infof("Predicate failed on Pod: %s, for reason: %v", newPod.Name, message) } if re, ok := err.(*predicates.InsufficientResourceError); ok { message := re.Error() glog.V(2).Infof("Predicate failed on Pod: %s, for reason: %v", newPod.Name, message) } message := fmt.Sprintf("GeneralPredicates failed due to %v.", err) glog.Warningf("Predicate failed on Pod %s - %s", newPod.Name, message) } return fit }
// The caller of this function must remove the taint if this function returns error. func prepareNodeForPod(client *kube_client.Client, predicateChecker *ca_simulator.PredicateChecker, node *kube_api.Node, criticalPod *kube_api.Pod) error { addTaint(client, node, podId(criticalPod)) requiredPods, otherPods, err := groupPods(client, node) if err != nil { return err } nodeInfo := schedulercache.NewNodeInfo(requiredPods...) nodeInfo.SetNode(node) // check whether critical pod still fit if err := predicateChecker.CheckPredicates(criticalPod, nodeInfo); err != nil { return fmt.Errorf("Pod %s doesn't fit to node %v: %v", podId(criticalPod), node.Name, err) } requiredPods = append(requiredPods, criticalPod) nodeInfo = schedulercache.NewNodeInfo(requiredPods...) nodeInfo.SetNode(node) for _, p := range otherPods { if err := predicateChecker.CheckPredicates(p, nodeInfo); err != nil { glog.Infof("Pod %s will be deleted in order to schedule critical pods %s.", podId(p), podId(criticalPod)) // TODO(piosz): add better support of graceful deletion delErr := client.Pods(p.Namespace).Delete(p.Name, kube_api.NewDeleteOptions(10)) if delErr != nil { return fmt.Errorf("Failed to delete pod %s: %v", podId(p), delErr) } } else { newPods := append(nodeInfo.Pods(), p) nodeInfo = schedulercache.NewNodeInfo(newPods...) nodeInfo.SetNode(node) } } // TODO(piosz): how to reset scheduler backoff? return nil }
// TODO: We don't need to pass list of nodes here as they are already available in nodeInfos. func findPlaceFor(bannedNode string, pods []*kube_api.Pod, nodes []*kube_api.Node, nodeInfos map[string]*schedulercache.NodeInfo, predicateChecker *PredicateChecker) error { newNodeInfos := make(map[string]*schedulercache.NodeInfo) for _, podptr := range pods { newpod := *podptr newpod.Spec.NodeName = "" pod := &newpod foundPlace := false glog.V(4).Infof("Looking for place for %s/%s", pod.Namespace, pod.Name) podKey := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) // TODO: Sort nodes by reservation nodeloop: for _, node := range nodes { if node.Name == bannedNode { continue } node.Status.Allocatable = node.Status.Capacity nodeInfo, found := newNodeInfos[node.Name] if !found { nodeInfo, found = nodeInfos[node.Name] } if found { err := predicateChecker.CheckPredicates(pod, nodeInfo) glog.V(4).Infof("Evaluation %s for %s -> %v", node.Name, podKey, err) if err == nil { foundPlace = true // TODO(mwielgus): Optimize it. podsOnNode := nodeInfo.Pods() podsOnNode = append(podsOnNode, pod) newNodeInfo := schedulercache.NewNodeInfo(podsOnNode...) newNodeInfo.SetNode(node) newNodeInfos[node.Name] = newNodeInfo break nodeloop } } } if !foundPlace { return fmt.Errorf("failed to find place for %s", podKey) } } return nil }
func TestPodFitsHost(t *testing.T) { tests := []struct { pod *api.Pod node string fits bool test string }{ { pod: &api.Pod{}, node: "foo", fits: true, test: "no host specified", }, { pod: &api.Pod{ Spec: api.PodSpec{ NodeName: "foo", }, }, node: "foo", fits: true, test: "host matches", }, { pod: &api.Pod{ Spec: api.PodSpec{ NodeName: "bar", }, }, node: "foo", fits: false, test: "host doesn't match", }, } for _, test := range tests { result, err := PodFitsHost(test.pod, test.node, schedulercache.NewNodeInfo()) if !reflect.DeepEqual(err, ErrPodNotMatchHostName) && err != nil { t.Errorf("unexpected error: %v", err) } if result == false && !reflect.DeepEqual(err, ErrPodNotMatchHostName) { t.Errorf("unexpected error: %v", err) } if result != test.fits { t.Errorf("unexpected difference for %s: expected: %v got %v", test.test, test.fits, result) } } }
func TestUtilization(t *testing.T) { pod := BuildTestPod("p1", 100, 200000) pod2 := BuildTestPod("p2", -1, -1) nodeInfo := schedulercache.NewNodeInfo(pod, pod, pod2) node := BuildTestNode("node1", 2000, 2000000) utilization, err := CalculateUtilization(node, nodeInfo) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilization, 0.01) node2 := BuildTestNode("node1", 2000, -1) _, err = CalculateUtilization(node2, nodeInfo) assert.Error(t, err) }
func TestBinpackingEstimateWithPorts(t *testing.T) { estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker()) cpuPerPod := int64(200) memoryPerPod := int64(1000 * 1024 * 1024) pod := &kube_api.Pod{ Spec: kube_api.PodSpec{ Containers: []kube_api.Container{ { Resources: kube_api.ResourceRequirements{ Requests: kube_api.ResourceList{ kube_api.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI), kube_api.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI), }, }, Ports: []kube_api.ContainerPort{ { HostPort: 5555, }, }, }, }, }, } pods := make([]*kube_api.Pod, 0) for i := 0; i < 8; i++ { pods = append(pods, pod) } node := &kube_api.Node{ Status: kube_api.NodeStatus{ Capacity: kube_api.ResourceList{ kube_api.ResourceCPU: *resource.NewMilliQuantity(5*cpuPerPod, resource.DecimalSI), kube_api.ResourceMemory: *resource.NewQuantity(5*memoryPerPod, resource.DecimalSI), kube_api.ResourcePods: *resource.NewQuantity(10, resource.DecimalSI), }, }, } node.Status.Allocatable = node.Status.Capacity nodeInfo := schedulercache.NewNodeInfo() nodeInfo.SetNode(node) estimate := estimator.Estimate(pods, nodeInfo) assert.Equal(t, 8, estimate) }
func TestBinpackingEstimate(t *testing.T) { estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker()) cpuPerPod := int64(350) memoryPerPod := int64(1000 * 1024 * 1024) pod := &apiv1.Pod{ Spec: apiv1.PodSpec{ Containers: []apiv1.Container{ { Resources: apiv1.ResourceRequirements{ Requests: apiv1.ResourceList{ apiv1.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI), apiv1.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI), }, }, }, }, }, } pods := make([]*apiv1.Pod, 0) for i := 0; i < 10; i++ { pods = append(pods, pod) } node := &apiv1.Node{ Status: apiv1.NodeStatus{ Capacity: apiv1.ResourceList{ apiv1.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod*3-50, resource.DecimalSI), apiv1.ResourceMemory: *resource.NewQuantity(2*memoryPerPod, resource.DecimalSI), apiv1.ResourcePods: *resource.NewQuantity(10, resource.DecimalSI), }, }, } node.Status.Allocatable = node.Status.Capacity nodeInfo := schedulercache.NewNodeInfo() nodeInfo.SetNode(node) estimate := estimator.Estimate(pods, nodeInfo) assert.Equal(t, 5, estimate) }
// Currently the logic choose a random node which satisfies requirements (a critical pod fits there). // TODO(piosz): add a prioritization to this logic func findNodeForPod(client *kube_client.Client, predicateChecker *ca_simulator.PredicateChecker, nodes []*kube_api.Node, pod *kube_api.Pod) *kube_api.Node { for _, node := range nodes { // ignore nodes with taints if err := checkTaints(node); err != nil { glog.Warningf("Skipping node %v due to %v", node.Name, err) } requiredPods, _, err := groupPods(client, node) if err != nil { glog.Warningf("Skipping node %v due to error: %v", node.Name, err) continue } nodeInfo := schedulercache.NewNodeInfo(requiredPods...) nodeInfo.SetNode(node) if err := predicateChecker.CheckPredicates(pod, nodeInfo); err == nil { return node } } return nil }
// nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a // summary. Returned booleans are: // * wantToRun: // Returns true when a user would expect a pod to run on this node and ignores conditions // such as OutOfDisk or insufficent resource that would cause a daemonset pod not to schedule. // This is primarily used to populate daemonset status. // * shouldSchedule: // Returns true when a daemonset should be scheduled to a node if a daemonset pod is not already // running on that node. // * shouldContinueRunning: // Returns true when a daemonset should continue running on a node if a daemonset pod is already // running on that node. func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *extensions.DaemonSet) (wantToRun, shouldSchedule, shouldContinueRunning bool, err error) { // Because these bools require an && of all their required conditions, we start // with all bools set to true and set a bool to false if a condition is not met. // A bool should probably not be set to true after this line. wantToRun, shouldSchedule, shouldContinueRunning = true, true, true // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { return false, false, false, nil } // TODO: Move it to the predicates for _, c := range node.Status.Conditions { if c.Type == v1.NodeOutOfDisk && c.Status == v1.ConditionTrue { // the kubelet will evict this pod if it needs to. Let kubelet // decide whether to continue running this pod so leave shouldContinueRunning // set to true shouldSchedule = false } } newPod := &v1.Pod{Spec: ds.Spec.Template.Spec, ObjectMeta: ds.Spec.Template.ObjectMeta} newPod.Namespace = ds.Namespace newPod.Spec.NodeName = node.Name pods := []*v1.Pod{} for _, m := range dsc.podStore.Indexer.List() { pod := m.(*v1.Pod) if pod.Spec.NodeName != node.Name { continue } if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { continue } // ignore pods that belong to the daemonset when taking into account whether // a daemonset should bind to a node. if pds := dsc.getPodDaemonSet(pod); pds != nil && ds.Name == pds.Name { continue } pods = append(pods, pod) } nodeInfo := schedulercache.NewNodeInfo(pods...) nodeInfo.SetNode(node) _, reasons, err := predicates.GeneralPredicates(newPod, nil, nodeInfo) if err != nil { glog.Warningf("GeneralPredicates failed on ds '%s/%s' due to unexpected error: %v", ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, err) return false, false, false, err } for _, r := range reasons { glog.V(4).Infof("GeneralPredicates failed on ds '%s/%s' for reason: %v", ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { case *predicates.InsufficientResourceError: dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) shouldSchedule = false case *predicates.PredicateFailureError: var emitEvent bool // we try to partition predicates into two partitions here: intentional on the part of the operator and not. switch reason { // intentional case predicates.ErrNodeSelectorNotMatch, predicates.ErrPodNotMatchHostName, predicates.ErrNodeLabelPresenceViolated, // this one is probably intentional since it's a workaround for not having // pod hard anti affinity. predicates.ErrPodNotFitsHostPorts: wantToRun, shouldSchedule, shouldContinueRunning = false, false, false // unintentional case predicates.ErrDiskConflict, predicates.ErrVolumeZoneConflict, predicates.ErrMaxVolumeCountExceeded, predicates.ErrNodeUnderMemoryPressure, predicates.ErrNodeUnderDiskPressure: // wantToRun and shouldContinueRunning are likely true here. They are // absolutely true at the time of writing the comment. See first comment // of this method. shouldSchedule = false emitEvent = true // unexpected case predicates.ErrPodAffinityNotMatch, predicates.ErrServiceAffinityViolated, predicates.ErrTaintsTolerationsNotMatch: return false, false, false, fmt.Errorf("unexpected reason: GeneralPredicates should not return reason %s", reason.GetReason()) default: glog.V(4).Infof("unknownd predicate failure reason: %s", reason.GetReason()) wantToRun, shouldSchedule, shouldContinueRunning = false, false, false emitEvent = true } if emitEvent { dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason()) } } } return }