Beispiel #1
0
// In order to meet interface criteria for LeaderElectionConfig we need to
// take stop channell as an argument. However, since we are committing a suicide
// after loosing mastership we can safely ignore it.
func run(_ <-chan struct{}) {
	kubeClient := createKubeClient()

	predicateChecker, err := simulator.NewPredicateChecker(kubeClient)
	if err != nil {
		glog.Fatalf("Failed to create predicate checker: %v", err)
	}
	unschedulablePodLister := kube_util.NewUnschedulablePodLister(kubeClient, kube_api.NamespaceAll)
	scheduledPodLister := kube_util.NewScheduledPodLister(kubeClient)
	nodeLister := kube_util.NewNodeLister(kubeClient)

	lastScaleUpTime := time.Now()
	lastScaleDownFailedTrial := time.Now()
	unneededNodes := make(map[string]time.Time)
	podLocationHints := make(map[string]string)
	usageTracker := simulator.NewUsageTracker()

	recorder := createEventRecorder(kubeClient)

	var cloudProvider cloudprovider.CloudProvider

	if *cloudProviderFlag == "gce" {
		// GCE Manager
		var gceManager *gce.GceManager
		var gceError error
		if *cloudConfig != "" {
			config, fileErr := os.Open(*cloudConfig)
			if fileErr != nil {
				glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err)
			}
			defer config.Close()
			gceManager, gceError = gce.CreateGceManager(config)
		} else {
			gceManager, gceError = gce.CreateGceManager(nil)
		}
		if gceError != nil {
			glog.Fatalf("Failed to create GCE Manager: %v", err)
		}
		cloudProvider, err = gce.BuildGceCloudProvider(gceManager, nodeGroupsFlag)
		if err != nil {
			glog.Fatalf("Failed to create GCE cloud provider: %v", err)
		}
	}

	if *cloudProviderFlag == "aws" {
		var awsManager *aws.AwsManager
		var awsError error
		if *cloudConfig != "" {
			config, fileErr := os.Open(*cloudConfig)
			if fileErr != nil {
				glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err)
			}
			defer config.Close()
			awsManager, awsError = aws.CreateAwsManager(config)
		} else {
			awsManager, awsError = aws.CreateAwsManager(nil)
		}
		if awsError != nil {
			glog.Fatalf("Failed to create AWS Manager: %v", err)
		}
		cloudProvider, err = aws.BuildAwsCloudProvider(awsManager, nodeGroupsFlag)
		if err != nil {
			glog.Fatalf("Failed to create AWS cloud provider: %v", err)
		}
	}

	for {
		select {
		case <-time.After(*scanInterval):
			{
				loopStart := time.Now()
				updateLastTime("main")

				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckGroupsAndNodes(nodes, cloudProvider); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				allUnschedulablePods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				allScheduled, err := scheduledPodLister.List()
				if err != nil {
					glog.Errorf("Failed to list scheduled pods: %v", err)
					continue
				}

				// We need to reset all pods that have been marked as unschedulable not after
				// the newest node became available for the scheduler.
				allNodesAvailableTime := GetAllNodesAvailableTime(nodes)
				podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime)
				ResetPodScheduledCondition(kubeClient, podsToReset)

				// We need to check whether pods marked as unschedulable are actually unschedulable.
				// This should prevent from adding unnecessary nodes. Example of such situation:
				// - CA and Scheduler has slightly different configuration
				// - Scheduler can't schedule a pod and marks it as unschedulable
				// - CA added a node which should help the pod
				// - Scheduler doesn't schedule the pod on the new node
				//   because according to it logic it doesn't fit there
				// - CA see the pod is still unschedulable, so it adds another node to help it
				//
				// With the check enabled the last point won't happen because CA will ignore a pod
				// which is supposed to schedule on an existing node.
				//
				// Without below check cluster might be unnecessary scaled up to the max allowed size
				// in the describe situation.
				schedulablePodsPresent := false
				if *verifyUnschedulablePods {
					newUnschedulablePodsToHelp := FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker)

					if len(newUnschedulablePodsToHelp) != len(unschedulablePodsToHelp) {
						glog.V(2).Info("Schedulable pods present")
						schedulablePodsPresent = true
					}
					unschedulablePodsToHelp = newUnschedulablePodsToHelp
				}

				if len(unschedulablePodsToHelp) == 0 {
					glog.V(1).Info("No unschedulable pods")
				} else if *maxNodesTotal > 0 && len(nodes) >= *maxNodesTotal {
					glog.V(1).Info("Max total nodes in cluster reached")
				} else {
					scaleUpStart := time.Now()
					updateLastTime("scaleup")
					scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, cloudProvider, kubeClient, predicateChecker, recorder,
						*maxNodesTotal)

					updateDuration("scaleup", scaleUpStart)

					if err != nil {
						glog.Errorf("Failed to scale up: %v", err)
						continue
					} else {
						if scaledUp {
							lastScaleUpTime = time.Now()
							// No scale down in this iteration.
							continue
						}
					}
				}

				if *scaleDownEnabled {
					unneededStart := time.Now()

					// In dry run only utilization is updated
					calculateUnneededOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) ||
						lastScaleDownFailedTrial.Add(*scaleDownTrialInterval).After(time.Now()) ||
						schedulablePodsPresent

					glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
						"lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly,
						lastScaleUpTime, lastScaleDownFailedTrial, schedulablePodsPresent)

					updateLastTime("findUnneeded")
					glog.V(4).Infof("Calculating unneded nodes")

					usageTracker.CleanUp(time.Now().Add(-(*scaleDownUnneededTime)))
					unneededNodes, podLocationHints = FindUnneededNodes(
						nodes,
						unneededNodes,
						*scaleDownUtilizationThreshold,
						allScheduled,
						predicateChecker,
						podLocationHints,
						usageTracker, time.Now())

					updateDuration("findUnneeded", unneededStart)

					for key, val := range unneededNodes {
						if glog.V(4) {
							glog.V(4).Infof("%s is unneeded since %s duration %s", key, val.String(), time.Now().Sub(val).String())
						}
					}

					if !calculateUnneededOnly {
						glog.V(4).Infof("Starting scale down")

						scaleDownStart := time.Now()
						updateLastTime("scaledown")

						result, err := ScaleDown(
							nodes,
							unneededNodes,
							*scaleDownUnneededTime,
							allScheduled,
							cloudProvider,
							kubeClient,
							predicateChecker,
							podLocationHints,
							usageTracker)

						updateDuration("scaledown", scaleDownStart)

						// TODO: revisit result handling
						if err != nil {
							glog.Errorf("Failed to scale down: %v", err)
						} else {
							if result == ScaleDownError || result == ScaleDownNoNodeDeleted {
								lastScaleDownFailedTrial = time.Now()
							}
						}
					}
				}
				updateDuration("main", loopStart)
			}
		}
	}
}
Beispiel #2
0
func main() {
	glog.Infof("Running Rescheduler")

	flags.Parse(os.Args)

	// TODO(piosz): figure our a better way of verifying cluster stabilization here.
	time.Sleep(*initialDelay)

	kubeClient, err := createKubeClient(flags, *inCluster)
	if err != nil {
		glog.Fatalf("Failed to create kube client: %v", err)
	}

	recorder := createEventRecorder(kubeClient)

	predicateChecker, err := ca_simulator.NewPredicateChecker(kubeClient)
	if err != nil {
		glog.Fatalf("Failed to create predicate checker: %v", err)
	}

	unschedulablePodLister := kube_utils.NewUnschedulablePodLister(kubeClient, *systemNamespace)
	nodeLister := kube_utils.NewNodeLister(kubeClient)

	// TODO(piosz): consider reseting this set once every few hours.
	podsBeingProcessed := NewPodSet()

	releaseAllTaints(kubeClient, nodeLister, podsBeingProcessed)

	for {
		select {
		case <-time.After(*housekeepingInterval):
			{
				allUnschedulablePods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				criticalPods := filterCriticalPods(allUnschedulablePods, podsBeingProcessed)

				if len(criticalPods) > 0 {
					for _, pod := range criticalPods {
						glog.Infof("Critical pod %s is unschedulable. Trying to find a spot for it.", podId(pod))
						nodes, err := nodeLister.List()
						if err != nil {
							glog.Errorf("Failed to list nodes: %v", err)
							continue
						}

						node := findNodeForPod(kubeClient, predicateChecker, nodes, pod)
						if node == nil {
							glog.Errorf("Pod %s can't be scheduled on any existing node.", podId(pod))
							recorder.Eventf(pod, kube_api.EventTypeNormal, "PodDoestFitAnyNode",
								"Critical pod %s doesn't fit on any node.", podId(pod))
							continue
						}
						glog.Infof("Trying to place the pod on node %v", node.Name)

						err = prepareNodeForPod(kubeClient, recorder, predicateChecker, node, pod)
						if err != nil {
							glog.Warningf("%+v", err)
						} else {
							podsBeingProcessed.Add(pod)
							go waitForScheduled(kubeClient, podsBeingProcessed, pod)
						}
					}
				}

				releaseAllTaints(kubeClient, nodeLister, podsBeingProcessed)
			}
		}
	}
}