Exemplo n.º 1
0
// StartPods check for numPods in TestNS. If they exist, it no-ops, otherwise it starts up
// a temp rc, scales it to match numPods, then deletes the rc leaving behind the pods.
func StartPods(numPods int, host string, restClient *client.Client) error {
	start := time.Now()
	defer func() {
		glog.Infof("StartPods took %v with numPods %d", time.Since(start), numPods)
	}()
	hostField := fields.OneTermEqualSelector(client.PodHost, host)
	pods, err := restClient.Pods(TestNS).List(labels.Everything(), hostField)
	if err != nil || len(pods.Items) == numPods {
		return err
	}
	glog.Infof("Found %d pods that match host %v, require %d", len(pods.Items), hostField, numPods)
	// For the sake of simplicity, assume all pods in TestNS have selectors matching TestRCManifest.
	controller := RCFromManifest(TestRCManifest)

	// Make the rc unique to the given host.
	controller.Spec.Replicas = numPods
	controller.Spec.Template.Spec.NodeName = host
	controller.Name = controller.Name + host
	controller.Spec.Selector["host"] = host
	controller.Spec.Template.Labels["host"] = host

	if rc, err := StartRC(controller, restClient); err != nil {
		return err
	} else {
		// Delete the rc, otherwise when we restart master components for the next benchmark
		// the rc controller will race with the pods controller in the rc manager.
		return restClient.ReplicationControllers(TestNS).Delete(rc.Name)
	}
}
Exemplo n.º 2
0
// deletePods will delete all pods from master running on given node, and return true
// if any pods were deleted.
func (nc *NodeController) deletePods(nodeName string) (bool, error) {
	remaining := false
	pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(), fields.OneTermEqualSelector(client.PodHost, nodeName))
	if err != nil {
		return remaining, err
	}

	if len(pods.Items) > 0 {
		nc.recordNodeEvent(nodeName, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName))
	}

	for _, pod := range pods.Items {
		// Defensive check, also needed for tests.
		if pod.Spec.NodeName != nodeName {
			continue
		}
		// if the pod has already been deleted, ignore it
		if pod.DeletionGracePeriodSeconds != nil {
			continue
		}

		glog.V(2).Infof("Starting deletion of pod %v", pod.Name)
		nc.recorder.Eventf(&pod, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName)
		if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
			return false, err
		}
		remaining = true
	}
	return remaining, nil
}
Exemplo n.º 3
0
// returns true if the provided node still has pods scheduled to it, or an error if
// the server could not be contacted.
func (nc *NodeController) hasPods(nodeName string) (bool, error) {
	pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(), fields.OneTermEqualSelector(client.PodHost, nodeName))
	if err != nil {
		return false, err
	}
	return len(pods.Items) > 0, nil
}
Exemplo n.º 4
0
// terminatePods will ensure all pods on the given node that are in terminating state are eventually
// cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how
// long before we should check again (the next deadline for a pod to complete), or an error.
func (nc *NodeController) terminatePods(nodeName string, since time.Time) (bool, time.Duration, error) {
	// the time before we should try again
	nextAttempt := time.Duration(0)
	// have we deleted all pods
	complete := true

	pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(),
		fields.OneTermEqualSelector(client.PodHost, nodeName))
	if err != nil {
		return false, nextAttempt, err
	}

	now := time.Now()
	elapsed := now.Sub(since)
	for _, pod := range pods.Items {
		// Defensive check, also needed for tests.
		if pod.Spec.NodeName != nodeName {
			continue
		}
		// only clean terminated pods
		if pod.DeletionGracePeriodSeconds == nil {
			continue
		}

		// the user's requested grace period
		grace := time.Duration(*pod.DeletionGracePeriodSeconds) * time.Second
		if grace > nc.maximumGracePeriod {
			grace = nc.maximumGracePeriod
		}

		// the time remaining before the pod should have been deleted
		remaining := grace - elapsed
		if remaining < 0 {
			remaining = 0
			glog.V(2).Infof("Removing pod %v after %s grace period", pod.Name, grace)
			nc.recordNodeEvent(nodeName, "TerminatingEvictedPod", fmt.Sprintf("Pod %s has exceeded the grace period for deletion after being evicted from Node %q and is being force killed", pod.Name, nodeName))
			if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil {
				glog.Errorf("Error completing deletion of pod %s: %v", pod.Name, err)
				complete = false
			}
		} else {
			glog.V(2).Infof("Pod %v still terminating, requested grace period %s, %s remaining", pod.Name, grace, remaining)
			complete = false
		}

		if nextAttempt < remaining {
			nextAttempt = remaining
		}
	}
	return complete, nextAttempt, nil
}
Exemplo n.º 5
0
// Benchmark pod listing by waiting on `Tasks` listers to list `Pods` pods via `Workers`.
func BenchmarkPodList(b *testing.B) {
	b.StopTimer()
	m := framework.NewMasterComponents(&framework.Config{nil, true, false, 250.0, 500})
	defer m.Stop(true, true)

	numPods, numTasks, iter := getPods(b.N), getTasks(b.N), getIterations(b.N)
	podsPerNode := numPods / numTasks
	if podsPerNode < 1 {
		podsPerNode = 1
	}
	glog.Infof("Starting benchmark: b.N %d, pods %d, workers %d, podsPerNode %d",
		b.N, numPods, numTasks, podsPerNode)

	startPodsOnNodes(numPods, numTasks, m.RestClient)
	// Stop the rc manager so it doesn't steal resources
	m.Stop(false, true)

	b.StartTimer()
	for i := 0; i < iter; i++ {
		framework.RunParallel(func(id int) error {
			host := fmt.Sprintf("host.%d", id)
			now := time.Now()
			defer func() {
				glog.V(3).Infof("Worker %d: Node %v listing pods took %v", id, host, time.Since(now))
			}()
			if pods, err := m.RestClient.Pods(framework.TestNS).List(
				labels.Everything(),
				fields.OneTermEqualSelector(client.PodHost, host)); err != nil {
				return err
			} else if len(pods.Items) < podsPerNode {
				glog.Fatalf("List retrieved %d pods, which is less than %d", len(pods.Items), podsPerNode)
			}
			return nil
		}, numTasks, Workers)
	}
	b.StopTimer()
}
Exemplo n.º 6
0
// NewSourceApiserver creates a config source that watches and pulls from the apiserver.
func NewSourceApiserver(c *client.Client, nodeName string, updates chan<- interface{}) {
	lw := cache.NewListWatchFromClient(c, "pods", api.NamespaceAll, fields.OneTermEqualSelector(client.PodHost, nodeName))
	newSourceApiserverFromLW(lw, updates)
}
Exemplo n.º 7
0
// ListResource returns a function that handles retrieving a list of resources from a rest.Storage object.
func ListResource(r rest.Lister, rw rest.Watcher, scope RequestScope, forceWatch bool, minRequestTimeout time.Duration) restful.RouteFunction {
	return func(req *restful.Request, res *restful.Response) {
		w := res.ResponseWriter

		namespace, err := scope.Namer.Namespace(req)
		if err != nil {
			errorJSON(err, scope.Codec, w)
			return
		}

		// Watches for single objects are routed to this function.
		// Treat a /name parameter the same as a field selector entry.
		hasName := true
		_, name, err := scope.Namer.Name(req)
		if err != nil {
			hasName = false
		}

		ctx := scope.ContextFunc(req)
		ctx = api.WithNamespace(ctx, namespace)

		out, err := queryToObject(req.Request.URL.Query(), scope, "ListOptions")
		if err != nil {
			errorJSON(err, scope.Codec, w)
			return
		}
		opts := *out.(*api.ListOptions)

		// transform fields
		// TODO: queryToObject should do this.
		fn := func(label, value string) (newLabel, newValue string, err error) {
			return scope.Convertor.ConvertFieldLabel(scope.APIVersion, scope.Kind, label, value)
		}
		if opts.FieldSelector, err = opts.FieldSelector.Transform(fn); err != nil {
			// TODO: allow bad request to set field causes based on query parameters
			err = errors.NewBadRequest(err.Error())
			errorJSON(err, scope.Codec, w)
			return
		}

		if hasName {
			// metadata.name is the canonical internal name.
			// generic.SelectionPredicate will notice that this is
			// a request for a single object and optimize the
			// storage query accordingly.
			nameSelector := fields.OneTermEqualSelector("metadata.name", name)
			if opts.FieldSelector != nil && !opts.FieldSelector.Empty() {
				// It doesn't make sense to ask for both a name
				// and a field selector, since just the name is
				// sufficient to narrow down the request to a
				// single object.
				errorJSON(
					errors.NewBadRequest("both a name and a field selector provided; please provide one or the other."),
					scope.Codec,
					w,
				)
				return
			}
			opts.FieldSelector = nameSelector
		}

		if (opts.Watch || forceWatch) && rw != nil {
			watcher, err := rw.Watch(ctx, opts.LabelSelector, opts.FieldSelector, opts.ResourceVersion)
			if err != nil {
				errorJSON(err, scope.Codec, w)
				return
			}
			serveWatch(watcher, scope, w, req, minRequestTimeout)
			return
		}

		result, err := r.List(ctx, opts.LabelSelector, opts.FieldSelector)
		if err != nil {
			errorJSON(err, scope.Codec, w)
			return
		}
		if err := setListSelfLink(result, req, scope.Namer); err != nil {
			errorJSON(err, scope.Codec, w)
			return
		}
		write(http.StatusOK, scope.APIVersion, scope.Codec, result, w, req.Request)
	}
}
Exemplo n.º 8
0
func (ks *KubeletExecutorServer) createAndInitKubelet(
	kc *app.KubeletConfig,
	hks hyperkube.Interface,
	clientConfig *client.Config,
	shutdownCloser io.Closer,
) (app.KubeletBootstrap, *kconfig.PodConfig, error) {

	// TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop
	// up into "per source" synchronizations
	// TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods
	// used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing
	// a nil pointer to it when what we really want is a nil interface.
	var kubeClient client.Interface
	if kc.KubeClient == nil {
		kubeClient = nil
	} else {
		kubeClient = kc.KubeClient
	}

	gcPolicy := kubelet.ContainerGCPolicy{
		MinAge:             kc.MinimumGCAge,
		MaxPerPodContainer: kc.MaxPerPodContainerCount,
		MaxContainers:      kc.MaxContainerCount,
	}

	pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kc.Recorder)
	updates := pc.Channel(MESOS_CFG_SOURCE)

	klet, err := kubelet.NewMainKubelet(
		kc.Hostname,
		kc.NodeName,
		kc.DockerClient,
		kubeClient,
		kc.RootDirectory,
		kc.PodInfraContainerImage,
		kc.SyncFrequency,
		float32(kc.RegistryPullQPS),
		kc.RegistryBurst,
		kc.EventRecordQPS,
		kc.EventBurst,
		gcPolicy,
		pc.SeenAllSources,
		kc.RegisterNode,
		kc.StandaloneMode,
		kc.ClusterDomain,
		net.IP(kc.ClusterDNS),
		kc.MasterServiceNamespace,
		kc.VolumePlugins,
		kc.NetworkPlugins,
		kc.NetworkPluginName,
		kc.StreamingConnectionIdleTimeout,
		kc.Recorder,
		kc.CAdvisorInterface,
		kc.ImageGCPolicy,
		kc.DiskSpacePolicy,
		kc.Cloud,
		kc.NodeStatusUpdateFrequency,
		kc.ResourceContainer,
		kc.OSInterface,
		kc.CgroupRoot,
		kc.ContainerRuntime,
		kc.RktPath,
		kc.RktStage1Image,
		kc.Mounter,
		kc.Writer,
		kc.DockerDaemonContainer,
		kc.SystemContainer,
		kc.ConfigureCBR0,
		kc.PodCIDR,
		kc.MaxPods,
		kc.DockerExecHandler,
		kc.ResolverConfig,
		kc.CPUCFSQuota,
		&api.NodeDaemonEndpoints{
			KubeletEndpoint: api.DaemonEndpoint{Port: int(kc.Port)},
		},
		true, // Serialize Image pulls
	)
	if err != nil {
		return nil, nil, err
	}

	//TODO(jdef) either configure Watch here with something useful, or else
	// get rid of it from executor.Config
	kubeletFinished := make(chan struct{})
	staticPodsConfigPath := filepath.Join(kc.RootDirectory, "static-pods")
	exec := executor.New(executor.Config{
		Kubelet:         klet,
		Updates:         updates,
		SourceName:      MESOS_CFG_SOURCE,
		APIClient:       kc.KubeClient,
		Docker:          kc.DockerClient,
		SuicideTimeout:  ks.SuicideTimeout,
		KubeletFinished: kubeletFinished,
		ShutdownAlert: func() {
			if shutdownCloser != nil {
				if e := shutdownCloser.Close(); e != nil {
					log.Warningf("failed to signal shutdown to external watcher: %v", e)
				}
			}
		},
		ExitFunc: os.Exit,
		PodStatusFunc: func(_ executor.KubeletInterface, pod *api.Pod) (*api.PodStatus, error) {
			return klet.GetRuntime().GetPodStatus(pod)
		},
		StaticPodsConfigPath: staticPodsConfigPath,
		PodLW:                cache.NewListWatchFromClient(kc.KubeClient, "pods", api.NamespaceAll, fields.OneTermEqualSelector(client.PodHost, kc.NodeName)),
	})

	go exec.InitializeStaticPodsSource(func() {
		// Create file source only when we are called back. Otherwise, it is never marked unseen.
		fileSourceUpdates := pc.Channel(kubelet.FileSource)

		kconfig.NewSourceFile(staticPodsConfigPath, kc.Hostname, kc.FileCheckFrequency, fileSourceUpdates)
	})

	k := &kubeletExecutor{
		Kubelet:         klet,
		address:         ks.Address,
		dockerClient:    kc.DockerClient,
		hks:             hks,
		kubeletFinished: kubeletFinished,
		executorDone:    exec.Done(),
		clientConfig:    clientConfig,
	}

	dconfig := bindings.DriverConfig{
		Executor:         exec,
		HostnameOverride: ks.HostnameOverride,
		BindingAddress:   ks.Address,
	}
	if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil {
		log.Fatalf("failed to create executor driver: %v", err)
	} else {
		k.driver = driver
	}

	log.V(2).Infof("Initialize executor driver...")

	k.BirthCry()
	exec.Init(k.driver)

	k.StartGarbageCollection()

	return k, pc, nil
}
Exemplo n.º 9
0
// rebootNode takes node name on provider through the following steps using c:
//  - ensures the node is ready
//  - ensures all pods on the node are running and ready
//  - reboots the node (by executing rebootCmd over ssh)
//  - ensures the node reaches some non-ready state
//  - ensures the node becomes ready again
//  - ensures all pods on the node become running and ready again
//
// It returns true through result only if all of the steps pass; at the first
// failed step, it will return false through result and not run the rest.
func rebootNode(c *client.Client, provider, name, rebootCmd string, result chan bool) {
	// Setup
	ns := api.NamespaceSystem
	ps := newPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(client.PodHost, name))
	defer ps.Stop()

	// Get the node initially.
	Logf("Getting %s", name)
	node, err := c.Nodes().Get(name)
	if err != nil {
		Logf("Couldn't get node %s", name)
		result <- false
		return
	}

	// Node sanity check: ensure it is "ready".
	if !waitForNodeToBeReady(c, name, nodeReadyInitialTimeout) {
		result <- false
		return
	}

	// Get all the pods on the node that don't have liveness probe set.
	// Liveness probe may cause restart of a pod during node reboot, and the pod may not be running.
	pods := ps.List()
	podNames := []string{}
	for _, p := range pods {
		probe := false
		for _, c := range p.Spec.Containers {
			if c.LivenessProbe != nil {
				probe = true
				break
			}
		}
		if !probe {
			podNames = append(podNames, p.ObjectMeta.Name)
		}
	}
	Logf("Node %s has %d pods: %v", name, len(podNames), podNames)

	// For each pod, we do a sanity check to ensure it's running / healthy
	// now, as that's what we'll be checking later.
	if !checkPodsRunningReady(c, ns, podNames, podReadyBeforeTimeout) {
		result <- false
		return
	}

	// Reboot the node.
	if err = issueSSHCommand(node, provider, rebootCmd); err != nil {
		Logf("Error while issuing ssh command: %v", err)
		result <- false
		return
	}

	// Wait for some kind of "not ready" status.
	if !waitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) {
		result <- false
		return
	}

	// Wait for some kind of "ready" status.
	if !waitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) {
		result <- false
		return
	}

	// Ensure all of the pods that we found on this node before the reboot are
	// running / healthy.
	if !checkPodsRunningReady(c, ns, podNames, rebootPodReadyAgainTimeout) {
		result <- false
		return
	}

	Logf("Reboot successful on node %s", name)
	result <- true
}