Esempio n. 1
0
// Call ScheduleFunc and subtract some resources, returning the name of the machine the task is scheduled on
func (k *kubeScheduler) doSchedule(task *podtask.T, err error) (string, error) {
	var offer offers.Perishable
	if task.HasAcceptedOffer() {
		// verify that the offer is still on the table
		offerId := task.GetOfferId()
		if offer, ok := k.api.offers().Get(offerId); ok && !offer.HasExpired() {
			// skip tasks that have already have assigned offers
			offer = task.Offer
		} else {
			task.Offer.Release()
			task.Reset()
			if err = k.api.tasks().Update(task); err != nil {
				return "", err
			}
		}
	}
	if err == nil && offer == nil {
		offer, err = k.api.algorithm()(k.api.offers(), k.api, task)
	}
	if err != nil {
		return "", err
	}
	details := offer.Details()
	if details == nil {
		return "", fmt.Errorf("offer already invalid/expired for task %v", task.ID)
	}
	slaveId := details.GetSlaveId().GetValue()
	if slave, ok := k.api.slaveFor(slaveId); !ok {
		// not much sense in Release()ing the offer here since its owner died
		offer.Release()
		k.api.offers().Invalidate(details.Id.GetValue())
		return "", fmt.Errorf("Slave disappeared (%v) while scheduling task %v", slaveId, task.ID)
	} else {
		if task.Offer != nil && task.Offer != offer {
			return "", fmt.Errorf("task.offer assignment must be idempotent, task %+v: offer %+v", task, offer)
		}

		// write resource limits into the pod spec which is transfered to the executor. From here
		// on we can expect that the pod spec of a task has proper limits for CPU and memory.
		// TODO(sttts): For a later separation of the kubelet and the executor also patch the pod on the apiserver
		if unlimitedCPU := mresource.LimitPodCPU(&task.Pod, k.defaultContainerCPULimit); unlimitedCPU {
			log.Warningf("Pod %s/%s without cpu limits is admitted %.2f cpu shares", task.Pod.Namespace, task.Pod.Name, mresource.PodCPULimit(&task.Pod))
		}
		if unlimitedMem := mresource.LimitPodMem(&task.Pod, k.defaultContainerMemLimit); unlimitedMem {
			log.Warningf("Pod %s/%s without memory limits is admitted %.2f MB", task.Pod.Namespace, task.Pod.Name, mresource.PodMemLimit(&task.Pod))
		}

		task.Offer = offer
		task.FillFromDetails(details)

		if err := k.api.tasks().Update(task); err != nil {
			offer.Release()
			return "", err
		}
		return slave.HostName, nil
	}
}
Esempio n. 2
0
func TestAcceptOfferPorts(t *testing.T) {
	t.Parallel()
	task, _ := fakePodTask("foo")
	pod := &task.Pod

	offer := &mesos.Offer{
		Resources: []*mesos.Resource{
			mutil.NewScalarResource("cpus", t_min_cpu),
			mutil.NewScalarResource("mem", t_min_mem),
			rangeResource("ports", []uint64{1, 1}),
		},
	}
	if ok := DefaultPredicate(task, offer); !ok {
		t.Fatalf("did not accepted offer %v:", offer)
	}

	pod.Spec = api.PodSpec{
		Containers: []api.Container{{
			Ports: []api.ContainerPort{{
				HostPort: 123,
			}},
		}},
	}

	mresource.LimitPodCPU(&task.Pod, mresource.DefaultDefaultContainerCPULimit)
	mresource.LimitPodMem(&task.Pod, mresource.DefaultDefaultContainerMemLimit)

	if ok := DefaultPredicate(task, offer); ok {
		t.Fatalf("accepted offer %v:", offer)
	}

	pod.Spec.Containers[0].Ports[0].HostPort = 1
	if ok := DefaultPredicate(task, offer); !ok {
		t.Fatalf("did not accepted offer %v:", offer)
	}

	pod.Spec.Containers[0].Ports[0].HostPort = 0
	if ok := DefaultPredicate(task, offer); !ok {
		t.Fatalf("did not accepted offer %v:", offer)
	}

	offer.Resources = []*mesos.Resource{
		mutil.NewScalarResource("cpus", t_min_cpu),
		mutil.NewScalarResource("mem", t_min_mem),
	}
	if ok := DefaultPredicate(task, offer); ok {
		t.Fatalf("accepted offer %v:", offer)
	}

	pod.Spec.Containers[0].Ports[0].HostPort = 1
	if ok := DefaultPredicate(task, offer); ok {
		t.Fatalf("accepted offer %v:", offer)
	}
}
Esempio n. 3
0
func (r *RequireSomePodResources) Procure(t *T, offer *mesos.Offer) error {
	// write resource limits into the pod spec which is transferred to the executor. From here
	// on we can expect that the pod spec of a task has proper limits for CPU and memory.
	// TODO(sttts): For a later separation of the kubelet and the executor also patch the pod on the apiserver
	// TODO(jdef): changing the state of t.Pod here feels dirty, especially since we don't use a kosher
	// method to clone the api.Pod state in T.Clone(). This needs some love.
	if unlimitedCPU := mresource.LimitPodCPU(&t.Pod, r.defaultContainerCPULimit); unlimitedCPU {
		log.V(2).Infof("Pod %s/%s without cpu limits is admitted %.2f cpu shares", t.Pod.Namespace, t.Pod.Name, mresource.PodCPULimit(&t.Pod))
	}
	if unlimitedMem := mresource.LimitPodMem(&t.Pod, r.defaultContainerMemLimit); unlimitedMem {
		log.V(2).Infof("Pod %s/%s without memory limits is admitted %.2f MB", t.Pod.Namespace, t.Pod.Name, mresource.PodMemLimit(&t.Pod))
	}
	return nil
}
Esempio n. 4
0
// limitPod limits the given pod based on the scheduler's default limits.
func (k *schedulerAlgorithm) limitPod(pod *api.Pod) error {
	cpuRequest, cpuLimit, _, err := mresource.LimitPodCPU(pod, k.defaultCpus)
	if err != nil {
		return err
	}

	memRequest, memLimit, _, err := mresource.LimitPodMem(pod, k.defaultMem)
	if err != nil {
		return err
	}

	log.V(3).Infof(
		"setting pod %s/%s resources: requested cpu %.2f mem %.2f MB, limited cpu %.2f mem %.2f MB",
		pod.Namespace, pod.Name, cpuRequest, memRequest, cpuLimit, memLimit,
	)

	return nil
}
Esempio n. 5
0
func TestLimitedResources(t *testing.T) {
	assert := assert.New(t)

	task, _ := fakePodTask("limited")
	pod := &task.Pod
	pod.Spec = api.PodSpec{
		Containers: []api.Container{{
			Name: "a",
			Resources: api.ResourceRequirements{
				Limits: api.ResourceList{
					api.ResourceCPU:    *resource.NewQuantity(1, resource.DecimalSI),
					api.ResourceMemory: *resource.NewQuantity(256*1024*1024, resource.BinarySI),
				},
			},
		}, {
			Name: "b",
			Resources: api.ResourceRequirements{
				Limits: api.ResourceList{
					api.ResourceCPU:    *resource.NewQuantity(2, resource.DecimalSI),
					api.ResourceMemory: *resource.NewQuantity(512*1024*1024, resource.BinarySI),
				},
			},
		}},
	}

	beforeLimitingCPU := mresource.CPUForPod(pod, mresource.DefaultDefaultContainerCPULimit)
	beforeLimitingMem := mresource.MemForPod(pod, mresource.DefaultDefaultContainerMemLimit)

	unboundedCPU := mresource.LimitPodCPU(pod, mresource.DefaultDefaultContainerCPULimit)
	unboundedMem := mresource.LimitPodMem(pod, mresource.DefaultDefaultContainerMemLimit)

	cpu := mresource.PodCPULimit(pod)
	mem := mresource.PodMemLimit(pod)

	assert.False(unboundedCPU, "CPU resources are defined as limited")
	assert.False(unboundedMem, "mem resources are defined as limited")

	assert.Equal(3.0, float64(cpu))
	assert.Equal(768.0, float64(mem))

	assert.Equal(cpu, beforeLimitingCPU)
	assert.Equal(mem, beforeLimitingMem)
}
Esempio n. 6
0
func TestEmptyOffer(t *testing.T) {
	t.Parallel()
	task, err := fakePodTask("foo")
	if err != nil {
		t.Fatal(err)
	}

	task.Pod.Spec = api.PodSpec{
		Containers: []api.Container{{
			Name: "a",
		}},
	}

	mresource.LimitPodCPU(&task.Pod, mresource.DefaultDefaultContainerCPULimit)
	mresource.LimitPodMem(&task.Pod, mresource.DefaultDefaultContainerMemLimit)

	if ok := DefaultPredicate(task, nil); ok {
		t.Fatalf("accepted nil offer")
	}
	if ok := DefaultPredicate(task, &mesos.Offer{}); ok {
		t.Fatalf("accepted empty offer")
	}
}
Esempio n. 7
0
// StaticPodValidator discards a pod if we can't calculate resource limits for it.
func StaticPodValidator(
	defaultContainerCPULimit resource.CPUShares,
	defaultContainerMemLimit resource.MegaBytes,
	accumCPU, accumMem *float64,
) podutil.FilterFunc {
	return podutil.FilterFunc(func(pod *api.Pod) (bool, error) {
		_, cpu, _, err := resource.LimitPodCPU(pod, defaultContainerCPULimit)
		if err != nil {
			return false, err
		}

		_, mem, _, err := resource.LimitPodMem(pod, defaultContainerMemLimit)
		if err != nil {
			return false, err
		}

		log.V(2).Infof("reserving %.2f cpu shares and %.2f MB of memory to static pod %s/%s", cpu, mem, pod.Namespace, pod.Name)

		*accumCPU += float64(cpu)
		*accumMem += float64(mem)
		return true, nil
	})
}
Esempio n. 8
0
func TestNoPortsInPodOrOffer(t *testing.T) {
	t.Parallel()
	task, err := fakePodTask("foo")
	if err != nil || task == nil {
		t.Fatal(err)
	}

	task.Pod.Spec = api.PodSpec{
		Containers: []api.Container{{
			Name: "a",
		}},
	}

	mresource.LimitPodCPU(&task.Pod, mresource.DefaultDefaultContainerCPULimit)
	mresource.LimitPodMem(&task.Pod, mresource.DefaultDefaultContainerMemLimit)

	offer := &mesos.Offer{
		Resources: []*mesos.Resource{
			mutil.NewScalarResource("cpus", 0.001),
			mutil.NewScalarResource("mem", 0.001),
		},
	}
	if ok := DefaultPredicate(task, offer); ok {
		t.Fatalf("accepted offer %v:", offer)
	}

	offer = &mesos.Offer{
		Resources: []*mesos.Resource{
			mutil.NewScalarResource("cpus", t_min_cpu),
			mutil.NewScalarResource("mem", t_min_mem),
		},
	}
	if ok := DefaultPredicate(task, offer); !ok {
		t.Fatalf("did not accepted offer %v:", offer)
	}
}
func (r *RequirePodResources) Procure(t *T, offer *mesos.Offer) error {
	// write resource limits into the pod spec which is transferred to the executor. From here
	// on we can expect that the pod spec of a task has proper limits for CPU and memory.
	// TODO(sttts): For a later separation of the kubelet and the executor also patch the pod on the apiserver
	// TODO(sttts): fall back to requested resources if resource limit cannot be fulfilled by the offer
	// TODO(jdef): changing the state of t.Pod here feels dirty, especially since we don't use a kosher
	// method to clone the api.Pod state in T.Clone(). This needs some love.
	_, cpuLimit, _, err := mresource.LimitPodCPU(&t.Pod, r.defaultContainerCPULimit)
	if err != nil {
		return err
	}

	_, memLimit, _, err := mresource.LimitPodMem(&t.Pod, r.defaultContainerMemLimit)
	if err != nil {
		return err
	}

	log.V(3).Infof("Recording offer(s) %s/%s against pod %v: cpu: %.2f, mem: %.2f MB", offer.Id, t.Pod.Namespace, t.Pod.Name, cpuLimit, memLimit)

	t.Spec.CPU = cpuLimit
	t.Spec.Memory = memLimit

	return nil
}
Esempio n. 10
0
func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.ExecutorInfo, *uid.UID, error) {
	ci := &mesos.CommandInfo{
		Shell: proto.Bool(false),
	}

	if s.ExecutorPath != "" {
		uri, executorCmd := s.serveFrameworkArtifact(s.ExecutorPath)
		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
		ci.Value = proto.String(fmt.Sprintf("./%s", executorCmd))
	} else if !hks.FindServer(hyperkube.CommandMinion) {
		return nil, nil, fmt.Errorf("either run this scheduler via km or else --executor-path is required")
	} else {
		if strings.Index(s.KMPath, "://") > 0 {
			// URI could point directly to executable, e.g. hdfs:///km
			// or else indirectly, e.g. http://acmestorage/tarball.tgz
			// so we assume that for this case the command will always "km"
			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(s.KMPath), Executable: proto.Bool(true)})
			ci.Value = proto.String("./km") // TODO(jdef) extract constant
		} else if s.KMPath != "" {
			uri, kmCmd := s.serveFrameworkArtifact(s.KMPath)
			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
			ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
		} else {
			uri, kmCmd := s.serveFrameworkArtifact(s.executable)
			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
			ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
		}
		ci.Arguments = append(ci.Arguments, hyperkube.CommandMinion)

		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--run-proxy=%v", s.RunProxy))
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-bindall=%v", s.ProxyBindall))
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-logv=%d", s.ProxyLogV))

		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--path-override=%s", s.MinionPathOverride))
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-size=%v", s.MinionLogMaxSize.String()))
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-backups=%d", s.MinionLogMaxBackups))
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-age=%d", s.MinionLogMaxAgeInDays))
	}

	if s.DockerCfgPath != "" {
		uri := s.serveFrameworkArtifactWithFilename(s.DockerCfgPath, ".dockercfg")
		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(false), Extract: proto.Bool(false)})
	}

	//TODO(jdef): provide some way (env var?) for users to customize executor config
	//TODO(jdef): set -address to 127.0.0.1 if `address` is 127.0.0.1

	apiServerArgs := strings.Join(s.APIServerList, ",")
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--api-servers=%s", apiServerArgs))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--v=%d", s.ExecutorLogV)) // this also applies to the minion
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--allow-privileged=%t", s.AllowPrivileged))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--suicide-timeout=%v", s.ExecutorSuicideTimeout))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--mesos-launch-grace-period=%v", s.LaunchGracePeriod))

	if s.ExecutorBindall {
		//TODO(jdef) determine whether hostname-override is really needed for bindall because
		//it conflicts with kubelet node status checks/updates
		//ci.Arguments = append(ci.Arguments, "--hostname-override=0.0.0.0")
		ci.Arguments = append(ci.Arguments, "--address=0.0.0.0")
	}

	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--mesos-cgroup-prefix=%v", s.MesosCgroupPrefix))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.ContainPodResources))
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.EnableProfiling))

	if s.AuthPath != "" {
		//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
		uri, basename := s.serveFrameworkArtifact(s.AuthPath)
		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri)})
		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--auth-path=%s", basename))
	}
	appendOptional := func(name string, value string) {
		if value != "" {
			ci.Arguments = append(ci.Arguments, fmt.Sprintf("--%s=%s", name, value))
		}
	}
	if s.ClusterDNS != nil {
		appendOptional("cluster-dns", s.ClusterDNS.String())
	}
	appendOptional("cluster-domain", s.ClusterDomain)
	appendOptional("root-dir", s.KubeletRootDirectory)
	appendOptional("docker-endpoint", s.KubeletDockerEndpoint)
	appendOptional("pod-infra-container-image", s.KubeletPodInfraContainerImage)
	appendOptional("host-network-sources", s.KubeletHostNetworkSources)
	appendOptional("network-plugin", s.KubeletNetworkPluginName)

	log.V(1).Infof("prepared executor command %q with args '%+v'", ci.GetValue(), ci.Arguments)

	// Create mesos scheduler driver.
	execInfo := &mesos.ExecutorInfo{
		Command: ci,
		Name:    proto.String(execcfg.DefaultInfoName),
		Source:  proto.String(execcfg.DefaultInfoSource),
	}

	// Check for staticPods
	var staticPodCPUs, staticPodMem float64
	if s.StaticPodsConfigPath != "" {
		bs, paths, err := archive.ZipDir(s.StaticPodsConfigPath)
		if err != nil {
			return nil, nil, err
		}

		// try to read pod files and sum resources
		// TODO(sttts): don't terminate when static pods are broken, but skip them
		// TODO(sttts): add a directory watch and tell running executors about updates
		for _, podPath := range paths {
			podJson, err := ioutil.ReadFile(podPath)
			if err != nil {
				return nil, nil, fmt.Errorf("error reading static pod spec: %v", err)
			}

			pod := api.Pod{}
			err = json.Unmarshal(podJson, &pod)
			if err != nil {
				return nil, nil, fmt.Errorf("error parsing static pod spec at %v: %v", podPath, err)
			}

			// TODO(sttts): allow unlimited static pods as well and patch in the default resource limits
			unlimitedCPU := mresource.LimitPodCPU(&pod, s.DefaultContainerCPULimit)
			unlimitedMem := mresource.LimitPodMem(&pod, s.DefaultContainerMemLimit)
			if unlimitedCPU {
				return nil, nil, fmt.Errorf("found static pod without limit on cpu resources: %v", podPath)
			}
			if unlimitedMem {
				return nil, nil, fmt.Errorf("found static pod without limit on memory resources: %v", podPath)
			}

			cpu := mresource.PodCPULimit(&pod)
			mem := mresource.PodMemLimit(&pod)
			log.V(2).Infof("reserving %.2f cpu shares and %.2f MB of memory to static pod %s", cpu, mem, pod.Name)

			staticPodCPUs += float64(cpu)
			staticPodMem += float64(mem)
		}

		// pass zipped pod spec to executor
		execInfo.Data = bs
	}

	execInfo.Resources = []*mesos.Resource{
		mutil.NewScalarResource("cpus", float64(s.MesosExecutorCPUs)+staticPodCPUs),
		mutil.NewScalarResource("mem", float64(s.MesosExecutorMem)+staticPodMem),
	}

	// calculate ExecutorInfo hash to be used for validating compatibility
	// of ExecutorInfo's generated by other HA schedulers.
	ehash := hashExecutorInfo(execInfo)
	eid := uid.New(ehash, execcfg.DefaultInfoID)
	execInfo.ExecutorId = &mesos.ExecutorID{Value: proto.String(eid.String())}

	return execInfo, eid, nil
}