func PodFitsResourcesPredicate(t *T, offer *mesos.Offer) bool { // find offered cpu and mem var ( offeredCpus mresource.CPUShares offeredMem mresource.MegaBytes ) for _, resource := range offer.Resources { if resource.GetName() == "cpus" { offeredCpus = mresource.CPUShares(*resource.GetScalar().Value) } if resource.GetName() == "mem" { offeredMem = mresource.MegaBytes(*resource.GetScalar().Value) } } // calculate cpu and mem sum over all containers of the pod // TODO (@sttts): also support pod.spec.resources.limit.request // TODO (@sttts): take into account the executor resources cpu := mresource.PodCPULimit(&t.Pod) mem := mresource.PodMemLimit(&t.Pod) log.V(4).Infof("trying to match offer with pod %v/%v: cpus: %.2f mem: %.2f MB", t.Pod.Namespace, t.Pod.Name, cpu, mem) if (cpu > offeredCpus) || (mem > offeredMem) { log.V(3).Infof("not enough resources for pod %v/%v: cpus: %.2f mem: %.2f MB", t.Pod.Namespace, t.Pod.Name, cpu, mem) return false } return true }
// PodResourcesProcurement converts k8s pod cpu and memory resource requirements into // mesos resource allocations. func PodResourcesProcurement(t *T, offer *mesos.Offer) error { // compute used resources cpu := mresource.PodCPULimit(&t.Pod) mem := mresource.PodMemLimit(&t.Pod) log.V(3).Infof("Recording offer(s) %s/%s against pod %v: cpu: %.2f, mem: %.2f MB", offer.Id, t.Pod.Namespace, t.Pod.Name, cpu, mem) t.Spec.CPU = cpu t.Spec.Memory = mem return nil }
func TestLimitedResources(t *testing.T) { assert := assert.New(t) task, _ := fakePodTask("limited") pod := &task.Pod pod.Spec = api.PodSpec{ Containers: []api.Container{{ Name: "a", Resources: api.ResourceRequirements{ Limits: api.ResourceList{ api.ResourceCPU: *resource.NewQuantity(1, resource.DecimalSI), api.ResourceMemory: *resource.NewQuantity(256*1024*1024, resource.BinarySI), }, }, }, { Name: "b", Resources: api.ResourceRequirements{ Limits: api.ResourceList{ api.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI), api.ResourceMemory: *resource.NewQuantity(512*1024*1024, resource.BinarySI), }, }, }}, } beforeLimitingCPU := mresource.CPUForPod(pod, mresource.DefaultDefaultContainerCPULimit) beforeLimitingMem := mresource.MemForPod(pod, mresource.DefaultDefaultContainerMemLimit) unboundedCPU := mresource.LimitPodCPU(pod, mresource.DefaultDefaultContainerCPULimit) unboundedMem := mresource.LimitPodMem(pod, mresource.DefaultDefaultContainerMemLimit) cpu := mresource.PodCPULimit(pod) mem := mresource.PodMemLimit(pod) assert.False(unboundedCPU, "CPU resources are defined as limited") assert.False(unboundedMem, "mem resources are defined as limited") assert.Equal(3.0, float64(cpu)) assert.Equal(768.0, float64(mem)) assert.Equal(cpu, beforeLimitingCPU) assert.Equal(mem, beforeLimitingMem) }
func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.ExecutorInfo, *uid.UID, error) { ci := &mesos.CommandInfo{ Shell: proto.Bool(false), } if s.ExecutorPath != "" { uri, executorCmd := s.serveFrameworkArtifact(s.ExecutorPath) ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) ci.Value = proto.String(fmt.Sprintf("./%s", executorCmd)) } else if !hks.FindServer(hyperkube.CommandMinion) { return nil, nil, fmt.Errorf("either run this scheduler via km or else --executor-path is required") } else { if strings.Index(s.KMPath, "://") > 0 { // URI could point directly to executable, e.g. hdfs:///km // or else indirectly, e.g. http://acmestorage/tarball.tgz // so we assume that for this case the command will always "km" ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(s.KMPath), Executable: proto.Bool(true)}) ci.Value = proto.String("./km") // TODO(jdef) extract constant } else if s.KMPath != "" { uri, kmCmd := s.serveFrameworkArtifact(s.KMPath) ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd)) } else { uri, kmCmd := s.serveFrameworkArtifact(s.executable) ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd)) } ci.Arguments = append(ci.Arguments, hyperkube.CommandMinion) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--run-proxy=%v", s.RunProxy)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-bindall=%v", s.ProxyBindall)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-logv=%d", s.ProxyLogV)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--path-override=%s", s.MinionPathOverride)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-size=%v", s.MinionLogMaxSize.String())) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-backups=%d", s.MinionLogMaxBackups)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--max-log-age=%d", s.MinionLogMaxAgeInDays)) } if s.DockerCfgPath != "" { uri := s.serveFrameworkArtifactWithFilename(s.DockerCfgPath, ".dockercfg") ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(false), Extract: proto.Bool(false)}) } //TODO(jdef): provide some way (env var?) for users to customize executor config //TODO(jdef): set -address to 127.0.0.1 if `address` is 127.0.0.1 apiServerArgs := strings.Join(s.APIServerList, ",") ci.Arguments = append(ci.Arguments, fmt.Sprintf("--api-servers=%s", apiServerArgs)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--v=%d", s.ExecutorLogV)) // this also applies to the minion ci.Arguments = append(ci.Arguments, fmt.Sprintf("--allow-privileged=%t", s.AllowPrivileged)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--suicide-timeout=%v", s.ExecutorSuicideTimeout)) if s.ExecutorBindall { //TODO(jdef) determine whether hostname-override is really needed for bindall because //it conflicts with kubelet node status checks/updates //ci.Arguments = append(ci.Arguments, "--hostname-override=0.0.0.0") ci.Arguments = append(ci.Arguments, "--address=0.0.0.0") } ci.Arguments = append(ci.Arguments, fmt.Sprintf("--mesos-cgroup-prefix=%v", s.MesosCgroupPrefix)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.ContainPodResources)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.EnableProfiling)) if s.AuthPath != "" { //TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file uri, basename := s.serveFrameworkArtifact(s.AuthPath) ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri)}) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--auth-path=%s", basename)) } appendOptional := func(name string, value string) { if value != "" { ci.Arguments = append(ci.Arguments, fmt.Sprintf("--%s=%s", name, value)) } } if s.ClusterDNS != nil { appendOptional("cluster-dns", s.ClusterDNS.String()) } appendOptional("cluster-domain", s.ClusterDomain) appendOptional("root-dir", s.KubeletRootDirectory) appendOptional("docker-endpoint", s.KubeletDockerEndpoint) appendOptional("pod-infra-container-image", s.KubeletPodInfraContainerImage) appendOptional("host-network-sources", s.KubeletHostNetworkSources) appendOptional("network-plugin", s.KubeletNetworkPluginName) log.V(1).Infof("prepared executor command %q with args '%+v'", ci.GetValue(), ci.Arguments) // Create mesos scheduler driver. execInfo := &mesos.ExecutorInfo{ Command: ci, Name: proto.String(execcfg.DefaultInfoName), Source: proto.String(execcfg.DefaultInfoSource), } // Check for staticPods var staticPodCPUs, staticPodMem float64 if s.StaticPodsConfigPath != "" { bs, paths, err := archive.ZipDir(s.StaticPodsConfigPath) if err != nil { return nil, nil, err } // try to read pod files and sum resources // TODO(sttts): don't terminate when static pods are broken, but skip them // TODO(sttts): add a directory watch and tell running executors about updates for _, podPath := range paths { podJson, err := ioutil.ReadFile(podPath) if err != nil { return nil, nil, fmt.Errorf("error reading static pod spec: %v", err) } pod := api.Pod{} err = json.Unmarshal(podJson, &pod) if err != nil { return nil, nil, fmt.Errorf("error parsing static pod spec at %v: %v", podPath, err) } // TODO(sttts): allow unlimited static pods as well and patch in the default resource limits unlimitedCPU := mresource.LimitPodCPU(&pod, s.DefaultContainerCPULimit) unlimitedMem := mresource.LimitPodMem(&pod, s.DefaultContainerMemLimit) if unlimitedCPU { return nil, nil, fmt.Errorf("found static pod without limit on cpu resources: %v", podPath) } if unlimitedMem { return nil, nil, fmt.Errorf("found static pod without limit on memory resources: %v", podPath) } cpu := mresource.PodCPULimit(&pod) mem := mresource.PodMemLimit(&pod) log.V(2).Infof("reserving %.2f cpu shares and %.2f MB of memory to static pod %s", cpu, mem, pod.Name) staticPodCPUs += float64(cpu) staticPodMem += float64(mem) } // pass zipped pod spec to executor execInfo.Data = bs } execInfo.Resources = []*mesos.Resource{ mutil.NewScalarResource("cpus", float64(s.MesosExecutorCPUs)+staticPodCPUs), mutil.NewScalarResource("mem", float64(s.MesosExecutorMem)+staticPodMem), } // calculate ExecutorInfo hash to be used for validating compatibility // of ExecutorInfo's generated by other HA schedulers. ehash := hashExecutorInfo(execInfo) eid := uid.New(ehash, execcfg.DefaultInfoID) execInfo.ExecutorId = &mesos.ExecutorID{Value: proto.String(eid.String())} return execInfo, eid, nil }
func (r *RequireSomePodResources) Procure(t *T, offer *mesos.Offer) error { // write resource limits into the pod spec which is transferred to the executor. From here // on we can expect that the pod spec of a task has proper limits for CPU and memory. // TODO(sttts): For a later separation of the kubelet and the executor also patch the pod on the apiserver // TODO(jdef): changing the state of t.Pod here feels dirty, especially since we don't use a kosher // method to clone the api.Pod state in T.Clone(). This needs some love. if unlimitedCPU := mresource.LimitPodCPU(&t.Pod, r.defaultContainerCPULimit); unlimitedCPU { log.V(2).Infof("Pod %s/%s without cpu limits is admitted %.2f cpu shares", t.Pod.Namespace, t.Pod.Name, mresource.PodCPULimit(&t.Pod)) } if unlimitedMem := mresource.LimitPodMem(&t.Pod, r.defaultContainerMemLimit); unlimitedMem { log.V(2).Infof("Pod %s/%s without memory limits is admitted %.2f MB", t.Pod.Namespace, t.Pod.Name, mresource.PodMemLimit(&t.Pod)) } return nil }