func (s *Scheduler) createExecutor(offer *mesos.Offer, tcpPort uint64, udpPort uint64) *mesos.ExecutorInfo { name := fmt.Sprintf("syslog-%s", offer.GetSlaveId().GetValue()) id := fmt.Sprintf("%s-%s", name, uuid()) uris := []*mesos.CommandInfo_URI{ &mesos.CommandInfo_URI{ Value: proto.String(fmt.Sprintf("%s/resource/%s", Config.Api, Config.Executor)), Executable: proto.Bool(true), }, } if Config.ProducerProperties != "" { uris = append(uris, &mesos.CommandInfo_URI{ Value: proto.String(fmt.Sprintf("%s/resource/%s", Config.Api, Config.ProducerProperties)), }) } command := fmt.Sprintf("./%s --log.level %s --tcp %d --udp %d --host %s", Config.Executor, Config.LogLevel, tcpPort, udpPort, offer.GetHostname()) return &mesos.ExecutorInfo{ ExecutorId: util.NewExecutorID(id), Name: proto.String(name), Command: &mesos.CommandInfo{ Value: proto.String(command), Uris: uris, }, } }
func (ct *ConsumerTask) NewTaskInfo(offer *mesos.Offer) *mesos.TaskInfo { taskName := fmt.Sprintf("consumer-%s", ct.ID) taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())), } data, err := json.Marshal(ct.Config) if err != nil { panic(err) } taskInfo := &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskId, SlaveId: offer.GetSlaveId(), Executor: ct.createExecutor(), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", ct.Cpu), util.NewScalarResource("mem", ct.Mem), }, Data: data, } return taskInfo }
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) { taskName := fmt.Sprintf("statsd-kafka-%s", offer.GetHostname()) taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())), } data, err := json.Marshal(Config) if err != nil { panic(err) //shouldn't happen } Logger.Debugf("Task data: %s", string(data)) task := &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskId, SlaveId: offer.GetSlaveId(), Executor: s.createExecutor(offer.GetHostname()), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", Config.Cpus), util.NewScalarResource("mem", Config.Mem), }, Data: data, } s.cluster.Add(offer.GetHostname(), task) driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) }
// NodeProcurement updates t.Spec in preparation for the task to be launched on the // slave associated with the offer. func NodeProcurement(t *T, offer *mesos.Offer) error { t.Spec.SlaveID = offer.GetSlaveId().GetValue() t.Spec.AssignedSlave = offer.GetHostname() // hostname needs of the executor needs to match that of the offer, otherwise // the kubelet node status checker/updater is very unhappy setCommandArgument(t.executor, "--hostname-override", offer.GetHostname(), true) return nil }
func (s *Scheduler) acceptOffer(driver scheduler.SchedulerDriver, offer *mesos.Offer) string { if s.cluster.Exists(offer.GetSlaveId().GetValue()) { return fmt.Sprintf("Server on slave %s is already running.", offer.GetSlaveId().GetValue()) } else { declineReason := s.match(offer) if declineReason == "" { s.launchTask(driver, offer) } return declineReason } }
// Fill the Spec in the T, should be called during k8s scheduling, before binding. func (t *T) FillFromDetails(details *mesos.Offer) error { if details == nil { //programming error panic("offer details are nil") } // compute used resources cpu := mresource.PodCPULimit(&t.Pod) mem := mresource.PodMemLimit(&t.Pod) log.V(3).Infof("Recording offer(s) %s/%s against pod %v: cpu: %.2f, mem: %.2f MB", details.Id, t.Pod.Namespace, t.Pod.Name, cpu, mem) t.Spec = Spec{ SlaveID: details.GetSlaveId().GetValue(), AssignedSlave: details.GetHostname(), CPU: cpu, Memory: mem, } // fill in port mapping if mapping, err := t.mapper.Generate(t, details); err != nil { t.Reset() return err } else { ports := []uint64{} for _, entry := range mapping { ports = append(ports, entry.OfferPort) } t.Spec.PortMap = mapping t.Spec.Ports = ports } // hostname needs of the executor needs to match that of the offer, otherwise // the kubelet node status checker/updater is very unhappy const HOSTNAME_OVERRIDE_FLAG = "--hostname-override=" hostname := details.GetHostname() // required field, non-empty hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname argv := t.executor.Command.Arguments overwrite := false for i, arg := range argv { if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) { overwrite = true argv[i] = hostnameOverride break } } if !overwrite { t.executor.Command.Arguments = append(argv, hostnameOverride) } return nil }
// Fill the Spec in the T, should be called during k8s scheduling, // before binding. // TODO(jdef): remove hardcoded values and make use of actual pod resource settings func (t *T) FillFromDetails(details *mesos.Offer) error { if details == nil { //programming error panic("offer details are nil") } log.V(3).Infof("Recording offer(s) %v against pod %v", details.Id, t.Pod.Name) t.Spec = Spec{ SlaveID: details.GetSlaveId().GetValue(), CPU: DefaultContainerCpus, Memory: DefaultContainerMem, } if mapping, err := t.mapper.Generate(t, details); err != nil { t.Reset() return err } else { ports := []uint64{} for _, entry := range mapping { ports = append(ports, entry.OfferPort) } t.Spec.PortMap = mapping t.Spec.Ports = ports } // hostname needs of the executor needs to match that of the offer, otherwise // the qinglet node status checker/updater is very unhappy const HOSTNAME_OVERRIDE_FLAG = "--hostname-override=" hostname := details.GetHostname() // required field, non-empty hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname argv := t.executor.Command.Arguments overwrite := false for i, arg := range argv { if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) { overwrite = true argv[i] = hostnameOverride break } } if !overwrite { t.executor.Command.Arguments = append(argv, hostnameOverride) } return nil }
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) { taskName := fmt.Sprintf("syslog-%s", offer.GetSlaveId().GetValue()) taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())), } data, err := json.Marshal(Config) if err != nil { panic(err) //shouldn't happen } Logger.Debugf("Task data: %s", string(data)) tcpPort := uint64(s.getPort(Config.TcpPort, offer, -1)) udpPort := uint64(s.getPort(Config.UdpPort, offer, int(tcpPort))) task := &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskId, SlaveId: offer.GetSlaveId(), Executor: s.createExecutor(offer, tcpPort, udpPort), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", Config.Cpus), util.NewScalarResource("mem", Config.Mem), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(tcpPort, tcpPort)}), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(udpPort, udpPort)}), }, Data: data, Labels: utils.StringToLabels(s.labels), } s.cluster.Add(offer.GetSlaveId().GetValue(), task) driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) }
// NodeProcurement updates t.Spec in preparation for the task to be launched on the // slave associated with the offer. func NodeProcurement(t *T, offer *mesos.Offer) error { t.Spec.SlaveID = offer.GetSlaveId().GetValue() t.Spec.AssignedSlave = offer.GetHostname() // hostname needs of the executor needs to match that of the offer, otherwise // the kubelet node status checker/updater is very unhappy const HOSTNAME_OVERRIDE_FLAG = "--hostname-override=" hostname := offer.GetHostname() // required field, non-empty hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname argv := t.executor.Command.Arguments overwrite := false for i, arg := range argv { if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) { overwrite = true argv[i] = hostnameOverride break } } if !overwrite { t.executor.Command.Arguments = append(argv, hostnameOverride) } return nil }
func (ctx *RunOnceApplicationContext) newTaskInfo(offer *mesos.Offer) *mesos.TaskInfo { taskName := fmt.Sprintf("%s.%s", ctx.Application.ID, offer.GetHostname()) taskID := util.NewTaskID(fmt.Sprintf("%s|%s|%s", ctx.Application.ID, offer.GetHostname(), framework.UUID())) var URIs []*mesos.CommandInfo_URI if len(ctx.Application.ArtifactURLs) > 0 || len(ctx.Application.AdditionalArtifacts) > 0 { URIs = make([]*mesos.CommandInfo_URI, 0) for _, uri := range ctx.Application.ArtifactURLs { URIs = append(URIs, &mesos.CommandInfo_URI{ Value: proto.String(uri), Extract: proto.Bool(true), }) } for _, uri := range ctx.Application.AdditionalArtifacts { URIs = append(URIs, &mesos.CommandInfo_URI{ Value: proto.String(uri), Extract: proto.Bool(true), }) } } return &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskID, SlaveId: offer.GetSlaveId(), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", ctx.Application.Cpu), util.NewScalarResource("mem", ctx.Application.Mem), }, Command: &mesos.CommandInfo{ Shell: proto.Bool(true), Value: proto.String(ctx.Application.LaunchCommand), Uris: URIs, }, } }
// NodeProcurement updates t.Spec in preparation for the task to be launched on the // slave associated with the offer. func NodeProcurement(t *T, offer *mesos.Offer) error { t.Spec.SlaveID = offer.GetSlaveId().GetValue() t.Spec.AssignedSlave = offer.GetHostname() return nil }
// TODO(tyler) split this long function up! func (s *EtcdScheduler) launchOne(driver scheduler.SchedulerDriver) { // Always ensure we've pruned any dead / unmanaged nodes before // launching new ones, or we may overconfigure the ensemble such // that it can not make progress if the next launch fails. err := s.Prune() if err != nil { log.Errorf("Failed to remove stale cluster members: %s", err) return } if !s.shouldLaunch(driver) { log.Infoln("Skipping launch attempt for now.") return } // validOffer filters out offers that are no longer // desirable, even though they may have been when // they were enqueued. validOffer := func(offer *mesos.Offer) bool { runningCopy := s.RunningCopy() for _, etcdConfig := range runningCopy { if etcdConfig.SlaveID == offer.SlaveId.GetValue() { if s.singleInstancePerSlave { log.Info("Skipping offer: already running on this slave.") return false } } } return true } // Issue BlockingPop until we get back an offer we can use. var offer *mesos.Offer for { offer = s.offerCache.BlockingPop() if validOffer(offer) { break } else { s.decline(driver, offer) } } // Do this again because BlockingPop may have taken a long time. if !s.shouldLaunch(driver) { log.Infoln("Skipping launch attempt for now.") s.decline(driver, offer) return } // TODO(tyler) this is a broken hack resources := parseOffer(offer) lowest := *resources.ports[0].Begin rpcPort := lowest clientPort := lowest + 1 httpPort := lowest + 2 s.mut.Lock() var clusterType string if len(s.running) == 0 { clusterType = "new" } else { clusterType = "existing" } s.highestInstanceID++ name := "etcd-" + strconv.FormatInt(s.highestInstanceID, 10) node := &config.Node{ Name: name, Host: *offer.Hostname, RPCPort: rpcPort, ClientPort: clientPort, ReseedPort: httpPort, Type: clusterType, SlaveID: offer.GetSlaveId().GetValue(), } running := []*config.Node{node} for _, r := range s.running { running = append(running, r) } serializedNodes, err := json.Marshal(running) log.Infof("Serialized running: %+v", string(serializedNodes)) if err != nil { log.Errorf("Could not serialize running list: %v", err) // This Unlock is not deferred because the test implementation of LaunchTasks // calls this scheduler's StatusUpdate method, causing the test to deadlock. s.decline(driver, offer) s.mut.Unlock() return } configSummary := node.String() taskID := &mesos.TaskID{Value: &configSummary} executor := s.newExecutorInfo(node, s.executorUris) task := &mesos.TaskInfo{ Data: serializedNodes, Name: proto.String("etcd-server"), TaskId: taskID, SlaveId: offer.SlaveId, Executor: executor, Resources: []*mesos.Resource{ util.NewScalarResource("cpus", s.cpusPerTask), util.NewScalarResource("mem", s.memPerTask), util.NewScalarResource("disk", s.diskPerTask), util.NewRangesResource("ports", []*mesos.Value_Range{ util.NewValueRange(uint64(rpcPort), uint64(httpPort)), }), }, Discovery: &mesos.DiscoveryInfo{ Visibility: mesos.DiscoveryInfo_EXTERNAL.Enum(), Name: proto.String("etcd-server"), Ports: &mesos.Ports{ Ports: []*mesos.Port{ &mesos.Port{ Number: proto.Uint32(uint32(rpcPort)), Protocol: proto.String("tcp"), }, // HACK: "client" is not a real SRV protocol. This is so // that we can have etcd proxies use srv discovery on the // above tcp name. Mesos-dns does not yet care about // names for DiscoveryInfo. When it does, we should // create a name for clients to use. We want to keep // the rpcPort accessible at _etcd-server._tcp.<fwname>.mesos &mesos.Port{ Number: proto.Uint32(uint32(clientPort)), Protocol: proto.String("client"), }, }, }, }, } log.Infof( "Prepared task: %s with offer %s for launch", task.GetName(), offer.Id.GetValue(), ) log.Info("Launching etcd node.") tasks := []*mesos.TaskInfo{task} s.pending[node.Name] = struct{}{} // This Unlock is not deferred because the test implementation of LaunchTasks // calls this scheduler's StatusUpdate method, causing the test to deadlock. s.mut.Unlock() atomic.AddUint32(&s.Stats.LaunchedServers, 1) driver.LaunchTasks( []*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{ RefuseSeconds: proto.Float64(1), }, ) }