func (sched *ExampleScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { log.Infoln("Framework Re-Registered with Master ", masterInfo) _, err := driver.ReconcileTasks([]*mesos.TaskStatus{}) if err != nil { log.Errorf("failed to request task reconciliation: %v", err) } }
func (sched *MesosRunonceScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.V(1).Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) eventCh <- status if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ } if sched.tasksFinished >= sched.totalTasks { log.V(1).Infoln("Total tasks completed, stopping framework.") driver.Stop(false) } if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_ERROR { exitStatus = 1 log.Warningf("mesos TaskStatus: %v", status) driver.Stop(false) log.Errorln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message.", status.GetMessage(), ) } }
func (sched *Scheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { logOffers(offers) jobs, err := getLaunchableJobs() if err != nil { log.Errorf("Unable to get pending jobs! %s\n", err.Error()) return } offersAndTasks, err := packJobsInOffers(jobs, offers) if err != nil { log.Errorf("Unable to pack jobs into offers! %s\n", err.Error()) return } for _, ot := range offersAndTasks { if len(ot.Tasks) == 0 { log.Infof("Declining unused offer %s", ot.Offer.Id.GetValue()) driver.DeclineOffer(ot.Offer.Id, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) continue } else { log.Infof("Launching %d tasks for offer %s\n", len(ot.Tasks), ot.Offer.Id.GetValue()) driver.LaunchTasks([]*mesos.OfferID{ot.Offer.Id}, ot.Tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) sched.tasksLaunched = sched.tasksLaunched + len(ot.Tasks) } } }
func (sc *SchedulerCore) acceptOffer(driver sched.SchedulerDriver, offer *mesos.Offer, operations []*mesos.Offer_Operation) { log.Infof("Accepting OfferID: %+v, Operations: %+v", *offer.Id.Value, operations) var status mesos.Status var err error if sc.compatibilityMode { tasks := []*mesos.TaskInfo{} for _, operation := range operations { if *operation.Type == mesos.Offer_Operation_LAUNCH { tasks = operation.Launch.TaskInfos } } status, err = driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)}) } else { status, err = driver.AcceptOffers([]*mesos.OfferID{offer.Id}, operations, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)}) } if status != mesos.Status_DRIVER_RUNNING { log.Fatal("Driver not running, while trying to accept offers") } if err != nil { log.Panic("Failed to launch tasks: ", err) } }
// perform one-time initialization actions upon the first registration event received from Mesos. func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) { defer close(k.registration) if k.failoverTimeout > 0 { refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() { refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second } // wait until we've written the framework ID at least once before proceeding firstStore := make(chan struct{}) go runtime.Until(func() { // only close firstStore once select { case <-firstStore: default: defer close(firstStore) } err := k.storeFrameworkId(context.TODO(), k.frameworkId.GetValue()) if err != nil { log.Errorf("failed to store framework ID: %v", err) if err == frameworkid.ErrMismatch { // we detected a framework ID in storage that doesn't match what we're trying // to save. this is a dangerous state: // (1) perhaps we failed to initially recover the framework ID and so mesos // issued us a new one. now that we're trying to save it there's a mismatch. // (2) we've somehow bungled the framework ID and we're out of alignment with // what mesos is expecting. // (3) multiple schedulers were launched at the same time, and both have // registered with mesos (because when they each checked, there was no ID in // storage, so they asked for a new one). one of them has already written the // ID to storage -- we lose. log.Error("aborting due to framework ID mismatch") driver.Abort() } } }, refreshInterval, k.terminate) // wait for the first store attempt of the framework ID select { case <-firstStore: case <-k.terminate: } } r1 := k.makeTaskRegistryReconciler() r2 := k.makePodRegistryReconciler() k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2), k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate) go k.tasksReconciler.Run(driver, k.terminate) if k.reconcileInterval > 0 { ri := time.Duration(k.reconcileInterval) * time.Second time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) }) log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration) } k.installDebugHandlers(k.mux) }
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) { taskName := fmt.Sprintf("syslog-%s", offer.GetSlaveId().GetValue()) taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())), } data, err := json.Marshal(Config) if err != nil { panic(err) //shouldn't happen } Logger.Debugf("Task data: %s", string(data)) tcpPort := uint64(s.getPort(Config.TcpPort, offer, -1)) udpPort := uint64(s.getPort(Config.UdpPort, offer, int(tcpPort))) task := &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskId, SlaveId: offer.GetSlaveId(), Executor: s.createExecutor(offer, tcpPort, udpPort), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", Config.Cpus), util.NewScalarResource("mem", Config.Mem), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(tcpPort, tcpPort)}), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(udpPort, udpPort)}), }, Data: data, Labels: utils.StringToLabels(s.labels), } s.cluster.Add(offer.GetSlaveId().GetValue(), task) driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) }
func (sched *NoneScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() log.Infoln("Status update: task", taskId, "is in state", status.State.Enum().String()) c := sched.queue.GetCommandById(taskId) if c == nil { log.Errorln("Unable to find command for task", taskId) driver.Abort() } if c.Status.GetState() == status.GetState() { // ignore repeated status updates return } c.Status = status // send status update to CommandHandler if status.GetState() == mesos.TaskState_TASK_RUNNING { sched.handler.CommandRunning(c) } else if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.handler.CommandEnded(c) sched.handler.CommandFinished(c) } else if status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED { sched.handler.CommandEnded(c) sched.handler.CommandFailed(c) } // stop if Commands channel was closed and all tasks are finished if sched.queue.Closed() && !sched.handler.HasRunningTasks() { log.Infoln("All tasks finished, stopping framework.") sched.handler.FinishAllCommands() driver.Stop(false) } }
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) { taskName := fmt.Sprintf("syscol-%s", offer.GetSlaveId().GetValue()) taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())), } data, err := json.Marshal(Config) if err != nil { panic(err) //shouldn't happen } Logger.Debugf("Task data: %s", string(data)) task := &mesos.TaskInfo{ Name: proto.String(taskName), TaskId: taskId, SlaveId: offer.GetSlaveId(), Executor: s.createExecutor(offer.GetSlaveId().GetValue()), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", Config.Cpus), util.NewScalarResource("mem", Config.Mem), }, Data: data, } s.cluster.Add(offer.GetSlaveId().GetValue(), task) driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) }
func (sched *SdcScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ // KillTaskを実行するとTASK_LOSTが検知され、フレームワークが止まる // driver.KillTask(status.TaskId) // log.Infoln("!! Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) // return } if sched.tasksFinished >= sched.totalTasks { // log.Infoln("Total tasks completed, stopping framework.") log.Infoln("Total tasks completed.") sched.tasksFinished = 0 sched.totalTasks = 0 sched.tasksLaunched = 0 // driver.Stop(false) } if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_ERROR { log.Infoln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message", status.GetMessage(), ) driver.Abort() } }
// ResourceOffers handles the Resource Offers func (s *Scheduler) ResourceOffers(driver mesossched.SchedulerDriver, offers []*mesosproto.Offer) { logrus.WithField("offers", len(offers)).Debug("Received offers") var offer *mesosproto.Offer loop: for len(offers) > 0 { select { case <-s.shutdown: logrus.Info("Shutting down: declining offers") break loop case tid := <-s.tasks: logrus.WithField("task_id", tid).Debug("Trying to find offer to launch task with") t, _ := s.database.ReadUnmaskedTask(tid) if t.IsTerminating() { logrus.Debug("Dropping terminating task.") t.UpdateStatus(eremetic.Status{ Status: eremetic.TaskKilled, Time: time.Now().Unix(), }) s.database.PutTask(&t) continue } offer, offers = matchOffer(t, offers) if offer == nil { logrus.WithField("task_id", tid).Warn("Unable to find a matching offer") tasksDelayed.Inc() go func() { s.tasks <- tid }() break loop } logrus.WithFields(logrus.Fields{ "task_id": tid, "offer_id": offer.Id.GetValue(), }).Debug("Preparing to launch task") t, task := createTaskInfo(t, offer) t.UpdateStatus(eremetic.Status{ Status: eremetic.TaskStaging, Time: time.Now().Unix(), }) s.database.PutTask(&t) driver.LaunchTasks([]*mesosproto.OfferID{offer.Id}, []*mesosproto.TaskInfo{task}, defaultFilter) tasksLaunched.Inc() queueSize.Dec() continue default: break loop } } logrus.Debug("No tasks to launch. Declining offers.") for _, offer := range offers { driver.DeclineOffer(offer.Id, defaultFilter) } }
func (s *SchedulerServer) failover(driver bindings.SchedulerDriver, hks hyperkube.Interface) error { if driver != nil { stat, err := driver.Stop(true) if stat != mesos.Status_DRIVER_STOPPED { return fmt.Errorf("failed to stop driver for failover, received unexpected status code: %v", stat) } else if err != nil { return err } } // there's no guarantee that all goroutines are actually programmed intelligently with 'done' // signals, so we'll need to restart if we want to really stop everything // run the same command that we were launched with //TODO(jdef) assumption here is that the sheduler is the only service running in this process, we should probably validate that somehow args := []string{} flags := pflag.CommandLine if hks != nil { args = append(args, hks.Name()) flags = hks.Flags() } flags.Visit(func(flag *pflag.Flag) { if flag.Name != "api-servers" && flag.Name != "etcd-servers" { args = append(args, fmt.Sprintf("--%s=%s", flag.Name, flag.Value.String())) } }) if !s.Graceful { args = append(args, "--graceful") } if len(s.APIServerList) > 0 { args = append(args, "--api-servers="+strings.Join(s.APIServerList, ",")) } if len(s.EtcdServerList) > 0 { args = append(args, "--etcd-servers="+strings.Join(s.EtcdServerList, ",")) } args = append(args, flags.Args()...) log.V(1).Infof("spawning scheduler for graceful failover: %s %+v", s.executable, args) cmd := exec.Command(s.executable, args...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.SysProcAttr = makeDisownedProcAttr() // TODO(jdef) pass in a pipe FD so that we can block, waiting for the child proc to be ready //cmd.ExtraFiles = []*os.File{} exitcode := 0 log.Flush() // TODO(jdef) it would be really nice to ensure that no one else in our process was still logging if err := cmd.Start(); err != nil { //log to stdtout here to avoid conflicts with normal stderr logging fmt.Fprintf(os.Stdout, "failed to spawn failover process: %v\n", err) os.Exit(1) } os.Exit(exitcode) select {} // will never reach here }
// Registered is called when the Scheduler is Registered func (s *eremeticScheduler) Registered(driver sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) { log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname()) if !s.initialised { driver.ReconcileTasks([]*mesos.TaskStatus{}) s.initialised = true } else { s.Reconcile(driver) } }
// Reregistered is called when the Scheduler is Reregistered func (s *eremeticScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { log.Debugf("Framework re-registered with master %s", masterInfo) if !s.initialised { driver.ReconcileTasks([]*mesos.TaskStatus{}) s.initialised = true } else { s.Reconcile(driver) } }
func (sched *ExampleScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { logOffers(offers) for _, offer := range offers { remainingCpus := getOfferCpu(offer) remainingMems := getOfferMem(offer) var tasks []*mesos.TaskInfo for sched.cpuPerTask <= remainingCpus && sched.memPerTask <= remainingMems && sched.tasksLaunched < sched.totalTasks { fmt.Printf("Tasks launched: %v Total tasks: %v\n", sched.tasksLaunched, sched.totalTasks) sched.tasksLaunched++ taskId := &mesos.TaskID{ Value: proto.String(strconv.Itoa(sched.tasksLaunched)), } dockerInfo := &mesos.ContainerInfo_DockerInfo{ Image: &sched.DockerImage, PortMappings: sched.DockerPorts, } containerType := mesos.ContainerInfo_DOCKER containerInfo := &mesos.ContainerInfo{ Type: &containerType, Docker: dockerInfo, } commandInfo := &mesos.CommandInfo{ Value: &sched.DockerCommand, } task := &mesos.TaskInfo{ Name: proto.String("go-task-" + taskId.GetValue()), TaskId: taskId, SlaveId: offer.SlaveId, Resources: []*mesos.Resource{ util.NewScalarResource("cpus", sched.cpuPerTask), util.NewScalarResource("mem", sched.memPerTask), }, Container: containerInfo, Command: commandInfo, } fmt.Printf("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue()) tasks = append(tasks, task) remainingCpus -= sched.cpuPerTask remainingMems -= sched.memPerTask } // fmt.Println("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue()) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) } }
func (sched *Scheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { for _, offer := range offers { taskId := &mesos.TaskID{ Value: proto.String(fmt.Sprintf("basicdocker-task-%d", time.Now().Unix())), } ports := util.FilterResources( offer.Resources, func(res *mesos.Resource) bool { return res.GetName() == "ports" }, ) if len(ports) > 0 && len(ports[0].GetRanges().GetRange()) > 0 { } else { return } task := &mesos.TaskInfo{ Name: proto.String(taskId.GetValue()), TaskId: taskId, SlaveId: offer.SlaveId, Container: &mesos.ContainerInfo{ Type: mesos.ContainerInfo_DOCKER.Enum(), Volumes: nil, Hostname: nil, Docker: &mesos.ContainerInfo_DockerInfo{ Image: &DOCKER_IMAGE_DEFAULT, Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), }, }, Command: &mesos.CommandInfo{ Shell: proto.Bool(true), Value: proto.String("set -x ; /bin/date ; /bin/hostname ; sleep 200 ; echo done"), }, Executor: nil, Resources: []*mesos.Resource{ util.NewScalarResource("cpus", getOfferCpu(offer)), util.NewScalarResource("mem", getOfferMem(offer)), util.NewRangesResource("ports", []*mesos.Value_Range{ util.NewValueRange( *ports[0].GetRanges().GetRange()[0].Begin, *ports[0].GetRanges().GetRange()[0].Begin+1, ), }), }, } log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue()) var tasks []*mesos.TaskInfo = []*mesos.TaskInfo{task} log.Infoln("Launching ", len(tasks), " tasks for offer", offer.Id.GetValue()) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) sched.tasksLaunched++ time.Sleep(time.Second) } }
func (this *ElodinaTransportScheduler) tryKillTask(driver scheduler.SchedulerDriver, taskId *mesos.TaskID) error { log.Logger.Info("Trying to kill task %s", taskId.GetValue()) var err error for i := 0; i <= this.config.KillTaskRetries; i++ { if _, err = driver.KillTask(taskId); err == nil { return nil } } return err }
func (sched *ExampleScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { for _, offer := range offers { cpuResources := util.FilterResources(offer.Resources, func(res *mesos.Resource) bool { return res.GetName() == "cpus" }) cpus := 0.0 for _, res := range cpuResources { cpus += res.GetScalar().GetValue() } memResources := util.FilterResources(offer.Resources, func(res *mesos.Resource) bool { return res.GetName() == "mem" }) mems := 0.0 for _, res := range memResources { mems += res.GetScalar().GetValue() } log.Infoln("Received Offer <", offer.Id.GetValue(), "> with cpus=", cpus, " mem=", mems) remainingCpus := cpus remainingMems := mems var tasks []*mesos.TaskInfo for sched.tasksLaunched < sched.totalTasks && CPUS_PER_TASK <= remainingCpus && MEM_PER_TASK <= remainingMems { sched.tasksLaunched++ taskId := &mesos.TaskID{ Value: proto.String(strconv.Itoa(sched.tasksLaunched)), } task := &mesos.TaskInfo{ Name: proto.String("go-task-" + taskId.GetValue()), TaskId: taskId, SlaveId: offer.SlaveId, Executor: sched.executor, Resources: []*mesos.Resource{ util.NewScalarResource("cpus", CPUS_PER_TASK), util.NewScalarResource("mem", MEM_PER_TASK), }, } log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue()) tasks = append(tasks, task) remainingCpus -= CPUS_PER_TASK remainingMems -= MEM_PER_TASK } log.Infoln("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue()) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) } }
func (s *StackDeployScheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) { Logger.Debug("[ResourceOffers] %s", pretty.Offers(offers)) for _, offer := range offers { declineReason := s.acceptOffer(driver, offer) if declineReason != "" { driver.DeclineOffer(offer.GetId(), &mesos.Filters{RefuseSeconds: proto.Float64(10)}) Logger.Debug("Declined offer %s: %s", pretty.Offer(offer), declineReason) } } }
func (ctx *RunOnceApplicationContext) LaunchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) error { ctx.lock.Lock() defer ctx.lock.Unlock() ctx.InstancesLeftToRun-- taskInfo := ctx.newTaskInfo(offer) ctx.tasks = append(ctx.tasks, newRunOnceTask(offer, taskInfo.GetTaskId().GetValue())) _, err := driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{taskInfo}, &mesos.Filters{RefuseSeconds: proto.Float64(10)}) return err }
func (this *TransformScheduler) tryKillTask(driver scheduler.SchedulerDriver, taskId *mesos.TaskID) error { fmt.Printf("Trying to kill task %s\n", taskId.GetValue()) var err error for i := 0; i <= this.config.KillTaskRetries; i++ { if _, err = driver.KillTask(taskId); err == nil { return nil } } return err }
// mesos.Scheduler interface method. // Invoked when resources have been offered to this framework. func (this *ElodinaTransportScheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) { log.Logger.Info("Received offers") offersAndTasks := make(map[*mesos.Offer][]*mesos.TaskInfo) remainingPartitions, err := this.GetTopicPartitions() if err != nil { return } remainingPartitions.RemoveAll(this.TakenTopicPartitions.GetArray()) log.Logger.Debug("%v", remainingPartitions) tps := remainingPartitions.GetArray() offersAndResources := this.wrapInOfferAndResources(offers) for !remainingPartitions.IsEmpty() { log.Logger.Debug("Iteration %v", remainingPartitions) if this.hasEnoughInstances() { for _, transfer := range this.taskIdToTaskState { if len(transfer.assignment) < this.config.ThreadsPerTask { transfer.assignment = append(transfer.assignment, tps[0]) remainingPartitions.Remove(tps[0]) this.TakenTopicPartitions.Add(tps[0]) if len(tps) > 1 { tps = tps[1:] } else { tps = []consumer.TopicAndPartition{} } } } } else { log.Logger.Debug("Trying to launch new task") offer, task := this.launchNewTask(offersAndResources) if offer != nil && task != nil { offersAndTasks[offer] = append(offersAndTasks[offer], task) } else { for _, offer := range offers { if _, exists := offersAndTasks[offer]; !exists { offersAndTasks[offer] = make([]*mesos.TaskInfo, 0) } } break } } } this.assignPendingPartitions() for _, offer := range offers { if tasks, ok := offersAndTasks[offer]; ok { driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) } else { driver.DeclineOffer(offer.Id, &mesos.Filters{RefuseSeconds: proto.Float64(10)}) } } }
// Reregistered is called when the Scheduler is Reregistered func (s *Scheduler) Reregistered(driver mesossched.SchedulerDriver, masterInfo *mesosproto.MasterInfo) { logrus.WithFields(logrus.Fields{ "master_id": masterInfo.GetId(), "master": masterInfo.GetHostname(), }).Debug("Framework re-registered with master.") if !s.initialised { driver.ReconcileTasks([]*mesosproto.TaskStatus{}) s.initialised = true } else { s.Reconcile(driver) } }
func (sc *SchedulerCore) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { sc.lock.Lock() defer sc.lock.Unlock() log.Info("Received resource offers: ", offers) launchTasks := make(map[string][]*mesos.TaskInfo) toBeScheduled := []*FrameworkRiakNode{} for _, cluster := range sc.schedulerState.Clusters { for _, riakNode := range cluster.Nodes { if riakNode.NeedsToBeScheduled() { log.Infof("Adding Riak node for scheduling: %+v", riakNode) // We need to schedule this task I guess? toBeScheduled = append(toBeScheduled, riakNode) } } } // Populate a mutable slice of offer resources allResources := [][]*mesos.Resource{} for _, offer := range offers { allResources = append(allResources, offer.Resources) } launchTasks, err := sc.spreadNodesAcrossOffers(offers, allResources, toBeScheduled, 0, 0, launchTasks) if err != nil { log.Error(err) } for _, offer := range offers { tasks := launchTasks[*offer.Id.Value] if tasks == nil { tasks = []*mesos.TaskInfo{} } // This is somewhat of a hack, to avoid synchronously calling the mesos-go SDK // to avoid a deadlock situation. // TODO: Fix and make actually queues around driver interactions // This is a massive hack // -Sargun Dhillon 2015-10-01 go func(offer *mesos.Offer) { log.Infof("Launching Tasks: %v for offer %v", tasks, *offer.Id.Value) status, err := driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)}) if status != mesos.Status_DRIVER_RUNNING { log.Fatal("Driver not running, while trying to launch tasks") } if err != nil { log.Panic("Failed to launch tasks: ", err) } }(offer) } }
func (s *DiegoScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { logOffers(offers) s.offersLock.Lock() defer s.offersLock.Unlock() if s.holdOffer { s.offers = append(s.offers, offers...) } else { offerIds := extractOfferIds(offers) driver.LaunchTasks(offerIds, nil, &mesos.Filters{RefuseSeconds: proto.Float64(30)}) } }
// decline declines an offer. func (s *EtcdScheduler) decline( driver scheduler.SchedulerDriver, offer *mesos.Offer, ) { log.V(2).Infof("Declining offer %s.", offer.Id.GetValue()) driver.DeclineOffer( offer.Id, &mesos.Filters{ // Decline offers for 5 seconds. RefuseSeconds: proto.Float64(float64(5)), }, ) }
// execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/ func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error { log.Info("explicit reconcile tasks") // tell mesos to send us the latest status updates for all the non-terminal tasks that we know about statusList := []*mesos.TaskStatus{} remaining := sets.KeySet(reflect.ValueOf(taskToSlave)) for taskId, slaveId := range taskToSlave { if slaveId == "" { delete(taskToSlave, taskId) continue } statusList = append(statusList, &mesos.TaskStatus{ TaskId: mutil.NewTaskID(taskId), SlaveId: mutil.NewSlaveID(slaveId), State: mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality }) } select { case <-cancel: return reconciliationCancelledErr default: if _, err := driver.ReconcileTasks(statusList); err != nil { return err } } start := time.Now() first := true for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 { first = false // nothing to do here other than wait for status updates.. if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration { backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration } select { case <-cancel: return reconciliationCancelledErr case <-time.After(backoff): for taskId := range remaining { if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) { // keep this task in remaining list continue } remaining.Delete(taskId) } } } return nil }
func (sched *ScraperScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { logOffers(offers) for _, offer := range offers { if sched.tasksLaunched >= sched.totalTasks || len(sched.urls) == 0 { log.Infof("Declining offer %s", offer.Id.GetValue()) driver.DeclineOffer(offer.Id, &mesos.Filters{}) continue } remainingCpus := getOfferCpu(offer) remainingMems := getOfferMem(offer) var tasks []*mesos.TaskInfo for sched.cpuPerTask <= remainingCpus && sched.memPerTask <= remainingMems && sched.tasksLaunched < sched.totalTasks { log.Infof("Processing url %v of %v\n", sched.tasksLaunched, sched.totalTasks) log.Infof("Total Tasks: %d", sched.totalTasks) log.Infof("Tasks Launched: %d", sched.tasksLaunched) uri := sched.urls[sched.tasksLaunched] log.Infof("URI: %s", uri) sched.tasksLaunched++ taskId := &mesos.TaskID{ Value: proto.String(strconv.Itoa(sched.tasksLaunched)), } task := &mesos.TaskInfo{ Name: proto.String("go-task-" + taskId.GetValue()), TaskId: taskId, SlaveId: offer.SlaveId, Executor: sched.executor, Resources: []*mesos.Resource{ util.NewScalarResource("cpus", sched.cpuPerTask), util.NewScalarResource("mem", sched.memPerTask), }, Data: []byte(uri), } log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue()) tasks = append(tasks, task) remainingCpus -= sched.cpuPerTask remainingMems -= sched.memPerTask } log.Infoln("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue()) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)}) } }
// Perform implicit reconciliation every 5 minutes func (s *EtcdScheduler) PeriodicReconciler(driver scheduler.SchedulerDriver) { for { s.mut.RLock() state := s.state s.mut.RUnlock() if state == Mutable { _, err := driver.ReconcileTasks([]*mesos.TaskStatus{}) if err != nil { log.Errorf("Error while calling ReconcileTasks: %s", err) } } time.Sleep(5 * time.Minute) } }
func (s *Scheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) { Logger.Debugf("[ResourceOffers] %s", pretty.Offers(offers)) for _, offer := range offers { declineReason := s.acceptOffer(driver, offer) if declineReason != "" { driver.DeclineOffer(offer.GetId(), &mesos.Filters{RefuseSeconds: proto.Float64(10)}) Logger.Debugf("Declined offer: %s", declineReason) } } s.reconcileTasks(false) s.cluster.Save() }
func (s *StackDeployScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Info("[StatusUpdate] %s", pretty.Status(status)) if status.GetState() == mesos.TaskState_TASK_FINISHED { driver.ReviveOffers() } for _, runner := range MesosTaskRunners { if runner.StatusUpdate(driver, status) { return } } Logger.Warn("Received status update that was not handled by any Mesos Task Runner: %s", pretty.Status(status)) }