Ejemplo n.º 1
0
func (sched *ExampleScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) {
	log.Infoln("Framework Re-Registered with Master ", masterInfo)
	_, err := driver.ReconcileTasks([]*mesos.TaskStatus{})
	if err != nil {
		log.Errorf("failed to request task reconciliation: %v", err)
	}
}
Ejemplo n.º 2
0
func (sched *MesosRunonceScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	log.V(1).Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String())
	eventCh <- status

	if status.GetState() == mesos.TaskState_TASK_FINISHED {
		sched.tasksFinished++
	}

	if sched.tasksFinished >= sched.totalTasks {
		log.V(1).Infoln("Total tasks completed, stopping framework.")
		driver.Stop(false)
	}

	if status.GetState() == mesos.TaskState_TASK_LOST ||
		status.GetState() == mesos.TaskState_TASK_KILLED ||
		status.GetState() == mesos.TaskState_TASK_FAILED ||
		status.GetState() == mesos.TaskState_TASK_ERROR {
		exitStatus = 1
		log.Warningf("mesos TaskStatus: %v", status)
		driver.Stop(false)
		log.Errorln(
			"Aborting because task", status.TaskId.GetValue(),
			"is in unexpected state", status.State.String(),
			"with message.", status.GetMessage(),
		)
	}
}
Ejemplo n.º 3
0
func (sched *Scheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	logOffers(offers)
	jobs, err := getLaunchableJobs()
	if err != nil {
		log.Errorf("Unable to get pending jobs! %s\n", err.Error())
		return
	}

	offersAndTasks, err := packJobsInOffers(jobs, offers)
	if err != nil {
		log.Errorf("Unable to pack jobs into offers! %s\n", err.Error())
		return
	}

	for _, ot := range offersAndTasks {
		if len(ot.Tasks) == 0 {
			log.Infof("Declining unused offer %s", ot.Offer.Id.GetValue())
			driver.DeclineOffer(ot.Offer.Id, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
			continue
		} else {
			log.Infof("Launching %d tasks for offer %s\n", len(ot.Tasks), ot.Offer.Id.GetValue())
			driver.LaunchTasks([]*mesos.OfferID{ot.Offer.Id}, ot.Tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
			sched.tasksLaunched = sched.tasksLaunched + len(ot.Tasks)
		}
	}

}
Ejemplo n.º 4
0
func (sc *SchedulerCore) acceptOffer(driver sched.SchedulerDriver, offer *mesos.Offer, operations []*mesos.Offer_Operation) {
	log.Infof("Accepting OfferID: %+v, Operations: %+v", *offer.Id.Value, operations)

	var status mesos.Status
	var err error

	if sc.compatibilityMode {
		tasks := []*mesos.TaskInfo{}
		for _, operation := range operations {
			if *operation.Type == mesos.Offer_Operation_LAUNCH {
				tasks = operation.Launch.TaskInfos
			}
		}
		status, err = driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)})
	} else {
		status, err = driver.AcceptOffers([]*mesos.OfferID{offer.Id}, operations, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)})
	}

	if status != mesos.Status_DRIVER_RUNNING {
		log.Fatal("Driver not running, while trying to accept offers")
	}
	if err != nil {
		log.Panic("Failed to launch tasks: ", err)
	}
}
Ejemplo n.º 5
0
// perform one-time initialization actions upon the first registration event received from Mesos.
func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) {
	defer close(k.registration)

	if k.failoverTimeout > 0 {
		refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration
		if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() {
			refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
		}

		// wait until we've written the framework ID at least once before proceeding
		firstStore := make(chan struct{})
		go runtime.Until(func() {
			// only close firstStore once
			select {
			case <-firstStore:
			default:
				defer close(firstStore)
			}
			err := k.storeFrameworkId(context.TODO(), k.frameworkId.GetValue())
			if err != nil {
				log.Errorf("failed to store framework ID: %v", err)
				if err == frameworkid.ErrMismatch {
					// we detected a framework ID in storage that doesn't match what we're trying
					// to save. this is a dangerous state:
					// (1) perhaps we failed to initially recover the framework ID and so mesos
					// issued us a new one. now that we're trying to save it there's a mismatch.
					// (2) we've somehow bungled the framework ID and we're out of alignment with
					// what mesos is expecting.
					// (3) multiple schedulers were launched at the same time, and both have
					// registered with mesos (because when they each checked, there was no ID in
					// storage, so they asked for a new one). one of them has already written the
					// ID to storage -- we lose.
					log.Error("aborting due to framework ID mismatch")
					driver.Abort()
				}
			}
		}, refreshInterval, k.terminate)

		// wait for the first store attempt of the framework ID
		select {
		case <-firstStore:
		case <-k.terminate:
		}
	}

	r1 := k.makeTaskRegistryReconciler()
	r2 := k.makePodRegistryReconciler()

	k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2),
		k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
	go k.tasksReconciler.Run(driver, k.terminate)

	if k.reconcileInterval > 0 {
		ri := time.Duration(k.reconcileInterval) * time.Second
		time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) })
		log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration)
	}

	k.installDebugHandlers(k.mux)
}
Ejemplo n.º 6
0
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) {
	taskName := fmt.Sprintf("syslog-%s", offer.GetSlaveId().GetValue())
	taskId := &mesos.TaskID{
		Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())),
	}

	data, err := json.Marshal(Config)
	if err != nil {
		panic(err) //shouldn't happen
	}
	Logger.Debugf("Task data: %s", string(data))

	tcpPort := uint64(s.getPort(Config.TcpPort, offer, -1))
	udpPort := uint64(s.getPort(Config.UdpPort, offer, int(tcpPort)))

	task := &mesos.TaskInfo{
		Name:     proto.String(taskName),
		TaskId:   taskId,
		SlaveId:  offer.GetSlaveId(),
		Executor: s.createExecutor(offer, tcpPort, udpPort),
		Resources: []*mesos.Resource{
			util.NewScalarResource("cpus", Config.Cpus),
			util.NewScalarResource("mem", Config.Mem),
			util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(tcpPort, tcpPort)}),
			util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(udpPort, udpPort)}),
		},
		Data:   data,
		Labels: utils.StringToLabels(s.labels),
	}

	s.cluster.Add(offer.GetSlaveId().GetValue(), task)

	driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
}
Ejemplo n.º 7
0
func (sched *NoneScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	taskId := status.GetTaskId().GetValue()
	log.Infoln("Status update: task", taskId, "is in state", status.State.Enum().String())

	c := sched.queue.GetCommandById(taskId)
	if c == nil {
		log.Errorln("Unable to find command for task", taskId)
		driver.Abort()
	}
	if c.Status.GetState() == status.GetState() {
		// ignore repeated status updates
		return
	}
	c.Status = status

	// send status update to CommandHandler
	if status.GetState() == mesos.TaskState_TASK_RUNNING {
		sched.handler.CommandRunning(c)
	} else if status.GetState() == mesos.TaskState_TASK_FINISHED {
		sched.handler.CommandEnded(c)
		sched.handler.CommandFinished(c)
	} else if status.GetState() == mesos.TaskState_TASK_FAILED ||
		status.GetState() == mesos.TaskState_TASK_LOST ||
		status.GetState() == mesos.TaskState_TASK_KILLED {
		sched.handler.CommandEnded(c)
		sched.handler.CommandFailed(c)
	}

	// stop if Commands channel was closed and all tasks are finished
	if sched.queue.Closed() && !sched.handler.HasRunningTasks() {
		log.Infoln("All tasks finished, stopping framework.")
		sched.handler.FinishAllCommands()
		driver.Stop(false)
	}
}
Ejemplo n.º 8
0
func (s *Scheduler) launchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) {
	taskName := fmt.Sprintf("syscol-%s", offer.GetSlaveId().GetValue())
	taskId := &mesos.TaskID{
		Value: proto.String(fmt.Sprintf("%s-%s", taskName, uuid())),
	}

	data, err := json.Marshal(Config)
	if err != nil {
		panic(err) //shouldn't happen
	}
	Logger.Debugf("Task data: %s", string(data))

	task := &mesos.TaskInfo{
		Name:     proto.String(taskName),
		TaskId:   taskId,
		SlaveId:  offer.GetSlaveId(),
		Executor: s.createExecutor(offer.GetSlaveId().GetValue()),
		Resources: []*mesos.Resource{
			util.NewScalarResource("cpus", Config.Cpus),
			util.NewScalarResource("mem", Config.Mem),
		},
		Data: data,
	}

	s.cluster.Add(offer.GetSlaveId().GetValue(), task)

	driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{task}, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
}
Ejemplo n.º 9
0
func (sched *SdcScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String())

	if status.GetState() == mesos.TaskState_TASK_FINISHED {
		sched.tasksFinished++
		// KillTaskを実行するとTASK_LOSTが検知され、フレームワークが止まる
		// driver.KillTask(status.TaskId)
		// log.Infoln("!! Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String())
		// return
	}

	if sched.tasksFinished >= sched.totalTasks {
		// log.Infoln("Total tasks completed, stopping framework.")
		log.Infoln("Total tasks completed.")
		sched.tasksFinished = 0
		sched.totalTasks = 0
		sched.tasksLaunched = 0
		// driver.Stop(false)
	}

	if status.GetState() == mesos.TaskState_TASK_LOST ||
		status.GetState() == mesos.TaskState_TASK_KILLED ||
		status.GetState() == mesos.TaskState_TASK_FAILED ||
		status.GetState() == mesos.TaskState_TASK_ERROR {
		log.Infoln(
			"Aborting because task", status.TaskId.GetValue(),
			"is in unexpected state", status.State.String(),
			"with message", status.GetMessage(),
		)
		driver.Abort()
	}
}
Ejemplo n.º 10
0
// ResourceOffers handles the Resource Offers
func (s *Scheduler) ResourceOffers(driver mesossched.SchedulerDriver, offers []*mesosproto.Offer) {
	logrus.WithField("offers", len(offers)).Debug("Received offers")
	var offer *mesosproto.Offer

loop:
	for len(offers) > 0 {
		select {
		case <-s.shutdown:
			logrus.Info("Shutting down: declining offers")
			break loop
		case tid := <-s.tasks:
			logrus.WithField("task_id", tid).Debug("Trying to find offer to launch task with")
			t, _ := s.database.ReadUnmaskedTask(tid)

			if t.IsTerminating() {
				logrus.Debug("Dropping terminating task.")
				t.UpdateStatus(eremetic.Status{
					Status: eremetic.TaskKilled,
					Time:   time.Now().Unix(),
				})
				s.database.PutTask(&t)

				continue
			}
			offer, offers = matchOffer(t, offers)

			if offer == nil {
				logrus.WithField("task_id", tid).Warn("Unable to find a matching offer")
				tasksDelayed.Inc()
				go func() { s.tasks <- tid }()
				break loop
			}

			logrus.WithFields(logrus.Fields{
				"task_id":  tid,
				"offer_id": offer.Id.GetValue(),
			}).Debug("Preparing to launch task")

			t, task := createTaskInfo(t, offer)
			t.UpdateStatus(eremetic.Status{
				Status: eremetic.TaskStaging,
				Time:   time.Now().Unix(),
			})
			s.database.PutTask(&t)
			driver.LaunchTasks([]*mesosproto.OfferID{offer.Id}, []*mesosproto.TaskInfo{task}, defaultFilter)
			tasksLaunched.Inc()
			queueSize.Dec()

			continue
		default:
			break loop
		}
	}

	logrus.Debug("No tasks to launch. Declining offers.")
	for _, offer := range offers {
		driver.DeclineOffer(offer.Id, defaultFilter)
	}
}
Ejemplo n.º 11
0
func (s *SchedulerServer) failover(driver bindings.SchedulerDriver, hks hyperkube.Interface) error {
	if driver != nil {
		stat, err := driver.Stop(true)
		if stat != mesos.Status_DRIVER_STOPPED {
			return fmt.Errorf("failed to stop driver for failover, received unexpected status code: %v", stat)
		} else if err != nil {
			return err
		}
	}

	// there's no guarantee that all goroutines are actually programmed intelligently with 'done'
	// signals, so we'll need to restart if we want to really stop everything

	// run the same command that we were launched with
	//TODO(jdef) assumption here is that the sheduler is the only service running in this process, we should probably validate that somehow
	args := []string{}
	flags := pflag.CommandLine
	if hks != nil {
		args = append(args, hks.Name())
		flags = hks.Flags()
	}
	flags.Visit(func(flag *pflag.Flag) {
		if flag.Name != "api-servers" && flag.Name != "etcd-servers" {
			args = append(args, fmt.Sprintf("--%s=%s", flag.Name, flag.Value.String()))
		}
	})
	if !s.Graceful {
		args = append(args, "--graceful")
	}
	if len(s.APIServerList) > 0 {
		args = append(args, "--api-servers="+strings.Join(s.APIServerList, ","))
	}
	if len(s.EtcdServerList) > 0 {
		args = append(args, "--etcd-servers="+strings.Join(s.EtcdServerList, ","))
	}
	args = append(args, flags.Args()...)

	log.V(1).Infof("spawning scheduler for graceful failover: %s %+v", s.executable, args)

	cmd := exec.Command(s.executable, args...)
	cmd.Stdin = os.Stdin
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	cmd.SysProcAttr = makeDisownedProcAttr()

	// TODO(jdef) pass in a pipe FD so that we can block, waiting for the child proc to be ready
	//cmd.ExtraFiles = []*os.File{}

	exitcode := 0
	log.Flush() // TODO(jdef) it would be really nice to ensure that no one else in our process was still logging
	if err := cmd.Start(); err != nil {
		//log to stdtout here to avoid conflicts with normal stderr logging
		fmt.Fprintf(os.Stdout, "failed to spawn failover process: %v\n", err)
		os.Exit(1)
	}
	os.Exit(exitcode)
	select {} // will never reach here
}
Ejemplo n.º 12
0
// Registered is called when the Scheduler is Registered
func (s *eremeticScheduler) Registered(driver sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname())
	if !s.initialised {
		driver.ReconcileTasks([]*mesos.TaskStatus{})
		s.initialised = true
	} else {
		s.Reconcile(driver)
	}
}
Ejemplo n.º 13
0
// Reregistered is called when the Scheduler is Reregistered
func (s *eremeticScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework re-registered with master %s", masterInfo)
	if !s.initialised {
		driver.ReconcileTasks([]*mesos.TaskStatus{})
		s.initialised = true
	} else {
		s.Reconcile(driver)
	}
}
Ejemplo n.º 14
0
func (sched *ExampleScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	logOffers(offers)
	for _, offer := range offers {
		remainingCpus := getOfferCpu(offer)
		remainingMems := getOfferMem(offer)

		var tasks []*mesos.TaskInfo
		for sched.cpuPerTask <= remainingCpus &&
			sched.memPerTask <= remainingMems &&
			sched.tasksLaunched < sched.totalTasks {

			fmt.Printf("Tasks launched: %v Total tasks: %v\n", sched.tasksLaunched, sched.totalTasks)

			sched.tasksLaunched++

			taskId := &mesos.TaskID{
				Value: proto.String(strconv.Itoa(sched.tasksLaunched)),
			}

			dockerInfo := &mesos.ContainerInfo_DockerInfo{
				Image:        &sched.DockerImage,
				PortMappings: sched.DockerPorts,
			}

			containerType := mesos.ContainerInfo_DOCKER

			containerInfo := &mesos.ContainerInfo{
				Type:   &containerType,
				Docker: dockerInfo,
			}

			commandInfo := &mesos.CommandInfo{
				Value: &sched.DockerCommand,
			}

			task := &mesos.TaskInfo{
				Name:    proto.String("go-task-" + taskId.GetValue()),
				TaskId:  taskId,
				SlaveId: offer.SlaveId,
				Resources: []*mesos.Resource{
					util.NewScalarResource("cpus", sched.cpuPerTask),
					util.NewScalarResource("mem", sched.memPerTask),
				},
				Container: containerInfo,
				Command:   commandInfo,
			}
			fmt.Printf("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue())

			tasks = append(tasks, task)
			remainingCpus -= sched.cpuPerTask
			remainingMems -= sched.memPerTask
		}
		//		fmt.Println("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue())
		driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
	}

}
Ejemplo n.º 15
0
func (sched *Scheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	for _, offer := range offers {
		taskId := &mesos.TaskID{
			Value: proto.String(fmt.Sprintf("basicdocker-task-%d", time.Now().Unix())),
		}

		ports := util.FilterResources(
			offer.Resources,
			func(res *mesos.Resource) bool {
				return res.GetName() == "ports"
			},
		)
		if len(ports) > 0 && len(ports[0].GetRanges().GetRange()) > 0 {

		} else {
			return
		}
		task := &mesos.TaskInfo{
			Name:    proto.String(taskId.GetValue()),
			TaskId:  taskId,
			SlaveId: offer.SlaveId,
			Container: &mesos.ContainerInfo{
				Type:     mesos.ContainerInfo_DOCKER.Enum(),
				Volumes:  nil,
				Hostname: nil,
				Docker: &mesos.ContainerInfo_DockerInfo{
					Image:   &DOCKER_IMAGE_DEFAULT,
					Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(),
				},
			},
			Command: &mesos.CommandInfo{
				Shell: proto.Bool(true),
				Value: proto.String("set -x ; /bin/date ; /bin/hostname ; sleep 200 ; echo done"),
			},
			Executor: nil,
			Resources: []*mesos.Resource{
				util.NewScalarResource("cpus", getOfferCpu(offer)),
				util.NewScalarResource("mem", getOfferMem(offer)),
				util.NewRangesResource("ports", []*mesos.Value_Range{
					util.NewValueRange(
						*ports[0].GetRanges().GetRange()[0].Begin,
						*ports[0].GetRanges().GetRange()[0].Begin+1,
					),
				}),
			},
		}

		log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue())

		var tasks []*mesos.TaskInfo = []*mesos.TaskInfo{task}
		log.Infoln("Launching ", len(tasks), " tasks for offer", offer.Id.GetValue())
		driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
		sched.tasksLaunched++
		time.Sleep(time.Second)
	}
}
Ejemplo n.º 16
0
func (this *ElodinaTransportScheduler) tryKillTask(driver scheduler.SchedulerDriver, taskId *mesos.TaskID) error {
	log.Logger.Info("Trying to kill task %s", taskId.GetValue())
	var err error
	for i := 0; i <= this.config.KillTaskRetries; i++ {
		if _, err = driver.KillTask(taskId); err == nil {
			return nil
		}
	}
	return err
}
Ejemplo n.º 17
0
func (sched *ExampleScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {

	for _, offer := range offers {
		cpuResources := util.FilterResources(offer.Resources, func(res *mesos.Resource) bool {
			return res.GetName() == "cpus"
		})
		cpus := 0.0
		for _, res := range cpuResources {
			cpus += res.GetScalar().GetValue()
		}

		memResources := util.FilterResources(offer.Resources, func(res *mesos.Resource) bool {
			return res.GetName() == "mem"
		})
		mems := 0.0
		for _, res := range memResources {
			mems += res.GetScalar().GetValue()
		}

		log.Infoln("Received Offer <", offer.Id.GetValue(), "> with cpus=", cpus, " mem=", mems)

		remainingCpus := cpus
		remainingMems := mems

		var tasks []*mesos.TaskInfo
		for sched.tasksLaunched < sched.totalTasks &&
			CPUS_PER_TASK <= remainingCpus &&
			MEM_PER_TASK <= remainingMems {

			sched.tasksLaunched++

			taskId := &mesos.TaskID{
				Value: proto.String(strconv.Itoa(sched.tasksLaunched)),
			}

			task := &mesos.TaskInfo{
				Name:     proto.String("go-task-" + taskId.GetValue()),
				TaskId:   taskId,
				SlaveId:  offer.SlaveId,
				Executor: sched.executor,
				Resources: []*mesos.Resource{
					util.NewScalarResource("cpus", CPUS_PER_TASK),
					util.NewScalarResource("mem", MEM_PER_TASK),
				},
			}
			log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue())

			tasks = append(tasks, task)
			remainingCpus -= CPUS_PER_TASK
			remainingMems -= MEM_PER_TASK
		}
		log.Infoln("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue())
		driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
	}
}
Ejemplo n.º 18
0
func (s *StackDeployScheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) {
	Logger.Debug("[ResourceOffers] %s", pretty.Offers(offers))

	for _, offer := range offers {
		declineReason := s.acceptOffer(driver, offer)
		if declineReason != "" {
			driver.DeclineOffer(offer.GetId(), &mesos.Filters{RefuseSeconds: proto.Float64(10)})
			Logger.Debug("Declined offer %s: %s", pretty.Offer(offer), declineReason)
		}
	}
}
func (ctx *RunOnceApplicationContext) LaunchTask(driver scheduler.SchedulerDriver, offer *mesos.Offer) error {
	ctx.lock.Lock()
	defer ctx.lock.Unlock()

	ctx.InstancesLeftToRun--
	taskInfo := ctx.newTaskInfo(offer)
	ctx.tasks = append(ctx.tasks, newRunOnceTask(offer, taskInfo.GetTaskId().GetValue()))

	_, err := driver.LaunchTasks([]*mesos.OfferID{offer.GetId()}, []*mesos.TaskInfo{taskInfo}, &mesos.Filters{RefuseSeconds: proto.Float64(10)})
	return err
}
Ejemplo n.º 20
0
func (this *TransformScheduler) tryKillTask(driver scheduler.SchedulerDriver, taskId *mesos.TaskID) error {
	fmt.Printf("Trying to kill task %s\n", taskId.GetValue())

	var err error
	for i := 0; i <= this.config.KillTaskRetries; i++ {
		if _, err = driver.KillTask(taskId); err == nil {
			return nil
		}
	}
	return err
}
Ejemplo n.º 21
0
// mesos.Scheduler interface method.
// Invoked when resources have been offered to this framework.
func (this *ElodinaTransportScheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) {
	log.Logger.Info("Received offers")
	offersAndTasks := make(map[*mesos.Offer][]*mesos.TaskInfo)
	remainingPartitions, err := this.GetTopicPartitions()
	if err != nil {
		return
	}
	remainingPartitions.RemoveAll(this.TakenTopicPartitions.GetArray())
	log.Logger.Debug("%v", remainingPartitions)
	tps := remainingPartitions.GetArray()

	offersAndResources := this.wrapInOfferAndResources(offers)
	for !remainingPartitions.IsEmpty() {
		log.Logger.Debug("Iteration %v", remainingPartitions)
		if this.hasEnoughInstances() {
			for _, transfer := range this.taskIdToTaskState {
				if len(transfer.assignment) < this.config.ThreadsPerTask {
					transfer.assignment = append(transfer.assignment, tps[0])
					remainingPartitions.Remove(tps[0])
					this.TakenTopicPartitions.Add(tps[0])
					if len(tps) > 1 {
						tps = tps[1:]
					} else {
						tps = []consumer.TopicAndPartition{}
					}
				}
			}
		} else {
			log.Logger.Debug("Trying to launch new task")
			offer, task := this.launchNewTask(offersAndResources)
			if offer != nil && task != nil {
				offersAndTasks[offer] = append(offersAndTasks[offer], task)
			} else {
				for _, offer := range offers {
					if _, exists := offersAndTasks[offer]; !exists {
						offersAndTasks[offer] = make([]*mesos.TaskInfo, 0)
					}
				}
				break
			}
		}
	}

	this.assignPendingPartitions()

	for _, offer := range offers {
		if tasks, ok := offersAndTasks[offer]; ok {
			driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
		} else {
			driver.DeclineOffer(offer.Id, &mesos.Filters{RefuseSeconds: proto.Float64(10)})
		}
	}
}
Ejemplo n.º 22
0
// Reregistered is called when the Scheduler is Reregistered
func (s *Scheduler) Reregistered(driver mesossched.SchedulerDriver, masterInfo *mesosproto.MasterInfo) {
	logrus.WithFields(logrus.Fields{
		"master_id": masterInfo.GetId(),
		"master":    masterInfo.GetHostname(),
	}).Debug("Framework re-registered with master.")
	if !s.initialised {
		driver.ReconcileTasks([]*mesosproto.TaskStatus{})
		s.initialised = true
	} else {
		s.Reconcile(driver)
	}
}
Ejemplo n.º 23
0
func (sc *SchedulerCore) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	sc.lock.Lock()
	defer sc.lock.Unlock()
	log.Info("Received resource offers: ", offers)
	launchTasks := make(map[string][]*mesos.TaskInfo)
	toBeScheduled := []*FrameworkRiakNode{}
	for _, cluster := range sc.schedulerState.Clusters {
		for _, riakNode := range cluster.Nodes {
			if riakNode.NeedsToBeScheduled() {
				log.Infof("Adding Riak node for scheduling: %+v", riakNode)
				// We need to schedule this task I guess?
				toBeScheduled = append(toBeScheduled, riakNode)
			}
		}
	}

	// Populate a mutable slice of offer resources
	allResources := [][]*mesos.Resource{}
	for _, offer := range offers {
		allResources = append(allResources, offer.Resources)
	}

	launchTasks, err := sc.spreadNodesAcrossOffers(offers, allResources, toBeScheduled, 0, 0, launchTasks)
	if err != nil {
		log.Error(err)
	}

	for _, offer := range offers {
		tasks := launchTasks[*offer.Id.Value]

		if tasks == nil {
			tasks = []*mesos.TaskInfo{}
		}

		// This is somewhat of a hack, to avoid synchronously calling the mesos-go SDK
		// to avoid a deadlock situation.
		// TODO: Fix and make actually queues around driver interactions
		// This is a massive hack
		// -Sargun Dhillon 2015-10-01
		go func(offer *mesos.Offer) {
			log.Infof("Launching Tasks: %v for offer %v", tasks, *offer.Id.Value)
			status, err := driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(OFFER_INTERVAL)})

			if status != mesos.Status_DRIVER_RUNNING {
				log.Fatal("Driver not running, while trying to launch tasks")
			}
			if err != nil {
				log.Panic("Failed to launch tasks: ", err)
			}
		}(offer)
	}
}
Ejemplo n.º 24
0
func (s *DiegoScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	logOffers(offers)

	s.offersLock.Lock()
	defer s.offersLock.Unlock()

	if s.holdOffer {
		s.offers = append(s.offers, offers...)
	} else {
		offerIds := extractOfferIds(offers)
		driver.LaunchTasks(offerIds, nil, &mesos.Filters{RefuseSeconds: proto.Float64(30)})
	}
}
Ejemplo n.º 25
0
// decline declines an offer.
func (s *EtcdScheduler) decline(
	driver scheduler.SchedulerDriver,
	offer *mesos.Offer,
) {
	log.V(2).Infof("Declining offer %s.", offer.Id.GetValue())
	driver.DeclineOffer(
		offer.Id,
		&mesos.Filters{
			// Decline offers for 5 seconds.
			RefuseSeconds: proto.Float64(float64(5)),
		},
	)
}
Ejemplo n.º 26
0
// execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/
func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error {
	log.Info("explicit reconcile tasks")

	// tell mesos to send us the latest status updates for all the non-terminal tasks that we know about
	statusList := []*mesos.TaskStatus{}
	remaining := sets.KeySet(reflect.ValueOf(taskToSlave))
	for taskId, slaveId := range taskToSlave {
		if slaveId == "" {
			delete(taskToSlave, taskId)
			continue
		}
		statusList = append(statusList, &mesos.TaskStatus{
			TaskId:  mutil.NewTaskID(taskId),
			SlaveId: mutil.NewSlaveID(slaveId),
			State:   mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality
		})
	}

	select {
	case <-cancel:
		return reconciliationCancelledErr
	default:
		if _, err := driver.ReconcileTasks(statusList); err != nil {
			return err
		}
	}

	start := time.Now()
	first := true
	for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 {
		first = false
		// nothing to do here other than wait for status updates..
		if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration {
			backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration
		}
		select {
		case <-cancel:
			return reconciliationCancelledErr
		case <-time.After(backoff):
			for taskId := range remaining {
				if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) {
					// keep this task in remaining list
					continue
				}
				remaining.Delete(taskId)
			}
		}
	}
	return nil
}
Ejemplo n.º 27
0
func (sched *ScraperScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	logOffers(offers)

	for _, offer := range offers {
		if sched.tasksLaunched >= sched.totalTasks || len(sched.urls) == 0 {
			log.Infof("Declining offer %s", offer.Id.GetValue())
			driver.DeclineOffer(offer.Id, &mesos.Filters{})
			continue
		}
		remainingCpus := getOfferCpu(offer)
		remainingMems := getOfferMem(offer)

		var tasks []*mesos.TaskInfo
		for sched.cpuPerTask <= remainingCpus &&
			sched.memPerTask <= remainingMems &&
			sched.tasksLaunched < sched.totalTasks {

			log.Infof("Processing url %v of %v\n", sched.tasksLaunched, sched.totalTasks)
			log.Infof("Total Tasks: %d", sched.totalTasks)
			log.Infof("Tasks Launched: %d", sched.tasksLaunched)
			uri := sched.urls[sched.tasksLaunched]
			log.Infof("URI: %s", uri)

			sched.tasksLaunched++

			taskId := &mesos.TaskID{
				Value: proto.String(strconv.Itoa(sched.tasksLaunched)),
			}

			task := &mesos.TaskInfo{
				Name:     proto.String("go-task-" + taskId.GetValue()),
				TaskId:   taskId,
				SlaveId:  offer.SlaveId,
				Executor: sched.executor,
				Resources: []*mesos.Resource{
					util.NewScalarResource("cpus", sched.cpuPerTask),
					util.NewScalarResource("mem", sched.memPerTask),
				},
				Data: []byte(uri),
			}
			log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue())

			tasks = append(tasks, task)
			remainingCpus -= sched.cpuPerTask
			remainingMems -= sched.memPerTask
		}
		log.Infoln("Launching ", len(tasks), "tasks for offer", offer.Id.GetValue())
		driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, &mesos.Filters{RefuseSeconds: proto.Float64(1)})
	}
}
Ejemplo n.º 28
0
// Perform implicit reconciliation every 5 minutes
func (s *EtcdScheduler) PeriodicReconciler(driver scheduler.SchedulerDriver) {
	for {
		s.mut.RLock()
		state := s.state
		s.mut.RUnlock()
		if state == Mutable {
			_, err := driver.ReconcileTasks([]*mesos.TaskStatus{})
			if err != nil {
				log.Errorf("Error while calling ReconcileTasks: %s", err)
			}
		}
		time.Sleep(5 * time.Minute)
	}
}
Ejemplo n.º 29
0
func (s *Scheduler) ResourceOffers(driver scheduler.SchedulerDriver, offers []*mesos.Offer) {
	Logger.Debugf("[ResourceOffers] %s", pretty.Offers(offers))

	for _, offer := range offers {
		declineReason := s.acceptOffer(driver, offer)
		if declineReason != "" {
			driver.DeclineOffer(offer.GetId(), &mesos.Filters{RefuseSeconds: proto.Float64(10)})
			Logger.Debugf("Declined offer: %s", declineReason)
		}
	}

	s.reconcileTasks(false)
	s.cluster.Save()
}
Ejemplo n.º 30
0
func (s *StackDeployScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) {
	Logger.Info("[StatusUpdate] %s", pretty.Status(status))

	if status.GetState() == mesos.TaskState_TASK_FINISHED {
		driver.ReviveOffers()
	}

	for _, runner := range MesosTaskRunners {
		if runner.StatusUpdate(driver, status) {
			return
		}
	}

	Logger.Warn("Received status update that was not handled by any Mesos Task Runner: %s", pretty.Status(status))
}