Ejemplo n.º 1
0
func (self *JobWatcher) TrackMachine(m *machine.Machine) {
	self.machines[m.BootId] = *m

	partial := NewSchedule()
	for _, watch := range self.watches {
		if watch.Count == ScheduleAllMachines {
			sched := self.schedules[watch.Payload.Name]

			if len(sched.MachineJobs(m)) > 0 {
				continue
			}

			name := fmt.Sprintf("%s.%s", m.BootId, watch.Payload.Name)
			j, _ := job.NewJob(name, nil, watch.Payload)
			log.Infof("Adding to schedule job=%s machine=%s", name, m.BootId)
			partial.Add(j, m)

			sched.Add(j, m)
		}
	}

	if len(partial) > 0 {
		log.Infof("Submitting schedule: %s", partial.String())
		self.submitSchedule(partial)
	}
}
Ejemplo n.º 2
0
func filterEventJobCreated(resp *etcd.Response) *Event {
	if resp.Action != "set" || resp.Node.PrevValue != "" {
		return nil
	}

	dir, jobName := path.Split(resp.Node.Key)
	dir = strings.TrimSuffix(dir, "/")
	dir, prefix := path.Split(dir)
	if prefix != strings.Trim(schedulePrefix, "/") {
		return nil
	}

	var jp job.JobPayload
	err := unmarshal(resp.Node.Value, &jp)
	if err != nil {
		log.V(1).Infof("Failed to deserialize JobPayload: %s", err)
		return nil
	}

	j, _ := job.NewJob(jobName, nil, &jp)

	dir = strings.TrimSuffix(dir, "/")
	dir, machName := path.Split(dir)
	m := machine.New(machName)

	return &Event{"EventJobCreated", *j, m}
}
Ejemplo n.º 3
0
// List the jobs a given Machine is scheduled to run
func (r *Registry) GetScheduledJobs(machine *machine.Machine) map[string]job.Job {
	key := path.Join(keyPrefix, machinePrefix, machine.BootId, schedulePrefix)
	resp, err := r.etcd.Get(key, false, true)

	// Assume the error was KeyNotFound and return an empty data structure
	if err != nil {
		return make(map[string]job.Job, 0)
	}

	jobs := make(map[string]job.Job, len(resp.Node.Nodes))
	for _, kv := range resp.Node.Nodes {
		name := path.Base(kv.Key)

		var payload job.JobPayload
		err := unmarshal(kv.Value, &payload)

		if err == nil {
			j, _ := job.NewJob(name, nil, &payload)
			//FIXME: This will hide duplicate jobs!
			jobs[j.Name] = *j
		} else {
			log.V(1).Infof("Failed to create Job: %s", err)
		}
	}
	return jobs
}
Ejemplo n.º 4
0
func statusUnitsAction(c *cli.Context) {
	r := registry.New()

	for i, v := range c.Args() {
		// This extra newline here to match systemctl status output
		if i != 0 {
			fmt.Printf("\n")
		}

		name := path.Base(v)
		j, _ := job.NewJob(name, nil, nil)
		j.State = r.GetJobState(j)

		loadState := "-"
		activeState := "-"
		subState := "-"

		if j.State != nil {
			loadState = j.State.LoadState
			activeState = j.State.ActiveState
			subState = j.State.SubState
		}

		fmt.Printf("%s\n", j.Name)
		fmt.Printf("\tLoaded: %s\n", loadState)
		fmt.Printf("\tActive: %s (%s)\n", activeState, subState)
		if j.State != nil {
			for _, sock := range j.State.Sockets {
				fmt.Printf("\tListen: %s\n", sock)
			}
		}
		fmt.Print("\n")
	}
}
Ejemplo n.º 5
0
func getJobsFromRequest(req *job.JobRequest) []job.Job {
	var jobs []job.Job
	for i := 0; i < len(req.Payloads); i++ {
		payload := req.Payloads[i]
		j := job.NewJob(payload.Name, nil, &payload)
		jobs = append(jobs, *j)
	}
	return jobs
}
Ejemplo n.º 6
0
// Attempt to remove a given JobWatch from coreinit
func (r *Registry) RemoveJobWatch(name string) {
	key := path.Join(keyPrefix, jobWatchPrefix, name, "object")
	r.etcd.Delete(key, true)

	// Attempt to stop any Jobs that may have been created by the given JobWatch
	for _, m := range r.GetActiveMachines() {
		name := fmt.Sprintf("%s.%s", m.BootId, name)
		j, _ := job.NewJob(name, nil, nil)
		r.RemoveMachineJob(j, &m)
	}
}
Ejemplo n.º 7
0
func (m *SystemdManager) GetJobs() map[string]job.Job {
	units := m.getUnitsByTarget(m.Target)
	jobs := make(map[string]job.Job, len(units))
	for _, u := range units {
		state := m.getJobStateFromUnit(&u)
		j, _ := job.NewJob(u.Name(), state, nil)
		jobs[j.Name] = *j
	}

	return jobs
}
Ejemplo n.º 8
0
func (self *JobWatcher) schedule(watch *job.JobWatch) {
	sched, ok := self.schedules[watch.Payload.Name]
	if !ok {
		sched = NewSchedule()
		self.schedules[watch.Payload.Name] = sched
	}

	if watch.Count == ScheduleAllMachines {
		for idx, _ := range self.machines {
			m := self.machines[idx]
			name := fmt.Sprintf("%s.%s", m.BootId, watch.Payload.Name)
			j, _ := job.NewJob(name, nil, watch.Payload)
			log.Infof("Scheduling Job(%s) to Machine(%s)", name, m.BootId)
			sched.Add(j, &m)
		}
	} else {
		for i := 1; i <= watch.Count; i++ {
			name := fmt.Sprintf("%d.%s", i, watch.Payload.Name)
			j, _ := job.NewJob(name, nil, watch.Payload)

			var m *machine.Machine
			// Check if this job was schedule somewhere already
			if state := self.registry.GetJobState(j); state != nil {
				m = state.Machine
			}
			sched.Add(j, m)
		}
	}

	if len(sched) > 0 {
		self.scheduler.FinalizeSchedule(&sched, self.machines, self.registry)
		log.Infof("Submitting schedule: %s", sched.String())
		self.submitSchedule(sched)
	} else {
		log.Infof("No schedule changes made")
	}
}
Ejemplo n.º 9
0
func filterEventJobStateExpired(resp *etcd.Response) *Event {
	if resp.Action != "delete" && resp.Action != "expire" {
		return nil
	}

	var js job.JobState
	err := unmarshal(resp.Node.Value, &js)
	if err != nil {
		log.V(1).Infof("Failed to deserialize JobState: %s", err)
		return nil
	}

	j := job.NewJob(path.Base(resp.Node.Key), &js, nil)
	return &Event{"EventJobStateExpired", *j, nil}
}
Ejemplo n.º 10
0
func (r *Registry) GetMachineJob(name string, machine *machine.Machine) *job.Job {
	key := path.Join(keyPrefix, machinePrefix, machine.BootId, schedulePrefix, name)
	resp, err := r.etcd.Get(key, false, true)

	// Assume the error was KeyNotFound and return an empty data structure
	if err != nil {
		return nil
	}

	var jp job.JobPayload
	//TODO: Handle the error generated by unmarshal
	unmarshal(resp.Node.Value, &jp)

	j, _ := job.NewJob(name, nil, &jp)
	return j
}
Ejemplo n.º 11
0
func filterEventJobStatePublished(resp *etcd.Response) *Event {
	if resp.Action != "set" {
		return nil
	}

	var js job.JobState
	err := unmarshal(resp.Node.Value, &js)
	if err != nil {
		log.V(1).Infof("Failed to deserialize JobState: %s", err)
		return nil
	}

	//TODO: handle error returned by NewJob
	j, _ := job.NewJob(path.Base(resp.Node.Key), &js, nil)
	return &Event{"EventJobStatePublished", *j, nil}
}
Ejemplo n.º 12
0
// Private helper method that takes a path to an Etcd directory and returns
// all of the items as job.Job objects.
func (r *Registry) getJobsAtPath(key string) map[string]job.Job {
	resp, err := r.Etcd.Get(key, false)

	// Assume the error was KeyNotFound and return an empty data structure
	if err != nil {
		return make(map[string]job.Job, 0)
	}

	jobs := make(map[string]job.Job, len(resp.Kvs))
	for _, kv := range resp.Kvs {
		_, name := path.Split(kv.Key)
		payload := job.NewJobPayload(kv.Value)
		job := job.NewJob(name, nil, payload)
		jobs[job.Name] = *job
	}

	return jobs
}
Ejemplo n.º 13
0
func (t *Target) GetJobs() map[string]job.Job {
	object := unitPath(t.GetSystemdTargetName())
	info, err := t.Systemd.GetUnitInfo(object)

	if err != nil {
		panic(err)
	}

	names := info["Wants"].Value().([]string)
	jobs := make(map[string]job.Job, len(names))

	for _, name := range names {
		payload := job.NewJobPayload(readUnit(name))
		name = strings.TrimSuffix(name, ".service")
		state := t.GetJobState(name)
		jobs[name] = *job.NewJob(name, state, payload)
	}

	return jobs
}
Ejemplo n.º 14
0
func (a *Agent) HandleEventJobCancelled(event registry.Event) {
	//TODO(bcwaldon): We should check the context of the event before
	// making any changes to local systemd or the registry

	jobName := event.Payload.(string)
	log.Infof("EventJobCancelled(%s): stopping Job", jobName)
	j := job.NewJob(jobName, nil, nil)
	a.Manager.StopJob(j)

	a.state.Lock()
	defer a.state.Unlock()

	reversePeers := a.state.GetJobsByPeer(jobName)
	a.state.DropPeersJob(jobName)

	for _, peer := range reversePeers {
		log.Infof("EventJobCancelled(%s): cancelling Peer(%s) of Job", jobName, peer)
		a.Registry.CancelJob(peer)
	}

	a.bidForPossibleOffers()
}
Ejemplo n.º 15
0
func (scheduler *Scheduler) FinalizeSchedule(schedule *Schedule, machines map[string]machine.Machine, reg *registry.Registry) error {
	decide := func(j *job.Job) *machine.Machine {
		var mach *machine.Machine
		// If the Job being scheduled is a systemd service unit, we assume we
		// can put it anywhere. If not, we must find the machine where the
		// Job's related service file is currently scheduled.
		if j.Type == "systemd-service" {
			mach = pickRandomMachine(machines)
		} else {
			// This is intended to match a standard filetype (i.e. '.socket' in 'web.socket')
			re := regexp.MustCompile("\\.(.[a-z]*)$")
			serviceName := re.ReplaceAllString(j.Name, ".service")

			// Check if the corresponding systemd-service job is referenced in the schedule
			// we're actively finalizing
			for j2, m := range *schedule {
				if serviceName == j2.Name {
					mach = m
				}
			}

			if mach == nil {
				service, _ := job.NewJob(serviceName, nil, nil)
				//TODO: Remove registry access from the scheduler
				if state := reg.GetJobState(service); state != nil {
					mach = state.Machine
				}
			}

			if mach == nil {
				log.Infof("Unable to schedule Job(%s) since corresponding "+
					"service Job(%s) could not be found", j.Name, serviceName)
			}
		}

		if mach == nil {
			log.V(1).Infof("Not scheduling Job(%s)", j.Name)
			return nil
		} else {
			log.Infof("Scheduling Job(%s) to Machine(%s)", j.Name, mach.BootId)
			return mach
		}
	}

	var undecided []job.Job
	for j, m := range *schedule {
		// The schedule may come in partially-completed. We assume any previous
		// decisions cannot be changed.
		if m == nil {
			undecided = append(undecided, j)
		}
	}

	// Iterate over the submitted set of undecided jobs up to N+1 times where N=len(jobs).
	// We assume that N+1 is the theoretical maximum number of attempts that we could possibly
	// take. This is not proven to be true...
	iterMax := len(undecided) + 1

	for i := 0; i < iterMax; i++ {
		decisions := 0

		for i := 0; i < len(undecided); i++ {
			job := undecided[i-decisions]
			mach := decide(&job)
			if mach != nil {
				schedule.Add(&job, mach)
				undecided = append(undecided[0:i-decisions], undecided[i-decisions+1:]...)
				decisions++
			}
		}
	}

	if len(undecided) > 0 {
		return errors.New("Unable to decide how to schedule all jobs")
	}

	return nil
}