func (self *JobWatcher) TrackMachine(m *machine.Machine) { self.machines[m.BootId] = *m partial := NewSchedule() for _, watch := range self.watches { if watch.Count == ScheduleAllMachines { sched := self.schedules[watch.Payload.Name] if len(sched.MachineJobs(m)) > 0 { continue } name := fmt.Sprintf("%s.%s", m.BootId, watch.Payload.Name) j, _ := job.NewJob(name, nil, watch.Payload) log.Infof("Adding to schedule job=%s machine=%s", name, m.BootId) partial.Add(j, m) sched.Add(j, m) } } if len(partial) > 0 { log.Infof("Submitting schedule: %s", partial.String()) self.submitSchedule(partial) } }
func filterEventJobCreated(resp *etcd.Response) *Event { if resp.Action != "set" || resp.Node.PrevValue != "" { return nil } dir, jobName := path.Split(resp.Node.Key) dir = strings.TrimSuffix(dir, "/") dir, prefix := path.Split(dir) if prefix != strings.Trim(schedulePrefix, "/") { return nil } var jp job.JobPayload err := unmarshal(resp.Node.Value, &jp) if err != nil { log.V(1).Infof("Failed to deserialize JobPayload: %s", err) return nil } j, _ := job.NewJob(jobName, nil, &jp) dir = strings.TrimSuffix(dir, "/") dir, machName := path.Split(dir) m := machine.New(machName) return &Event{"EventJobCreated", *j, m} }
// List the jobs a given Machine is scheduled to run func (r *Registry) GetScheduledJobs(machine *machine.Machine) map[string]job.Job { key := path.Join(keyPrefix, machinePrefix, machine.BootId, schedulePrefix) resp, err := r.etcd.Get(key, false, true) // Assume the error was KeyNotFound and return an empty data structure if err != nil { return make(map[string]job.Job, 0) } jobs := make(map[string]job.Job, len(resp.Node.Nodes)) for _, kv := range resp.Node.Nodes { name := path.Base(kv.Key) var payload job.JobPayload err := unmarshal(kv.Value, &payload) if err == nil { j, _ := job.NewJob(name, nil, &payload) //FIXME: This will hide duplicate jobs! jobs[j.Name] = *j } else { log.V(1).Infof("Failed to create Job: %s", err) } } return jobs }
func statusUnitsAction(c *cli.Context) { r := registry.New() for i, v := range c.Args() { // This extra newline here to match systemctl status output if i != 0 { fmt.Printf("\n") } name := path.Base(v) j, _ := job.NewJob(name, nil, nil) j.State = r.GetJobState(j) loadState := "-" activeState := "-" subState := "-" if j.State != nil { loadState = j.State.LoadState activeState = j.State.ActiveState subState = j.State.SubState } fmt.Printf("%s\n", j.Name) fmt.Printf("\tLoaded: %s\n", loadState) fmt.Printf("\tActive: %s (%s)\n", activeState, subState) if j.State != nil { for _, sock := range j.State.Sockets { fmt.Printf("\tListen: %s\n", sock) } } fmt.Print("\n") } }
func getJobsFromRequest(req *job.JobRequest) []job.Job { var jobs []job.Job for i := 0; i < len(req.Payloads); i++ { payload := req.Payloads[i] j := job.NewJob(payload.Name, nil, &payload) jobs = append(jobs, *j) } return jobs }
// Attempt to remove a given JobWatch from coreinit func (r *Registry) RemoveJobWatch(name string) { key := path.Join(keyPrefix, jobWatchPrefix, name, "object") r.etcd.Delete(key, true) // Attempt to stop any Jobs that may have been created by the given JobWatch for _, m := range r.GetActiveMachines() { name := fmt.Sprintf("%s.%s", m.BootId, name) j, _ := job.NewJob(name, nil, nil) r.RemoveMachineJob(j, &m) } }
func (m *SystemdManager) GetJobs() map[string]job.Job { units := m.getUnitsByTarget(m.Target) jobs := make(map[string]job.Job, len(units)) for _, u := range units { state := m.getJobStateFromUnit(&u) j, _ := job.NewJob(u.Name(), state, nil) jobs[j.Name] = *j } return jobs }
func (self *JobWatcher) schedule(watch *job.JobWatch) { sched, ok := self.schedules[watch.Payload.Name] if !ok { sched = NewSchedule() self.schedules[watch.Payload.Name] = sched } if watch.Count == ScheduleAllMachines { for idx, _ := range self.machines { m := self.machines[idx] name := fmt.Sprintf("%s.%s", m.BootId, watch.Payload.Name) j, _ := job.NewJob(name, nil, watch.Payload) log.Infof("Scheduling Job(%s) to Machine(%s)", name, m.BootId) sched.Add(j, &m) } } else { for i := 1; i <= watch.Count; i++ { name := fmt.Sprintf("%d.%s", i, watch.Payload.Name) j, _ := job.NewJob(name, nil, watch.Payload) var m *machine.Machine // Check if this job was schedule somewhere already if state := self.registry.GetJobState(j); state != nil { m = state.Machine } sched.Add(j, m) } } if len(sched) > 0 { self.scheduler.FinalizeSchedule(&sched, self.machines, self.registry) log.Infof("Submitting schedule: %s", sched.String()) self.submitSchedule(sched) } else { log.Infof("No schedule changes made") } }
func filterEventJobStateExpired(resp *etcd.Response) *Event { if resp.Action != "delete" && resp.Action != "expire" { return nil } var js job.JobState err := unmarshal(resp.Node.Value, &js) if err != nil { log.V(1).Infof("Failed to deserialize JobState: %s", err) return nil } j := job.NewJob(path.Base(resp.Node.Key), &js, nil) return &Event{"EventJobStateExpired", *j, nil} }
func (r *Registry) GetMachineJob(name string, machine *machine.Machine) *job.Job { key := path.Join(keyPrefix, machinePrefix, machine.BootId, schedulePrefix, name) resp, err := r.etcd.Get(key, false, true) // Assume the error was KeyNotFound and return an empty data structure if err != nil { return nil } var jp job.JobPayload //TODO: Handle the error generated by unmarshal unmarshal(resp.Node.Value, &jp) j, _ := job.NewJob(name, nil, &jp) return j }
func filterEventJobStatePublished(resp *etcd.Response) *Event { if resp.Action != "set" { return nil } var js job.JobState err := unmarshal(resp.Node.Value, &js) if err != nil { log.V(1).Infof("Failed to deserialize JobState: %s", err) return nil } //TODO: handle error returned by NewJob j, _ := job.NewJob(path.Base(resp.Node.Key), &js, nil) return &Event{"EventJobStatePublished", *j, nil} }
// Private helper method that takes a path to an Etcd directory and returns // all of the items as job.Job objects. func (r *Registry) getJobsAtPath(key string) map[string]job.Job { resp, err := r.Etcd.Get(key, false) // Assume the error was KeyNotFound and return an empty data structure if err != nil { return make(map[string]job.Job, 0) } jobs := make(map[string]job.Job, len(resp.Kvs)) for _, kv := range resp.Kvs { _, name := path.Split(kv.Key) payload := job.NewJobPayload(kv.Value) job := job.NewJob(name, nil, payload) jobs[job.Name] = *job } return jobs }
func (t *Target) GetJobs() map[string]job.Job { object := unitPath(t.GetSystemdTargetName()) info, err := t.Systemd.GetUnitInfo(object) if err != nil { panic(err) } names := info["Wants"].Value().([]string) jobs := make(map[string]job.Job, len(names)) for _, name := range names { payload := job.NewJobPayload(readUnit(name)) name = strings.TrimSuffix(name, ".service") state := t.GetJobState(name) jobs[name] = *job.NewJob(name, state, payload) } return jobs }
func (a *Agent) HandleEventJobCancelled(event registry.Event) { //TODO(bcwaldon): We should check the context of the event before // making any changes to local systemd or the registry jobName := event.Payload.(string) log.Infof("EventJobCancelled(%s): stopping Job", jobName) j := job.NewJob(jobName, nil, nil) a.Manager.StopJob(j) a.state.Lock() defer a.state.Unlock() reversePeers := a.state.GetJobsByPeer(jobName) a.state.DropPeersJob(jobName) for _, peer := range reversePeers { log.Infof("EventJobCancelled(%s): cancelling Peer(%s) of Job", jobName, peer) a.Registry.CancelJob(peer) } a.bidForPossibleOffers() }
func (scheduler *Scheduler) FinalizeSchedule(schedule *Schedule, machines map[string]machine.Machine, reg *registry.Registry) error { decide := func(j *job.Job) *machine.Machine { var mach *machine.Machine // If the Job being scheduled is a systemd service unit, we assume we // can put it anywhere. If not, we must find the machine where the // Job's related service file is currently scheduled. if j.Type == "systemd-service" { mach = pickRandomMachine(machines) } else { // This is intended to match a standard filetype (i.e. '.socket' in 'web.socket') re := regexp.MustCompile("\\.(.[a-z]*)$") serviceName := re.ReplaceAllString(j.Name, ".service") // Check if the corresponding systemd-service job is referenced in the schedule // we're actively finalizing for j2, m := range *schedule { if serviceName == j2.Name { mach = m } } if mach == nil { service, _ := job.NewJob(serviceName, nil, nil) //TODO: Remove registry access from the scheduler if state := reg.GetJobState(service); state != nil { mach = state.Machine } } if mach == nil { log.Infof("Unable to schedule Job(%s) since corresponding "+ "service Job(%s) could not be found", j.Name, serviceName) } } if mach == nil { log.V(1).Infof("Not scheduling Job(%s)", j.Name) return nil } else { log.Infof("Scheduling Job(%s) to Machine(%s)", j.Name, mach.BootId) return mach } } var undecided []job.Job for j, m := range *schedule { // The schedule may come in partially-completed. We assume any previous // decisions cannot be changed. if m == nil { undecided = append(undecided, j) } } // Iterate over the submitted set of undecided jobs up to N+1 times where N=len(jobs). // We assume that N+1 is the theoretical maximum number of attempts that we could possibly // take. This is not proven to be true... iterMax := len(undecided) + 1 for i := 0; i < iterMax; i++ { decisions := 0 for i := 0; i < len(undecided); i++ { job := undecided[i-decisions] mach := decide(&job) if mach != nil { schedule.Add(&job, mach) undecided = append(undecided[0:i-decisions], undecided[i-decisions+1:]...) decisions++ } } } if len(undecided) > 0 { return errors.New("Unable to decide how to schedule all jobs") } return nil }