func assertJobState(name string, js job.JobState, out io.Writer) (ret bool) { j, err := cAPI.Job(name) if err != nil { log.Warningf("Error retrieving Job(%s) from Registry: %v", name, err) return } if j == nil || j.State == nil || *(j.State) != js { return } ret = true msg := fmt.Sprintf("Job %s %s", name, *(j.State)) if j.TargetMachineID == "" { return } machines, err := cAPI.Machines() if err != nil { log.Warningf("Failed retrieving list of Machines from Registry: %v", err) } for _, ms := range machines { if ms.ID != j.TargetMachineID { continue } msg = fmt.Sprintf("%s on %s", msg, machineFullLegend(ms, false)) break } fmt.Fprintln(out, msg) return }
func (r *EtcdRegistry) getJobFromModel(jm jobModel) *job.Job { var err error var unit *unit.Unit // New-style Jobs should have a populated UnitHash, and the contents of the Unit are stored separately in the Registry if !jm.UnitHash.Empty() { unit = r.getUnitByHash(jm.UnitHash) if unit == nil { log.Warningf("No Unit found in Registry for Job(%s)", jm.Name) return nil } if unit.Hash() != jm.UnitHash { log.Errorf("Unit Hash %s does not match expected %s for Job(%s)!", unit.Hash(), jm.UnitHash, jm.Name) return nil } } else { // Old-style Jobs had "Payloads" instead of Units, also stored separately in the Registry unit, err = r.getUnitFromLegacyPayload(jm.Name) if err != nil { log.Errorf("Error retrieving legacy payload for Job(%s)", jm.Name) return nil } else if unit == nil { log.Warningf("No Payload found in Registry for Job(%s)", jm.Name) return nil } log.Infof("Migrating legacy Payload(%s)", jm.Name) if err := r.storeOrGetUnit(*unit); err != nil { log.Warningf("Unable to migrate legacy Payload: %v", err) } } return job.NewJob(jm.Name, *unit) }
func assertJobState(name string, js job.JobState, out io.Writer) (ret bool) { j, err := cAPI.Job(name) if err != nil { log.Warningf("Error retrieving Job(%s) from Registry: %v", name, err) return } if j == nil || j.State == nil || *(j.State) != js { return } ret = true msg := fmt.Sprintf("Job %s %s", name, *(j.State)) if j.TargetMachineID == "" { return } ms := cachedMachineState(j.TargetMachineID) if ms != nil { msg = fmt.Sprintf("%s on %s", msg, machineFullLegend(*ms, false)) } fmt.Fprintln(out, msg) return }
func (f *HostKeyFile) GetHostKeys() (map[string][]gossh.PublicKey, error) { in, err := os.Open(f.path) if err != nil { return nil, err } defer in.Close() hostKeys := make(map[string][]gossh.PublicKey) n := 0 s := bufio.NewScanner(in) for s.Scan() { n++ line := s.Bytes() hosts, key, err := parseKnownHostsLine(line) if err != nil { log.Warningf("%v:%d - %v\n", f.path, n, err) continue } if hosts == "" { // Comment/empty line continue } // It is permissible to have several lines for the same host name(s) hostKeys[hosts] = append(hostKeys[hosts], key) } return hostKeys, nil }
func (r *EtcdRegistry) getJobFromObjectNode(node *etcd.Node) (*job.Job, error) { var err error var jm jobModel if err = unmarshal(node.Value, &jm); err != nil { return nil, err } var unit *unit.Unit // New-style Jobs should have a populated UnitHash, and the contents of the Unit are stored separately in the Registry if !jm.UnitHash.Empty() { unit = r.getUnitByHash(jm.UnitHash) if unit == nil { log.Warningf("No Unit found in Registry for Job(%s)", jm.Name) return nil, nil } if unit.Hash() != jm.UnitHash { log.Errorf("Unit Hash %s does not match expected %s for Job(%s)!", unit.Hash(), jm.UnitHash, jm.Name) return nil, nil } } else { // Old-style Jobs had "Payloads" instead of Units, also stored separately in the Registry unit, err = r.getUnitFromLegacyPayload(jm.Name) if err != nil { log.Errorf("Error retrieving legacy payload for Job(%s)", jm.Name) return nil, nil } else if unit == nil { log.Warningf("No Payload found in Registry for Job(%s)", jm.Name) return nil, nil } log.Infof("Migrating legacy Payload(%s)", jm.Name) if err := r.storeOrGetUnit(*unit); err != nil { log.Warningf("Unable to migrate legacy Payload: %v", err) } jm.UnitHash = unit.Hash() log.Infof("Updating Job(%s) with legacy payload Hash(%s)", jm.Name, jm.UnitHash) if err := r.updateJobObjectNode(&jm, node.ModifiedIndex); err != nil { log.Warningf("Unable to update Job(%s) with legacy payload Hash(%s): %v", jm.Name, jm.UnitHash, err) } } return job.NewJob(jm.Name, *unit), nil }
// Units enumerates all files recognized as valid systemd units in // this manager's units directory. func (m *SystemdUnitManager) Units() (units []string, err error) { fis, err := ioutil.ReadDir(m.UnitsDir) if err != nil { return } for _, fi := range fis { name := fi.Name() if !unit.RecognizedUnitType(name) { log.Warningf("Found unrecognized file in %s, ignoring", path.Join(m.UnitsDir, name)) continue } units = append(units, name) } return }
// Initialize prepares the Agent for normal operation by doing three things: // 1. Announce presence to the Registry, tracking the etcd index of the operation // 2. Discover any jobs that are scheduled locally, loading/starting them if they can run locally // 3. Cache all unresolved job offers and bid for any that can be run locally // The returned value is the etcd index at which the agent's presence was announced. func (a *Agent) Initialize() uint64 { log.Infof("Initializing Agent") var idx uint64 wait := time.Second for { var err error if idx, err = a.registry.SetMachineState(a.Machine.State(), a.ttl); err == nil { log.V(1).Infof("Heartbeat succeeded") break } log.V(1).Infof("Failed heartbeat, retrying in %v", wait) time.Sleep(wait) } // Lock the state early so we can decide what the Agent needs to do // without the risk of conflicting with any of its other moving parts a.state.Lock() defer a.state.Unlock() machID := a.Machine.State().ID loaded := map[string]job.Job{} launched := map[string]job.Job{} jobs, _ := a.registry.Jobs() for _, j := range jobs { if j.TargetMachineID == "" || j.TargetMachineID != machID { continue } if !a.ableToRun(&j) { log.Infof("Unable to run Job(%s), unscheduling", j.Name) a.registry.ClearJobTarget(j.Name, machID) continue } if j.TargetState == nil || *j.TargetState == job.JobStateInactive { continue } loaded[j.Name] = j if *j.TargetState != job.JobStateLaunched { continue } launched[j.Name] = j } units, err := a.um.Units() if err != nil { log.Warningf("Failed determining what units are already loaded: %v", err) } for _, name := range units { if _, ok := loaded[name]; !ok { log.Infof("Unit(%s) should not be loaded here, unloading", name) a.um.Stop(name) a.um.Unload(name) } } for _, j := range loaded { a.state.TrackJob(&j) a.loadJob(&j) if _, ok := launched[j.Name]; !ok { continue } a.startJobUnlocked(j.Name) } for _, jo := range a.registry.UnresolvedJobOffers() { // Everything we check against could change over time, so we track // all offers starting here for future bidding even if we are // currently unable to bid a.state.TrackOffer(jo) a.state.TrackJob(&jo.Job) } a.bidForPossibleJobs() return idx }