Beispiel #1
0
func assertJobState(name string, js job.JobState, out io.Writer) (ret bool) {
	j, err := cAPI.Job(name)
	if err != nil {
		log.Warningf("Error retrieving Job(%s) from Registry: %v", name, err)
		return
	}
	if j == nil || j.State == nil || *(j.State) != js {
		return
	}

	ret = true
	msg := fmt.Sprintf("Job %s %s", name, *(j.State))

	if j.TargetMachineID == "" {
		return
	}

	machines, err := cAPI.Machines()
	if err != nil {
		log.Warningf("Failed retrieving list of Machines from Registry: %v", err)
	}
	for _, ms := range machines {
		if ms.ID != j.TargetMachineID {
			continue
		}
		msg = fmt.Sprintf("%s on %s", msg, machineFullLegend(ms, false))
		break
	}

	fmt.Fprintln(out, msg)
	return
}
Beispiel #2
0
func (r *EtcdRegistry) getJobFromModel(jm jobModel) *job.Job {
	var err error
	var unit *unit.Unit

	// New-style Jobs should have a populated UnitHash, and the contents of the Unit are stored separately in the Registry
	if !jm.UnitHash.Empty() {
		unit = r.getUnitByHash(jm.UnitHash)
		if unit == nil {
			log.Warningf("No Unit found in Registry for Job(%s)", jm.Name)
			return nil
		}
		if unit.Hash() != jm.UnitHash {
			log.Errorf("Unit Hash %s does not match expected %s for Job(%s)!", unit.Hash(), jm.UnitHash, jm.Name)
			return nil
		}
	} else {
		// Old-style Jobs had "Payloads" instead of Units, also stored separately in the Registry
		unit, err = r.getUnitFromLegacyPayload(jm.Name)
		if err != nil {
			log.Errorf("Error retrieving legacy payload for Job(%s)", jm.Name)
			return nil
		} else if unit == nil {
			log.Warningf("No Payload found in Registry for Job(%s)", jm.Name)
			return nil
		}

		log.Infof("Migrating legacy Payload(%s)", jm.Name)
		if err := r.storeOrGetUnit(*unit); err != nil {
			log.Warningf("Unable to migrate legacy Payload: %v", err)
		}
	}

	return job.NewJob(jm.Name, *unit)
}
Beispiel #3
0
func assertJobState(name string, js job.JobState, out io.Writer) (ret bool) {
	j, err := cAPI.Job(name)
	if err != nil {
		log.Warningf("Error retrieving Job(%s) from Registry: %v", name, err)
		return
	}
	if j == nil || j.State == nil || *(j.State) != js {
		return
	}

	ret = true
	msg := fmt.Sprintf("Job %s %s", name, *(j.State))

	if j.TargetMachineID == "" {
		return
	}

	ms := cachedMachineState(j.TargetMachineID)
	if ms != nil {
		msg = fmt.Sprintf("%s on %s", msg, machineFullLegend(*ms, false))
	}

	fmt.Fprintln(out, msg)
	return
}
Beispiel #4
0
func (f *HostKeyFile) GetHostKeys() (map[string][]gossh.PublicKey, error) {
	in, err := os.Open(f.path)
	if err != nil {
		return nil, err
	}
	defer in.Close()

	hostKeys := make(map[string][]gossh.PublicKey)
	n := 0
	s := bufio.NewScanner(in)
	for s.Scan() {
		n++
		line := s.Bytes()

		hosts, key, err := parseKnownHostsLine(line)

		if err != nil {
			log.Warningf("%v:%d - %v\n", f.path, n, err)
			continue
		}

		if hosts == "" {
			// Comment/empty line
			continue
		}

		// It is permissible to have several lines for the same host name(s)
		hostKeys[hosts] = append(hostKeys[hosts], key)
	}

	return hostKeys, nil
}
Beispiel #5
0
func (r *EtcdRegistry) getJobFromObjectNode(node *etcd.Node) (*job.Job, error) {
	var err error
	var jm jobModel
	if err = unmarshal(node.Value, &jm); err != nil {
		return nil, err
	}

	var unit *unit.Unit

	// New-style Jobs should have a populated UnitHash, and the contents of the Unit are stored separately in the Registry
	if !jm.UnitHash.Empty() {
		unit = r.getUnitByHash(jm.UnitHash)
		if unit == nil {
			log.Warningf("No Unit found in Registry for Job(%s)", jm.Name)
			return nil, nil
		}
		if unit.Hash() != jm.UnitHash {
			log.Errorf("Unit Hash %s does not match expected %s for Job(%s)!", unit.Hash(), jm.UnitHash, jm.Name)
			return nil, nil
		}
	} else {
		// Old-style Jobs had "Payloads" instead of Units, also stored separately in the Registry
		unit, err = r.getUnitFromLegacyPayload(jm.Name)
		if err != nil {
			log.Errorf("Error retrieving legacy payload for Job(%s)", jm.Name)
			return nil, nil
		} else if unit == nil {
			log.Warningf("No Payload found in Registry for Job(%s)", jm.Name)
			return nil, nil
		}

		log.Infof("Migrating legacy Payload(%s)", jm.Name)
		if err := r.storeOrGetUnit(*unit); err != nil {
			log.Warningf("Unable to migrate legacy Payload: %v", err)
		}

		jm.UnitHash = unit.Hash()
		log.Infof("Updating Job(%s) with legacy payload Hash(%s)", jm.Name, jm.UnitHash)
		if err := r.updateJobObjectNode(&jm, node.ModifiedIndex); err != nil {
			log.Warningf("Unable to update Job(%s) with legacy payload Hash(%s): %v", jm.Name, jm.UnitHash, err)
		}
	}

	return job.NewJob(jm.Name, *unit), nil
}
Beispiel #6
0
// Units enumerates all files recognized as valid systemd units in
// this manager's units directory.
func (m *SystemdUnitManager) Units() (units []string, err error) {
	fis, err := ioutil.ReadDir(m.UnitsDir)
	if err != nil {
		return
	}

	for _, fi := range fis {
		name := fi.Name()
		if !unit.RecognizedUnitType(name) {
			log.Warningf("Found unrecognized file in %s, ignoring", path.Join(m.UnitsDir, name))
			continue
		}
		units = append(units, name)
	}
	return
}
Beispiel #7
0
// Initialize prepares the Agent for normal operation by doing three things:
// 1. Announce presence to the Registry, tracking the etcd index of the operation
// 2. Discover any jobs that are scheduled locally, loading/starting them if they can run locally
// 3. Cache all unresolved job offers and bid for any that can be run locally
// The returned value is the etcd index at which the agent's presence was announced.
func (a *Agent) Initialize() uint64 {
	log.Infof("Initializing Agent")

	var idx uint64
	wait := time.Second
	for {
		var err error
		if idx, err = a.registry.SetMachineState(a.Machine.State(), a.ttl); err == nil {
			log.V(1).Infof("Heartbeat succeeded")
			break
		}
		log.V(1).Infof("Failed heartbeat, retrying in %v", wait)
		time.Sleep(wait)
	}

	// Lock the state early so we can decide what the Agent needs to do
	// without the risk of conflicting with any of its other moving parts
	a.state.Lock()
	defer a.state.Unlock()

	machID := a.Machine.State().ID
	loaded := map[string]job.Job{}
	launched := map[string]job.Job{}
	jobs, _ := a.registry.Jobs()
	for _, j := range jobs {
		if j.TargetMachineID == "" || j.TargetMachineID != machID {
			continue
		}

		if !a.ableToRun(&j) {
			log.Infof("Unable to run Job(%s), unscheduling", j.Name)
			a.registry.ClearJobTarget(j.Name, machID)
			continue
		}

		if j.TargetState == nil || *j.TargetState == job.JobStateInactive {
			continue
		}

		loaded[j.Name] = j

		if *j.TargetState != job.JobStateLaunched {
			continue
		}

		launched[j.Name] = j
	}

	units, err := a.um.Units()
	if err != nil {
		log.Warningf("Failed determining what units are already loaded: %v", err)
	}

	for _, name := range units {
		if _, ok := loaded[name]; !ok {
			log.Infof("Unit(%s) should not be loaded here, unloading", name)
			a.um.Stop(name)
			a.um.Unload(name)
		}
	}

	for _, j := range loaded {
		a.state.TrackJob(&j)
		a.loadJob(&j)

		if _, ok := launched[j.Name]; !ok {
			continue
		}

		a.startJobUnlocked(j.Name)
	}

	for _, jo := range a.registry.UnresolvedJobOffers() {
		// Everything we check against could change over time, so we track
		// all offers starting here for future bidding even if we are
		// currently unable to bid
		a.state.TrackOffer(jo)
		a.state.TrackJob(&jo.Job)
	}

	a.bidForPossibleJobs()

	return idx
}