Beispiel #1
0
func (as *AgentState) GetReplacedUnit(j *job.Job) (string, error) {
	cExists, replaced := as.hasReplace(j.Name, j.Replaces())
	if !cExists {
		return "", fmt.Errorf("cannot find units to be replaced for Unit(%s)", j.Name)
	}
	return replaced, nil
}
Beispiel #2
0
func mapUnitToJob(entity *schema.Unit, mm map[string]*machine.MachineState) (*job.Job, error) {
	contents, err := base64.StdEncoding.DecodeString(entity.FileContents)
	if err != nil {
		return nil, err
	}
	u, err := unit.NewUnit(string(contents))
	if err != nil {
		return nil, err
	}

	js := job.JobState(entity.CurrentState)
	j := job.Job{
		Name:  entity.Name,
		State: &js,
		Unit:  *u,
	}

	// populate a UnitState object only if the entity
	// is actually reporting relevant data
	if entity.Systemd != nil {
		j.UnitState = &unit.UnitState{
			LoadState:   entity.Systemd.LoadState,
			ActiveState: entity.Systemd.ActiveState,
			SubState:    entity.Systemd.SubState,
		}
		if len(entity.Systemd.MachineID) > 0 {
			j.UnitState.MachineID = entity.Systemd.MachineID
		}
	}

	return &j, nil
}
Beispiel #3
0
// partitionCluster returns a slice of IDs from a subset of active machines
// that should be considered for scheduling the specified job.
// The returned slice is sorted by ascending lexicographical string value of machine boot id.
func (eg *Engine) partitionCluster(j *job.Job) ([]string, error) {
	if machID, ok := j.RequiredTarget(); ok {
		return []string{machID}, nil
	}

	// TODO(uwedeportivo): for now punt on jobs with requirements and offer to all machines
	// because agents are decoding the requirements
	if len(j.Requirements()) > 0 {
		machines, _ := eg.registry.Machines()

		machineIDs := make([]string, len(machines))
		for i, mach := range machines {
			machineIDs[i] = mach.ID
		}
		sort.Strings(machineIDs)
		return machineIDs, nil
	}

	// this is usually a cheap no-op
	eg.refreshCluster(false)

	// as an initial heuristic, choose the k least loaded, with k = partitionSize
	machineIDs := eg.clust.kLeastLoaded(partitionSize)

	sort.Strings(machineIDs)
	return machineIDs, nil
}
Beispiel #4
0
// AbleToRun determines if an Agent can run the provided Job based on
// the Agent's current state. A boolean indicating whether this is the
// case or not is returned. The following criteria is used:
//   - Agent must meet the Job's machine target requirement (if any)
//   - Agent must have all of the Job's required metadata (if any)
//   - Agent must have all required Peers of the Job scheduled locally (if any)
//   - Job must not conflict with any other Units scheduled to the agent
//   - Job must specially handle replaced units to be rescheduled
func (as *AgentState) AbleToRun(j *job.Job) (jobAction job.JobAction, errstr string) {
	if tgt, ok := j.RequiredTarget(); ok && !as.MState.MatchID(tgt) {
		return job.JobActionUnschedule, fmt.Sprintf("agent ID %q does not match required %q", as.MState.ID, tgt)
	}

	metadata := j.RequiredTargetMetadata()
	if len(metadata) != 0 {
		if !machine.HasMetadata(as.MState, metadata) {
			return job.JobActionUnschedule, "local Machine metadata insufficient"
		}
	}

	peers := j.Peers()
	if len(peers) != 0 {
		for _, peer := range peers {
			if !as.unitScheduled(peer) {
				return job.JobActionUnschedule, fmt.Sprintf("required peer Unit(%s) is not scheduled locally", peer)
			}
		}
	}

	if cExists, cJobName := as.HasConflict(j.Name, j.Conflicts()); cExists {
		return job.JobActionUnschedule, fmt.Sprintf("found conflict with locally-scheduled Unit(%s)", cJobName)
	}

	// Handle Replace option specially for rescheduling the unit
	if cExists, cJobName := as.hasReplace(j.Name, j.Replaces()); cExists {
		return job.JobActionReschedule, fmt.Sprintf("found replace with locally-scheduled Unit(%s)", cJobName)
	}

	return job.JobActionSchedule, ""
}
Beispiel #5
0
// Determine if the Agent can run the provided Job
func (a *Agent) AbleToRun(j *job.Job) bool {
	if !a.VerifyJob(j) {
		log.V(1).Infof("Failed to verify Job(%s)", j.Name)
		return false
	}

	requirements := j.Requirements()
	if len(requirements) == 0 {
		log.V(1).Infof("Job(%s) has no requirements", j.Name)
		return true
	}

	if log.V(1) {
		var reqString string
		for key, slice := range requirements {
			reqString += fmt.Sprintf("%s = [", key)
			for _, val := range slice {
				reqString += fmt.Sprintf("%s, ", val)
			}
			reqString += fmt.Sprint("] ")
		}

		log.Infof("Job(%s) has requirements: %s", j.Name, reqString)
	}

	metadata := extractMachineMetadata(requirements)
	log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata)
	if !a.machine.HasMetadata(metadata) {
		log.V(1).Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name)
		return false
	}

	bootID, ok := requirements[unit.FleetXConditionMachineBootID]
	if ok && len(bootID) > 0 && !a.machine.State().MatchBootID(bootID[0]) {
		log.V(1).Infof("Agent does not pass MachineBootID condition for Job(%s)", j.Name)
		return false
	}

	peers := j.Payload.Peers()
	if len(peers) > 0 {
		log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name)
		for _, peer := range peers {
			if !a.peerScheduledHere(j.Name, peer) {
				log.V(1).Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name)
				return false
			}
		}
	} else {
		log.V(2).Infof("Job(%s) has no peers to worry about", j.Name)
	}

	if conflicted, conflictedJobName := a.state.HasConflict(j.Name, j.Payload.Conflicts()); conflicted {
		log.V(1).Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName)
		return false
	}

	return true
}
Beispiel #6
0
// AbleToRun determines if an Agent can run the provided Job based on
// the Agent's current state. A boolean indicating whether this is the
// case or not is returned. The following criteria is used:
//   - Agent must meet the Job's machine target requirement (if any)
//   - Agent must have all of the Job's required metadata (if any)
//   - Agent must have all required Peers of the Job scheduled locally (if any)
//   - Job must not conflict with any other Jobs scheduled to the agent
func (as *AgentState) AbleToRun(j *job.Job) (bool, string) {
	if tgt, ok := j.RequiredTarget(); ok && !as.MState.MatchID(tgt) {
		return false, fmt.Sprintf("agent ID %q does not match required %q", as.MState.ID, tgt)
	}

	metadata := j.RequiredTargetMetadata()
	if len(metadata) != 0 {
		if !machine.HasMetadata(as.MState, metadata) {
			return false, "local Machine metadata insufficient"
		}
	}

	peers := j.Peers()
	if len(peers) != 0 {
		for _, peer := range peers {
			if !as.jobScheduled(peer) {
				return false, fmt.Sprintf("required peer Job(%s) is not scheduled locally", peer)
			}
		}
	}

	if cExists, cJobName := as.hasConflict(j.Name, j.Conflicts()); cExists {
		return false, fmt.Sprintf("found conflict with locally-scheduled Job(%s)", cJobName)
	}

	return true, ""
}
Beispiel #7
0
func (r *EtcdRegistry) hydrateJob(j *job.Job) (err error) {
	j.TargetState, err = r.jobTargetState(j.Name)
	if err != nil {
		return
	}

	j.TargetMachineID, err = r.jobTargetMachine(j.Name)
	if err != nil {
		return
	}

	j.UnitState = r.getUnitState(j.Name)
	j.State = r.determineJobState(j.Name)

	return
}
Beispiel #8
0
// Determine if the Agent can run the provided Job
func (a *Agent) ableToRun(j *job.Job) bool {
	if !a.verifyJob(j) {
		log.V(1).Infof("Failed to verify Job(%s)", j.Name)
		return false
	}

	requirements := j.Requirements()
	if len(requirements) == 0 {
		log.V(1).Infof("Job(%s) has no requirements", j.Name)
	}

	log.Infof("Job(%s) has requirements: %s", j.Name, requirements)

	metadata := j.RequiredTargetMetadata()
	log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata)
	ms := a.Machine.State()
	if !machine.HasMetadata(&ms, metadata) {
		log.Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name)
		return false
	}

	if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) {
		log.Infof("Agent does not meet machine target requirement for Job(%s)", j.Name)
		return false
	}

	peers := j.Peers()
	if len(peers) > 0 {
		log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name)
		for _, peer := range peers {
			if !a.peerScheduledHere(j.Name, peer) {
				log.Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name)
				return false
			}
		}
	} else {
		log.V(1).Infof("Job(%s) has no peers to worry about", j.Name)
	}

	if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted {
		log.Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName)
		return false
	}

	return true
}
Beispiel #9
0
// ableToRun determines if the Agent can run the provided Job, and returns a boolean indicating
// whether this is the case. There are five criteria for an Agent to be eligible to run a Job:
//   - Job must pass signature verification
//   - agent must have all of the Job's required metadata (if any)
//   - agent must meet the Job's machine target requirement (if any)
//   - agent must have all required Peers of the Job scheduled locally (if any)
//   - Job must not conflict with any other Jobs scheduled to the agent
func (a *Agent) ableToRun(j *job.Job) bool {
	if !a.verifyJobSignature(j) {
		log.V(1).Infof("Failed to verify Job(%s)", j.Name)
		return false
	}

	log.Infof("Job(%s) has requirements: %s", j.Name, j.Requirements())

	metadata := j.RequiredTargetMetadata()
	if len(metadata) == 0 {
		log.V(1).Infof("Job(%s) has no required machine metadata", j.Name)
	} else {
		log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata)
		ms := a.Machine.State()
		if !machine.HasMetadata(&ms, metadata) {
			log.Infof("Unable to run Job(%s): local Machine metadata insufficient", j.Name)
			return false
		}
	}

	if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) {
		log.Infof("Unable to run Job(%s): agent does not meet machine target requirement (%s)", j.Name, tgt)
		return false
	}

	peers := j.Peers()
	if len(peers) == 0 {
		log.V(1).Infof("Job(%s) has no required peers", j.Name)
	} else {
		log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers)
		for _, peer := range peers {
			if !a.peerScheduledHere(j.Name, peer) {
				log.Infof("Unable to run Job(%s): required Peer(%s) is not scheduled locally", j.Name, peer)
				return false
			}
		}
	}

	if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted {
		log.Infof("Unable to run Job(%s): conflict with Job(%s)", j.Name, conflictedJobName)
		return false
	}

	return true
}
Beispiel #10
0
func (r *EtcdRegistry) hydrateJob(j *job.Job) error {
	tgt, err := r.jobTargetState(j.Name)
	if err != nil {
		return err
	}

	j.TargetState = tgt

	j.TargetMachineID, err = r.jobTargetMachine(j.Name)
	if err != nil {
		return err
	}

	j.UnitState = r.getUnitState(j.Name)
	j.State = r.determineJobState(j.Name)

	return nil
}
Beispiel #11
0
// ableToRun determines if the Agent can run the provided Job based on
// the Agent's desired state. A boolean indicating whether this is the
// case or not is returned. The following criteria is used:
//   - Agent must meet the Job's machine target requirement (if any)
//   - Job must pass signature verification
//   - Agent must have all of the Job's required metadata (if any)
//   - Agent must have all required Peers of the Job scheduled locally (if any)
//   - Job must not conflict with any other Jobs scheduled to the agent
func (ar *AgentReconciler) ableToRun(as *agentState, ms *machine.MachineState, j *job.Job) (bool, string) {
	log.V(1).Infof("Attempting to determine if able to run Job(%s)", j.Name)

	if tgt, ok := j.RequiredTarget(); ok && !ms.MatchID(tgt) {
		return false, fmt.Sprintf("Agent ID %q does not match required %q", ms.ID, tgt)
	}

	if !ar.verifyJobSignature(j) {
		return false, "unable to verify signature"
	}

	log.V(1).Infof("Job(%s) has requirements: %s", j.Name, j.Requirements())

	metadata := j.RequiredTargetMetadata()
	if len(metadata) == 0 {
		log.V(1).Infof("Job(%s) has no required machine metadata", j.Name)
	} else {
		log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata)
		if !machine.HasMetadata(ms, metadata) {
			return false, "local Machine metadata insufficient"
		}
	}

	peers := j.Peers()
	if len(peers) == 0 {
		log.V(1).Infof("Job(%s) has no required peers", j.Name)
	} else {
		log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers)
		for _, peer := range peers {
			if !as.jobScheduled(peer) {
				return false, fmt.Sprintf("required peer Job(%s) is not scheduled locally", peer)
			}
		}
	}

	if cExists, cJobName := as.hasConflict(j.Name, j.Conflicts()); cExists {
		return false, fmt.Sprintf("found conflict with locally-scheduled Job(%s)", cJobName)
	}

	log.V(1).Infof("Determined local Agent is able to run Job(%s)", j.Name)
	return true, ""
}
Beispiel #12
0
func newClusterState(units []job.Unit, sUnits []job.ScheduledUnit, machines []machine.MachineState) *clusterState {
	sUnitMap := make(map[string]*job.ScheduledUnit)
	for _, sUnit := range sUnits {
		sUnit := sUnit
		sUnitMap[sUnit.Name] = &sUnit
	}

	jMap := make(map[string]*job.Job)
	guMap := make(map[string]*job.Unit)
	for _, u := range units {
		if u.IsGlobal() {
			u := u
			guMap[u.Name] = &u
		} else {
			j := job.Job{
				Name:        u.Name,
				Unit:        u.Unit,
				TargetState: u.TargetState,
			}

			if sUnit, ok := sUnitMap[u.Name]; ok {
				j.TargetMachineID = sUnit.TargetMachineID
				j.State = sUnit.State
			}

			jMap[j.Name] = &j
		}
	}

	mMap := make(map[string]*machine.MachineState, len(machines))
	for _, ms := range machines {
		ms := ms
		mMap[ms.ID] = &ms
	}

	return &clusterState{
		jobs:     jMap,
		gUnits:   guMap,
		machines: mMap,
	}
}
Beispiel #13
0
func (r *EtcdRegistry) parseJobDir(j *job.Job, dir *etcd.Node) (err error) {
	var heartbeat string
	for _, node := range dir.Nodes {
		switch node.Key {
		case r.jobTargetStatePath(j.Name):
			j.TargetState, err = job.ParseJobState(node.Value)
			if err != nil {
				return
			}
		case r.jobTargetAgentPath(j.Name):
			j.TargetMachineID = node.Value
		case r.jobHeartbeatPath(j.Name):
			heartbeat = node.Value
		}
	}

	j.UnitState = r.getUnitState(j.Name)

	js := determineJobState(j, heartbeat)
	j.State = &js

	return
}
Beispiel #14
0
// TrackJob extracts and stores information about the given job for later reference
func (as *AgentState) TrackJob(j *job.Job) {
	as.trackJobPeers(j.Name, j.Peers())
	as.trackJobConflicts(j.Name, j.Conflicts())
}
Beispiel #15
0
// requiresMachine returns whether specified job requires a specific machine.
func (eg *Engine) requiresMachine(j *job.Job) ([]string, bool) {
	requirements := j.Requirements()

	bootID, ok := requirements[unit.FleetXConditionMachineBootID]
	return bootID, ok && len(bootID) > 0
}