func (as *AgentState) GetReplacedUnit(j *job.Job) (string, error) { cExists, replaced := as.hasReplace(j.Name, j.Replaces()) if !cExists { return "", fmt.Errorf("cannot find units to be replaced for Unit(%s)", j.Name) } return replaced, nil }
func mapUnitToJob(entity *schema.Unit, mm map[string]*machine.MachineState) (*job.Job, error) { contents, err := base64.StdEncoding.DecodeString(entity.FileContents) if err != nil { return nil, err } u, err := unit.NewUnit(string(contents)) if err != nil { return nil, err } js := job.JobState(entity.CurrentState) j := job.Job{ Name: entity.Name, State: &js, Unit: *u, } // populate a UnitState object only if the entity // is actually reporting relevant data if entity.Systemd != nil { j.UnitState = &unit.UnitState{ LoadState: entity.Systemd.LoadState, ActiveState: entity.Systemd.ActiveState, SubState: entity.Systemd.SubState, } if len(entity.Systemd.MachineID) > 0 { j.UnitState.MachineID = entity.Systemd.MachineID } } return &j, nil }
// partitionCluster returns a slice of IDs from a subset of active machines // that should be considered for scheduling the specified job. // The returned slice is sorted by ascending lexicographical string value of machine boot id. func (eg *Engine) partitionCluster(j *job.Job) ([]string, error) { if machID, ok := j.RequiredTarget(); ok { return []string{machID}, nil } // TODO(uwedeportivo): for now punt on jobs with requirements and offer to all machines // because agents are decoding the requirements if len(j.Requirements()) > 0 { machines, _ := eg.registry.Machines() machineIDs := make([]string, len(machines)) for i, mach := range machines { machineIDs[i] = mach.ID } sort.Strings(machineIDs) return machineIDs, nil } // this is usually a cheap no-op eg.refreshCluster(false) // as an initial heuristic, choose the k least loaded, with k = partitionSize machineIDs := eg.clust.kLeastLoaded(partitionSize) sort.Strings(machineIDs) return machineIDs, nil }
// AbleToRun determines if an Agent can run the provided Job based on // the Agent's current state. A boolean indicating whether this is the // case or not is returned. The following criteria is used: // - Agent must meet the Job's machine target requirement (if any) // - Agent must have all of the Job's required metadata (if any) // - Agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Units scheduled to the agent // - Job must specially handle replaced units to be rescheduled func (as *AgentState) AbleToRun(j *job.Job) (jobAction job.JobAction, errstr string) { if tgt, ok := j.RequiredTarget(); ok && !as.MState.MatchID(tgt) { return job.JobActionUnschedule, fmt.Sprintf("agent ID %q does not match required %q", as.MState.ID, tgt) } metadata := j.RequiredTargetMetadata() if len(metadata) != 0 { if !machine.HasMetadata(as.MState, metadata) { return job.JobActionUnschedule, "local Machine metadata insufficient" } } peers := j.Peers() if len(peers) != 0 { for _, peer := range peers { if !as.unitScheduled(peer) { return job.JobActionUnschedule, fmt.Sprintf("required peer Unit(%s) is not scheduled locally", peer) } } } if cExists, cJobName := as.HasConflict(j.Name, j.Conflicts()); cExists { return job.JobActionUnschedule, fmt.Sprintf("found conflict with locally-scheduled Unit(%s)", cJobName) } // Handle Replace option specially for rescheduling the unit if cExists, cJobName := as.hasReplace(j.Name, j.Replaces()); cExists { return job.JobActionReschedule, fmt.Sprintf("found replace with locally-scheduled Unit(%s)", cJobName) } return job.JobActionSchedule, "" }
// Determine if the Agent can run the provided Job func (a *Agent) AbleToRun(j *job.Job) bool { if !a.VerifyJob(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } requirements := j.Requirements() if len(requirements) == 0 { log.V(1).Infof("Job(%s) has no requirements", j.Name) return true } if log.V(1) { var reqString string for key, slice := range requirements { reqString += fmt.Sprintf("%s = [", key) for _, val := range slice { reqString += fmt.Sprintf("%s, ", val) } reqString += fmt.Sprint("] ") } log.Infof("Job(%s) has requirements: %s", j.Name, reqString) } metadata := extractMachineMetadata(requirements) log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) if !a.machine.HasMetadata(metadata) { log.V(1).Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name) return false } bootID, ok := requirements[unit.FleetXConditionMachineBootID] if ok && len(bootID) > 0 && !a.machine.State().MatchBootID(bootID[0]) { log.V(1).Infof("Agent does not pass MachineBootID condition for Job(%s)", j.Name) return false } peers := j.Payload.Peers() if len(peers) > 0 { log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.V(1).Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name) return false } } } else { log.V(2).Infof("Job(%s) has no peers to worry about", j.Name) } if conflicted, conflictedJobName := a.state.HasConflict(j.Name, j.Payload.Conflicts()); conflicted { log.V(1).Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
// AbleToRun determines if an Agent can run the provided Job based on // the Agent's current state. A boolean indicating whether this is the // case or not is returned. The following criteria is used: // - Agent must meet the Job's machine target requirement (if any) // - Agent must have all of the Job's required metadata (if any) // - Agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Jobs scheduled to the agent func (as *AgentState) AbleToRun(j *job.Job) (bool, string) { if tgt, ok := j.RequiredTarget(); ok && !as.MState.MatchID(tgt) { return false, fmt.Sprintf("agent ID %q does not match required %q", as.MState.ID, tgt) } metadata := j.RequiredTargetMetadata() if len(metadata) != 0 { if !machine.HasMetadata(as.MState, metadata) { return false, "local Machine metadata insufficient" } } peers := j.Peers() if len(peers) != 0 { for _, peer := range peers { if !as.jobScheduled(peer) { return false, fmt.Sprintf("required peer Job(%s) is not scheduled locally", peer) } } } if cExists, cJobName := as.hasConflict(j.Name, j.Conflicts()); cExists { return false, fmt.Sprintf("found conflict with locally-scheduled Job(%s)", cJobName) } return true, "" }
func (r *EtcdRegistry) hydrateJob(j *job.Job) (err error) { j.TargetState, err = r.jobTargetState(j.Name) if err != nil { return } j.TargetMachineID, err = r.jobTargetMachine(j.Name) if err != nil { return } j.UnitState = r.getUnitState(j.Name) j.State = r.determineJobState(j.Name) return }
// Determine if the Agent can run the provided Job func (a *Agent) ableToRun(j *job.Job) bool { if !a.verifyJob(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } requirements := j.Requirements() if len(requirements) == 0 { log.V(1).Infof("Job(%s) has no requirements", j.Name) } log.Infof("Job(%s) has requirements: %s", j.Name, requirements) metadata := j.RequiredTargetMetadata() log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) ms := a.Machine.State() if !machine.HasMetadata(&ms, metadata) { log.Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name) return false } if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) { log.Infof("Agent does not meet machine target requirement for Job(%s)", j.Name) return false } peers := j.Peers() if len(peers) > 0 { log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name) return false } } } else { log.V(1).Infof("Job(%s) has no peers to worry about", j.Name) } if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted { log.Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
// ableToRun determines if the Agent can run the provided Job, and returns a boolean indicating // whether this is the case. There are five criteria for an Agent to be eligible to run a Job: // - Job must pass signature verification // - agent must have all of the Job's required metadata (if any) // - agent must meet the Job's machine target requirement (if any) // - agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Jobs scheduled to the agent func (a *Agent) ableToRun(j *job.Job) bool { if !a.verifyJobSignature(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } log.Infof("Job(%s) has requirements: %s", j.Name, j.Requirements()) metadata := j.RequiredTargetMetadata() if len(metadata) == 0 { log.V(1).Infof("Job(%s) has no required machine metadata", j.Name) } else { log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) ms := a.Machine.State() if !machine.HasMetadata(&ms, metadata) { log.Infof("Unable to run Job(%s): local Machine metadata insufficient", j.Name) return false } } if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) { log.Infof("Unable to run Job(%s): agent does not meet machine target requirement (%s)", j.Name, tgt) return false } peers := j.Peers() if len(peers) == 0 { log.V(1).Infof("Job(%s) has no required peers", j.Name) } else { log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.Infof("Unable to run Job(%s): required Peer(%s) is not scheduled locally", j.Name, peer) return false } } } if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted { log.Infof("Unable to run Job(%s): conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
func (r *EtcdRegistry) hydrateJob(j *job.Job) error { tgt, err := r.jobTargetState(j.Name) if err != nil { return err } j.TargetState = tgt j.TargetMachineID, err = r.jobTargetMachine(j.Name) if err != nil { return err } j.UnitState = r.getUnitState(j.Name) j.State = r.determineJobState(j.Name) return nil }
// ableToRun determines if the Agent can run the provided Job based on // the Agent's desired state. A boolean indicating whether this is the // case or not is returned. The following criteria is used: // - Agent must meet the Job's machine target requirement (if any) // - Job must pass signature verification // - Agent must have all of the Job's required metadata (if any) // - Agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Jobs scheduled to the agent func (ar *AgentReconciler) ableToRun(as *agentState, ms *machine.MachineState, j *job.Job) (bool, string) { log.V(1).Infof("Attempting to determine if able to run Job(%s)", j.Name) if tgt, ok := j.RequiredTarget(); ok && !ms.MatchID(tgt) { return false, fmt.Sprintf("Agent ID %q does not match required %q", ms.ID, tgt) } if !ar.verifyJobSignature(j) { return false, "unable to verify signature" } log.V(1).Infof("Job(%s) has requirements: %s", j.Name, j.Requirements()) metadata := j.RequiredTargetMetadata() if len(metadata) == 0 { log.V(1).Infof("Job(%s) has no required machine metadata", j.Name) } else { log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) if !machine.HasMetadata(ms, metadata) { return false, "local Machine metadata insufficient" } } peers := j.Peers() if len(peers) == 0 { log.V(1).Infof("Job(%s) has no required peers", j.Name) } else { log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers) for _, peer := range peers { if !as.jobScheduled(peer) { return false, fmt.Sprintf("required peer Job(%s) is not scheduled locally", peer) } } } if cExists, cJobName := as.hasConflict(j.Name, j.Conflicts()); cExists { return false, fmt.Sprintf("found conflict with locally-scheduled Job(%s)", cJobName) } log.V(1).Infof("Determined local Agent is able to run Job(%s)", j.Name) return true, "" }
func newClusterState(units []job.Unit, sUnits []job.ScheduledUnit, machines []machine.MachineState) *clusterState { sUnitMap := make(map[string]*job.ScheduledUnit) for _, sUnit := range sUnits { sUnit := sUnit sUnitMap[sUnit.Name] = &sUnit } jMap := make(map[string]*job.Job) guMap := make(map[string]*job.Unit) for _, u := range units { if u.IsGlobal() { u := u guMap[u.Name] = &u } else { j := job.Job{ Name: u.Name, Unit: u.Unit, TargetState: u.TargetState, } if sUnit, ok := sUnitMap[u.Name]; ok { j.TargetMachineID = sUnit.TargetMachineID j.State = sUnit.State } jMap[j.Name] = &j } } mMap := make(map[string]*machine.MachineState, len(machines)) for _, ms := range machines { ms := ms mMap[ms.ID] = &ms } return &clusterState{ jobs: jMap, gUnits: guMap, machines: mMap, } }
func (r *EtcdRegistry) parseJobDir(j *job.Job, dir *etcd.Node) (err error) { var heartbeat string for _, node := range dir.Nodes { switch node.Key { case r.jobTargetStatePath(j.Name): j.TargetState, err = job.ParseJobState(node.Value) if err != nil { return } case r.jobTargetAgentPath(j.Name): j.TargetMachineID = node.Value case r.jobHeartbeatPath(j.Name): heartbeat = node.Value } } j.UnitState = r.getUnitState(j.Name) js := determineJobState(j, heartbeat) j.State = &js return }
// TrackJob extracts and stores information about the given job for later reference func (as *AgentState) TrackJob(j *job.Job) { as.trackJobPeers(j.Name, j.Peers()) as.trackJobConflicts(j.Name, j.Conflicts()) }
// requiresMachine returns whether specified job requires a specific machine. func (eg *Engine) requiresMachine(j *job.Job) ([]string, bool) { requirements := j.Requirements() bootID, ok := requirements[unit.FleetXConditionMachineBootID] return bootID, ok && len(bootID) > 0 }