// partitionCluster returns a slice of IDs from a subset of active machines // that should be considered for scheduling the specified job. // The returned slice is sorted by ascending lexicographical string value of machine boot id. func (eg *Engine) partitionCluster(j *job.Job) ([]string, error) { if machID, ok := j.RequiredTarget(); ok { return []string{machID}, nil } // TODO(uwedeportivo): for now punt on jobs with requirements and offer to all machines // because agents are decoding the requirements if len(j.Requirements()) > 0 { machines, _ := eg.registry.Machines() machineIDs := make([]string, len(machines)) for i, mach := range machines { machineIDs[i] = mach.ID } sort.Strings(machineIDs) return machineIDs, nil } // this is usually a cheap no-op eg.refreshCluster(false) // as an initial heuristic, choose the k least loaded, with k = partitionSize machineIDs := eg.clust.kLeastLoaded(partitionSize) sort.Strings(machineIDs) return machineIDs, nil }
// Determine if the Agent can run the provided Job func (a *Agent) AbleToRun(j *job.Job) bool { if !a.VerifyJob(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } requirements := j.Requirements() if len(requirements) == 0 { log.V(1).Infof("Job(%s) has no requirements", j.Name) return true } if log.V(1) { var reqString string for key, slice := range requirements { reqString += fmt.Sprintf("%s = [", key) for _, val := range slice { reqString += fmt.Sprintf("%s, ", val) } reqString += fmt.Sprint("] ") } log.Infof("Job(%s) has requirements: %s", j.Name, reqString) } metadata := extractMachineMetadata(requirements) log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) if !a.machine.HasMetadata(metadata) { log.V(1).Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name) return false } bootID, ok := requirements[unit.FleetXConditionMachineBootID] if ok && len(bootID) > 0 && !a.machine.State().MatchBootID(bootID[0]) { log.V(1).Infof("Agent does not pass MachineBootID condition for Job(%s)", j.Name) return false } peers := j.Payload.Peers() if len(peers) > 0 { log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.V(1).Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name) return false } } } else { log.V(2).Infof("Job(%s) has no peers to worry about", j.Name) } if conflicted, conflictedJobName := a.state.HasConflict(j.Name, j.Payload.Conflicts()); conflicted { log.V(1).Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
// Determine if the Agent can run the provided Job func (a *Agent) ableToRun(j *job.Job) bool { if !a.verifyJob(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } requirements := j.Requirements() if len(requirements) == 0 { log.V(1).Infof("Job(%s) has no requirements", j.Name) } log.Infof("Job(%s) has requirements: %s", j.Name, requirements) metadata := j.RequiredTargetMetadata() log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) ms := a.Machine.State() if !machine.HasMetadata(&ms, metadata) { log.Infof("Unable to run Job(%s), local Machine metadata insufficient", j.Name) return false } if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) { log.Infof("Agent does not meet machine target requirement for Job(%s)", j.Name) return false } peers := j.Peers() if len(peers) > 0 { log.V(1).Infof("Asserting required Peers %v of Job(%s) are scheduled locally", peers, j.Name) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.Infof("Required Peer(%s) of Job(%s) is not scheduled locally", peer, j.Name) return false } } } else { log.V(1).Infof("Job(%s) has no peers to worry about", j.Name) } if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted { log.Infof("Job(%s) has conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
// ableToRun determines if the Agent can run the provided Job, and returns a boolean indicating // whether this is the case. There are five criteria for an Agent to be eligible to run a Job: // - Job must pass signature verification // - agent must have all of the Job's required metadata (if any) // - agent must meet the Job's machine target requirement (if any) // - agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Jobs scheduled to the agent func (a *Agent) ableToRun(j *job.Job) bool { if !a.verifyJobSignature(j) { log.V(1).Infof("Failed to verify Job(%s)", j.Name) return false } log.Infof("Job(%s) has requirements: %s", j.Name, j.Requirements()) metadata := j.RequiredTargetMetadata() if len(metadata) == 0 { log.V(1).Infof("Job(%s) has no required machine metadata", j.Name) } else { log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) ms := a.Machine.State() if !machine.HasMetadata(&ms, metadata) { log.Infof("Unable to run Job(%s): local Machine metadata insufficient", j.Name) return false } } if tgt, ok := j.RequiredTarget(); ok && !a.Machine.State().MatchID(tgt) { log.Infof("Unable to run Job(%s): agent does not meet machine target requirement (%s)", j.Name, tgt) return false } peers := j.Peers() if len(peers) == 0 { log.V(1).Infof("Job(%s) has no required peers", j.Name) } else { log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers) for _, peer := range peers { if !a.peerScheduledHere(j.Name, peer) { log.Infof("Unable to run Job(%s): required Peer(%s) is not scheduled locally", j.Name, peer) return false } } } if conflicted, conflictedJobName := a.HasConflict(j.Name, j.Conflicts()); conflicted { log.Infof("Unable to run Job(%s): conflict with Job(%s)", j.Name, conflictedJobName) return false } return true }
// ableToRun determines if the Agent can run the provided Job based on // the Agent's desired state. A boolean indicating whether this is the // case or not is returned. The following criteria is used: // - Agent must meet the Job's machine target requirement (if any) // - Job must pass signature verification // - Agent must have all of the Job's required metadata (if any) // - Agent must have all required Peers of the Job scheduled locally (if any) // - Job must not conflict with any other Jobs scheduled to the agent func (ar *AgentReconciler) ableToRun(as *agentState, ms *machine.MachineState, j *job.Job) (bool, string) { log.V(1).Infof("Attempting to determine if able to run Job(%s)", j.Name) if tgt, ok := j.RequiredTarget(); ok && !ms.MatchID(tgt) { return false, fmt.Sprintf("Agent ID %q does not match required %q", ms.ID, tgt) } if !ar.verifyJobSignature(j) { return false, "unable to verify signature" } log.V(1).Infof("Job(%s) has requirements: %s", j.Name, j.Requirements()) metadata := j.RequiredTargetMetadata() if len(metadata) == 0 { log.V(1).Infof("Job(%s) has no required machine metadata", j.Name) } else { log.V(1).Infof("Job(%s) requires machine metadata: %v", j.Name, metadata) if !machine.HasMetadata(ms, metadata) { return false, "local Machine metadata insufficient" } } peers := j.Peers() if len(peers) == 0 { log.V(1).Infof("Job(%s) has no required peers", j.Name) } else { log.V(1).Infof("Job(%s) requires peers: %v", j.Name, peers) for _, peer := range peers { if !as.jobScheduled(peer) { return false, fmt.Sprintf("required peer Job(%s) is not scheduled locally", peer) } } } if cExists, cJobName := as.hasConflict(j.Name, j.Conflicts()); cExists { return false, fmt.Sprintf("found conflict with locally-scheduled Job(%s)", cJobName) } log.V(1).Infof("Determined local Agent is able to run Job(%s)", j.Name) return true, "" }
// requiresMachine returns whether specified job requires a specific machine. func (eg *Engine) requiresMachine(j *job.Job) ([]string, bool) { requirements := j.Requirements() bootID, ok := requirements[unit.FleetXConditionMachineBootID] return bootID, ok && len(bootID) > 0 }