// nodeID restores a persistent unique ID or generates a new one func (c *Client) nodeID() (string, error) { // Do not persist in dev mode if c.config.DevMode { return structs.GenerateUUID(), nil } // Attempt to read existing ID path := filepath.Join(c.config.StateDir, "client-id") buf, err := ioutil.ReadFile(path) if err != nil && !os.IsNotExist(err) { return "", err } // Use existing ID if any if len(buf) != 0 { return string(buf), nil } // Generate new ID id := structs.GenerateUUID() // Persist the ID if err := ioutil.WriteFile(path, []byte(id), 0700); err != nil { return "", err } return id, nil }
func TestProposedAllocConstraint_JobDistinctHosts_Infeasible(t *testing.T) { _, ctx := testContext(t) nodes := []*structs.Node{ mock.Node(), mock.Node(), } static := NewStaticIterator(ctx, nodes) // Create a job with a distinct_hosts constraint and two task groups. tg1 := &structs.TaskGroup{Name: "bar"} tg2 := &structs.TaskGroup{Name: "baz"} job := &structs.Job{ ID: "foo", Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}}, TaskGroups: []*structs.TaskGroup{tg1, tg2}, } // Add allocs placing tg1 on node1 and tg2 on node2. This should make the // job unsatisfiable. plan := ctx.Plan() plan.NodeAllocation[nodes[0].ID] = []*structs.Allocation{ &structs.Allocation{ TaskGroup: tg1.Name, JobID: job.ID, ID: structs.GenerateUUID(), }, // Should be ignored as it is a different job. &structs.Allocation{ TaskGroup: tg2.Name, JobID: "ignore 2", ID: structs.GenerateUUID(), }, } plan.NodeAllocation[nodes[1].ID] = []*structs.Allocation{ &structs.Allocation{ TaskGroup: tg2.Name, JobID: job.ID, ID: structs.GenerateUUID(), }, // Should be ignored as it is a different job. &structs.Allocation{ TaskGroup: tg1.Name, JobID: "ignore 2", ID: structs.GenerateUUID(), }, } propsed := NewProposedAllocConstraintIterator(ctx, static) propsed.SetTaskGroup(tg1) propsed.SetJob(job) out := collectFeasible(propsed) if len(out) != 0 { t.Fatalf("Bad: %#v", out) } }
func VaultAccessor() *structs.VaultAccessor { return &structs.VaultAccessor{ Accessor: structs.GenerateUUID(), NodeID: structs.GenerateUUID(), AllocID: structs.GenerateUUID(), CreationTTL: 86400, Task: "foo", } }
func TestJobAntiAffinity_PlannedAlloc(t *testing.T) { _, ctx := testContext(t) nodes := []*RankedNode{ &RankedNode{ Node: &structs.Node{ ID: structs.GenerateUUID(), }, }, &RankedNode{ Node: &structs.Node{ ID: structs.GenerateUUID(), }, }, } static := NewStaticRankIterator(ctx, nodes) // Add a planned alloc to node1 that fills it plan := ctx.Plan() plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{ &structs.Allocation{ ID: structs.GenerateUUID(), JobID: "foo", }, &structs.Allocation{ ID: structs.GenerateUUID(), JobID: "foo", }, } // Add a planned alloc to node2 that half fills it plan.NodeAllocation[nodes[1].Node.ID] = []*structs.Allocation{ &structs.Allocation{ JobID: "bar", }, } binp := NewJobAntiAffinityIterator(ctx, static, 5.0, "foo") out := collectRanked(binp) if len(out) != 2 { t.Fatalf("Bad: %#v", out) } if out[0] != nodes[0] { t.Fatalf("Bad: %v", out) } if out[0].Score != -10.0 { t.Fatalf("Bad: %#v", out[0]) } if out[1] != nodes[1] { t.Fatalf("Bad: %v", out) } if out[1].Score != 0.0 { t.Fatalf("Bad: %v", out[1]) } }
func Eval() *structs.Evaluation { eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: 50, Type: structs.JobTypeService, JobID: structs.GenerateUUID(), Status: structs.EvalStatusPending, } return eval }
func Node() *structs.Node { node := &structs.Node{ ID: structs.GenerateUUID(), SecretID: structs.GenerateUUID(), Datacenter: "dc1", Name: "foobar", Attributes: map[string]string{ "kernel.name": "linux", "arch": "x86", "nomad.version": "0.5.0", "driver.exec": "1", }, Resources: &structs.Resources{ CPU: 4000, MemoryMB: 8192, DiskMB: 100 * 1024, IOPS: 150, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ Device: "eth0", CIDR: "192.168.0.100/32", MBits: 1000, }, }, }, Reserved: &structs.Resources{ CPU: 100, MemoryMB: 256, DiskMB: 4 * 1024, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ Device: "eth0", IP: "192.168.0.100", ReservedPorts: []structs.Port{{Label: "main", Value: 22}}, MBits: 1, }, }, }, Links: map[string]string{ "consul": "foobar.dc1", }, Meta: map[string]string{ "pci-dss": "true", "database": "mysql", "version": "5.6", }, NodeClass: "linux-medium-pci", Status: structs.NodeStatusReady, } node.ComputeClass() return node }
func TestSystemSched_ExhaustResources(t *testing.T) { h := NewHarness(t) // Create a nodes node := mock.Node() noErr(t, h.State.UpsertNode(h.NextIndex(), node)) // Create a service job which consumes most of the system resources svcJob := mock.Job() svcJob.TaskGroups[0].Count = 1 svcJob.TaskGroups[0].Tasks[0].Resources.CPU = 3600 noErr(t, h.State.UpsertJob(h.NextIndex(), svcJob)) // Create a mock evaluation to register the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: svcJob.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: svcJob.ID, } // Process the evaluation err := h.Process(NewServiceScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Create a system job job := mock.SystemJob() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to register the job eval1 := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, } // Process the evaluation if err := h.Process(NewSystemScheduler, eval1); err != nil { t.Fatalf("err: %v", err) } // Ensure that we have one allocation queued from the system job eval queued := h.Evals[1].QueuedAllocations["web"] if queued != 1 { t.Fatalf("expected: %v, actual: %v", 1, queued) } }
// This test ensures that the scheduler doesn't increment the queued allocation // count for a task group when allocations can't be created on currently // availabe nodes because of constrain mismatches. func TestSystemSched_Queued_With_Constraints(t *testing.T) { h := NewHarness(t) // Register a node node := mock.Node() node.Attributes["kernel.name"] = "darwin" noErr(t, h.State.UpsertNode(h.NextIndex(), node)) // Generate a system job which can't be placed on the node job := mock.SystemJob() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to deal eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: 50, TriggeredBy: structs.EvalTriggerNodeUpdate, JobID: job.ID, NodeID: node.ID, } // Process the evaluation err := h.Process(NewSystemScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure that queued allocations is zero if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 { t.Fatalf("bad queued allocations: %#v", h.Evals[0].QueuedAllocations) } }
// This test just ensures the scheduler handles the eval type to avoid // regressions. func TestServiceSched_EvaluateMaxPlanEval(t *testing.T) { h := NewHarness(t) // Create a job and set the task group count to zero. job := mock.Job() job.TaskGroups[0].Count = 0 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock blocked evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Status: structs.EvalStatusBlocked, Priority: job.Priority, TriggeredBy: structs.EvalTriggerMaxPlans, JobID: job.ID, } // Insert it into the state store noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) // Process the evaluation err := h.Process(NewServiceScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure there was no plan if len(h.Plans) != 0 { t.Fatalf("bad: %#v", h.Plans) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
// dequeueForSched is used to dequeue the next work item for a given scheduler. // This assumes locks are held and that this scheduler has work func (b *EvalBroker) dequeueForSched(sched string) (*structs.Evaluation, string, error) { // Get the pending queue pending := b.ready[sched] raw := heap.Pop(&pending) b.ready[sched] = pending eval := raw.(*structs.Evaluation) // Generate a UUID for the token token := structs.GenerateUUID() // Setup Nack timer nackTimer := time.AfterFunc(b.nackTimeout, func() { b.Nack(eval.ID, token) }) // Add to the unack queue b.unack[eval.ID] = &unackEval{ Eval: eval, Token: token, NackTimer: nackTimer, } // Increment the dequeue count b.evals[eval.ID] += 1 // Update the stats b.stats.TotalReady -= 1 b.stats.TotalUnacked += 1 bySched := b.stats.ByScheduler[sched] bySched.Ready -= 1 bySched.Unacked += 1 return eval, token, nil }
// computePlacements computes placements for allocations func (s *SystemScheduler) computePlacements(place []allocTuple) error { nodeByID := make(map[string]*structs.Node, len(s.nodes)) for _, node := range s.nodes { nodeByID[node.ID] = node } nodes := make([]*structs.Node, 1) for _, missing := range place { node, ok := nodeByID[missing.Alloc.NodeID] if !ok { return fmt.Errorf("could not find node %q", missing.Alloc.NodeID) } // Update the set of placement nodes nodes[0] = node s.stack.SetNodes(nodes) // Attempt to match the task group option, _ := s.stack.Select(missing.TaskGroup) if option == nil { // Check if this task group has already failed if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok { metric.CoalescedFailures += 1 continue } } // Store the available nodes by datacenter s.ctx.Metrics().NodesAvailable = s.nodesByDC // Set fields based on if we found an allocation option if option != nil { // Create an allocation for this alloc := &structs.Allocation{ ID: structs.GenerateUUID(), EvalID: s.eval.ID, Name: missing.Name, JobID: s.job.ID, TaskGroup: missing.TaskGroup.Name, Metrics: s.ctx.Metrics(), NodeID: option.Node.ID, TaskResources: option.TaskResources, DesiredStatus: structs.AllocDesiredStatusRun, ClientStatus: structs.AllocClientStatusPending, } s.plan.AppendAlloc(alloc) } else { // Lazy initialize the failed map if s.eval.FailedTGAllocs == nil { s.eval.FailedTGAllocs = make(map[string]*structs.AllocMetric) } s.eval.FailedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics() } } return nil }
// configureCgroups converts a Nomad Resources specification into the equivalent // cgroup configuration. It returns an error if the resources are invalid. func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error { e.resConCtx.groups = &cgroupConfig.Cgroup{} e.resConCtx.groups.Resources = &cgroupConfig.Resources{} cgroupName := structs.GenerateUUID() e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName) // TODO: verify this is needed for things like network access e.resConCtx.groups.Resources.AllowAllDevices = true if resources.MemoryMB > 0 { // Total amount of memory allowed to consume e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024) // Disable swap to avoid issues on the machine e.resConCtx.groups.Resources.MemorySwap = int64(-1) } if resources.CPU < 2 { return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) } // Set the relative CPU shares for this cgroup. e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU) if resources.IOPS != 0 { // Validate it is in an acceptable range. if resources.IOPS < 10 || resources.IOPS > 1000 { return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) } e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS) } return nil }
// DispatchJob creates an evaluation for the passed job and commits both the // evaluation and the job to the raft log. func (s *Server) DispatchJob(job *structs.Job) error { // Commit this update via Raft req := structs.JobRegisterRequest{Job: job} _, index, err := s.raftApply(structs.JobRegisterRequestType, req) if err != nil { return err } // Create a new evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, Type: job.Type, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, JobModifyIndex: index, Status: structs.EvalStatusPending, } update := &structs.EvalUpdateRequest{ Evals: []*structs.Evaluation{eval}, } // Commit this evaluation via Raft // XXX: There is a risk of partial failure where the JobRegister succeeds // but that the EvalUpdate does not. _, _, err = s.raftApply(structs.EvalUpdateRequestType, update) if err != nil { return err } return nil }
func Job() *structs.Job { job := &structs.Job{ Region: "global", ID: structs.GenerateUUID(), Name: "my-job", Type: structs.JobTypeService, Priority: 50, AllAtOnce: false, Datacenters: []string{"dc1"}, Constraints: []*structs.Constraint{ &structs.Constraint{ Hard: true, LTarget: "$attr.kernel.name", RTarget: "linux", Operand: "=", }, }, TaskGroups: []*structs.TaskGroup{ &structs.TaskGroup{ Name: "web", Count: 10, Tasks: []*structs.Task{ &structs.Task{ Name: "web", Driver: "exec", Config: map[string]string{ "command": "/bin/date", "args": "+%s", }, Env: map[string]string{ "FOO": "bar", }, Resources: &structs.Resources{ CPU: 500, MemoryMB: 256, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ MBits: 50, DynamicPorts: []string{"http"}, }, }, }, }, }, Meta: map[string]string{ "elb_check_type": "http", "elb_check_interval": "30s", "elb_check_min": "3", }, }, }, Meta: map[string]string{ "owner": "armon", }, Status: structs.JobStatusPending, CreateIndex: 42, ModifyIndex: 99, } return job }
func TestSystemSched_JobRegister_AllocFail(t *testing.T) { h := NewHarness(t) // Create NO nodes // Create a job job := mock.SystemJob() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to register the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, } // Process the evaluation err := h.Process(NewSystemScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure no plan as this should be a no-op. if len(h.Plans) != 0 { t.Fatalf("bad: %#v", h.Plans) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
// createNodeEvals is used to create evaluations for each alloc on a node. // Each Eval is scoped to a job, so we need to potentially trigger many evals. func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { // Snapshot the state snap, err := n.srv.fsm.State().Snapshot() if err != nil { return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) } // Find all the allocations for this node allocs, err := snap.AllocsByNode(nodeID) if err != nil { return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) } // Fast-path if nothing to do if len(allocs) == 0 { return nil, 0, nil } // Create an eval for each JobID affected var evals []*structs.Evaluation var evalIDs []string jobIDs := make(map[string]struct{}) for _, alloc := range allocs { // Deduplicate on JobID if _, ok := jobIDs[alloc.JobID]; ok { continue } jobIDs[alloc.JobID] = struct{}{} // Create a new eval eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: alloc.Job.Priority, Type: alloc.Job.Type, TriggeredBy: structs.EvalTriggerNodeUpdate, JobID: alloc.JobID, NodeID: nodeID, NodeModifyIndex: nodeIndex, Status: structs.EvalStatusPending, } evals = append(evals, eval) evalIDs = append(evalIDs, eval.ID) } // Create the Raft transaction update := &structs.EvalUpdateRequest{ Evals: evals, WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, } // Commit this evaluation via Raft // XXX: There is a risk of partial failure where the node update succeeds // but that the EvalUpdate does not. _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) if err != nil { return nil, 0, err } return evalIDs, evalIndex, nil }
// nodeIDs restores the nodes persistent unique ID and SecretID or generates new // ones func (c *Client) nodeID() (id string, secret string, err error) { // Do not persist in dev mode if c.config.DevMode { return structs.GenerateUUID(), structs.GenerateUUID(), nil } // Attempt to read existing ID idPath := filepath.Join(c.config.StateDir, "client-id") idBuf, err := ioutil.ReadFile(idPath) if err != nil && !os.IsNotExist(err) { return "", "", err } // Attempt to read existing secret ID secretPath := filepath.Join(c.config.StateDir, "secret-id") secretBuf, err := ioutil.ReadFile(secretPath) if err != nil && !os.IsNotExist(err) { return "", "", err } // Use existing ID if any if len(idBuf) != 0 { id = string(idBuf) } else { // Generate new ID id = structs.GenerateUUID() // Persist the ID if err := ioutil.WriteFile(idPath, []byte(id), 0700); err != nil { return "", "", err } } if len(secretBuf) != 0 { secret = string(secretBuf) } else { // Generate new ID secret = structs.GenerateUUID() // Persist the ID if err := ioutil.WriteFile(secretPath, []byte(secret), 0700); err != nil { return "", "", err } } return id, secret, nil }
// Register is used to upsert a job for scheduling func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegisterResponse) error { if done, err := j.srv.forward("Job.Register", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "job", "register"}, time.Now()) // Validate the arguments if args.Job == nil { return fmt.Errorf("missing job for registration") } if err := args.Job.Validate(); err != nil { return err } // Expand the service names args.Job.ExpandAllServiceNames() if args.Job.Type == structs.JobTypeCore { return fmt.Errorf("job type cannot be core") } // Commit this update via Raft _, index, err := j.srv.raftApply(structs.JobRegisterRequestType, args) if err != nil { j.srv.logger.Printf("[ERR] nomad.job: Register failed: %v", err) return err } // Create a new evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: args.Job.Priority, Type: args.Job.Type, TriggeredBy: structs.EvalTriggerJobRegister, JobID: args.Job.ID, JobModifyIndex: index, Status: structs.EvalStatusPending, } update := &structs.EvalUpdateRequest{ Evals: []*structs.Evaluation{eval}, WriteRequest: structs.WriteRequest{Region: args.Region}, } // Commit this evaluation via Raft // XXX: There is a risk of partial failure where the JobRegister succeeds // but that the EvalUpdate does not. _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) if err != nil { j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) return err } // Setup the reply reply.EvalID = eval.ID reply.EvalCreateIndex = evalIndex reply.JobModifyIndex = index reply.Index = evalIndex return nil }
// computePlacements computes placements for allocations func (s *GenericScheduler) computePlacements(place []allocTuple) error { // Get the base nodes nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters) if err != nil { return err } // Update the set of placement ndoes s.stack.SetNodes(nodes) // Track the failed task groups so that we can coalesce // the failures together to avoid creating many failed allocs. failedTG := make(map[*structs.TaskGroup]*structs.Allocation) for _, missing := range place { // Check if this task group has already failed if alloc, ok := failedTG[missing.TaskGroup]; ok { alloc.Metrics.CoalescedFailures += 1 continue } // Attempt to match the task group option, _ := s.stack.Select(missing.TaskGroup) // Create an allocation for this alloc := &structs.Allocation{ ID: structs.GenerateUUID(), EvalID: s.eval.ID, Name: missing.Name, JobID: s.job.ID, TaskGroup: missing.TaskGroup.Name, Metrics: s.ctx.Metrics(), } // Store the available nodes by datacenter s.ctx.Metrics().NodesAvailable = byDC // Set fields based on if we found an allocation option if option != nil { // Generate service IDs tasks in this allocation // COMPAT - This is no longer required and would be removed in v0.4 alloc.PopulateServiceIDs(missing.TaskGroup) alloc.NodeID = option.Node.ID alloc.TaskResources = option.TaskResources alloc.DesiredStatus = structs.AllocDesiredStatusRun alloc.ClientStatus = structs.AllocClientStatusPending s.plan.AppendAlloc(alloc) } else { alloc.DesiredStatus = structs.AllocDesiredStatusFailed alloc.DesiredDescription = "failed to find a node for placement" alloc.ClientStatus = structs.AllocClientStatusFailed s.plan.AppendFailed(alloc) failedTG[missing.TaskGroup] = alloc } } return nil }
func TestServiceSched_JobRegister_AllocFail(t *testing.T) { h := NewHarness(t) // Create NO nodes // Create a job job := mock.Job() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to register the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, } // Process the evaluation err := h.Process(NewServiceScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a single plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } plan := h.Plans[0] // Ensure the plan has created a follow up eval. if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { t.Fatalf("bad: %#v", h.CreateEvals) } // Ensure the plan failed to alloc if len(plan.FailedAllocs) != 1 { t.Fatalf("bad: %#v", plan) } // Lookup the allocations by JobID out, err := h.State.AllocsByJob(job.ID) noErr(t, err) // Ensure all allocations placed if len(out) != 1 { t.Fatalf("bad: %#v", out) } // Check the coalesced failures if out[0].Metrics.CoalescedFailures != 9 { t.Fatalf("bad: %#v", out[0].Metrics) } // Check the available nodes if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 0 { t.Fatalf("bad: %#v", out[0].Metrics) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
func TestServiceSched_JobDeregister(t *testing.T) { h := NewHarness(t) // Generate a fake job with allocations job := mock.Job() var allocs []*structs.Allocation for i := 0; i < 10; i++ { alloc := mock.Alloc() alloc.Job = job alloc.JobID = job.ID allocs = append(allocs, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) // Create a mock evaluation to deregister the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: 50, TriggeredBy: structs.EvalTriggerJobDeregister, JobID: job.ID, } // Process the evaluation err := h.Process(NewServiceScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a single plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } plan := h.Plans[0] // Ensure the plan evicted all nodes if len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"]) != len(allocs) { t.Fatalf("bad: %#v", plan) } // Lookup the allocations by JobID out, err := h.State.AllocsByJob(job.ID) noErr(t, err) // Ensure that the job field on the allocation is still populated for _, alloc := range out { if alloc.Job == nil { t.Fatalf("bad: %#v", alloc) } } // Ensure no remaining allocations out = structs.FilterTerminalAllocs(out) if len(out) != 0 { t.Fatalf("bad: %#v", out) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
func SystemJob() *structs.Job { job := &structs.Job{ Region: "global", ID: structs.GenerateUUID(), Name: "my-job", Type: structs.JobTypeSystem, Priority: 100, AllAtOnce: false, Datacenters: []string{"dc1"}, Constraints: []*structs.Constraint{ &structs.Constraint{ LTarget: "${attr.kernel.name}", RTarget: "linux", Operand: "=", }, }, TaskGroups: []*structs.TaskGroup{ &structs.TaskGroup{ Name: "web", Count: 1, RestartPolicy: &structs.RestartPolicy{ Attempts: 3, Interval: 10 * time.Minute, Delay: 1 * time.Minute, Mode: structs.RestartPolicyModeDelay, }, EphemeralDisk: structs.DefaultEphemeralDisk(), Tasks: []*structs.Task{ &structs.Task{ Name: "web", Driver: "exec", Config: map[string]interface{}{ "command": "/bin/date", }, Env: map[string]string{}, Resources: &structs.Resources{ CPU: 500, MemoryMB: 256, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ MBits: 50, DynamicPorts: []structs.Port{{Label: "http"}}, }, }, }, LogConfig: structs.DefaultLogConfig(), }, }, }, }, Meta: map[string]string{ "owner": "armon", }, Status: structs.JobStatusPending, CreateIndex: 42, ModifyIndex: 99, } return job }
func Alloc() *structs.Allocation { alloc := &structs.Allocation{ ID: structs.GenerateUUID(), EvalID: structs.GenerateUUID(), NodeID: "foo", TaskGroup: "web", Resources: &structs.Resources{ CPU: 500, MemoryMB: 256, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ Device: "eth0", IP: "192.168.0.100", ReservedPorts: []structs.Port{{Label: "main", Value: 12345}}, MBits: 100, DynamicPorts: []structs.Port{{Label: "http"}}, }, }, }, TaskResources: map[string]*structs.Resources{ "web": &structs.Resources{ CPU: 500, MemoryMB: 256, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ Device: "eth0", IP: "192.168.0.100", ReservedPorts: []structs.Port{{Label: "main", Value: 5000}}, MBits: 50, DynamicPorts: []structs.Port{{Label: "http"}}, }, }, }, }, TaskStates: map[string]*structs.TaskState{ "web": &structs.TaskState{ State: structs.TaskStatePending, }, }, Job: Job(), DesiredStatus: structs.AllocDesiredStatusRun, ClientStatus: structs.AllocClientStatusPending, } alloc.JobID = alloc.Job.ID return alloc }
func TestBatchSched_Run_FailedAlloc(t *testing.T) { h := NewHarness(t) // Create a node node := mock.Node() noErr(t, h.State.UpsertNode(h.NextIndex(), node)) // Create a job job := mock.Job() job.TaskGroups[0].Count = 1 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a failed alloc alloc := mock.Alloc() alloc.Job = job alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" alloc.ClientStatus = structs.AllocClientStatusFailed noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to register the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, } // Process the evaluation err := h.Process(NewBatchScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } // Lookup the allocations by JobID out, err := h.State.AllocsByJob(job.ID) noErr(t, err) // Ensure a replacement alloc was placed. if len(out) != 2 { t.Fatalf("bad: %#v", out) } // Ensure that the scheduler is recording the correct number of queued // allocations queued := h.Evals[0].QueuedAllocations["web"] if queued != 0 { t.Fatalf("expected: %v, actual: %v", 1, queued) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
func TestSystemSched_JobRegister(t *testing.T) { h := NewHarness(t) // Create some nodes for i := 0; i < 10; i++ { node := mock.Node() noErr(t, h.State.UpsertNode(h.NextIndex(), node)) } // Create a job job := mock.SystemJob() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to deregister the job eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, } // Process the evaluation err := h.Process(NewSystemScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a single plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } plan := h.Plans[0] // Ensure the plan allocated var planned []*structs.Allocation for _, allocList := range plan.NodeAllocation { planned = append(planned, allocList...) } if len(planned) != 10 { t.Fatalf("bad: %#v", plan) } // Lookup the allocations by JobID out, err := h.State.AllocsByJob(job.ID) noErr(t, err) // Ensure all allocations placed if len(out) != 10 { t.Fatalf("bad: %#v", out) } // Check the available nodes if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 10 { t.Fatalf("bad: %#v", out[0].Metrics) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
func TestSystemSched_NodeDrain_Down(t *testing.T) { h := NewHarness(t) // Register a draining node node := mock.Node() node.Drain = true node.Status = structs.NodeStatusDown noErr(t, h.State.UpsertNode(h.NextIndex(), node)) // Generate a fake job allocated on that node. job := mock.SystemJob() noErr(t, h.State.UpsertJob(h.NextIndex(), job)) alloc := mock.Alloc() alloc.Job = job alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to deal with the node update eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: 50, TriggeredBy: structs.EvalTriggerNodeUpdate, JobID: job.ID, NodeID: node.ID, } // Process the evaluation err := h.Process(NewServiceScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a single plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } plan := h.Plans[0] // Ensure the plan evicted non terminal allocs if len(plan.NodeUpdate[node.ID]) != 1 { t.Fatalf("bad: %#v", plan) } // Ensure that the allocation is marked as lost var lostAllocs []string for _, alloc := range plan.NodeUpdate[node.ID] { lostAllocs = append(lostAllocs, alloc.ID) } expected := []string{alloc.ID} if !reflect.DeepEqual(lostAllocs, expected) { t.Fatalf("expected: %v, actual: %v", expected, lostAllocs) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
func TestSystemSched_QueuedAllocsMultTG(t *testing.T) { h := NewHarness(t) // Register two nodes with two different classes node := mock.Node() node.NodeClass = "green" node.ComputeClass() noErr(t, h.State.UpsertNode(h.NextIndex(), node)) node2 := mock.Node() node2.NodeClass = "blue" node2.ComputeClass() noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) // Create a Job with two task groups, each constrianed on node class job := mock.SystemJob() tg1 := job.TaskGroups[0] tg1.Constraints = append(tg1.Constraints, &structs.Constraint{ LTarget: "${node.class}", RTarget: "green", Operand: "==", }) tg2 := tg1.Copy() tg2.Name = "web2" tg2.Constraints[0].RTarget = "blue" job.TaskGroups = append(job.TaskGroups, tg2) noErr(t, h.State.UpsertJob(h.NextIndex(), job)) // Create a mock evaluation to deal with drain eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: 50, TriggeredBy: structs.EvalTriggerNodeUpdate, JobID: job.ID, NodeID: node.ID, } // Process the evaluation err := h.Process(NewSystemScheduler, eval) if err != nil { t.Fatalf("err: %v", err) } // Ensure a single plan if len(h.Plans) != 1 { t.Fatalf("bad: %#v", h.Plans) } qa := h.Evals[0].QueuedAllocations if qa["web"] != 0 || qa["web2"] != 0 { t.Fatalf("bad queued allocations %#v", qa) } h.AssertEvalStatus(t, structs.EvalStatusComplete) }
// Evaluate is used to force a job for re-evaluation func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error { if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "job", "evaluate"}, time.Now()) // Validate the arguments if args.JobID == "" { return fmt.Errorf("missing job ID for evaluation") } // Lookup the job snap, err := j.srv.fsm.State().Snapshot() if err != nil { return err } job, err := snap.JobByID(args.JobID) if err != nil { return err } if job == nil { return fmt.Errorf("job not found") } if job.IsPeriodic() { return fmt.Errorf("can't evaluate periodic job") } // Create a new evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: job.Priority, Type: job.Type, TriggeredBy: structs.EvalTriggerJobRegister, JobID: job.ID, JobModifyIndex: job.ModifyIndex, Status: structs.EvalStatusPending, } update := &structs.EvalUpdateRequest{ Evals: []*structs.Evaluation{eval}, WriteRequest: structs.WriteRequest{Region: args.Region}, } // Commit this evaluation via Raft _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) if err != nil { j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) return err } // Setup the reply reply.EvalID = eval.ID reply.EvalCreateIndex = evalIndex reply.JobModifyIndex = job.ModifyIndex reply.Index = evalIndex return nil }
// computePlacements computes placements for allocations func (s *GenericScheduler) computePlacements(place []allocTuple) error { // Get the base nodes nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters) if err != nil { return err } // Update the set of placement ndoes s.stack.SetNodes(nodes) for _, missing := range place { // Check if this task group has already failed if metric, ok := s.failedTGAllocs[missing.TaskGroup.Name]; ok { metric.CoalescedFailures += 1 continue } // Attempt to match the task group option, _ := s.stack.Select(missing.TaskGroup) // Store the available nodes by datacenter s.ctx.Metrics().NodesAvailable = byDC // Set fields based on if we found an allocation option if option != nil { // Create an allocation for this alloc := &structs.Allocation{ ID: structs.GenerateUUID(), EvalID: s.eval.ID, Name: missing.Name, JobID: s.job.ID, TaskGroup: missing.TaskGroup.Name, Metrics: s.ctx.Metrics(), NodeID: option.Node.ID, TaskResources: option.TaskResources, DesiredStatus: structs.AllocDesiredStatusRun, ClientStatus: structs.AllocClientStatusPending, } // Generate service IDs tasks in this allocation // COMPAT - This is no longer required and would be removed in v0.4 alloc.PopulateServiceIDs(missing.TaskGroup) s.plan.AppendAlloc(alloc) } else { // Lazy initialize the failed map if s.failedTGAllocs == nil { s.failedTGAllocs = make(map[string]*structs.AllocMetric) } s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics() } } return nil }
// coreJobEval returns an evaluation for a core job func (s *Server) coreJobEval(job string, modifyIndex uint64) *structs.Evaluation { return &structs.Evaluation{ ID: structs.GenerateUUID(), Priority: structs.CoreJobPriority, Type: structs.JobTypeCore, TriggeredBy: structs.EvalTriggerScheduled, JobID: job, Status: structs.EvalStatusPending, ModifyIndex: modifyIndex, } }