// nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { // Lookup the evaluation existing, err := txn.First("evals", "id", eval.ID) if err != nil { return fmt.Errorf("eval lookup failed: %v", err) } // Update the indexes if existing != nil { eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex eval.ModifyIndex = index } else { eval.CreateIndex = index eval.ModifyIndex = index } // Insert the eval if err := txn.Insert("evals", eval); err != nil { return fmt.Errorf("eval insert failed: %v", err) } if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } return nil }
// gcEval returns whether the eval should be garbage collected given a raft // threshold index. The eval disqualifies for garbage collection if it or its // allocs are not older than the threshold. If the eval should be garbage // collected, the associated alloc ids that should also be removed are also // returned func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64) ( bool, []string, error) { // Ignore non-terminal and new evaluations if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex { return false, nil, nil } // Get the allocations by eval allocs, err := c.snap.AllocsByEval(eval.ID) if err != nil { c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", eval.ID, err) return false, nil, err } // Scan the allocations to ensure they are terminal and old for _, alloc := range allocs { if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex { return false, nil, nil } } allocIds := make([]string, len(allocs)) for i, alloc := range allocs { allocIds[i] = alloc.ID } // Evaluation is eligible for garbage collection return true, allocIds, nil }
// nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { // Lookup the evaluation existing, err := txn.First("evals", "id", eval.ID) if err != nil { return fmt.Errorf("eval lookup failed: %v", err) } // Update the indexes if existing != nil { eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex eval.ModifyIndex = index } else { eval.CreateIndex = index eval.ModifyIndex = index } // Update the job summary summaryRaw, err := txn.First("job_summary", "id", eval.JobID) if err != nil { return fmt.Errorf("job summary lookup failed: %v", err) } if summaryRaw != nil { js := summaryRaw.(structs.JobSummary) var hasSummaryChanged bool for tg, num := range eval.QueuedAllocations { if summary, ok := js.Summary[tg]; ok { if summary.Queued != num { summary.Queued = num js.Summary[tg] = summary hasSummaryChanged = true } } else { s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) } } // Insert the job summary if hasSummaryChanged { js.ModifyIndex = index if err := txn.Insert("job_summary", js); err != nil { return fmt.Errorf("job summary insert failed: %v", err) } if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } } } // Insert the eval if err := txn.Insert("evals", eval); err != nil { return fmt.Errorf("eval insert failed: %v", err) } if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } return nil }
// setStatus is used to update the status of the evaluation func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error { logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status) newEval := eval.Copy() newEval.Status = status newEval.StatusDescription = desc if nextEval != nil { newEval.NextEval = nextEval.ID } return planner.UpdateEval(newEval) }
// gcEval returns whether the eval should be garbage collected given a raft // threshold index. The eval disqualifies for garbage collection if it or its // allocs are not older than the threshold. If the eval should be garbage // collected, the associated alloc ids that should also be removed are also // returned func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64, allowBatch bool) ( bool, []string, error) { // Ignore non-terminal and new evaluations if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex { return false, nil, nil } // If the eval is from a running "batch" job we don't want to garbage // collect its allocations. If there is a long running batch job and its // terminal allocations get GC'd the scheduler would re-run the // allocations. if eval.Type == structs.JobTypeBatch { if !allowBatch { return false, nil, nil } // Check if the job is running job, err := c.snap.JobByID(eval.JobID) if err != nil { return false, nil, err } // We don't want to gc anything related to a job which is not dead if job != nil && job.Status != structs.JobStatusDead { return false, nil, nil } } // Get the allocations by eval allocs, err := c.snap.AllocsByEval(eval.ID) if err != nil { c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", eval.ID, err) return false, nil, err } // Scan the allocations to ensure they are terminal and old gcEval := true var gcAllocIDs []string for _, alloc := range allocs { if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex { // Can't GC the evaluation since not all of the allocations are // terminal gcEval = false } else { // The allocation is eligible to be GC'd gcAllocIDs = append(gcAllocIDs, alloc.ID) } } return gcEval, gcAllocIDs, nil }
// CreateEval is used to create a new evaluation. This allows // the worker to act as the planner for the scheduler. func (w *Worker) CreateEval(eval *structs.Evaluation) error { // Check for a shutdown before plan submission if w.srv.IsShutdown() { return fmt.Errorf("shutdown while planning") } defer metrics.MeasureSince([]string{"nomad", "worker", "create_eval"}, time.Now()) // Store the snapshot index in the eval eval.SnapshotIndex = w.snapshotIndex // Setup the request req := structs.EvalUpdateRequest{ Evals: []*structs.Evaluation{eval}, EvalToken: w.evalToken, WriteRequest: structs.WriteRequest{ Region: w.srv.config.Region, }, } var resp structs.GenericResponse SUBMIT: // Make the RPC call if err := w.srv.RPC("Eval.Create", &req, &resp); err != nil { w.logger.Printf("[ERR] worker: failed to create evaluation %#v: %v", eval, err) if w.shouldResubmit(err) && !w.backoffErr(backoffBaselineSlow, backoffLimitSlow) { goto SUBMIT } return err } else { w.logger.Printf("[DEBUG] worker: created evaluation %#v", eval) w.backoffReset() } return nil }
// setStatus is used to update the status of the evaluation func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, tgMetrics map[string]*structs.AllocMetric, status, desc string) error { logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status) newEval := eval.Copy() newEval.Status = status newEval.StatusDescription = desc newEval.FailedTGAllocs = tgMetrics if nextEval != nil { newEval.NextEval = nextEval.ID } if spawnedBlocked != nil { newEval.BlockedEval = spawnedBlocked.ID } return planner.UpdateEval(newEval) }
// ReblockEval is used to reinsert a blocked evaluation into the blocked eval // tracker. This allows the worker to act as the planner for the scheduler. func (w *Worker) ReblockEval(eval *structs.Evaluation) error { // Check for a shutdown before plan submission if w.srv.IsShutdown() { return fmt.Errorf("shutdown while planning") } defer metrics.MeasureSince([]string{"nomad", "worker", "reblock_eval"}, time.Now()) // Update the evaluation if the queued jobs is not same as what is // recorded in the job summary summary, err := w.srv.fsm.state.JobSummaryByID(eval.JobID) if err != nil { return fmt.Errorf("couldn't retreive job summary: %v", err) } if summary != nil { var hasChanged bool for tg, summary := range summary.Summary { if queued, ok := eval.QueuedAllocations[tg]; ok { if queued != summary.Queued { hasChanged = true break } } } if hasChanged { if err := w.UpdateEval(eval); err != nil { return err } } } // Store the snapshot index in the eval eval.SnapshotIndex = w.snapshotIndex // Setup the request req := structs.EvalUpdateRequest{ Evals: []*structs.Evaluation{eval}, EvalToken: w.evalToken, WriteRequest: structs.WriteRequest{ Region: w.srv.config.Region, }, } var resp structs.GenericResponse SUBMIT: // Make the RPC call if err := w.srv.RPC("Eval.Reblock", &req, &resp); err != nil { w.logger.Printf("[ERR] worker: failed to reblock evaluation %#v: %v", eval, err) if w.shouldResubmit(err) && !w.backoffErr(backoffBaselineSlow, backoffLimitSlow) { goto SUBMIT } return err } else { w.logger.Printf("[DEBUG] worker: reblocked evaluation %#v", eval) w.backoffReset() } return nil }
// evalGC is used to garbage collect old evaluations func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error { // Iterate over the evaluations iter, err := c.snap.Evals() if err != nil { return err } // Compute the old threshold limit for GC using the FSM // time table. This is a rough mapping of a time to the // Raft index it belongs to. tt := c.srv.fsm.TimeTable() cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold) oldThreshold := tt.NearestIndex(cutoff) c.srv.logger.Printf("[DEBUG] sched.core: eval GC: scanning before index %d (%v)", oldThreshold, c.srv.config.EvalGCThreshold) // Collect the allocations and evaluations to GC var gcAlloc, gcEval []string OUTER: for { raw := iter.Next() if raw == nil { break } eval := raw.(*structs.Evaluation) // Ignore non-terminal and new evaluations if !eval.TerminalStatus() || eval.ModifyIndex > oldThreshold { continue } // Get the allocations by eval allocs, err := c.snap.AllocsByEval(eval.ID) if err != nil { c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", eval.ID, err) continue } // Scan the allocations to ensure they are terminal and old for _, alloc := range allocs { if !alloc.TerminalStatus() || alloc.ModifyIndex > oldThreshold { continue OUTER } } // Evaluation is eligible for garbage collection gcEval = append(gcEval, eval.ID) for _, alloc := range allocs { gcAlloc = append(gcAlloc, alloc.ID) } } // Fast-path the nothing case if len(gcEval) == 0 && len(gcAlloc) == 0 { return nil } c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible", len(gcEval), len(gcAlloc)) // Call to the leader to issue the reap req := structs.EvalDeleteRequest{ Evals: gcEval, Allocs: gcAlloc, WriteRequest: structs.WriteRequest{ Region: c.srv.config.Region, }, } var resp structs.GenericResponse if err := c.srv.RPC("Eval.Reap", &req, &resp); err != nil { c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err) return err } return nil }