// updateSummaryWithJob creates or updates job summaries when new jobs are // upserted or existing ones are updated func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, watcher watch.Items, txn *memdb.Txn) error { existing, err := s.JobSummaryByID(job.ID) if err != nil { return fmt.Errorf("unable to retrieve summary for job: %v", err) } var hasSummaryChanged bool if existing == nil { existing = &structs.JobSummary{ JobID: job.ID, Summary: make(map[string]structs.TaskGroupSummary), CreateIndex: index, } hasSummaryChanged = true } for _, tg := range job.TaskGroups { if _, ok := existing.Summary[tg.Name]; !ok { newSummary := structs.TaskGroupSummary{ Complete: 0, Failed: 0, Running: 0, Starting: 0, } existing.Summary[tg.Name] = newSummary hasSummaryChanged = true } } // The job summary has changed, so add to watcher and update the modify // index. if hasSummaryChanged { existing.ModifyIndex = index watcher.Add(watch.Item{Table: "job_summary"}) watcher.Add(watch.Item{JobSummary: job.ID}) // Update the indexes table for job summary if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } if err := txn.Insert("job_summary", *existing); err != nil { return err } } return nil }
// nestedUpdateAllocFromClient is used to nest an update of an allocation with client status func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, watcher watch.Items, index uint64, alloc *structs.Allocation) error { // Look for existing alloc existing, err := txn.First("allocs", "id", alloc.ID) if err != nil { return fmt.Errorf("alloc lookup failed: %v", err) } // Nothing to do if this does not exist if existing == nil { return nil } exist := existing.(*structs.Allocation) // Trigger the watcher watcher.Add(watch.Item{Alloc: alloc.ID}) watcher.Add(watch.Item{AllocEval: exist.EvalID}) watcher.Add(watch.Item{AllocJob: exist.JobID}) watcher.Add(watch.Item{AllocNode: exist.NodeID}) // Copy everything from the existing allocation copyAlloc := new(structs.Allocation) *copyAlloc = *exist // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus copyAlloc.ClientDescription = alloc.ClientDescription copyAlloc.TaskStates = alloc.TaskStates // Update the modify index copyAlloc.ModifyIndex = index if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, watcher, txn); err != nil { return fmt.Errorf("error updating job summary: %v", err) } // Update the allocation if err := txn.Insert("allocs", copyAlloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } // Set the job's status forceStatus := "" if !copyAlloc.TerminalStatus() { forceStatus = structs.JobStatusRunning } jobs := map[string]string{exist.JobID: forceStatus} if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { return fmt.Errorf("setting job status failed: %v", err) } return nil }
// setJobStatus sets the status of the job by looking up associated evaluations // and allocations. evalDelete should be set to true if setJobStatus is being // called because an evaluation is being deleted (potentially because of garbage // collection). If forceStatus is non-empty, the job's status will be set to the // passed status. func (s *StateStore) setJobStatus(index uint64, watcher watch.Items, txn *memdb.Txn, job *structs.Job, evalDelete bool, forceStatus string) error { // Capture the current status so we can check if there is a change oldStatus := job.Status newStatus := forceStatus // If forceStatus is not set, compute the jobs status. if forceStatus == "" { var err error newStatus, err = s.getJobStatus(txn, job, evalDelete) if err != nil { return err } } // Fast-path if nothing has changed. if oldStatus == newStatus { return nil } // The job has changed, so add to watcher. watcher.Add(watch.Item{Table: "jobs"}) watcher.Add(watch.Item{Job: job.ID}) // Copy and update the existing job updated := job.Copy() updated.Status = newStatus updated.ModifyIndex = index // Insert the job if err := txn.Insert("jobs", updated); err != nil { return fmt.Errorf("job insert failed: %v", err) } if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } return nil }
// updateSummaryWithAlloc updates the job summary when allocations are updated // or inserted func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, existingAlloc *structs.Allocation, watcher watch.Items, txn *memdb.Txn) error { // We don't have to update the summary if the job is missing if alloc.Job == nil { return nil } summaryRaw, err := txn.First("job_summary", "id", alloc.JobID) if err != nil { return fmt.Errorf("unable to lookup job summary for job id %q: %v", err) } if summaryRaw == nil { // Check if the job is de-registered rawJob, err := txn.First("jobs", "id", alloc.JobID) if err != nil { return fmt.Errorf("unable to query job: %v", err) } // If the job is de-registered then we skip updating it's summary if rawJob == nil { return nil } return fmt.Errorf("job summary for job %q is not present", alloc.JobID) } summary := summaryRaw.(structs.JobSummary) jobSummary := summary.Copy() // Not updating the job summary because the allocation doesn't belong to the // currently registered job if jobSummary.CreateIndex != alloc.Job.CreateIndex { return nil } tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] if !ok { return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) } var summaryChanged bool if existingAlloc == nil { switch alloc.DesiredStatus { case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", alloc.ID, alloc.DesiredStatus) } switch alloc.ClientStatus { case structs.AllocClientStatusPending: tgSummary.Starting += 1 if tgSummary.Queued > 0 { tgSummary.Queued -= 1 } summaryChanged = true case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", alloc.ID, alloc.ClientStatus) } } else if existingAlloc.ClientStatus != alloc.ClientStatus { // Incrementing the client of the bin of the current state switch alloc.ClientStatus { case structs.AllocClientStatusRunning: tgSummary.Running += 1 case structs.AllocClientStatusFailed: tgSummary.Failed += 1 case structs.AllocClientStatusPending: tgSummary.Starting += 1 case structs.AllocClientStatusComplete: tgSummary.Complete += 1 case structs.AllocClientStatusLost: tgSummary.Lost += 1 } // Decrementing the count of the bin of the last state switch existingAlloc.ClientStatus { case structs.AllocClientStatusRunning: tgSummary.Running -= 1 case structs.AllocClientStatusPending: tgSummary.Starting -= 1 case structs.AllocClientStatusLost: tgSummary.Lost -= 1 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: default: s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", existingAlloc.ID, existingAlloc.ClientStatus) } summaryChanged = true } jobSummary.Summary[alloc.TaskGroup] = tgSummary if summaryChanged { jobSummary.ModifyIndex = index watcher.Add(watch.Item{Table: "job_summary"}) watcher.Add(watch.Item{JobSummary: alloc.JobID}) // Update the indexes table for job summary if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } if err := txn.Insert("job_summary", *jobSummary); err != nil { return fmt.Errorf("updating job summary failed: %v", err) } } return nil }