func (s *Supervisor) ScheduleTask(t *db.Task) { t.TimeoutAt = timestamp.Now().Add(s.Timeout) log.Infof("schedule task %s with deadline %v", t.UUID, t.TimeoutAt) s.schedq = append(s.schedq, t) }
func (s *Supervisor) Run() error { if err := s.Database.Connect(); err != nil { return fmt.Errorf("failed to connect to %s database at %s: %s\n", s.Database.Driver, s.Database.DSN, err) } if err := s.Database.CheckCurrentSchema(); err != nil { return fmt.Errorf("database failed schema version check: %s\n", err) } if err := s.Resync(); err != nil { return err } if err := s.FailUnfinishedTasks(); err != nil { return err } if err := s.ReschedulePendingTasks(); err != nil { return err } for { select { case <-s.resync: if err := s.Resync(); err != nil { log.Errorf("resync error: %s", err) } case <-s.purge.C: s.PurgeArchives() case <-s.tick.C: s.CheckSchedule() // see if any tasks have been running past the timeout period if len(s.runq) > 0 { ok := true lst := make([]*db.Task, 0) now := timestamp.Now() for _, runtask := range s.runq { if now.After(runtask.TimeoutAt) { s.Database.CancelTask(runtask.UUID, now.Time()) log.Errorf("shield timed out task '%s' after running for %v", runtask.UUID, s.Timeout) ok = false } else { lst = append(lst, runtask) } } if !ok { s.runq = lst } } // see if we have anything in the schedule queue SchedQueue: for len(s.schedq) > 0 { select { case s.workers <- s.schedq[0]: s.Database.StartTask(s.schedq[0].UUID, time.Now()) s.schedq[0].Attempts++ log.Infof("sent a task to a worker") s.runq = append(s.runq, s.schedq[0]) log.Debugf("added task to the runq") s.schedq = s.schedq[1:] default: break SchedQueue } } case adhoc := <-s.adhoc: s.ScheduleAdhoc(adhoc) case u := <-s.updates: switch u.Op { case STOPPED: log.Infof(" %s: job stopped at %s", u.Task, u.StoppedAt) s.RemoveTaskFromRunq(u.Task) if err := s.Database.CompleteTask(u.Task, u.StoppedAt); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } case FAILED: log.Warnf(" %s: task failed!", u.Task) s.RemoveTaskFromRunq(u.Task) if err := s.Database.FailTask(u.Task, u.StoppedAt); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } case OUTPUT: log.Infof(" %s> %s", u.Task, strings.Trim(u.Output, "\n")) if err := s.Database.UpdateTaskLog(u.Task, u.Output); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } case RESTORE_KEY: log.Infof(" %s: restore key is %s", u.Task, u.Output) if id, err := s.Database.CreateTaskArchive(u.Task, u.Output, time.Now()); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } else { if !u.TaskSuccess { s.Database.InvalidateArchive(id) } } case PURGE_ARCHIVE: log.Infof(" %s: archive %s purged from storage", u.Task, u.Archive) if err := s.Database.PurgeArchive(u.Archive); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } default: log.Errorf(" %s: !! unrecognized op type", u.Task) } } } }