// tasksEqual returns true if the tasks are functionaly equal, ignoring status, // version and other superfluous fields. // // This used to decide whether or not to propagate a task update to a controller. func tasksEqual(a, b *api.Task) bool { a, b = a.Copy(), b.Copy() a.Status, b.Status = api.TaskStatus{}, api.TaskStatus{} a.Meta, b.Meta = api.Meta{}, api.Meta{} return reflect.DeepEqual(a, b) }
func (w *worker) startTask(ctx context.Context, tx *bolt.Tx, task *api.Task) error { w.taskevents.Publish(task.Copy()) _, err := w.taskManager(ctx, tx, task) // side-effect taskManager creation. if err != nil { log.G(ctx).WithError(err).Error("failed to start taskManager") } // TODO(stevvooe): Add start method for taskmanager return nil }
func newTaskManager(ctx context.Context, task *api.Task, ctlr exec.Controller, reporter StatusReporter) *taskManager { t := &taskManager{ task: task.Copy(), ctlr: ctlr, reporter: reporter, updateq: make(chan *api.Task), shutdown: make(chan struct{}), closed: make(chan struct{}), } go t.run(ctx) return t }
// PutTask places the task into the database. func PutTask(tx *bolt.Tx, task *api.Task) error { return withCreateTaskBucketIfNotExists(tx, task.ID, func(bkt *bolt.Bucket) error { task = task.Copy() task.Status = api.TaskStatus{} // blank out the status. p, err := proto.Marshal(task) if err != nil { return err } return bkt.Put(bucketKeyData, p) }) }
// tick attempts to schedule the queue. func (s *Scheduler) tick(ctx context.Context) { tasksByCommonSpec := make(map[string]map[string]*api.Task) schedulingDecisions := make(map[string]schedulingDecision, s.unassignedTasks.Len()) var next *list.Element for e := s.unassignedTasks.Front(); e != nil; e = next { next = e.Next() t := s.allTasks[e.Value.(*api.Task).ID] if t == nil || t.NodeID != "" { // task deleted or already assigned s.unassignedTasks.Remove(e) continue } // Group common tasks with common specs by marshalling the spec // into taskKey and using it as a map key. // TODO(aaronl): Once specs are versioned, this will allow a // much more efficient fast path. fieldsToMarshal := api.Task{ ServiceID: t.ServiceID, Spec: t.Spec, } marshalled, err := fieldsToMarshal.Marshal() if err != nil { panic(err) } taskGroupKey := string(marshalled) if tasksByCommonSpec[taskGroupKey] == nil { tasksByCommonSpec[taskGroupKey] = make(map[string]*api.Task) } tasksByCommonSpec[taskGroupKey][t.ID] = t s.unassignedTasks.Remove(e) } for _, taskGroup := range tasksByCommonSpec { s.scheduleTaskGroup(ctx, taskGroup, schedulingDecisions) } _, failed := s.applySchedulingDecisions(ctx, schedulingDecisions) for _, decision := range failed { s.allTasks[decision.old.ID] = decision.old nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) if err == nil && nodeInfo.removeTask(decision.new) { s.nodeSet.updateNode(nodeInfo) } // enqueue task for next scheduling attempt s.enqueue(decision.old) } }
func taskUpdateNetworks(t *api.Task, networks []*api.NetworkAttachment) { networksCopy := make([]*api.NetworkAttachment, 0, len(networks)) for _, n := range networks { networksCopy = append(networksCopy, n.Copy()) } t.Networks = networksCopy }
func newTask(cluster *api.Cluster, service *api.Service, slot uint64, nodeID string) *api.Task { var logDriver *api.Driver if service.Spec.Task.LogDriver != nil { // use the log driver specific to the task, if we have it. logDriver = service.Spec.Task.LogDriver } else if cluster != nil { // pick up the cluster default, if available. logDriver = cluster.Spec.TaskDefaults.LogDriver // nil is okay here. } taskID := identity.NewID() task := api.Task{ ID: taskID, ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: slot, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, Endpoint: &api.Endpoint{ Spec: service.Spec.Endpoint.Copy(), }, DesiredState: api.TaskStateRunning, LogDriver: logDriver, } // In global mode we also set the NodeID if nodeID != "" { task.NodeID = nodeID } // Assign name based on task name schema name := store.TaskName(&task) task.Annotations = api.Annotations{Name: name} return &task }
func (g *GlobalOrchestrator) removeTask(ctx context.Context, batch *store.Batch, t *api.Task) { // set existing task DesiredState to TaskStateShutdown // TODO(aaronl): optimistic update? err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t != nil { t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: removeTask failed to remove %s", t.ID) } }
func taskUpdateEndpoint(t *api.Task, endpoint *api.Endpoint) { t.Endpoint = endpoint.Copy() }
// Restart initiates a new task to replace t if appropriate under the service's // restart policy. func (r *Supervisor) Restart(ctx context.Context, tx store.Tx, cluster *api.Cluster, service *api.Service, t api.Task) error { // TODO(aluzzardi): This function should not depend on `service`. // Is the old task still in the process of restarting? If so, wait for // its restart delay to elapse, to avoid tight restart loops (for // example, when the image doesn't exist). r.mu.Lock() oldDelay, ok := r.delays[t.ID] if ok { if !oldDelay.waiter { oldDelay.waiter = true go r.waitRestart(ctx, oldDelay, cluster, t.ID) } r.mu.Unlock() return nil } r.mu.Unlock() // Sanity check: was the task shut down already by a separate call to // Restart? If so, we must avoid restarting it, because this will create // an extra task. This should never happen unless there is a bug. if t.DesiredState > api.TaskStateRunning { return errors.New("Restart called on task that was already shut down") } t.DesiredState = api.TaskStateShutdown err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } if !r.shouldRestart(ctx, &t, service) { return nil } var restartTask *api.Task if orchestrator.IsReplicatedService(service) { restartTask = orchestrator.NewTask(cluster, service, t.Slot, "") } else if orchestrator.IsGlobalService(service) { restartTask = orchestrator.NewTask(cluster, service, 0, t.NodeID) } else { log.G(ctx).Error("service not supported by restart supervisor") return nil } n := store.GetNode(tx, t.NodeID) restartTask.DesiredState = api.TaskStateReady var restartDelay time.Duration // Restart delay is not applied to drained nodes if n == nil || n.Spec.Availability != api.NodeAvailabilityDrain { if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = ptypes.Duration(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay; using default") restartDelay = orchestrator.DefaultRestartDelay } } else { restartDelay = orchestrator.DefaultRestartDelay } } waitStop := true // Normally we wait for the old task to stop running, but we skip this // if the old task is already dead or the node it's assigned to is down. if (n != nil && n.Status.State == api.NodeStatus_DOWN) || t.Status.State > api.TaskStateRunning { waitStop = false } if err := store.CreateTask(tx, restartTask); err != nil { log.G(ctx).WithError(err).WithField("task.id", restartTask.ID).Error("task create failed") return err } r.recordRestartHistory(restartTask) r.DelayStart(ctx, tx, &t, restartTask.ID, restartDelay, waitStop) return nil }
// Restart initiates a new task to replace t if appropriate under the service's // restart policy. func (r *RestartSupervisor) Restart(ctx context.Context, tx store.Tx, service *api.Service, t api.Task) error { // TODO(aluzzardi): This function should not depend on `service`. t.DesiredState = api.TaskStateShutdown err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } if !r.shouldRestart(ctx, &t, service) { return nil } var restartTask *api.Task if isReplicatedService(service) { restartTask = newTask(service, t.Slot) } else if isGlobalService(service) { restartTask = newTask(service, 0) restartTask.NodeID = t.NodeID } else { log.G(ctx).Error("service not supported by restart supervisor") return nil } n := store.GetNode(tx, t.NodeID) restartTask.DesiredState = api.TaskStateAccepted var restartDelay time.Duration // Restart delay does not applied to drained nodes if n == nil || n.Spec.Availability != api.NodeAvailabilityDrain { if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = ptypes.Duration(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay; using default") restartDelay = defaultRestartDelay } } else { restartDelay = defaultRestartDelay } } waitStop := true // Normally we wait for the old task to stop running, but we skip this // if the old task is already dead or the node it's assigned to is down. if (n != nil && n.Status.State == api.NodeStatus_DOWN) || t.Status.State > api.TaskStateRunning { waitStop = false } if err := store.CreateTask(tx, restartTask); err != nil { log.G(ctx).WithError(err).WithField("task.id", restartTask.ID).Error("task create failed") return err } r.recordRestartHistory(restartTask) r.DelayStart(ctx, tx, &t, restartTask.ID, restartDelay, waitStop) return nil }